diff options
Diffstat (limited to '')
307 files changed, 77999 insertions, 3851 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 61ae899cfc17..2f9d378edec3 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,32 +1,56 @@ # SPDX-License-Identifier: GPL-2.0-only +bind_bhash +bind_timewait +bind_wildcard +csum +cmsg_sender +diag_uid +fin_ack_lat +gro +hwtstamp_config +io_uring_zerocopy_tx +ioam6_parser +ip_defrag +ip_local_port_range ipsec +ipv6_flowlabel +ipv6_flowlabel_mgr +log.txt msg_zerocopy -socket +nettest psock_fanout psock_snd psock_tpacket +reuseaddr_conflict +reuseaddr_ports_exhausted reuseport_addr_any reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa reuseport_dualstack -reuseaddr_conflict -tcp_mmap -udpgso -udpgso_bench_rx -udpgso_bench_tx -tcp_inq -tls -txring_overwrite -ip_defrag -ipv6_flowlabel -ipv6_flowlabel_mgr +rxtimestamp +sctp_hello +scm_pidfd +sk_bind_sendto_listen +sk_connect_zero_addr +socket +so_incoming_cpu +so_netns_cookie so_txtime +stress_reuseport_listen +tap tcp_fastopen_backup_key -nettest -fin_ack_lat -reuseaddr_ports_exhausted -hwtstamp_config -rxtimestamp +tcp_inq +tcp_mmap +test_unix_oob timestamping +tls +toeplitz +tools +tun +txring_overwrite txtimestamp +udpgso +udpgso_bench_rx +udpgso_bench_tx +unix_connect diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ef352477cac6..7b6918d5f4af 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -2,7 +2,9 @@ # Makefile for net selftests CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -CFLAGS += -I../../../../usr/include/ +CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) +# Additional include paths needed by kselftest.h +CFLAGS += -I../ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh @@ -11,8 +13,8 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh -TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh -TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh +TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh +TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh TEST_PROGS += route_localnet.sh TEST_PROGS += reuseaddr_ports_exhausted.sh TEST_PROGS += txtimestamp.sh @@ -21,23 +23,133 @@ TEST_PROGS += rxtimestamp.sh TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh -TEST_PROGS_EXTENDED := in_netns.sh +TEST_PROGS += bareudp.sh +TEST_PROGS += amt.sh +TEST_PROGS += unicast_extensions.sh +TEST_PROGS += udpgro_fwd.sh +TEST_PROGS += udpgro_frglist.sh +TEST_PROGS += veth.sh +TEST_PROGS += ioam6.sh +TEST_PROGS += gro.sh +TEST_PROGS += gre_gso.sh +TEST_PROGS += cmsg_so_mark.sh +TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh +TEST_PROGS += netns-name.sh +TEST_PROGS += srv6_end_dt46_l3vpn_test.sh +TEST_PROGS += srv6_end_dt4_l3vpn_test.sh +TEST_PROGS += srv6_end_dt6_l3vpn_test.sh +TEST_PROGS += srv6_hencap_red_l3vpn_test.sh +TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh +TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh +TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh +TEST_PROGS += srv6_end_flavors_test.sh +TEST_PROGS += vrf_strict_mode_test.sh +TEST_PROGS += arp_ndisc_evict_nocarrier.sh +TEST_PROGS += ndisc_unsolicited_na_test.sh +TEST_PROGS += arp_ndisc_untracked_subnets.sh +TEST_PROGS += stress_reuseport_listen.sh +TEST_PROGS += l2_tos_ttl_inherit.sh +TEST_PROGS += bind_bhash.sh +TEST_PROGS += ip_local_port_range.sh +TEST_PROGS += rps_default_mask.sh +TEST_PROGS += big_tcp.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag -TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr +TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat TEST_GEN_FILES += reuseaddr_ports_exhausted TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp TEST_GEN_FILES += ipsec +TEST_GEN_FILES += ioam6_parser +TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap +TEST_GEN_FILES += toeplitz +TEST_GEN_FILES += cmsg_sender +TEST_GEN_FILES += stress_reuseport_listen +TEST_PROGS += test_vxlan_vnifiltering.sh +TEST_GEN_FILES += io_uring_zerocopy_tx +TEST_PROGS += io_uring_zerocopy_tx.sh +TEST_GEN_FILES += bind_bhash +TEST_GEN_PROGS += sk_bind_sendto_listen +TEST_GEN_PROGS += sk_connect_zero_addr +TEST_PROGS += test_ingress_egress_chaining.sh +TEST_GEN_PROGS += so_incoming_cpu +TEST_PROGS += sctp_vrf.sh +TEST_GEN_FILES += sctp_hello +TEST_GEN_FILES += csum +TEST_GEN_FILES += nat6to4.o +TEST_GEN_FILES += xdp_dummy.o +TEST_GEN_FILES += ip_local_port_range +TEST_GEN_FILES += bind_wildcard +TEST_PROGS += test_vxlan_mdb.sh +TEST_PROGS += test_bridge_neigh_suppress.sh +TEST_PROGS += test_vxlan_nolocalbypass.sh +TEST_PROGS += test_bridge_backup_port.sh +TEST_PROGS += fdb_flush.sh +TEST_PROGS += fq_band_pktlimit.sh +TEST_PROGS += vlan_hw_filter.sh + +TEST_FILES := settings +TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh + +TEST_INCLUDES := forwarding/lib.sh -KSFT_KHDR_INSTALL := 1 include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma -$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread +$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto $(OUTPUT)/tcp_inq: LDLIBS += -lpthread +$(OUTPUT)/bind_bhash: LDLIBS += -lpthread +$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/ + +# Rules to generate bpf objs +CLANG ?= clang +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +BPFDIR := $(abspath ../../../lib/bpf) +APIDIR := $(abspath ../../../include/uapi) + +CCINCLUDE += -I../bpf +CCINCLUDE += -I../../../../usr/include/ +CCINCLUDE += -I$(SCRATCH_DIR)/include + +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + +MAKE_DIRS := $(BUILD_DIR)/libbpf +$(MAKE_DIRS): + mkdir -p $@ + +# Get Clang's default includes on this system, as opposed to those seen by +# '--target=bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +endef + +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + +$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS) + $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ + +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + $(APIDIR)/linux/bpf.h \ + | $(BUILD_DIR)/libbpf + $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + +EXTRA_CLEAN := $(SCRATCH_DIR) diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile new file mode 100644 index 000000000000..221c387a7d7f --- /dev/null +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -0,0 +1,4 @@ +CFLAGS += $(KHDR_INCLUDES) +TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd + +include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c new file mode 100644 index 000000000000..79a3dd75590e --- /dev/null +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#define _GNU_SOURCE +#include <sched.h> + +#include <unistd.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/sock_diag.h> +#include <linux/unix_diag.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(diag_uid) +{ + int netlink_fd; + int unix_fd; + __u32 inode; + __u64 cookie; +}; + +FIXTURE_VARIANT(diag_uid) +{ + int unshare; + int udiag_show; +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid) +{ + .unshare = 0, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid_unshare) +{ + .unshare = CLONE_NEWUSER, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_SETUP(diag_uid) +{ + struct stat file_stat; + socklen_t optlen; + int ret; + + if (variant->unshare) + ASSERT_EQ(unshare(variant->unshare), 0); + + self->netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + ASSERT_NE(self->netlink_fd, -1); + + self->unix_fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_NE(self->unix_fd, -1); + + ret = fstat(self->unix_fd, &file_stat); + ASSERT_EQ(ret, 0); + + self->inode = file_stat.st_ino; + + optlen = sizeof(self->cookie); + ret = getsockopt(self->unix_fd, SOL_SOCKET, SO_COOKIE, &self->cookie, &optlen); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(diag_uid) +{ + close(self->netlink_fd); + close(self->unix_fd); +} + +int send_request(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self, + const FIXTURE_VARIANT(diag_uid) *variant) +{ + struct { + struct nlmsghdr nlh; + struct unix_diag_req udr; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .udr = { + .sdiag_family = AF_UNIX, + .udiag_ino = self->inode, + .udiag_cookie = { + (__u32)self->cookie, + (__u32)(self->cookie >> 32) + }, + .udiag_show = variant->udiag_show + } + }; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = &req, + .iov_len = sizeof(req) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + + return sendmsg(self->netlink_fd, &msg, 0); +} + +void render_response(struct __test_metadata *_metadata, + struct unix_diag_req *udr, __u32 len) +{ + unsigned int rta_len = len - NLMSG_LENGTH(sizeof(*udr)); + struct rtattr *attr; + uid_t uid; + + ASSERT_GT(len, sizeof(*udr)); + ASSERT_EQ(udr->sdiag_family, AF_UNIX); + + attr = (struct rtattr *)(udr + 1); + ASSERT_NE(RTA_OK(attr, rta_len), 0); + ASSERT_EQ(attr->rta_type, UNIX_DIAG_UID); + + uid = *(uid_t *)RTA_DATA(attr); + ASSERT_EQ(uid, getuid()); +} + +void receive_response(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self) +{ + long buf[8192 / sizeof(long)]; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + struct nlmsghdr *nlh; + int ret; + + ret = recvmsg(self->netlink_fd, &msg, 0); + ASSERT_GT(ret, 0); + + nlh = (struct nlmsghdr *)buf; + ASSERT_NE(NLMSG_OK(nlh, ret), 0); + ASSERT_EQ(nlh->nlmsg_type, SOCK_DIAG_BY_FAMILY); + + render_response(_metadata, NLMSG_DATA(nlh), nlh->nlmsg_len); + + nlh = NLMSG_NEXT(nlh, ret); + ASSERT_EQ(NLMSG_OK(nlh, ret), 0); +} + +TEST_F(diag_uid, 1) +{ + int ret; + + ret = send_request(_metadata, self, variant); + ASSERT_GT(ret, 0); + + receive_response(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c new file mode 100644 index 000000000000..7e534594167e --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +#define _GNU_SOURCE +#include <error.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <linux/socket.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <sys/un.h> +#include <sys/signal.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "../../kselftest_harness.h" + +#define clean_errno() (errno == 0 ? "None" : strerror(errno)) +#define log_err(MSG, ...) \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", __FILE__, __LINE__, \ + clean_errno(), ##__VA_ARGS__) + +#ifndef SCM_PIDFD +#define SCM_PIDFD 0x04 +#endif + +static void child_die() +{ + exit(1); +} + +static int safe_int(const char *numstr, int *converted) +{ + char *err = NULL; + long sli; + + errno = 0; + sli = strtol(numstr, &err, 0); + if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN)) + return -ERANGE; + + if (errno != 0 && sli == 0) + return -EINVAL; + + if (err == numstr || *err != '\0') + return -EINVAL; + + if (sli > INT_MAX || sli < INT_MIN) + return -ERANGE; + + *converted = (int)sli; + return 0; +} + +static int char_left_gc(const char *buffer, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (buffer[i] == ' ' || buffer[i] == '\t') + continue; + + return i; + } + + return 0; +} + +static int char_right_gc(const char *buffer, size_t len) +{ + int i; + + for (i = len - 1; i >= 0; i--) { + if (buffer[i] == ' ' || buffer[i] == '\t' || + buffer[i] == '\n' || buffer[i] == '\0') + continue; + + return i + 1; + } + + return 0; +} + +static char *trim_whitespace_in_place(char *buffer) +{ + buffer += char_left_gc(buffer, strlen(buffer)); + buffer[char_right_gc(buffer, strlen(buffer))] = '\0'; + return buffer; +} + +/* borrowed (with all helpers) from pidfd/pidfd_open_test.c */ +static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen) +{ + int ret; + char path[512]; + FILE *f; + size_t n = 0; + pid_t result = -1; + char *line = NULL; + + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd); + + f = fopen(path, "re"); + if (!f) + return -1; + + while (getline(&line, &n, f) != -1) { + char *numstr; + + if (strncmp(line, key, keylen)) + continue; + + numstr = trim_whitespace_in_place(line + 4); + ret = safe_int(numstr, &result); + if (ret < 0) + goto out; + + break; + } + +out: + free(line); + fclose(f); + return result; +} + +static int cmsg_check(int fd) +{ + struct msghdr msg = { 0 }; + struct cmsghdr *cmsg; + struct iovec iov; + struct ucred *ucred = NULL; + int data = 0; + char control[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(int))] = { 0 }; + int *pidfd = NULL; + pid_t parent_pid; + int err; + + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + err = recvmsg(fd, &msg, 0); + if (err < 0) { + log_err("recvmsg"); + return 1; + } + + if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_PIDFD) { + if (cmsg->cmsg_len < sizeof(*pidfd)) { + log_err("CMSG parse: SCM_PIDFD wrong len"); + return 1; + } + + pidfd = (void *)CMSG_DATA(cmsg); + } + + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS) { + if (cmsg->cmsg_len < sizeof(*ucred)) { + log_err("CMSG parse: SCM_CREDENTIALS wrong len"); + return 1; + } + + ucred = (void *)CMSG_DATA(cmsg); + } + } + + /* send(pfd, "x", sizeof(char), 0) */ + if (data != 'x') { + log_err("recvmsg: data corruption"); + return 1; + } + + if (!pidfd) { + log_err("CMSG parse: SCM_PIDFD not found"); + return 1; + } + + if (!ucred) { + log_err("CMSG parse: SCM_CREDENTIALS not found"); + return 1; + } + + /* pidfd from SCM_PIDFD should point to the parent process PID */ + parent_pid = + get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1); + if (parent_pid != getppid()) { + log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + return 1; + } + + return 0; +} + +struct sock_addr { + char sock_name[32]; + struct sockaddr_un listen_addr; + socklen_t addrlen; +}; + +FIXTURE(scm_pidfd) +{ + int server; + pid_t client_pid; + int startup_pipe[2]; + struct sock_addr server_addr; + struct sock_addr *client_addr; +}; + +FIXTURE_VARIANT(scm_pidfd) +{ + int type; + bool abstract; +}; + +FIXTURE_VARIANT_ADD(scm_pidfd, stream_pathname) +{ + .type = SOCK_STREAM, + .abstract = 0, +}; + +FIXTURE_VARIANT_ADD(scm_pidfd, stream_abstract) +{ + .type = SOCK_STREAM, + .abstract = 1, +}; + +FIXTURE_VARIANT_ADD(scm_pidfd, dgram_pathname) +{ + .type = SOCK_DGRAM, + .abstract = 0, +}; + +FIXTURE_VARIANT_ADD(scm_pidfd, dgram_abstract) +{ + .type = SOCK_DGRAM, + .abstract = 1, +}; + +FIXTURE_SETUP(scm_pidfd) +{ + self->client_addr = mmap(NULL, sizeof(*self->client_addr), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, self->client_addr); +} + +FIXTURE_TEARDOWN(scm_pidfd) +{ + close(self->server); + + kill(self->client_pid, SIGKILL); + waitpid(self->client_pid, NULL, 0); + + if (!variant->abstract) { + unlink(self->server_addr.sock_name); + unlink(self->client_addr->sock_name); + } +} + +static void fill_sockaddr(struct sock_addr *addr, bool abstract) +{ + char *sun_path_buf = (char *)&addr->listen_addr.sun_path; + + addr->listen_addr.sun_family = AF_UNIX; + addr->addrlen = offsetof(struct sockaddr_un, sun_path); + snprintf(addr->sock_name, sizeof(addr->sock_name), "scm_pidfd_%d", getpid()); + addr->addrlen += strlen(addr->sock_name); + if (abstract) { + *sun_path_buf = '\0'; + addr->addrlen++; + sun_path_buf++; + } else { + unlink(addr->sock_name); + } + memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name)); +} + +static void client(FIXTURE_DATA(scm_pidfd) *self, + const FIXTURE_VARIANT(scm_pidfd) *variant) +{ + int cfd; + socklen_t len; + struct ucred peer_cred; + int peer_pidfd; + pid_t peer_pid; + int on = 0; + + cfd = socket(AF_UNIX, variant->type, 0); + if (cfd < 0) { + log_err("socket"); + child_die(); + } + + if (variant->type == SOCK_DGRAM) { + fill_sockaddr(self->client_addr, variant->abstract); + + if (bind(cfd, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen)) { + log_err("bind"); + child_die(); + } + } + + if (connect(cfd, (struct sockaddr *)&self->server_addr.listen_addr, + self->server_addr.addrlen) != 0) { + log_err("connect"); + child_die(); + } + + on = 1; + if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { + log_err("Failed to set SO_PASSCRED"); + child_die(); + } + + if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { + log_err("Failed to set SO_PASSPIDFD"); + child_die(); + } + + close(self->startup_pipe[1]); + + if (cmsg_check(cfd)) { + log_err("cmsg_check failed"); + child_die(); + } + + /* skip further for SOCK_DGRAM as it's not applicable */ + if (variant->type == SOCK_DGRAM) + return; + + len = sizeof(peer_cred); + if (getsockopt(cfd, SOL_SOCKET, SO_PEERCRED, &peer_cred, &len)) { + log_err("Failed to get SO_PEERCRED"); + child_die(); + } + + len = sizeof(peer_pidfd); + if (getsockopt(cfd, SOL_SOCKET, SO_PEERPIDFD, &peer_pidfd, &len)) { + log_err("Failed to get SO_PEERPIDFD"); + child_die(); + } + + /* pid from SO_PEERCRED should point to the parent process PID */ + if (peer_cred.pid != getppid()) { + log_err("peer_cred.pid != getppid(): %d != %d", peer_cred.pid, getppid()); + child_die(); + } + + peer_pid = get_pid_from_fdinfo_file(peer_pidfd, + "Pid:", sizeof("Pid:") - 1); + if (peer_pid != peer_cred.pid) { + log_err("peer_pid != peer_cred.pid: %d != %d", peer_pid, peer_cred.pid); + child_die(); + } +} + +TEST_F(scm_pidfd, test) +{ + int err; + int pfd; + int child_status = 0; + + self->server = socket(AF_UNIX, variant->type, 0); + ASSERT_NE(-1, self->server); + + fill_sockaddr(&self->server_addr, variant->abstract); + + err = bind(self->server, (struct sockaddr *)&self->server_addr.listen_addr, self->server_addr.addrlen); + ASSERT_EQ(0, err); + + if (variant->type == SOCK_STREAM) { + err = listen(self->server, 1); + ASSERT_EQ(0, err); + } + + err = pipe(self->startup_pipe); + ASSERT_NE(-1, err); + + self->client_pid = fork(); + ASSERT_NE(-1, self->client_pid); + if (self->client_pid == 0) { + close(self->server); + close(self->startup_pipe[0]); + client(self, variant); + exit(0); + } + close(self->startup_pipe[1]); + + if (variant->type == SOCK_STREAM) { + pfd = accept(self->server, NULL, NULL); + ASSERT_NE(-1, pfd); + } else { + pfd = self->server; + } + + /* wait until the child arrives at checkpoint */ + read(self->startup_pipe[0], &err, sizeof(int)); + close(self->startup_pipe[0]); + + if (variant->type == SOCK_DGRAM) { + err = sendto(pfd, "x", sizeof(char), 0, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen); + ASSERT_NE(-1, err); + } else { + err = send(pfd, "x", sizeof(char), 0); + ASSERT_NE(-1, err); + } + + close(pfd); + waitpid(self->client_pid, &child_status, 0); + ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c new file mode 100644 index 000000000000..a7c51889acd5 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -0,0 +1,436 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <string.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <netinet/tcp.h> +#include <sys/un.h> +#include <sys/signal.h> +#include <sys/poll.h> + +static int pipefd[2]; +static int signal_recvd; +static pid_t producer_id; +static char sock_name[32]; + +static void sig_hand(int sn, siginfo_t *si, void *p) +{ + signal_recvd = sn; +} + +static int set_sig_handler(int signal) +{ + struct sigaction sa; + + sa.sa_sigaction = sig_hand; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO | SA_RESTART; + + return sigaction(signal, &sa, NULL); +} + +static void set_filemode(int fd, int set) +{ + int flags = fcntl(fd, F_GETFL, 0); + + if (set) + flags &= ~O_NONBLOCK; + else + flags |= O_NONBLOCK; + fcntl(fd, F_SETFL, flags); +} + +static void signal_producer(int fd) +{ + char cmd; + + cmd = 'S'; + write(fd, &cmd, sizeof(cmd)); +} + +static void wait_for_signal(int fd) +{ + char buf[5]; + + read(fd, buf, 5); +} + +static void die(int status) +{ + fflush(NULL); + unlink(sock_name); + kill(producer_id, SIGTERM); + exit(status); +} + +int is_sioctatmark(int fd) +{ + int ans = -1; + + if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { +#ifdef DEBUG + perror("SIOCATMARK Failed"); +#endif + } + return ans; +} + +void read_oob(int fd, char *c) +{ + + *c = ' '; + if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { +#ifdef DEBUG + perror("Reading MSG_OOB Failed"); +#endif + } +} + +int read_data(int pfd, char *buf, int size) +{ + int len = 0; + + memset(buf, size, '0'); + len = read(pfd, buf, size); +#ifdef DEBUG + if (len < 0) + perror("read failed"); +#endif + return len; +} + +static void wait_for_data(int pfd, int event) +{ + struct pollfd pfds[1]; + + pfds[0].fd = pfd; + pfds[0].events = event; + poll(pfds, 1, -1); +} + +void producer(struct sockaddr_un *consumer_addr) +{ + int cfd; + char buf[64]; + int i; + + memset(buf, 'x', sizeof(buf)); + cfd = socket(AF_UNIX, SOCK_STREAM, 0); + + wait_for_signal(pipefd[0]); + if (connect(cfd, (struct sockaddr *)consumer_addr, + sizeof(*consumer_addr)) != 0) { + perror("Connect failed"); + kill(0, SIGTERM); + exit(1); + } + + for (i = 0; i < 2; i++) { + /* Test 1: Test for SIGURG and OOB */ + wait_for_signal(pipefd[0]); + memset(buf, 'x', sizeof(buf)); + buf[63] = '@'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + wait_for_signal(pipefd[0]); + + /* Test 2: Test for OOB being overwitten */ + memset(buf, 'x', sizeof(buf)); + buf[63] = '%'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + buf[63] = '#'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + wait_for_signal(pipefd[0]); + + /* Test 3: Test for SIOCATMARK */ + memset(buf, 'x', sizeof(buf)); + buf[63] = '@'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + buf[63] = '%'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + send(cfd, buf, sizeof(buf), 0); + + wait_for_signal(pipefd[0]); + + /* Test 4: Test for 1byte OOB msg */ + memset(buf, 'x', sizeof(buf)); + buf[0] = '@'; + send(cfd, buf, 1, MSG_OOB); + } +} + +int +main(int argc, char **argv) +{ + int lfd, pfd; + struct sockaddr_un consumer_addr, paddr; + socklen_t len = sizeof(consumer_addr); + char buf[1024]; + int on = 0; + char oob; + int atmark; + + lfd = socket(AF_UNIX, SOCK_STREAM, 0); + memset(&consumer_addr, 0, sizeof(consumer_addr)); + consumer_addr.sun_family = AF_UNIX; + sprintf(sock_name, "unix_oob_%d", getpid()); + unlink(sock_name); + strcpy(consumer_addr.sun_path, sock_name); + + if ((bind(lfd, (struct sockaddr *)&consumer_addr, + sizeof(consumer_addr))) != 0) { + perror("socket bind failed"); + exit(1); + } + + pipe(pipefd); + + listen(lfd, 1); + + producer_id = fork(); + if (producer_id == 0) { + producer(&consumer_addr); + exit(0); + } + + set_sig_handler(SIGURG); + signal_producer(pipefd[1]); + + pfd = accept(lfd, (struct sockaddr *) &paddr, &len); + fcntl(pfd, F_SETOWN, getpid()); + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 1: + * veriyf that SIGURG is + * delivered, 63 bytes are + * read, oob is '@', and POLLPRI works. + */ + wait_for_data(pfd, POLLPRI); + read_oob(pfd, &oob); + len = read_data(pfd, buf, 1024); + if (!signal_recvd || len != 63 || oob != '@') { + fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 2: + * Verify that the first OOB is over written by + * the 2nd one and the first OOB is returned as + * part of the read, and sigurg is received. + */ + wait_for_data(pfd, POLLIN | POLLPRI); + len = 0; + while (len < 70) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + read_oob(pfd, &oob); + if (!signal_recvd || len != 127 || oob != '#') { + fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 3: + * verify that 2nd oob over writes + * the first one and read breaks at + * oob boundary returning 127 bytes + * and sigurg is received and atmark + * is set. + * oob is '%' and second read returns + * 64 bytes. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 150) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + read_oob(pfd, &oob); + + if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { + fprintf(stderr, + "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n", + signal_recvd, len, oob, atmark); + die(1); + } + + signal_recvd = 0; + + len = read_data(pfd, buf, 1024); + if (len != 64) { + fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 4: + * verify that a single byte + * oob message is delivered. + * set non blocking mode and + * check proper error is + * returned and sigurg is + * received and correct + * oob is read. + */ + + set_filemode(pfd, 0); + + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + if ((len == -1) && (errno == 11)) + len = 0; + + read_oob(pfd, &oob); + + if (!signal_recvd || len != 0 || oob != '@') { + fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + set_filemode(pfd, 1); + + /* Inline Testing */ + + on = 1; + if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { + perror("SO_OOBINLINE"); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 1 -- Inline: + * Check that SIGURG is + * delivered and 63 bytes are + * read and oob is '@' + */ + + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + + if (!signal_recvd || len != 63) { + fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", + signal_recvd, len); + die(1); + } + + len = read_data(pfd, buf, 1024); + + if (len != 1) { + fprintf(stderr, + "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 2 -- Inline: + * Verify that the first OOB is over written by + * the 2nd one and read breaks correctly on + * 2nd OOB boundary with the first OOB returned as + * part of the read, and sigurg is delivered and + * siocatmark returns true. + * next read returns one byte, the oob byte + * and siocatmark returns false. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 70) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 127 || atmark != 1 || !signal_recvd) { + fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", + len, atmark); + die(1); + } + + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 1 || buf[0] != '#' || atmark == 1) { + fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", + len, buf[0], atmark); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 3 -- Inline: + * verify that 2nd oob over writes + * the first one and read breaks at + * oob boundary returning 127 bytes + * and sigurg is received and siocatmark + * is true after the read. + * subsequent read returns 65 bytes + * because of oob which should be '%'. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 126) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (!signal_recvd || len != 127 || !atmark) { + fprintf(stderr, + "Test 3 Inline failed, sigurg %d len %d data %c\n", + signal_recvd, len, buf[0]); + die(1); + } + + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 65 || buf[0] != '%' || atmark != 0) { + fprintf(stderr, + "Test 3.1 Inline failed, len %d oob %c atmark %d\n", + len, buf[0], atmark); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 4 -- Inline: + * verify that a single + * byte oob message is delivered + * and read returns one byte, the oob + * byte and sigurg is received + */ + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + if (!signal_recvd || len != 1 || buf[0] != '@') { + fprintf(stderr, + "Test 4 Inline failed, signal %d len %d data %c\n", + signal_recvd, len, buf[0]); + die(1); + } + die(0); +} diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c new file mode 100644 index 000000000000..d799fd8f5c7c --- /dev/null +++ b/tools/testing/selftests/net/af_unix/unix_connect.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <sched.h> + +#include <stddef.h> +#include <stdio.h> +#include <unistd.h> + +#include <sys/socket.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(unix_connect) +{ + int server, client; + int family; +}; + +FIXTURE_VARIANT(unix_connect) +{ + int type; + char sun_path[8]; + int len; + int flags; + int err; +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_pathname) +{ + .type = SOCK_STREAM, + .sun_path = "test", + .len = 4 + 1, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_abstract) +{ + .type = SOCK_STREAM, + .sun_path = "\0test", + .len = 5, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_pathname_netns) +{ + .type = SOCK_STREAM, + .sun_path = "test", + .len = 4 + 1, + .flags = CLONE_NEWNET, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_abstract_netns) +{ + .type = SOCK_STREAM, + .sun_path = "\0test", + .len = 5, + .flags = CLONE_NEWNET, + .err = ECONNREFUSED, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname) +{ + .type = SOCK_DGRAM, + .sun_path = "test", + .len = 4 + 1, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract) +{ + .type = SOCK_DGRAM, + .sun_path = "\0test", + .len = 5, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname_netns) +{ + .type = SOCK_DGRAM, + .sun_path = "test", + .len = 4 + 1, + .flags = CLONE_NEWNET, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract_netns) +{ + .type = SOCK_DGRAM, + .sun_path = "\0test", + .len = 5, + .flags = CLONE_NEWNET, + .err = ECONNREFUSED, +}; + +FIXTURE_SETUP(unix_connect) +{ + self->family = AF_UNIX; +} + +FIXTURE_TEARDOWN(unix_connect) +{ + close(self->server); + close(self->client); + + if (variant->sun_path[0]) + remove("test"); +} + +TEST_F(unix_connect, test) +{ + socklen_t addrlen; + struct sockaddr_un addr = { + .sun_family = self->family, + }; + int err; + + self->server = socket(self->family, variant->type, 0); + ASSERT_NE(-1, self->server); + + addrlen = offsetof(struct sockaddr_un, sun_path) + variant->len; + memcpy(&addr.sun_path, variant->sun_path, variant->len); + + err = bind(self->server, (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, err); + + if (variant->type == SOCK_STREAM) { + err = listen(self->server, 32); + ASSERT_EQ(0, err); + } + + err = unshare(variant->flags); + ASSERT_EQ(0, err); + + self->client = socket(self->family, variant->type, 0); + ASSERT_LT(0, self->client); + + err = connect(self->client, (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(variant->err, err == -1 ? errno : 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh index 4254ddc3f70b..1ef9e4159bba 100755 --- a/tools/testing/selftests/net/altnames.sh +++ b/tools/testing/selftests/net/altnames.sh @@ -45,7 +45,7 @@ altnames_test() check_err $? "Got unexpected long alternative name from link show JSON" ip link property del $DUMMY_DEV altname $SHORT_NAME - check_err $? "Failed to add short alternative name" + check_err $? "Failed to delete short alternative name" ip -j -p link show $SHORT_NAME &>/dev/null check_fail $? "Unexpected success while trying to do link show with deleted short alternative name" diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh new file mode 100755 index 000000000000..75528788cb95 --- /dev/null +++ b/tools/testing/selftests/net/amt.sh @@ -0,0 +1,284 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Author: Taehee Yoo <ap420073@gmail.com> +# +# This script evaluates the AMT driver. +# There are four network-namespaces, LISTENER, SOURCE, GATEWAY, RELAY. +# The role of LISTENER is to listen multicast traffic. +# In order to do that, it send IGMP group join message. +# The role of SOURCE is to send multicast traffic to listener. +# The role of GATEWAY is to work Gateway role of AMT interface. +# The role of RELAY is to work Relay role of AMT interface. +# +# +# +------------------------+ +# | LISTENER netns | +# | | +# | +------------------+ | +# | | l_gw | | +# | | 192.168.0.2/24 | | +# | | 2001:db8::2/64 | | +# | +------------------+ | +# | . | +# +------------------------+ +# . +# . +# +-----------------------------------------------------+ +# | . GATEWAY netns | +# | . | +# |+---------------------------------------------------+| +# || . br0 || +# || +------------------+ +------------------+ || +# || | gw_l | | amtg | || +# || | 192.168.0.1/24 | +--------+---------+ || +# || | 2001:db8::1/64 | | || +# || +------------------+ | || +# |+-------------------------------------|-------------+| +# | | | +# | +--------+---------+ | +# | | gw_relay | | +# | | 10.0.0.1/24 | | +# | +------------------+ | +# | . | +# +-----------------------------------------------------+ +# . +# . +# +-----------------------------------------------------+ +# | RELAY netns . | +# | +------------------+ | +# | | relay_gw | | +# | | 10.0.0.2/24 | | +# | +--------+---------+ | +# | | | +# | | | +# | +------------------+ +--------+---------+ | +# | | relay_src | | amtr | | +# | | 172.17.0.1/24 | +------------------+ | +# | | 2001:db8:3::1/64 | | +# | +------------------+ | +# | . | +# | . | +# +-----------------------------------------------------+ +# . +# . +# +------------------------+ +# | . | +# | +------------------+ | +# | | src_relay | | +# | | 172.17.0.2/24 | | +# | | 2001:db8:3::2/64 | | +# | +------------------+ | +# | SOURCE netns | +# +------------------------+ +#============================================================================== + +readonly LISTENER=$(mktemp -u listener-XXXXXXXX) +readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX) +readonly RELAY=$(mktemp -u relay-XXXXXXXX) +readonly SOURCE=$(mktemp -u source-XXXXXXXX) +ERR=4 +err=0 + +exit_cleanup() +{ + for ns in "$@"; do + ip netns delete "${ns}" 2>/dev/null || true + done + + exit $ERR +} + +create_namespaces() +{ + ip netns add "${LISTENER}" || exit_cleanup + ip netns add "${GATEWAY}" || exit_cleanup "${LISTENER}" + ip netns add "${RELAY}" || exit_cleanup "${LISTENER}" "${GATEWAY}" + ip netns add "${SOURCE}" || exit_cleanup "${LISTENER}" "${GATEWAY}" \ + "${RELAY}" +} + +# The trap function handler +# +exit_cleanup_all() +{ + exit_cleanup "${LISTENER}" "${GATEWAY}" "${RELAY}" "${SOURCE}" +} + +setup_interface() +{ + for ns in "${LISTENER}" "${GATEWAY}" "${RELAY}" "${SOURCE}"; do + ip -netns "${ns}" link set dev lo up + done; + + ip link add l_gw type veth peer name gw_l + ip link add gw_relay type veth peer name relay_gw + ip link add relay_src type veth peer name src_relay + + ip link set l_gw netns "${LISTENER}" up + ip link set gw_l netns "${GATEWAY}" up + ip link set gw_relay netns "${GATEWAY}" up + ip link set relay_gw netns "${RELAY}" up + ip link set relay_src netns "${RELAY}" up + ip link set src_relay netns "${SOURCE}" up mtu 1400 + + ip netns exec "${LISTENER}" ip a a 192.168.0.2/24 dev l_gw + ip netns exec "${LISTENER}" ip r a default via 192.168.0.1 dev l_gw + ip netns exec "${LISTENER}" ip a a 2001:db8::2/64 dev l_gw + ip netns exec "${LISTENER}" ip r a default via 2001:db8::1 dev l_gw + ip netns exec "${LISTENER}" ip a a 239.0.0.1/32 dev l_gw autojoin + ip netns exec "${LISTENER}" ip a a ff0e::5:6/128 dev l_gw autojoin + + ip netns exec "${GATEWAY}" ip a a 192.168.0.1/24 dev gw_l + ip netns exec "${GATEWAY}" ip a a 2001:db8::1/64 dev gw_l + ip netns exec "${GATEWAY}" ip a a 10.0.0.1/24 dev gw_relay + ip netns exec "${GATEWAY}" ip link add br0 type bridge + ip netns exec "${GATEWAY}" ip link set br0 up + ip netns exec "${GATEWAY}" ip link set gw_l master br0 + ip netns exec "${GATEWAY}" ip link set gw_l up + ip netns exec "${GATEWAY}" ip link add amtg master br0 type amt \ + mode gateway local 10.0.0.1 discovery 10.0.0.2 dev gw_relay \ + gateway_port 2268 relay_port 2268 + ip netns exec "${RELAY}" ip a a 10.0.0.2/24 dev relay_gw + ip netns exec "${RELAY}" ip link add amtr type amt mode relay \ + local 10.0.0.2 dev relay_gw relay_port 2268 max_tunnels 4 + ip netns exec "${RELAY}" ip a a 172.17.0.1/24 dev relay_src + ip netns exec "${RELAY}" ip a a 2001:db8:3::1/64 dev relay_src + ip netns exec "${SOURCE}" ip a a 172.17.0.2/24 dev src_relay + ip netns exec "${SOURCE}" ip a a 2001:db8:3::2/64 dev src_relay + ip netns exec "${SOURCE}" ip r a default via 172.17.0.1 dev src_relay + ip netns exec "${SOURCE}" ip r a default via 2001:db8:3::1 dev src_relay + ip netns exec "${RELAY}" ip link set amtr up + ip netns exec "${GATEWAY}" ip link set amtg up +} + +setup_sysctl() +{ + ip netns exec "${RELAY}" sysctl net.ipv4.ip_forward=1 -w -q +} + +setup_iptables() +{ + ip netns exec "${RELAY}" iptables -t mangle -I PREROUTING \ + -d 239.0.0.1 -j TTL --ttl-set 2 + ip netns exec "${RELAY}" ip6tables -t mangle -I PREROUTING \ + -j HL --hl-set 2 +} + +setup_mcast_routing() +{ + ip netns exec "${RELAY}" smcrouted + ip netns exec "${RELAY}" smcroutectl a relay_src \ + 172.17.0.2 239.0.0.1 amtr + ip netns exec "${RELAY}" smcroutectl a relay_src \ + 2001:db8:3::2 ff0e::5:6 amtr +} + +test_remote_ip() +{ + REMOTE=$(ip netns exec "${GATEWAY}" \ + ip -d -j link show amtg | jq .[0].linkinfo.info_data.remote) + if [ $REMOTE == "\"10.0.0.2\"" ]; then + printf "TEST: %-60s [ OK ]\n" "amt discovery" + else + printf "TEST: %-60s [FAIL]\n" "amt discovery" + ERR=1 + fi +} + +send_mcast_torture4() +{ + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001' +} + + +send_mcast_torture6() +{ + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001' +} + +check_features() +{ + ip link help 2>&1 | grep -q amt + if [ $? -ne 0 ]; then + echo "Missing amt support in iproute2" >&2 + exit_cleanup + fi +} + +test_ipv4_forward() +{ + RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000) + if [ "$RESULT4" == "172.17.0.2" ]; then + printf "TEST: %-60s [ OK ]\n" "IPv4 amt multicast forwarding" + exit 0 + else + printf "TEST: %-60s [FAIL]\n" "IPv4 amt multicast forwarding" + exit 1 + fi +} + +test_ipv6_forward() +{ + RESULT6=$(ip netns exec "${LISTENER}" nc -w 1 -l -u ff0e::5:6 6000) + if [ "$RESULT6" == "2001:db8:3::2" ]; then + printf "TEST: %-60s [ OK ]\n" "IPv6 amt multicast forwarding" + exit 0 + else + printf "TEST: %-60s [FAIL]\n" "IPv6 amt multicast forwarding" + exit 1 + fi +} + +send_mcast4() +{ + sleep 2 + ip netns exec "${SOURCE}" bash -c \ + 'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' & +} + +send_mcast6() +{ + sleep 2 + ip netns exec "${SOURCE}" bash -c \ + 'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' & +} + +check_features + +create_namespaces + +set -e +trap exit_cleanup_all EXIT + +setup_interface +setup_sysctl +setup_iptables +setup_mcast_routing +test_remote_ip +test_ipv4_forward & +pid=$! +send_mcast4 +wait $pid || err=$? +if [ $err -eq 1 ]; then + ERR=1 +fi +test_ipv6_forward & +pid=$! +send_mcast6 +wait $pid || err=$? +if [ $err -eq 1 ]; then + ERR=1 +fi +send_mcast_torture4 +printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture" +send_mcast_torture6 +printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture" +sleep 5 +if [ "${ERR}" -eq 1 ]; then + echo "Some tests failed." >&2 +else + ERR=0 +fi diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh new file mode 100755 index 000000000000..92eb880c52f2 --- /dev/null +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -0,0 +1,213 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Tests sysctl options {arp,ndisc}_evict_nocarrier={0,1} +# +# Create a veth pair and set IPs/routes on both. Then ping to establish +# an entry in the ARP/ND table. Depending on the test set sysctl option to +# 1 or 0. Set remote veth down which will cause local veth to go into a no +# carrier state. Depending on the test check the ARP/ND table: +# +# {arp,ndisc}_evict_nocarrier=1 should contain no ARP/ND after no carrier +# {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry +# + +source lib.sh + +readonly V4_ADDR0=10.0.10.1 +readonly V4_ADDR1=10.0.10.2 +readonly V6_ADDR0=2001:db8:91::1 +readonly V6_ADDR1=2001:db8:91::2 +nsid=100 +ret=0 + +cleanup_v6() +{ + cleanup_ns ${me} ${peer} + + sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 +} + +setup_v6() { + setup_ns me peer + + IP="ip -netns ${me}" + + $IP li add veth1 type veth peer name veth2 + $IP li set veth1 up + $IP -6 addr add $V6_ADDR0/64 dev veth1 nodad + $IP li set veth2 netns ${peer} up + ip -netns ${peer} -6 addr add $V6_ADDR1/64 dev veth2 nodad + + ip netns exec ${me} sysctl -w $1 >/dev/null 2>&1 + + # Establish an ND cache entry + ip netns exec ${me} ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1 + # Should have the veth1 entry in ND table + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + if [ $? -ne 0 ]; then + cleanup_v6 + echo "failed" + exit 1 + fi + + # Set veth2 down, which will put veth1 in NOCARRIER state + ip netns exec ${peer} ip link set veth2 down +} + +setup_v4() { + setup_ns PEER_NS + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 up + ip link set dev veth1 netns "${PEER_NS}" + ip netns exec "${PEER_NS}" ip link set dev veth1 up + ip addr add $V4_ADDR0/24 dev veth0 + ip netns exec "${PEER_NS}" ip addr add $V4_ADDR1/24 dev veth1 + ip netns exec ${PEER_NS} ip route add default via $V4_ADDR1 dev veth1 + ip route add default via $V4_ADDR0 dev veth0 + + sysctl -w "$1" >/dev/null 2>&1 + + # Establish an ARP cache entry + ping -c1 -I veth0 $V4_ADDR1 -q >/dev/null 2>&1 + # Should have the veth1 entry in ARP table + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + if [ $? -ne 0 ]; then + cleanup_v4 + echo "failed" + exit 1 + fi + + # Set veth1 down, which will put veth0 in NOCARRIER state + ip netns exec "${PEER_NS}" ip link set veth1 down +} + +cleanup_v4() { + ip neigh flush dev veth0 + ip link del veth0 + cleanup_ns $PEER_NS + + sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1 +} + +# Run test when arp_evict_nocarrier = 1 (default). +run_arp_evict_nocarrier_enabled() { + echo "run arp_evict_nocarrier=1 test" + setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=1" + + # ARP table should be empty + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "failed" + ret=1 + else + echo "ok" + fi + + cleanup_v4 +} + +# Run test when arp_evict_nocarrier = 0 +run_arp_evict_nocarrier_disabled() { + echo "run arp_evict_nocarrier=0 test" + setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=0" + + # ARP table should still contain the entry + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + ret=1 + fi + + cleanup_v4 +} + +run_arp_evict_nocarrier_disabled_all() { + echo "run all.arp_evict_nocarrier=0 test" + setup_v4 "net.ipv4.conf.all.arp_evict_nocarrier=0" + + # ARP table should still contain the entry + ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + fi + + cleanup_v4 +} + +run_ndisc_evict_nocarrier_enabled() { + echo "run ndisc_evict_nocarrier=1 test" + + setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1" + + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "failed" + ret=1 + else + echo "ok" + fi + + cleanup_v6 +} + +run_ndisc_evict_nocarrier_disabled() { + echo "run ndisc_evict_nocarrier=0 test" + + setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0" + + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + ret=1 + fi + + cleanup_v6 +} + +run_ndisc_evict_nocarrier_disabled_all() { + echo "run all.ndisc_evict_nocarrier=0 test" + + setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0" + + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + + if [ $? -eq 0 ];then + echo "ok" + else + echo "failed" + ret=1 + fi + + cleanup_v6 +} + +run_all_tests() { + run_arp_evict_nocarrier_enabled + run_arp_evict_nocarrier_disabled + run_arp_evict_nocarrier_disabled_all + run_ndisc_evict_nocarrier_enabled + run_ndisc_evict_nocarrier_disabled + run_ndisc_evict_nocarrier_disabled_all +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +run_all_tests +exit $ret diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh new file mode 100755 index 000000000000..a40c0e9bd023 --- /dev/null +++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh @@ -0,0 +1,302 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# 2 namespaces: one host and one router. Use arping from the host to send a +# garp to the router. Router accepts or ignores based on its arp_accept +# or accept_untracked_na configuration. + +source lib.sh + +TESTS="arp ndisc" + +ROUTER_INTF="veth-router" +ROUTER_ADDR="10.0.10.1" +ROUTER_ADDR_V6="2001:db8:abcd:0012::1" + +HOST_INTF="veth-host" +HOST_ADDR="10.0.10.2" +HOST_ADDR_V6="2001:db8:abcd:0012::2" + +SUBNET_WIDTH=24 +PREFIX_WIDTH_V6=64 + +cleanup() { + cleanup_ns ${HOST_NS} ${ROUTER_NS} +} + +cleanup_v6() { + cleanup_ns ${HOST_NS_V6} ${ROUTER_NS_V6} +} + +setup() { + set -e + local arp_accept=$1 + + # Set up two namespaces + setup_ns HOST_NS ROUTER_NS + + # Set up interfaces veth0 and veth1, which are pairs in separate + # namespaces. veth0 is veth-router, veth1 is veth-host. + # first, set up the inteface's link to the namespace + # then, set the interface "up" + ip netns exec ${ROUTER_NS} ip link add name ${ROUTER_INTF} \ + type veth peer name ${HOST_INTF} + + ip netns exec ${ROUTER_NS} ip link set dev ${ROUTER_INTF} up + ip netns exec ${ROUTER_NS} ip link set dev ${HOST_INTF} netns ${HOST_NS} + + ip netns exec ${HOST_NS} ip link set dev ${HOST_INTF} up + ip netns exec ${ROUTER_NS} ip addr add ${ROUTER_ADDR}/${SUBNET_WIDTH} \ + dev ${ROUTER_INTF} + + ip netns exec ${HOST_NS} ip addr add ${HOST_ADDR}/${SUBNET_WIDTH} \ + dev ${HOST_INTF} + ip netns exec ${HOST_NS} ip route add default via ${HOST_ADDR} \ + dev ${HOST_INTF} + ip netns exec ${ROUTER_NS} ip route add default via ${ROUTER_ADDR} \ + dev ${ROUTER_INTF} + + ROUTER_CONF=net.ipv4.conf.${ROUTER_INTF} + ip netns exec ${ROUTER_NS} sysctl -w \ + ${ROUTER_CONF}.arp_accept=${arp_accept} >/dev/null 2>&1 + set +e +} + +setup_v6() { + set -e + local accept_untracked_na=$1 + + # Set up two namespaces + setup_ns HOST_NS_V6 ROUTER_NS_V6 + + # Set up interfaces veth0 and veth1, which are pairs in separate + # namespaces. veth0 is veth-router, veth1 is veth-host. + # first, set up the inteface's link to the namespace + # then, set the interface "up" + ip -6 -netns ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \ + type veth peer name ${HOST_INTF} + + ip -6 -netns ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up + ip -6 -netns ${ROUTER_NS_V6} link set dev ${HOST_INTF} netns \ + ${HOST_NS_V6} + + ip -6 -netns ${HOST_NS_V6} link set dev ${HOST_INTF} up + ip -6 -netns ${ROUTER_NS_V6} addr add \ + ${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} dev ${ROUTER_INTF} nodad + + HOST_CONF=net.ipv6.conf.${HOST_INTF} + ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.ndisc_notify=1 + ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.disable_ipv6=0 + ip -6 -netns ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \ + dev ${HOST_INTF} + + ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF} + + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.forwarding=1 >/dev/null 2>&1 + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.drop_unsolicited_na=0 >/dev/null 2>&1 + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} \ + >/dev/null 2>&1 + set +e +} + +verify_arp() { + local arp_accept=$1 + local same_subnet=$2 + + neigh_show_output=$(ip netns exec ${ROUTER_NS} ip neigh get \ + ${HOST_ADDR} dev ${ROUTER_INTF} 2>/dev/null) + + if [ ${arp_accept} -eq 1 ]; then + # Neighbor entries expected + [[ ${neigh_show_output} ]] + elif [ ${arp_accept} -eq 2 ]; then + if [ ${same_subnet} -eq 1 ]; then + # Neighbor entries expected + [[ ${neigh_show_output} ]] + else + [[ -z "${neigh_show_output}" ]] + fi + else + [[ -z "${neigh_show_output}" ]] + fi + } + +arp_test_gratuitous() { + set -e + local arp_accept=$1 + local same_subnet=$2 + + if [ ${arp_accept} -eq 2 ]; then + test_msg=("test_arp: " + "accept_arp=$1 " + "same_subnet=$2") + if [ ${same_subnet} -eq 0 ]; then + HOST_ADDR=10.0.11.3 + else + HOST_ADDR=10.0.10.3 + fi + else + test_msg=("test_arp: " + "accept_arp=$1") + fi + # Supply arp_accept option to set up which sets it in sysctl + setup ${arp_accept} + ip netns exec ${HOST_NS} arping -A -I ${HOST_INTF} -U ${HOST_ADDR} -c1 2>&1 >/dev/null + + if verify_arp $1 $2; then + printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" + else + printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}" + fi + cleanup + set +e +} + +arp_test_gratuitous_combinations() { + arp_test_gratuitous 0 + arp_test_gratuitous 1 + arp_test_gratuitous 2 0 # Second entry indicates subnet or not + arp_test_gratuitous 2 1 +} + +cleanup_tcpdump() { + set -e + [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout} + [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr} + tcpdump_stdout= + tcpdump_stderr= + set +e +} + +start_tcpdump() { + set -e + tcpdump_stdout=`mktemp` + tcpdump_stderr=`mktemp` + ip netns exec ${ROUTER_NS_V6} timeout 15s \ + tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \ + "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR_V6}" \ + > ${tcpdump_stdout} 2> /dev/null + set +e +} + +verify_ndisc() { + local accept_untracked_na=$1 + local same_subnet=$2 + + neigh_show_output=$(ip -6 -netns ${ROUTER_NS_V6} neigh show \ + to ${HOST_ADDR_V6} dev ${ROUTER_INTF} nud stale) + + if [ ${accept_untracked_na} -eq 1 ]; then + # Neighbour entry expected to be present + [[ ${neigh_show_output} ]] + elif [ ${accept_untracked_na} -eq 2 ]; then + if [ ${same_subnet} -eq 1 ]; then + [[ ${neigh_show_output} ]] + else + [[ -z "${neigh_show_output}" ]] + fi + else + # Neighbour entry expected to be absent for all other cases + [[ -z "${neigh_show_output}" ]] + fi +} + +ndisc_test_untracked_advertisements() { + set -e + test_msg=("test_ndisc: " + "accept_untracked_na=$1") + + local accept_untracked_na=$1 + local same_subnet=$2 + if [ ${accept_untracked_na} -eq 2 ]; then + test_msg=("test_ndisc: " + "accept_untracked_na=$1 " + "same_subnet=$2") + if [ ${same_subnet} -eq 0 ]; then + # Not same subnet + HOST_ADDR_V6=2000:db8:abcd:0013::4 + else + HOST_ADDR_V6=2001:db8:abcd:0012::3 + fi + fi + setup_v6 $1 $2 + start_tcpdump + + if verify_ndisc $1 $2; then + printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" + else + printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}" + fi + + cleanup_tcpdump + cleanup_v6 + set +e +} + +ndisc_test_untracked_combinations() { + ndisc_test_untracked_advertisements 0 + ndisc_test_untracked_advertisements 1 + ndisc_test_untracked_advertisements 2 0 + ndisc_test_untracked_advertisements 2 1 +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v arping)" ]; then + echo "SKIP: Could not run test without arping tool" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null +cleanup_v6 &> /dev/null + +for t in $TESTS +do + case $t in + arp_test_gratuitous_combinations|arp) arp_test_gratuitous_combinations;; + ndisc_test_untracked_combinations|ndisc) \ + ndisc_test_untracked_combinations;; + help) echo "Test names: $TESTS"; exit 0;; +esac +done diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh new file mode 100755 index 000000000000..f366cadbc5e8 --- /dev/null +++ b/tools/testing/selftests/net/bareudp.sh @@ -0,0 +1,546 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Test various bareudp tunnel configurations. +# +# The bareudp module allows to tunnel network protocols like IP or MPLS over +# UDP, without adding any intermediate header. This scripts tests several +# configurations of bareudp (using IPv4 or IPv6 as underlay and transporting +# IPv4, IPv6 or MPLS packets on the overlay). +# +# Network topology: +# +# * A chain of 4 network namespaces, connected with veth pairs. Each veth +# is assigned an IPv4 and an IPv6 address. A host-route allows a veth to +# join its peer. +# +# * NS0 and NS3 are at the extremities of the chain. They have additional +# IPv4 and IPv6 addresses on their loopback device. Routes are added in NS0 +# and NS3, so that they can communicate using these overlay IP addresses. +# For IPv4 and IPv6 reachability tests, the route simply sets the peer's +# veth address as gateway. For MPLS reachability tests, an MPLS header is +# also pushed before the IP header. +# +# * NS1 and NS2 are the intermediate namespaces. They use a bareudp device to +# encapsulate the traffic into UDP. +# +# +-----------------------------------------------------------------------+ +# | NS0 | +# | | +# | lo: | +# | * IPv4 address: 192.0.2.100/32 | +# | * IPv6 address: 2001:db8::100/128 | +# | * IPv6 address: 2001:db8::200/128 | +# | * IPv4 route: 192.0.2.103/32 reachable via 192.0.2.11 | +# | * IPv6 route: 2001:db8::103/128 reachable via 2001:db8::11 | +# | * IPv6 route: 2001:db8::203/128 reachable via 2001:db8::11 | +# | (encapsulated with MPLS label 203) | +# | | +# | veth01: | +# | ^ * IPv4 address: 192.0.2.10, peer 192.0.2.11/32 | +# | | * IPv6 address: 2001:db8::10, peer 2001:db8::11/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test) +# | +# +---+-------------------------------------------------------------------+ +# | | NS1 | +# | | | +# | v | +# | veth10: | +# | * IPv4 address: 192.0.2.11, peer 192.0.2.10/32 | +# | * IPv6 address: 2001:db8::11, peer 2001:db8::10/128 | +# | | +# | bareudp_ns1: | +# | * Encapsulate IP or MPLS packets received on veth10 into UDP | +# | and send the resulting packets through veth12. | +# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) | +# | received on veth12 and send the inner packets through veth10. | +# | | +# | veth12: | +# | ^ * IPv4 address: 192.0.2.21, peer 192.0.2.22/32 | +# | | * IPv6 address: 2001:db8::21, peer 2001:db8::22/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test), over UDP +# | +# +---+-------------------------------------------------------------------+ +# | | NS2 | +# | | | +# | v | +# | veth21: | +# | * IPv4 address: 192.0.2.22, peer 192.0.2.21/32 | +# | * IPv6 address: 2001:db8::22, peer 2001:db8::21/128 | +# | | +# | bareudp_ns2: | +# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) | +# | received on veth21 and send the inner packets through veth23. | +# | * Encapsulate IP or MPLS packets received on veth23 into UDP | +# | and send the resulting packets through veth21. | +# | | +# | veth23: | +# | ^ * IPv4 address: 192.0.2.32, peer 192.0.2.33/32 | +# | | * IPv6 address: 2001:db8::32, peer 2001:db8::33/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test) +# | +# +---+-------------------------------------------------------------------+ +# | | NS3 | +# | v | +# | veth32: | +# | * IPv4 address: 192.0.2.33, peer 192.0.2.32/32 | +# | * IPv6 address: 2001:db8::33, peer 2001:db8::32/128 | +# | | +# | lo: | +# | * IPv4 address: 192.0.2.103/32 | +# | * IPv6 address: 2001:db8::103/128 | +# | * IPv6 address: 2001:db8::203/128 | +# | * IPv4 route: 192.0.2.100/32 reachable via 192.0.2.32 | +# | * IPv6 route: 2001:db8::100/128 reachable via 2001:db8::32 | +# | * IPv6 route: 2001:db8::200/128 reachable via 2001:db8::32 | +# | (encapsulated with MPLS label 200) | +# | | +# +-----------------------------------------------------------------------+ + +ERR=4 # Return 4 by default, which is the SKIP code for kselftest +PING6="ping" +PAUSE_ON_FAIL="no" + +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) +readonly NS2=$(mktemp -u ns2-XXXXXXXX) +readonly NS3=$(mktemp -u ns3-XXXXXXXX) + +# Exit the script after having removed the network namespaces it created +# +# Parameters: +# +# * The list of network namespaces to delete before exiting. +# +exit_cleanup() +{ + for ns in "$@"; do + ip netns delete "${ns}" 2>/dev/null || true + done + + if [ "${ERR}" -eq 4 ]; then + echo "Error: Setting up the testing environment failed." >&2 + fi + + exit "${ERR}" +} + +# Create the four network namespaces used by the script (NS0, NS1, NS2 and NS3) +# +# New namespaces are cleaned up manually in case of error, to ensure that only +# namespaces created by this script are deleted. +create_namespaces() +{ + ip netns add "${NS0}" || exit_cleanup + ip netns add "${NS1}" || exit_cleanup "${NS0}" + ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}" + ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}" +} + +# The trap function handler +# +exit_cleanup_all() +{ + exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}" +} + +# Configure a network interface using a host route +# +# Parameters +# +# * $1: the netns the network interface resides in, +# * $2: the network interface name, +# * $3: the local IPv4 address to assign to this interface, +# * $4: the IPv4 address of the remote network interface, +# * $5: the local IPv6 address to assign to this interface, +# * $6: the IPv6 address of the remote network interface. +# +iface_config() +{ + local NS="${1}"; readonly NS + local DEV="${2}"; readonly DEV + local LOCAL_IP4="${3}"; readonly LOCAL_IP4 + local PEER_IP4="${4}"; readonly PEER_IP4 + local LOCAL_IP6="${5}"; readonly LOCAL_IP6 + local PEER_IP6="${6}"; readonly PEER_IP6 + + ip -netns "${NS}" link set dev "${DEV}" up + ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP4}" peer "${PEER_IP4}" + ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP6}" peer "${PEER_IP6}" nodad +} + +# Create base networking topology: +# +# * set up the loopback device in all network namespaces (NS0..NS3), +# * set up a veth pair to connect each netns in sequence (NS0 with NS1, +# NS1 with NS2, etc.), +# * add and IPv4 and an IPv6 address on each veth interface, +# * prepare the ingress qdiscs in the intermediate namespaces. +# +setup_underlay() +{ + for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do + ip -netns "${ns}" link set dev lo up + done; + + ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}" + ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}" + ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}" + iface_config "${NS0}" veth01 192.0.2.10 192.0.2.11/32 2001:db8::10 2001:db8::11/128 + iface_config "${NS1}" veth10 192.0.2.11 192.0.2.10/32 2001:db8::11 2001:db8::10/128 + iface_config "${NS1}" veth12 192.0.2.21 192.0.2.22/32 2001:db8::21 2001:db8::22/128 + iface_config "${NS2}" veth21 192.0.2.22 192.0.2.21/32 2001:db8::22 2001:db8::21/128 + iface_config "${NS2}" veth23 192.0.2.32 192.0.2.33/32 2001:db8::32 2001:db8::33/128 + iface_config "${NS3}" veth32 192.0.2.33 192.0.2.32/32 2001:db8::33 2001:db8::32/128 + + tc -netns "${NS1}" qdisc add dev veth10 ingress + tc -netns "${NS2}" qdisc add dev veth23 ingress +} + +# Set up the IPv4, IPv6 and MPLS overlays. +# +# Configuration is similar for all protocols: +# +# * add an overlay IP address on the loopback interface of each edge +# namespace, +# * route these IP addresses via the intermediate namespaces (for the MPLS +# tests, this is also where MPLS encapsulation is done), +# * add routes for these IP addresses (or MPLS labels) in the intermediate +# namespaces. +# +# The bareudp encapsulation isn't configured in setup_overlay_*(). That will be +# done just before running the reachability tests. + +setup_overlay_ipv4() +{ + # Add the overlay IP addresses and route them through the veth devices + ip -netns "${NS0}" address add 192.0.2.100/32 dev lo + ip -netns "${NS3}" address add 192.0.2.103/32 dev lo + ip -netns "${NS0}" route add 192.0.2.103/32 src 192.0.2.100 via 192.0.2.11 + ip -netns "${NS3}" route add 192.0.2.100/32 src 192.0.2.103 via 192.0.2.32 + + # Route the overlay addresses in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.ipv4.ip_forward=1 + ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 + ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 + ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 + + # The intermediate namespaces don't have routes for the reverse path, + # as it will be handled by tc. So we need to ensure that rp_filter is + # not going to block the traffic. + ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 + ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 +} + +setup_overlay_ipv6() +{ + # Add the overlay IP addresses and route them through the veth devices + ip -netns "${NS0}" address add 2001:db8::100/128 dev lo + ip -netns "${NS3}" address add 2001:db8::103/128 dev lo + ip -netns "${NS0}" route add 2001:db8::103/128 src 2001:db8::100 via 2001:db8::11 + ip -netns "${NS3}" route add 2001:db8::100/128 src 2001:db8::103 via 2001:db8::32 + + # Route the overlay addresses in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec "${NS2}" sysctl -qw net.ipv6.conf.all.forwarding=1 + ip -netns "${NS1}" route add 2001:db8::100/128 via 2001:db8::10 + ip -netns "${NS2}" route add 2001:db8::103/128 via 2001:db8::33 +} + +setup_overlay_mpls() +{ + # Add specific overlay IP addresses, routed over MPLS + ip -netns "${NS0}" address add 2001:db8::200/128 dev lo + ip -netns "${NS3}" address add 2001:db8::203/128 dev lo + ip -netns "${NS0}" route add 2001:db8::203/128 src 2001:db8::200 encap mpls 203 via 2001:db8::11 + ip -netns "${NS3}" route add 2001:db8::200/128 src 2001:db8::203 encap mpls 200 via 2001:db8::32 + + # Route the MPLS packets in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.mpls.platform_labels=256 + ip netns exec "${NS2}" sysctl -qw net.mpls.platform_labels=256 + ip -netns "${NS1}" -family mpls route add 200 via inet6 2001:db8::10 + ip -netns "${NS2}" -family mpls route add 203 via inet6 2001:db8::33 +} + +# Run "ping" from NS0 and print the result +# +# Parameters: +# +# * $1: the variant of ping to use (normally either "ping" or "ping6"), +# * $2: the IP address to ping, +# * $3: a human readable description of the purpose of the test. +# +# If the test fails and PAUSE_ON_FAIL is active, the user is given the +# possibility to continue with the next test or to quit immediately. +# +ping_test_one() +{ + local PING="$1"; readonly PING + local IP="$2"; readonly IP + local MSG="$3"; readonly MSG + local RET + + printf "TEST: %-60s " "${MSG}" + + set +e + ip netns exec "${NS0}" "${PING}" -w 5 -c 1 "${IP}" > /dev/null 2>&1 + RET=$? + set -e + + if [ "${RET}" -eq 0 ]; then + printf "[ OK ]\n" + else + ERR=1 + printf "[FAIL]\n" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + printf "\nHit enter to continue, 'q' to quit\n" + read a + if [ "$a" = "q" ]; then + exit 1 + fi + fi + fi +} + +# Run reachability tests +# +# Parameters: +# +# * $1: human readable string describing the underlay protocol. +# +# $IPV4, $IPV6, $MPLS_UC and $MULTIPROTO are inherited from the calling +# function. +# +ping_test() +{ + local UNDERLAY="$1"; readonly UNDERLAY + local MODE + local MSG + + if [ "${MULTIPROTO}" = "multiproto" ]; then + MODE=" (multiproto mode)" + else + MODE="" + fi + + if [ $IPV4 ]; then + ping_test_one "ping" "192.0.2.103" "IPv4 packets over ${UNDERLAY}${MODE}" + fi + if [ $IPV6 ]; then + ping_test_one "${PING6}" "2001:db8::103" "IPv6 packets over ${UNDERLAY}${MODE}" + fi + if [ $MPLS_UC ]; then + ping_test_one "${PING6}" "2001:db8::203" "Unicast MPLS packets over ${UNDERLAY}${MODE}" + fi +} + +# Set up a bareudp overlay and run reachability tests over IPv4 and IPv6 +# +# Parameters: +# +# * $1: the packet type (protocol) to be handled by bareudp, +# * $2: a flag to activate or deactivate bareudp's "multiproto" mode. +# +test_overlay() +{ + local ETHERTYPE="$1"; readonly ETHERTYPE + local MULTIPROTO="$2"; readonly MULTIPROTO + local IPV4 + local IPV6 + local MPLS_UC + + case "${ETHERTYPE}" in + "ipv4") + IPV4="ipv4" + if [ "${MULTIPROTO}" = "multiproto" ]; then + IPV6="ipv6" + else + IPV6="" + fi + MPLS_UC="" + ;; + "ipv6") + IPV6="ipv6" + IPV4="" + MPLS_UC="" + ;; + "mpls_uc") + MPLS_UC="mpls_uc" + IPV4="" + IPV6="" + ;; + *) + exit 1 + ;; + esac + readonly IPV4 + readonly IPV6 + readonly MPLS_UC + + # Create the bareudp devices in the intermediate namespaces + ip -netns "${NS1}" link add name bareudp_ns1 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}" + ip -netns "${NS2}" link add name bareudp_ns2 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}" + + # IPv4 over UDPv4 + if [ $IPV4 ]; then + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \ + flower dst_ip 192.0.2.103/32 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \ + flower dst_ip 192.0.2.100/32 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # IPv6 over UDPv4 + if [ $IPV6 ]; then + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \ + flower dst_ip 2001:db8::103/128 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \ + flower dst_ip 2001:db8::100/128 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # MPLS (unicast) over UDPv4 + if [ $MPLS_UC ]; then + ip netns exec "${NS1}" sysctl -qw net.mpls.conf.bareudp_ns1.input=1 + ip netns exec "${NS2}" sysctl -qw net.mpls.conf.bareudp_ns2.input=1 + + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \ + flower mpls_label 203 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \ + flower mpls_label 200 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # Test IPv4 underlay + ping_test "UDPv4" + + # Cleanup bareudp encapsulation instructions, as they were specific to + # the IPv4 underlay, before setting up and testing the IPv6 underlay + tc -netns "${NS1}" filter delete dev veth10 ingress + tc -netns "${NS2}" filter delete dev veth23 ingress + + # IPv4 over UDPv6 + if [ $IPV4 ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \ + flower dst_ip 192.0.2.103/32 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \ + flower dst_ip 192.0.2.100/32 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # IPv6 over UDPv6 + if [ $IPV6 ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \ + flower dst_ip 2001:db8::103/128 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \ + flower dst_ip 2001:db8::100/128 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # MPLS (unicast) over UDPv6 + if [ $MPLS_UC ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \ + flower mpls_label 203 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \ + flower mpls_label 200 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # Test IPv6 underlay + ping_test "UDPv6" + + tc -netns "${NS1}" filter delete dev veth10 ingress + tc -netns "${NS2}" filter delete dev veth23 ingress + ip -netns "${NS1}" link delete bareudp_ns1 + ip -netns "${NS2}" link delete bareudp_ns2 +} + +check_features() +{ + ip link help 2>&1 | grep -q bareudp + if [ $? -ne 0 ]; then + echo "Missing bareudp support in iproute2" >&2 + exit_cleanup + fi + + # Use ping6 on systems where ping doesn't handle IPv6 + ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING6="ping6" +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +check_features + +# Create namespaces before setting up the exit trap. +# Otherwise, exit_cleanup_all() could delete namespaces that were not created +# by this script. +create_namespaces + +set -e +trap exit_cleanup_all EXIT + +setup_underlay +setup_overlay_ipv4 +setup_overlay_ipv6 +setup_overlay_mpls + +test_overlay ipv4 nomultiproto +test_overlay ipv6 nomultiproto +test_overlay ipv4 multiproto +test_overlay mpls_uc nomultiproto + +if [ "${ERR}" -eq 1 ]; then + echo "Some tests failed." >&2 +else + ERR=0 +fi diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh new file mode 100755 index 000000000000..2db9d15cd45f --- /dev/null +++ b/tools/testing/selftests/net/big_tcp.sh @@ -0,0 +1,182 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For IPv4 and IPv6 BIG TCP. +# TOPO: CLIENT_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) SERVER_NS + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP4="198.51.100.1" +CLIENT_IP6="2001:db8:1::1" + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP4="203.0.113.1" +SERVER_IP6="2001:db8:2::1" + +ROUTER_NS=$(mktemp -u router-XXXXXXXX) +SERVER_GW4="203.0.113.2" +CLIENT_GW4="198.51.100.2" +SERVER_GW6="2001:db8:2::2" +CLIENT_GW6="2001:db8:1::2" + +MAX_SIZE=128000 +CHK_SIZE=65535 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +setup() { + ip netns add $CLIENT_NS + ip netns add $SERVER_NS + ip netns add $ROUTER_NS + ip -net $ROUTER_NS link add link1 type veth peer name link0 netns $CLIENT_NS + ip -net $ROUTER_NS link add link2 type veth peer name link3 netns $SERVER_NS + + ip -net $CLIENT_NS link set link0 up + ip -net $CLIENT_NS link set link0 mtu 1442 + ip -net $CLIENT_NS addr add $CLIENT_IP4/24 dev link0 + ip -net $CLIENT_NS addr add $CLIENT_IP6/64 dev link0 nodad + ip -net $CLIENT_NS route add $SERVER_IP4 dev link0 via $CLIENT_GW4 + ip -net $CLIENT_NS route add $SERVER_IP6 dev link0 via $CLIENT_GW6 + ip -net $CLIENT_NS link set dev link0 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $CLIENT_NS link set dev link0 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip net exec $CLIENT_NS sysctl -wq net.ipv4.tcp_window_scaling=10 + + ip -net $ROUTER_NS link set link1 up + ip -net $ROUTER_NS link set link2 up + ip -net $ROUTER_NS addr add $CLIENT_GW4/24 dev link1 + ip -net $ROUTER_NS addr add $CLIENT_GW6/64 dev link1 nodad + ip -net $ROUTER_NS addr add $SERVER_GW4/24 dev link2 + ip -net $ROUTER_NS addr add $SERVER_GW6/64 dev link2 nodad + ip -net $ROUTER_NS link set dev link1 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link2 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link1 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link2 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + # test for nf_ct_skb_network_trim in nf_conntrack_ovs used by TC ct action. + ip net exec $ROUTER_NS tc qdisc add dev link1 ingress + ip net exec $ROUTER_NS tc filter add dev link1 ingress \ + proto ip flower ip_proto tcp action ct + ip net exec $ROUTER_NS tc filter add dev link1 ingress \ + proto ipv6 flower ip_proto tcp action ct + ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1 + ip net exec $ROUTER_NS sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip -net $SERVER_NS link set link3 up + ip -net $SERVER_NS addr add $SERVER_IP4/24 dev link3 + ip -net $SERVER_NS addr add $SERVER_IP6/64 dev link3 nodad + ip -net $SERVER_NS route add $CLIENT_IP4 dev link3 via $SERVER_GW4 + ip -net $SERVER_NS route add $CLIENT_IP6 dev link3 via $SERVER_GW6 + ip -net $SERVER_NS link set dev link3 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $SERVER_NS link set dev link3 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip net exec $SERVER_NS sysctl -wq net.ipv4.tcp_window_scaling=10 + ip net exec $SERVER_NS netserver 2>&1 >/dev/null +} + +cleanup() { + ip net exec $SERVER_NS pkill netserver + ip -net $ROUTER_NS link del link1 + ip -net $ROUTER_NS link del link2 + ip netns del "$CLIENT_NS" + ip netns del "$SERVER_NS" + ip netns del "$ROUTER_NS" +} + +start_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + ip net exec $netns $ipt -t raw -A PREROUTING -i $iface \ + -m length ! --length 0:$CHK_SIZE -j ACCEPT +} + +check_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + test `ip net exec $netns $ipt -t raw -L -v |grep $iface | awk '{print $1}'` != "0" +} + +stop_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + ip net exec $netns $ipt -t raw -D PREROUTING -i $iface \ + -m length ! --length 0:$CHK_SIZE -j ACCEPT +} + +do_netperf() { + local serip=$SERVER_IP4 + local netns=$1 + + [ "$NF" = "6" ] && serip=$SERVER_IP6 + + # use large write to be sure to generate big tcp packets + ip net exec $netns netperf -$NF -t TCP_STREAM -l 1 -H $serip -- -m 262144 2>&1 >/dev/null +} + +do_test() { + local cli_tso=$1 + local gw_gro=$2 + local gw_tso=$3 + local ser_gro=$4 + local ret="PASS" + + ip net exec $CLIENT_NS ethtool -K link0 tso $cli_tso + ip net exec $ROUTER_NS ethtool -K link1 gro $gw_gro + ip net exec $ROUTER_NS ethtool -K link2 tso $gw_tso + ip net exec $SERVER_NS ethtool -K link3 gro $ser_gro + + start_counter link1 $ROUTER_NS + start_counter link3 $SERVER_NS + do_netperf $CLIENT_NS + + if check_counter link1 $ROUTER_NS; then + check_counter link3 $SERVER_NS || ret="FAIL_on_link3" + else + ret="FAIL_on_link1" + fi + + stop_counter link1 $ROUTER_NS + stop_counter link3 $SERVER_NS + printf "%-9s %-8s %-8s %-8s: [%s]\n" \ + $cli_tso $gw_gro $gw_tso $ser_gro $ret + test $ret = "PASS" +} + +testup() { + echo "CLI GSO | GW GRO | GW GSO | SER GRO" && \ + do_test "on" "on" "on" "on" && \ + do_test "on" "off" "on" "off" && \ + do_test "off" "on" "on" "on" && \ + do_test "on" "on" "off" "on" && \ + do_test "off" "on" "off" "on" +} + +if ! netperf -V &> /dev/null; then + echo "SKIP: Could not run test without netperf tool" + exit $ksft_skip +fi + +if ! ip link help 2>&1 | grep gso_ipv4_max_size &> /dev/null; then + echo "SKIP: Could not run test without gso/gro_ipv4_max_size supported in ip-link" + exit $ksft_skip +fi + +trap cleanup EXIT +setup && echo "Testing for BIG TCP:" && \ +NF=4 testup && echo "***v4 Tests Done***" && \ +NF=6 testup && echo "***v6 Tests Done***" +exit $? diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c new file mode 100644 index 000000000000..57ff67a3751e --- /dev/null +++ b/tools/testing/selftests/net/bind_bhash.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This times how long it takes to bind to a port when the port already + * has multiple sockets in its bhash table. + * + * In the setup(), we populate the port's bhash table with + * MAX_THREADS * MAX_CONNECTIONS number of entries. + */ + +#include <unistd.h> +#include <stdio.h> +#include <netdb.h> +#include <pthread.h> +#include <string.h> +#include <stdbool.h> + +#define MAX_THREADS 600 +#define MAX_CONNECTIONS 40 + +static const char *setup_addr_v6 = "::1"; +static const char *setup_addr_v4 = "127.0.0.1"; +static const char *setup_addr; +static const char *bind_addr; +static const char *port; +bool use_v6; +int ret; + +static int fd_array[MAX_THREADS][MAX_CONNECTIONS]; + +static int bind_socket(int opt, const char *addr) +{ + struct addrinfo *res, hint = {}; + int sock_fd, reuse = 1, err; + int domain = use_v6 ? AF_INET6 : AF_INET; + + sock_fd = socket(domain, SOCK_STREAM, 0); + if (sock_fd < 0) { + perror("socket fd err"); + return sock_fd; + } + + hint.ai_family = domain; + hint.ai_socktype = SOCK_STREAM; + + err = getaddrinfo(addr, port, &hint, &res); + if (err) { + perror("getaddrinfo failed"); + goto cleanup; + } + + if (opt) { + err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse)); + if (err) { + perror("setsockopt failed"); + goto cleanup; + } + } + + err = bind(sock_fd, res->ai_addr, res->ai_addrlen); + if (err) { + perror("failed to bind to port"); + goto cleanup; + } + + return sock_fd; + +cleanup: + close(sock_fd); + return err; +} + +static void *setup(void *arg) +{ + int sock_fd, i; + int *array = (int *)arg; + + for (i = 0; i < MAX_CONNECTIONS; i++) { + sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + if (sock_fd < 0) { + ret = sock_fd; + pthread_exit(&ret); + } + array[i] = sock_fd; + } + + return NULL; +} + +int main(int argc, const char *argv[]) +{ + int listener_fd, sock_fd, i, j; + pthread_t tid[MAX_THREADS]; + clock_t begin, end; + + if (argc != 4) { + printf("Usage: listener <port> <ipv6 | ipv4> <bind-addr>\n"); + return -1; + } + + port = argv[1]; + use_v6 = strcmp(argv[2], "ipv6") == 0; + bind_addr = argv[3]; + + setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4; + + listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + if (listen(listener_fd, 100) < 0) { + perror("listen failed"); + return -1; + } + + /* Set up threads to populate the bhash table entry for the port */ + for (i = 0; i < MAX_THREADS; i++) + pthread_create(&tid[i], NULL, setup, fd_array[i]); + + for (i = 0; i < MAX_THREADS; i++) + pthread_join(tid[i], NULL); + + if (ret) + goto done; + + begin = clock(); + + /* Bind to the same port on a different address */ + sock_fd = bind_socket(0, bind_addr); + if (sock_fd < 0) + goto done; + + end = clock(); + + printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC); + + /* clean up */ + close(sock_fd); + +done: + close(listener_fd); + for (i = 0; i < MAX_THREADS; i++) { + for (j = 0; i < MAX_THREADS; i++) + close(fd_array[i][j]); + } + + return 0; +} diff --git a/tools/testing/selftests/net/bind_bhash.sh b/tools/testing/selftests/net/bind_bhash.sh new file mode 100755 index 000000000000..a28563bdaae0 --- /dev/null +++ b/tools/testing/selftests/net/bind_bhash.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NR_FILES=32768 +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +# default values +port=443 +addr_v6="2001:0db8:0:f101::1" +addr_v4="10.8.8.8" +use_v6=true +addr="" + +usage() { + echo "Usage: $0 [-6 | -4] [-p port] [-a address]" + echo -e "\t6: use ipv6" + echo -e "\t4: use ipv4" + echo -e "\tport: Port number" + echo -e "\taddress: ip address" +} + +while getopts "ha:p:64" opt; do + case ${opt} in + h) + usage $0 + exit 0 + ;; + a) addr=$OPTARG;; + p) + port=$OPTARG;; + 6) + use_v6=true;; + 4) + use_v6=false;; + esac +done + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link add veth0 type veth peer name veth1 + ip -netns "${NETNS}" link set lo up + ip -netns "${NETNS}" link set veth0 up + ip -netns "${NETNS}" link set veth1 up + + if [[ "$use_v6" == true ]]; then + ip -netns "${NETNS}" addr add $addr_v6 nodad dev veth0 + else + ip -netns "${NETNS}" addr add $addr_v4 dev lo + fi +} + +cleanup() { + ip netns del "${NETNS}" +} + +if [[ "$addr" != "" ]]; then + addr_v4=$addr; + addr_v6=$addr; +fi +setup +if [[ "$use_v6" == true ]] ; then + ip netns exec "${NETNS}" sh -c \ + "ulimit -n ${NR_FILES};./bind_bhash ${port} ipv6 ${addr_v6}" +else + ip netns exec "${NETNS}" sh -c \ + "ulimit -n ${NR_FILES};./bind_bhash ${port} ipv4 ${addr_v4}" +fi +cleanup diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c new file mode 100644 index 000000000000..cb9fdf51ea59 --- /dev/null +++ b/tools/testing/selftests/net/bind_timewait.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include <sys/socket.h> +#include <netinet/in.h> + +#include "../kselftest_harness.h" + +FIXTURE(bind_timewait) +{ + struct sockaddr_in addr; + socklen_t addrlen; +}; + +FIXTURE_VARIANT(bind_timewait) +{ + __u32 addr_const; +}; + +FIXTURE_VARIANT_ADD(bind_timewait, localhost) +{ + .addr_const = INADDR_LOOPBACK +}; + +FIXTURE_VARIANT_ADD(bind_timewait, addrany) +{ + .addr_const = INADDR_ANY +}; + +FIXTURE_SETUP(bind_timewait) +{ + self->addr.sin_family = AF_INET; + self->addr.sin_port = 0; + self->addr.sin_addr.s_addr = htonl(variant->addr_const); + self->addrlen = sizeof(self->addr); +} + +FIXTURE_TEARDOWN(bind_timewait) +{ +} + +void create_timewait_socket(struct __test_metadata *_metadata, + FIXTURE_DATA(bind_timewait) *self) +{ + int server_fd, client_fd, child_fd, ret; + struct sockaddr_in addr; + socklen_t addrlen; + + server_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(server_fd, 0); + + ret = bind(server_fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + ret = listen(server_fd, 1); + ASSERT_EQ(ret, 0); + + ret = getsockname(server_fd, (struct sockaddr *)&self->addr, &self->addrlen); + ASSERT_EQ(ret, 0); + + client_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(client_fd, 0); + + ret = connect(client_fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + addrlen = sizeof(addr); + child_fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_GT(child_fd, 0); + + close(child_fd); + close(client_fd); + close(server_fd); +} + +TEST_F(bind_timewait, 1) +{ + int fd, ret; + + create_timewait_socket(_metadata, self); + + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GT(fd, 0); + + ret = bind(fd, (struct sockaddr *)&self->addr, self->addrlen); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EADDRINUSE); + + close(fd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c new file mode 100644 index 000000000000..a2662348cdb1 --- /dev/null +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include <sys/socket.h> +#include <netinet/in.h> + +#include "../kselftest_harness.h" + +struct in6_addr in6addr_v4mapped_any = { + .s6_addr = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 255, 255, + 0, 0, 0, 0 + } +}; + +struct in6_addr in6addr_v4mapped_loopback = { + .s6_addr = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 255, 255, + 127, 0, 0, 1 + } +}; + +FIXTURE(bind_wildcard) +{ + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; +}; + +FIXTURE_VARIANT(bind_wildcard) +{ + const __u32 addr4_const; + const struct in6_addr *addr6_const; + int expected_errno; +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any) +{ + .addr4_const = INADDR_ANY, + .addr6_const = &in6addr_any, + .expected_errno = EADDRINUSE, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local) +{ + .addr4_const = INADDR_ANY, + .addr6_const = &in6addr_loopback, + .expected_errno = 0, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any) +{ + .addr4_const = INADDR_ANY, + .addr6_const = &in6addr_v4mapped_any, + .expected_errno = EADDRINUSE, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local) +{ + .addr4_const = INADDR_ANY, + .addr6_const = &in6addr_v4mapped_loopback, + .expected_errno = EADDRINUSE, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any) +{ + .addr4_const = INADDR_LOOPBACK, + .addr6_const = &in6addr_any, + .expected_errno = EADDRINUSE, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local) +{ + .addr4_const = INADDR_LOOPBACK, + .addr6_const = &in6addr_loopback, + .expected_errno = 0, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any) +{ + .addr4_const = INADDR_LOOPBACK, + .addr6_const = &in6addr_v4mapped_any, + .expected_errno = EADDRINUSE, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local) +{ + .addr4_const = INADDR_LOOPBACK, + .addr6_const = &in6addr_v4mapped_loopback, + .expected_errno = EADDRINUSE, +}; + +FIXTURE_SETUP(bind_wildcard) +{ + self->addr4.sin_family = AF_INET; + self->addr4.sin_port = htons(0); + self->addr4.sin_addr.s_addr = htonl(variant->addr4_const); + + self->addr6.sin6_family = AF_INET6; + self->addr6.sin6_port = htons(0); + self->addr6.sin6_addr = *variant->addr6_const; +} + +FIXTURE_TEARDOWN(bind_wildcard) +{ +} + +void bind_sockets(struct __test_metadata *_metadata, + FIXTURE_DATA(bind_wildcard) *self, + int expected_errno, + struct sockaddr *addr1, socklen_t addrlen1, + struct sockaddr *addr2, socklen_t addrlen2) +{ + int fd[2]; + int ret; + + fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0); + ASSERT_GT(fd[0], 0); + + ret = bind(fd[0], addr1, addrlen1); + ASSERT_EQ(ret, 0); + + ret = getsockname(fd[0], addr1, &addrlen1); + ASSERT_EQ(ret, 0); + + ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port; + + fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0); + ASSERT_GT(fd[1], 0); + + ret = bind(fd[1], addr2, addrlen2); + if (expected_errno) { + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, expected_errno); + } else { + ASSERT_EQ(ret, 0); + } + + close(fd[1]); + close(fd[0]); +} + +TEST_F(bind_wildcard, v4_v6) +{ + bind_sockets(_metadata, self, variant->expected_errno, + (struct sockaddr *)&self->addr4, sizeof(self->addr4), + (struct sockaddr *)&self->addr6, sizeof(self->addr6)); +} + +TEST_F(bind_wildcard, v6_v4) +{ + bind_sockets(_metadata, self, variant->expected_errno, + (struct sockaddr *)&self->addr6, sizeof(self->addr6), + (struct sockaddr *)&self->addr4, sizeof(self->addr4)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh new file mode 100755 index 000000000000..8bc23fb4c82b --- /dev/null +++ b/tools/testing/selftests/net/cmsg_ipv6.sh @@ -0,0 +1,154 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +IP6=2001:db8:1::1/64 +TGT6=2001:db8:1::2 +TMPF=$(mktemp --suffix ".pcap") + +cleanup() +{ + rm -f $TMPF + cleanup_ns $NS +} + +trap cleanup EXIT + +tcpdump -h | grep immediate-mode >> /dev/null +if [ $? -ne 0 ]; then + echo "SKIP - tcpdump with --immediate-mode option required" + exit $ksft_skip +fi + +# Namespaces +setup_ns NS +NSEXE="ip netns exec $NS" + +$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null + +# Connectivity +ip -netns $NS link add type dummy +ip -netns $NS link set dev dummy0 up +ip -netns $NS addr add $IP6 dev dummy0 + +# Test +BAD=0 +TOTAL=0 + +check_result() { + ((TOTAL++)) + if [ $1 -ne $2 ]; then + echo " Case $3 returned $1, expected $2" + ((BAD++)) + fi +} + +# IPV6_DONTFRAG +for ovr in setsock cmsg both diff; do + for df in 0 1; do + for p in u i r; do + [ $p == "u" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-F $df" + [ $ovr == "cmsg" ] && m="-f $df" + [ $ovr == "both" ] && m="-F $df -f $df" + [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df" + + $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234 + check_result $? $df "DONTFRAG $prot $ovr" + done + done +done + +# IPV6_TCLASS +TOS=0x10 +TOS2=0x20 + +ip -6 -netns $NS rule add tos $TOS lookup 300 +ip -6 -netns $NS route add table 300 prohibit any + +for ovr in setsock cmsg both diff; do + for p in u i r; do + [ $p == "u" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-C" + [ $ovr == "cmsg" ] && m="-c" + [ $ovr == "both" ] && m="-C $((TOS2)) -c" + [ $ovr == "diff" ] && m="-C $((TOS )) -c" + + $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & + BG=$! + sleep 0.05 + + $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 + check_result $? 0 "TCLASS $prot $ovr - pass" + + while [ -d /proc/$BG ]; do + $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null + check_result $? 0 "TCLASS $prot $ovr - packet data" + rm $TMPF + + [ $ovr == "both" ] && m="-C $((TOS )) -c" + [ $ovr == "diff" ] && m="-C $((TOS2)) -c" + + $NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234 + check_result $? 1 "TCLASS $prot $ovr - rejection" + done +done + +# IPV6_HOPLIMIT +LIM=4 + +for ovr in setsock cmsg both diff; do + for p in u i r; do + [ $p == "u" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-L" + [ $ovr == "cmsg" ] && m="-l" + [ $ovr == "both" ] && m="-L $LIM -l" + [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l" + + $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & + BG=$! + sleep 0.05 + + $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 + check_result $? 0 "HOPLIMIT $prot $ovr - pass" + + while [ -d /proc/$BG ]; do + $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null + check_result $? 0 "HOPLIMIT $prot $ovr - packet data" + rm $TMPF + done +done + +# IPV6 exthdr +for p in u i r; do + # Very basic "does it crash" test + for h in h d r; do + $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 + check_result $? 0 "ExtHdr $prot $ovr - pass" + done +done + +# Summary +if [ $BAD -ne 0 ]; then + echo "FAIL - $BAD/$TOTAL cases failed" + exit 1 +else + echo "OK" + exit 0 +fi diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c new file mode 100644 index 000000000000..c79e65581dc3 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -0,0 +1,522 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <errno.h> +#include <error.h> +#include <netdb.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <linux/errqueue.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/net_tstamp.h> +#include <linux/types.h> +#include <linux/udp.h> +#include <sys/socket.h> + +#include "../kselftest.h" + +enum { + ERN_SUCCESS = 0, + /* Well defined errors, callers may depend on these */ + ERN_SEND = 1, + /* Informational, can reorder */ + ERN_HELP, + ERN_SEND_SHORT, + ERN_SOCK_CREATE, + ERN_RESOLVE, + ERN_CMSG_WR, + ERN_SOCKOPT, + ERN_GETTIME, + ERN_RECVERR, + ERN_CMSG_RD, + ERN_CMSG_RCV, +}; + +struct option_cmsg_u32 { + bool ena; + unsigned int val; +}; + +struct options { + bool silent_send; + const char *host; + const char *service; + unsigned int size; + unsigned int num_pkt; + struct { + unsigned int mark; + unsigned int dontfrag; + unsigned int tclass; + unsigned int hlimit; + unsigned int priority; + } sockopt; + struct { + unsigned int family; + unsigned int type; + unsigned int proto; + } sock; + struct option_cmsg_u32 mark; + struct { + bool ena; + unsigned int delay; + } txtime; + struct { + bool ena; + } ts; + struct { + struct option_cmsg_u32 dontfrag; + struct option_cmsg_u32 tclass; + struct option_cmsg_u32 hlimit; + struct option_cmsg_u32 exthdr; + } v6; +} opt = { + .size = 13, + .num_pkt = 1, + .sock = { + .family = AF_UNSPEC, + .type = SOCK_DGRAM, + .proto = IPPROTO_UDP, + }, +}; + +static struct timespec time_start_real; +static struct timespec time_start_mono; + +static void __attribute__((noreturn)) cs_usage(const char *bin) +{ + printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin); + printf("Options:\n" + "\t\t-s Silent send() failures\n" + "\t\t-S send() size\n" + "\t\t-4/-6 Force IPv4 / IPv6 only\n" + "\t\t-p prot Socket protocol\n" + "\t\t (u = UDP (default); i = ICMP; r = RAW)\n" + "\n" + "\t\t-m val Set SO_MARK with given value\n" + "\t\t-M val Set SO_MARK via setsockopt\n" + "\t\t-d val Set SO_TXTIME with given delay (usec)\n" + "\t\t-t Enable time stamp reporting\n" + "\t\t-f val Set don't fragment via cmsg\n" + "\t\t-F val Set don't fragment via setsockopt\n" + "\t\t-c val Set TCLASS via cmsg\n" + "\t\t-C val Set TCLASS via setsockopt\n" + "\t\t-l val Set HOPLIMIT via cmsg\n" + "\t\t-L val Set HOPLIMIT via setsockopt\n" + "\t\t-H type Add an IPv6 header option\n" + "\t\t (h = HOP; d = DST; r = RTDST)" + ""); + exit(ERN_HELP); +} + +static void cs_parse_args(int argc, char *argv[]) +{ + int o; + + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { + switch (o) { + case 's': + opt.silent_send = true; + break; + case 'S': + opt.size = atoi(optarg); + break; + case '4': + opt.sock.family = AF_INET; + break; + case '6': + opt.sock.family = AF_INET6; + break; + case 'p': + if (*optarg == 'u' || *optarg == 'U') { + opt.sock.proto = IPPROTO_UDP; + } else if (*optarg == 'i' || *optarg == 'I') { + opt.sock.proto = IPPROTO_ICMP; + } else if (*optarg == 'r') { + opt.sock.type = SOCK_RAW; + } else { + printf("Error: unknown protocol: %s\n", optarg); + cs_usage(argv[0]); + } + break; + case 'P': + opt.sockopt.priority = atoi(optarg); + break; + case 'm': + opt.mark.ena = true; + opt.mark.val = atoi(optarg); + break; + case 'M': + opt.sockopt.mark = atoi(optarg); + break; + case 'n': + opt.num_pkt = atoi(optarg); + break; + case 'd': + opt.txtime.ena = true; + opt.txtime.delay = atoi(optarg); + break; + case 't': + opt.ts.ena = true; + break; + case 'f': + opt.v6.dontfrag.ena = true; + opt.v6.dontfrag.val = atoi(optarg); + break; + case 'F': + opt.sockopt.dontfrag = atoi(optarg); + break; + case 'c': + opt.v6.tclass.ena = true; + opt.v6.tclass.val = atoi(optarg); + break; + case 'C': + opt.sockopt.tclass = atoi(optarg); + break; + case 'l': + opt.v6.hlimit.ena = true; + opt.v6.hlimit.val = atoi(optarg); + break; + case 'L': + opt.sockopt.hlimit = atoi(optarg); + break; + case 'H': + opt.v6.exthdr.ena = true; + switch (optarg[0]) { + case 'h': + opt.v6.exthdr.val = IPV6_HOPOPTS; + break; + case 'd': + opt.v6.exthdr.val = IPV6_DSTOPTS; + break; + case 'r': + opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS; + break; + default: + printf("Error: hdr type: %s\n", optarg); + break; + } + break; + } + } + + if (optind != argc - 2) + cs_usage(argv[0]); + + opt.host = argv[optind]; + opt.service = argv[optind + 1]; +} + +static void memrnd(void *s, size_t n) +{ + int *dword = s; + char *byte; + + for (; n >= 4; n -= 4) + *dword++ = rand(); + byte = (void *)dword; + while (n--) + *byte++ = rand(); +} + +static void +ca_write_cmsg_u32(char *cbuf, size_t cbuf_sz, size_t *cmsg_len, + int level, int optname, struct option_cmsg_u32 *uopt) +{ + struct cmsghdr *cmsg; + + if (!uopt->ena) + return; + + cmsg = (struct cmsghdr *)(cbuf + *cmsg_len); + *cmsg_len += CMSG_SPACE(sizeof(__u32)); + if (cbuf_sz < *cmsg_len) + error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small"); + + cmsg->cmsg_level = level; + cmsg->cmsg_type = optname; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + *(__u32 *)CMSG_DATA(cmsg) = uopt->val; +} + +static void +cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) +{ + struct cmsghdr *cmsg; + size_t cmsg_len; + + msg->msg_control = cbuf; + cmsg_len = 0; + + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_SOCKET, SO_MARK, &opt.mark); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit); + + if (opt.txtime.ena) { + struct sock_txtime so_txtime = { + .clockid = CLOCK_MONOTONIC, + }; + __u64 txtime; + + if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, + &so_txtime, sizeof(so_txtime))) + error(ERN_SOCKOPT, errno, "setsockopt TXTIME"); + + txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) + + time_start_mono.tv_nsec + + opt.txtime.delay * 1000; + + cmsg = (struct cmsghdr *)(cbuf + cmsg_len); + cmsg_len += CMSG_SPACE(sizeof(txtime)); + if (cbuf_sz < cmsg_len) + error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small"); + + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_TXTIME; + cmsg->cmsg_len = CMSG_LEN(sizeof(txtime)); + memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime)); + } + if (opt.ts.ena) { + __u32 val = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_TSONLY; + + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, + &val, sizeof(val))) + error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING"); + + cmsg = (struct cmsghdr *)(cbuf + cmsg_len); + cmsg_len += CMSG_SPACE(sizeof(__u32)); + if (cbuf_sz < cmsg_len) + error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small"); + + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + *(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_TX_SOFTWARE; + } + if (opt.v6.exthdr.ena) { + cmsg = (struct cmsghdr *)(cbuf + cmsg_len); + cmsg_len += CMSG_SPACE(8); + if (cbuf_sz < cmsg_len) + error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small"); + + cmsg->cmsg_level = SOL_IPV6; + cmsg->cmsg_type = opt.v6.exthdr.val; + cmsg->cmsg_len = CMSG_LEN(8); + *(__u64 *)CMSG_DATA(cmsg) = 0; + } + + if (cmsg_len) + msg->msg_controllen = cmsg_len; + else + msg->msg_control = NULL; +} + +static const char *cs_ts_info2str(unsigned int info) +{ + static const char *names[] = { + [SCM_TSTAMP_SND] = "SND", + [SCM_TSTAMP_SCHED] = "SCHED", + [SCM_TSTAMP_ACK] = "ACK", + }; + + if (info < ARRAY_SIZE(names)) + return names[info]; + return "unknown"; +} + +static void +cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) +{ + struct sock_extended_err *see; + struct scm_timestamping *ts; + struct cmsghdr *cmsg; + int i, err; + + if (!opt.ts.ena) + return; + msg->msg_control = cbuf; + msg->msg_controllen = cbuf_sz; + + while (true) { + ts = NULL; + see = NULL; + memset(cbuf, 0, cbuf_sz); + + err = recvmsg(fd, msg, MSG_ERRQUEUE); + if (err < 0) { + if (errno == EAGAIN) + break; + error(ERN_RECVERR, errno, "recvmsg ERRQ"); + } + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SO_TIMESTAMPING_OLD) { + if (cmsg->cmsg_len < sizeof(*ts)) + error(ERN_CMSG_RD, EINVAL, "TS cmsg"); + + ts = (void *)CMSG_DATA(cmsg); + } + if ((cmsg->cmsg_level == SOL_IP && + cmsg->cmsg_type == IP_RECVERR) || + (cmsg->cmsg_level == SOL_IPV6 && + cmsg->cmsg_type == IPV6_RECVERR)) { + if (cmsg->cmsg_len < sizeof(*see)) + error(ERN_CMSG_RD, EINVAL, "sock_err cmsg"); + + see = (void *)CMSG_DATA(cmsg); + } + } + + if (!ts) + error(ERN_CMSG_RCV, ENOENT, "TS cmsg not found"); + if (!see) + error(ERN_CMSG_RCV, ENOENT, "sock_err cmsg not found"); + + for (i = 0; i < 3; i++) { + unsigned long long rel_time; + + if (!ts->ts[i].tv_sec && !ts->ts[i].tv_nsec) + continue; + + rel_time = (ts->ts[i].tv_sec - time_start_real.tv_sec) * + (1000ULL * 1000) + + (ts->ts[i].tv_nsec - time_start_real.tv_nsec) / + 1000; + printf(" %5s ts%d %lluus\n", + cs_ts_info2str(see->ee_info), + i, rel_time); + } + } +} + +static void ca_set_sockopts(int fd) +{ + if (opt.sockopt.mark && + setsockopt(fd, SOL_SOCKET, SO_MARK, + &opt.sockopt.mark, sizeof(opt.sockopt.mark))) + error(ERN_SOCKOPT, errno, "setsockopt SO_MARK"); + if (opt.sockopt.dontfrag && + setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG, + &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG"); + if (opt.sockopt.tclass && + setsockopt(fd, SOL_IPV6, IPV6_TCLASS, + &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS"); + if (opt.sockopt.hlimit && + setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, + &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); + if (opt.sockopt.priority && + setsockopt(fd, SOL_SOCKET, SO_PRIORITY, + &opt.sockopt.priority, sizeof(opt.sockopt.priority))) + error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); +} + +int main(int argc, char *argv[]) +{ + struct addrinfo hints, *ai; + struct iovec iov[1]; + unsigned char *buf; + struct msghdr msg; + char cbuf[1024]; + int err; + int fd; + int i; + + cs_parse_args(argc, argv); + + buf = malloc(opt.size); + memrnd(buf, opt.size); + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = opt.sock.family; + + ai = NULL; + err = getaddrinfo(opt.host, opt.service, &hints, &ai); + if (err) { + fprintf(stderr, "Can't resolve address [%s]:%s\n", + opt.host, opt.service); + return ERN_SOCK_CREATE; + } + + if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP) + opt.sock.proto = IPPROTO_ICMPV6; + + fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto); + if (fd < 0) { + fprintf(stderr, "Can't open socket: %s\n", strerror(errno)); + freeaddrinfo(ai); + return ERN_RESOLVE; + } + + if (opt.sock.proto == IPPROTO_ICMP) { + buf[0] = ICMP_ECHO; + buf[1] = 0; + } else if (opt.sock.proto == IPPROTO_ICMPV6) { + buf[0] = ICMPV6_ECHO_REQUEST; + buf[1] = 0; + } else if (opt.sock.type == SOCK_RAW) { + struct udphdr hdr = { 1, 2, htons(opt.size), 0 }; + struct sockaddr_in6 *sin6 = (void *)ai->ai_addr; + + memcpy(buf, &hdr, sizeof(hdr)); + sin6->sin6_port = htons(opt.sock.proto); + } + + ca_set_sockopts(fd); + + if (clock_gettime(CLOCK_REALTIME, &time_start_real)) + error(ERN_GETTIME, errno, "gettime REALTIME"); + if (clock_gettime(CLOCK_MONOTONIC, &time_start_mono)) + error(ERN_GETTIME, errno, "gettime MONOTONIC"); + + iov[0].iov_base = buf; + iov[0].iov_len = opt.size; + + memset(&msg, 0, sizeof(msg)); + msg.msg_name = ai->ai_addr; + msg.msg_namelen = ai->ai_addrlen; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + + for (i = 0; i < opt.num_pkt; i++) { + err = sendmsg(fd, &msg, 0); + if (err < 0) { + if (!opt.silent_send) + fprintf(stderr, "send failed: %s\n", strerror(errno)); + err = ERN_SEND; + goto err_out; + } else if (err != (int)opt.size) { + fprintf(stderr, "short send\n"); + err = ERN_SEND_SHORT; + goto err_out; + } + } + err = ERN_SUCCESS; + + if (opt.ts.ena) { + /* Make sure all timestamps have time to loop back */ + usleep(opt.txtime.delay); + + cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + } + +err_out: + close(fd); + freeaddrinfo(ai); + return err; +} diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh new file mode 100755 index 000000000000..772ad0cc2630 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_so_mark.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +IP4=172.16.0.1/24 +TGT4=172.16.0.2 +IP6=2001:db8:1::1/64 +TGT6=2001:db8:1::2 +MARK=1000 + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +# Namespaces +setup_ns NS + +ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null + +# Connectivity +ip -netns $NS link add type dummy +ip -netns $NS link set dev dummy0 up +ip -netns $NS addr add $IP4 dev dummy0 +ip -netns $NS addr add $IP6 dev dummy0 + +ip -netns $NS rule add fwmark $MARK lookup 300 +ip -6 -netns $NS rule add fwmark $MARK lookup 300 +ip -netns $NS route add prohibit any table 300 +ip -6 -netns $NS route add prohibit any table 300 + +# Test +BAD=0 +TOTAL=0 + +check_result() { + ((TOTAL++)) + if [ $1 -ne $2 ]; then + echo " Case $3 returned $1, expected $2" + ((BAD++)) + fi +} + +for ovr in setsock cmsg both; do + for i in 4 6; do + [ $i == 4 ] && TGT=$TGT4 || TGT=$TGT6 + + for p in u i r; do + [ $p == "u" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-M" + [ $ovr == "cmsg" ] && m="-m" + [ $ovr == "both" ] && m="-M $MARK -m" + + ip netns exec $NS ./cmsg_sender -$i -p $p $m $((MARK + 1)) $TGT 1234 + check_result $? 0 "$prot $ovr - pass" + + [ $ovr == "diff" ] && m="-M $((MARK + 1)) -m" + + ip netns exec $NS ./cmsg_sender -$i -p $p $m $MARK -s $TGT 1234 + check_result $? 1 "$prot $ovr - rejection" + done + done +done + +# Summary +if [ $BAD -ne 0 ]; then + echo "FAIL - $BAD/$TOTAL cases failed" + exit 1 +else + echo "OK" + exit 0 +fi diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh new file mode 100755 index 000000000000..af85267ad1e3 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_time.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +IP4=172.16.0.1/24 +TGT4=172.16.0.2 +IP6=2001:db8:1::1/64 +TGT6=2001:db8:1::2 + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +# Namespaces +setup_ns NS + +ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null + +# Connectivity +ip -netns $NS link add type dummy +ip -netns $NS link set dev dummy0 up +ip -netns $NS addr add $IP4 dev dummy0 +ip -netns $NS addr add $IP6 dev dummy0 + +# Need FQ for TXTIME +ip netns exec $NS tc qdisc replace dev dummy0 root fq + +# Test +BAD=0 +TOTAL=0 + +check_result() { + ((TOTAL++)) + if [ $1 -ne 0 ]; then + echo " Case $4 returned $1, expected 0" + ((BAD++)) + elif [ "$2" != "$3" ]; then + echo " Case $4 returned '$2', expected '$3'" + ((BAD++)) + fi +} + +for i in "-4 $TGT4" "-6 $TGT6"; do + for p in u i r; do + [ $p == "u" ] && prot=UDPv${i:1:2} + [ $p == "i" ] && prot=ICMPv${i:1:2} + [ $p == "r" ] && prot=RAWv${i:1:2} + + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234) + check_result $? "$ts" "" "$prot - no options" + + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t | wc -l) + check_result $? "$ts" "2" "$prot - ts cnt" + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t | + sed -n "s/.*SCHED ts0 [0-9].*/OK/p") + check_result $? "$ts" "OK" "$prot - ts0 SCHED" + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t | + sed -n "s/.*SND ts0 [0-9].*/OK/p") + check_result $? "$ts" "OK" "$prot - ts0 SND" + + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 | + awk '/SND/ { if ($3 > 1000) print "OK"; }') + check_result $? "$ts" "OK" "$prot - TXTIME abs" + + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 | + awk '/SND/ {snd=$3} + /SCHED/ {sch=$3} + END { if (snd - sch > 500) print "OK"; }') + check_result $? "$ts" "OK" "$prot - TXTIME rel" + done +done + +# Summary +if [ $BAD -ne 0 ]; then + echo "FAIL - $BAD/$TOTAL cases failed" + exit 1 +else + echo "OK" + exit 0 +fi diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 4d5df8e1eee7..5e4390cac17e 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,7 +1,12 @@ CONFIG_USER_NS=y +CONFIG_NET_NS=y +CONFIG_BONDING=m CONFIG_BPF_SYSCALL=y CONFIG_TEST_BPF=m CONFIG_NUMA=y +CONFIG_RPS=y +CONFIG_SYSFS=y +CONFIG_PROC_SYSCTL=y CONFIG_NET_VRF=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6=y @@ -10,27 +15,88 @@ CONFIG_VETH=y CONFIG_NET_IPVTI=y CONFIG_IPV6_VTI=y CONFIG_DUMMY=y +CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y +CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_VLAN_8021Q=y +CONFIG_GENEVE=m CONFIG_IFB=y +CONFIG_INET_DIAG=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y +CONFIG_NET_FOU=y +CONFIG_NET_FOU_IP_TUNNELS=y +CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m +CONFIG_IPV6_SIT=y +CONFIG_IP_DCCP=m CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_L2TP_ETH=m +CONFIG_L2TP_IP=m +CONFIG_L2TP=m +CONFIG_L2TP_V3=y +CONFIG_MACSEC=m +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MPLS=y +CONFIG_MPTCP=y CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_CT=m +CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPIP=y +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NET_SCH_PRIO=m +CONFIG_NFT_COMPAT=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_PSAMPLE=m +CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m CONFIG_KALLSYMS=y +CONFIG_TLS=m CONFIG_TRACEPOINTS=y CONFIG_NET_DROP_MONITOR=m CONFIG_NETDEVSIM=m -CONFIG_NET_FOU=m +CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_MIRRED=m +CONFIG_BAREUDP=m +CONFIG_IPV6_IOAM6_LWTUNNEL=y +CONFIG_CRYPTO_SM4_GENERIC=y +CONFIG_AMT=m +CONFIG_TUN=y +CONFIG_VXLAN=m +CONFIG_IP_SCTP=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_CRYPTO_ARIA=y +CONFIG_XFRM_INTERFACE=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c new file mode 100644 index 000000000000..90eb06fefa59 --- /dev/null +++ b/tools/testing/selftests/net/csum.c @@ -0,0 +1,988 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP. + * + * The test runs on two machines to exercise the NIC. For this reason it + * is not integrated in kselftests. + * + * CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS)) + * + * Rx: + * + * The sender sends packets with a known checksum field using PF_INET(6) + * SOCK_RAW sockets. + * + * good packet: $CMD [-t] + * bad packet: $CMD [-t] -E + * + * The receiver reads UDP packets with a UDP socket. This is not an + * option for TCP packets ('-t'). Optionally insert an iptables filter + * to avoid these entering the real protocol stack. + * + * The receiver also reads all packets with a PF_PACKET socket, to + * observe whether both good and bad packets arrive on the host. And to + * read the optional TP_STATUS_CSUM_VALID bit. This requires setting + * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY. + * + * Tx: + * + * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx + * checksum offload. + * + * The sender can sends packets with a UDP socket. + * + * Optionally crafts a packet that sums up to zero to verify that the + * device writes negative zero 0xFFFF in this case to distinguish from + * 0x0000 (checksum disabled), as required by RFC 768. Hit this case + * by choosing a specific source port. + * + * good packet: $CMD -U + * zero csum: $CMD -U -Z + * + * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR, + * to cover more protocols. PF_PACKET requires passing src and dst mac + * addresses. + * + * good packet: $CMD -s $smac -d $dmac -p [-t] + * + * Argument '-z' sends UDP packets with a 0x000 checksum disabled field, + * to verify that the NIC passes these packets unmodified. + * + * Argument '-e' adds a transport mode encapsulation header between + * network and transport header. This will fail for devices that parse + * headers. Should work on devices that implement protocol agnostic tx + * checksum offload (NETIF_F_HW_CSUM). + * + * Argument '-r $SEED' optionally randomizes header, payload and length + * to increase coverage between packets sent. SEED 1 further chooses a + * different seed for each run (and logs this for reproducibility). It + * is advised to enable this for extra coverage in continuous testing. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/byteorder.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/virtio_net.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include "kselftest.h" + +static bool cfg_bad_csum; +static int cfg_family = PF_INET6; +static int cfg_num_pkt = 4; +static bool cfg_do_rx = true; +static bool cfg_do_tx = true; +static bool cfg_encap; +static char *cfg_ifname = "eth0"; +static char *cfg_mac_dst; +static char *cfg_mac_src; +static int cfg_proto = IPPROTO_UDP; +static int cfg_payload_char = 'a'; +static int cfg_payload_len = 100; +static uint16_t cfg_port_dst = 34000; +static uint16_t cfg_port_src = 33000; +static uint16_t cfg_port_src_encap = 33001; +static unsigned int cfg_random_seed; +static int cfg_rcvbuf = 1 << 22; /* be able to queue large cfg_num_pkt */ +static bool cfg_send_pfpacket; +static bool cfg_send_udp; +static int cfg_timeout_ms = 2000; +static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */ +static bool cfg_zero_sum; /* create packet that adds up to zero */ + +static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET}; +static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET}; +static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6}; +static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6}; + +#define ENC_HEADER_LEN (sizeof(struct udphdr) + sizeof(struct udp_encap_hdr)) +#define MAX_HEADER_LEN (sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr)) +#define MAX_PAYLOAD_LEN 1024 + +/* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */ +struct udp_encap_hdr { + uint8_t nexthdr; + uint8_t padding[3]; +}; + +/* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */ +static void *iph_addr_p; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL); +} + +static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum) +{ + uint16_t *words = (uint16_t *)data; + int i; + + for (i = 0; i < len / 2; i++) + sum += words[i]; + + if (len & 1) + sum += ((unsigned char *)data)[len - 1]; + + return sum; +} + +static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum_nofold(data, len, sum); + + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static uint16_t checksum(void *th, uint16_t proto, size_t len) +{ + uint32_t sum; + int alen; + + alen = cfg_family == PF_INET6 ? 32 : 8; + + sum = checksum_nofold(iph_addr_p, alen, 0); + sum += htons(proto); + sum += htons(len); + + /* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */ + if (cfg_do_tx && cfg_send_pfpacket) + return ~checksum_fold(NULL, 0, sum); + else + return checksum_fold(th, len, sum); +} + +static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len) +{ + struct iphdr *iph = _iph; + + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->ttl = 8; + iph->protocol = proto; + iph->saddr = cfg_saddr4.sin_addr.s_addr; + iph->daddr = cfg_daddr4.sin_addr.s_addr; + iph->tot_len = htons(sizeof(*iph) + len); + iph->check = checksum_fold(iph, sizeof(*iph), 0); + + iph_addr_p = &iph->saddr; + + return iph + 1; +} + +static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len) +{ + struct ipv6hdr *ip6h = _ip6h; + + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 64; + ip6h->saddr = cfg_saddr6.sin6_addr; + ip6h->daddr = cfg_daddr6.sin6_addr; + + iph_addr_p = &ip6h->saddr; + + return ip6h + 1; +} + +static void *build_packet_udp(void *_uh) +{ + struct udphdr *uh = _uh; + + uh->source = htons(cfg_port_src); + uh->dest = htons(cfg_port_dst); + uh->len = htons(sizeof(*uh) + cfg_payload_len); + uh->check = 0; + + /* choose source port so that uh->check adds up to zero */ + if (cfg_zero_sum) { + uh->source = 0; + uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); + + fprintf(stderr, "tx: changing sport: %hu -> %hu\n", + cfg_port_src, ntohs(uh->source)); + cfg_port_src = ntohs(uh->source); + } + + if (cfg_zero_disable) + uh->check = 0; + else + uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); + + if (cfg_bad_csum) + uh->check = ~uh->check; + + fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check); + return uh + 1; +} + +static void *build_packet_tcp(void *_th) +{ + struct tcphdr *th = _th; + + th->source = htons(cfg_port_src); + th->dest = htons(cfg_port_dst); + th->doff = 5; + th->check = 0; + + th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len); + + if (cfg_bad_csum) + th->check = ~th->check; + + fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check); + return th + 1; +} + +static char *build_packet_udp_encap(void *_uh) +{ + struct udphdr *uh = _uh; + struct udp_encap_hdr *eh = _uh + sizeof(*uh); + + /* outer dst == inner dst, to simplify BPF filter + * outer src != inner src, to demultiplex on recv + */ + uh->dest = htons(cfg_port_dst); + uh->source = htons(cfg_port_src_encap); + uh->check = 0; + uh->len = htons(sizeof(*uh) + + sizeof(*eh) + + sizeof(struct tcphdr) + + cfg_payload_len); + + eh->nexthdr = IPPROTO_TCP; + + return build_packet_tcp(eh + 1); +} + +static char *build_packet(char *buf, int max_len, int *len) +{ + uint8_t proto; + char *off; + int tlen; + + if (cfg_random_seed) { + int *buf32 = (void *)buf; + int i; + + for (i = 0; i < (max_len / sizeof(int)); i++) + buf32[i] = rand(); + } else { + memset(buf, cfg_payload_char, max_len); + } + + if (cfg_proto == IPPROTO_UDP) + tlen = sizeof(struct udphdr) + cfg_payload_len; + else + tlen = sizeof(struct tcphdr) + cfg_payload_len; + + if (cfg_encap) { + proto = IPPROTO_UDP; + tlen += ENC_HEADER_LEN; + } else { + proto = cfg_proto; + } + + if (cfg_family == PF_INET) + off = build_packet_ipv4(buf, proto, tlen); + else + off = build_packet_ipv6(buf, proto, tlen); + + if (cfg_encap) + off = build_packet_udp_encap(off); + else if (cfg_proto == IPPROTO_UDP) + off = build_packet_udp(off); + else + off = build_packet_tcp(off); + + /* only pass the payload, but still compute headers for cfg_zero_sum */ + if (cfg_send_udp) { + *len = cfg_payload_len; + return off; + } + + *len = off - buf + cfg_payload_len; + return buf; +} + +static int open_inet(int ipproto, int protocol) +{ + int fd; + + fd = socket(cfg_family, ipproto, protocol); + if (fd == -1) + error(1, errno, "socket inet"); + + if (cfg_family == PF_INET6) { + /* may have been updated by cfg_zero_sum */ + cfg_saddr6.sin6_port = htons(cfg_port_src); + + if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6))) + error(1, errno, "bind dgram 6"); + if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) + error(1, errno, "connect dgram 6"); + } else { + /* may have been updated by cfg_zero_sum */ + cfg_saddr4.sin_port = htons(cfg_port_src); + + if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4))) + error(1, errno, "bind dgram 4"); + if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) + error(1, errno, "connect dgram 4"); + } + + return fd; +} + +static int open_packet(void) +{ + int fd, one = 1; + + fd = socket(PF_PACKET, SOCK_RAW, 0); + if (fd == -1) + error(1, errno, "socket packet"); + + if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one))) + error(1, errno, "setsockopt packet_vnet_ndr"); + + return fd; +} + +static void send_inet(int fd, const char *buf, int len) +{ + int ret; + + ret = write(fd, buf, len); + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, 0, "write: %d", ret); +} + +static void eth_str_to_addr(const char *str, unsigned char *eth) +{ + if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + ð[0], ð[1], ð[2], ð[3], ð[4], ð[5]) != 6) + error(1, 0, "cannot parse mac addr %s", str); +} + +static void send_packet(int fd, const char *buf, int len) +{ + struct virtio_net_hdr vh = {0}; + struct sockaddr_ll addr = {0}; + struct msghdr msg = {0}; + struct ethhdr eth; + struct iovec iov[3]; + int ret; + + addr.sll_family = AF_PACKET; + addr.sll_halen = ETH_ALEN; + addr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!addr.sll_ifindex) + error(1, errno, "if_nametoindex %s", cfg_ifname); + + vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + if (cfg_family == PF_INET6) { + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + addr.sll_protocol = htons(ETH_P_IPV6); + } else { + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr); + addr.sll_protocol = htons(ETH_P_IP); + } + + if (cfg_encap) + vh.csum_start += ENC_HEADER_LEN; + + if (cfg_proto == IPPROTO_TCP) { + vh.csum_offset = __builtin_offsetof(struct tcphdr, check); + vh.hdr_len = vh.csum_start + sizeof(struct tcphdr); + } else { + vh.csum_offset = __builtin_offsetof(struct udphdr, check); + vh.hdr_len = vh.csum_start + sizeof(struct udphdr); + } + + eth_str_to_addr(cfg_mac_src, eth.h_source); + eth_str_to_addr(cfg_mac_dst, eth.h_dest); + eth.h_proto = addr.sll_protocol; + + iov[0].iov_base = &vh; + iov[0].iov_len = sizeof(vh); + + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + + iov[2].iov_base = (void *)buf; + iov[2].iov_len = len; + + msg.msg_iov = iov; + msg.msg_iovlen = ARRAY_SIZE(iov); + + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + + ret = sendmsg(fd, &msg, 0); + if (ret == -1) + error(1, errno, "sendmsg packet"); + if (ret != sizeof(vh) + sizeof(eth) + len) + error(1, errno, "sendmsg packet: %u", ret); +} + +static int recv_prepare_udp(void) +{ + int fd; + + fd = socket(cfg_family, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket r"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) + error(1, errno, "setsockopt SO_RCVBUF r"); + + if (cfg_family == PF_INET6) { + if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) + error(1, errno, "bind r"); + } else { + if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) + error(1, errno, "bind r"); + } + + return fd; +} + +/* Filter out all traffic that is not cfg_proto with our destination port. + * + * Otherwise background noise may cause PF_PACKET receive queue overflow, + * dropping the expected packets and failing the test. + */ +static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = ARRAY_SIZE(filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static void recv_prepare_packet_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + + if (cfg_family == AF_INET) + __recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol), + sizeof(struct iphdr) + off_dport); + else + __recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr), + sizeof(struct ipv6hdr) + off_dport); +} + +static void recv_prepare_packet_bind(int fd) +{ + struct sockaddr_ll laddr = {0}; + + laddr.sll_family = AF_PACKET; + + if (cfg_family == PF_INET) + laddr.sll_protocol = htons(ETH_P_IP); + else + laddr.sll_protocol = htons(ETH_P_IPV6); + + laddr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!laddr.sll_ifindex) + error(1, 0, "if_nametoindex %s", cfg_ifname); + + if (bind(fd, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind pf_packet"); +} + +static int recv_prepare_packet(void) +{ + int fd, one = 1; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket p"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) + error(1, errno, "setsockopt SO_RCVBUF p"); + + /* enable auxdata to recv checksum status (valid vs unknown) */ + if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one))) + error(1, errno, "setsockopt auxdata"); + + /* install filter to restrict packet flow to match */ + recv_prepare_packet_filter(fd); + + /* bind to address family to start packet flow */ + recv_prepare_packet_bind(fd); + + return fd; +} + +static int recv_udp(int fd) +{ + static char buf[MAX_PAYLOAD_LEN]; + int ret, count = 0; + + while (1) { + ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "recv r"); + + fprintf(stderr, "rx: udp: len=%u\n", ret); + count++; + } + + return count; +} + +static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field) +{ + uint16_t csum; + + csum = checksum(th, cfg_proto, len); + + fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n", + sport, len, csum_field, csum); + + /* csum must be zero unless cfg_bad_csum indicates bad csum */ + if (csum && !cfg_bad_csum) { + fprintf(stderr, "pkt: bad csum\n"); + return 1; + } else if (cfg_bad_csum && !csum) { + fprintf(stderr, "pkt: good csum, while bad expected\n"); + return 1; + } + + if (cfg_zero_sum && csum_field != 0xFFFF) { + fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field); + return 1; + } + + return 0; +} + +static int recv_verify_packet_tcp(void *th, int len) +{ + struct tcphdr *tcph = th; + + if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst)) + return -1; + + return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check); +} + +static int recv_verify_packet_udp_encap(void *th, int len) +{ + struct udp_encap_hdr *eh = th; + + if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP) + return -1; + + return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh)); +} + +static int recv_verify_packet_udp(void *th, int len) +{ + struct udphdr *udph = th; + + if (len < sizeof(*udph)) + return -1; + + if (udph->dest != htons(cfg_port_dst)) + return -1; + + if (udph->source == htons(cfg_port_src_encap)) + return recv_verify_packet_udp_encap(udph + 1, + len - sizeof(*udph)); + + return recv_verify_csum(th, len, ntohs(udph->source), udph->check); +} + +static int recv_verify_packet_ipv4(void *nh, int len) +{ + struct iphdr *iph = nh; + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + + if (len < sizeof(*iph) || iph->protocol != proto) + return -1; + + iph_addr_p = &iph->saddr; + if (proto == IPPROTO_TCP) + return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); + else + return recv_verify_packet_udp(iph + 1, len - sizeof(*iph)); +} + +static int recv_verify_packet_ipv6(void *nh, int len) +{ + struct ipv6hdr *ip6h = nh; + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + + if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) + return -1; + + iph_addr_p = &ip6h->saddr; + + if (proto == IPPROTO_TCP) + return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); + else + return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); +} + +/* return whether auxdata includes TP_STATUS_CSUM_VALID */ +static bool recv_verify_packet_csum(struct msghdr *msg) +{ + struct tpacket_auxdata *aux = NULL; + struct cmsghdr *cm; + + if (msg->msg_flags & MSG_CTRUNC) + error(1, 0, "cmsg: truncated"); + + for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) { + if (cm->cmsg_level != SOL_PACKET || + cm->cmsg_type != PACKET_AUXDATA) + error(1, 0, "cmsg: level=%d type=%d\n", + cm->cmsg_level, cm->cmsg_type); + + if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata))) + error(1, 0, "cmsg: len=%lu expected=%lu", + cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata))); + + aux = (void *)CMSG_DATA(cm); + } + + if (!aux) + error(1, 0, "cmsg: no auxdata"); + + return aux->tp_status & TP_STATUS_CSUM_VALID; +} + +static int recv_packet(int fd) +{ + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; + unsigned long total = 0, bad_csums = 0, bad_validations = 0; + char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; + struct pkt *buf = (void *)_buf; + struct msghdr msg = {0}; + struct iovec iov; + int len, ret; + + iov.iov_base = _buf; + iov.iov_len = sizeof(_buf); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + while (1) { + msg.msg_flags = 0; + + len = recvmsg(fd, &msg, MSG_DONTWAIT); + if (len == -1 && errno == EAGAIN) + break; + if (len == -1) + error(1, errno, "recv p"); + + if (cfg_family == PF_INET6) + ret = recv_verify_packet_ipv6(buf, len); + else + ret = recv_verify_packet_ipv4(buf, len); + + if (ret == -1 /* skip: non-matching */) + continue; + + total++; + if (ret == 1) + bad_csums++; + + /* Fail if kernel returns valid for known bad csum. + * Do not fail if kernel does not validate a good csum: + * Absence of validation does not imply invalid. + */ + if (recv_verify_packet_csum(&msg) && cfg_bad_csum) { + fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n"); + bad_validations++; + } + } + + if (bad_csums || bad_validations) + error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n", + total, bad_csums, bad_validations); + + return total; +} + +static void parse_args(int argc, char *const argv[]) +{ + const char *daddr = NULL, *saddr = NULL; + int c; + + while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) { + switch (c) { + case '4': + cfg_family = PF_INET; + break; + case '6': + cfg_family = PF_INET6; + break; + case 'd': + cfg_mac_dst = optarg; + break; + case 'D': + daddr = optarg; + break; + case 'e': + cfg_encap = true; + break; + case 'E': + cfg_bad_csum = true; + break; + case 'i': + cfg_ifname = optarg; + break; + case 'l': + cfg_payload_len = strtol(optarg, NULL, 0); + break; + case 'L': + cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000; + break; + case 'n': + cfg_num_pkt = strtol(optarg, NULL, 0); + break; + case 'r': + cfg_random_seed = strtol(optarg, NULL, 0); + break; + case 'P': + cfg_send_pfpacket = true; + break; + case 'R': + /* only Rx: used with two machine tests */ + cfg_do_tx = false; + break; + case 's': + cfg_mac_src = optarg; + break; + case 'S': + saddr = optarg; + break; + case 't': + cfg_proto = IPPROTO_TCP; + break; + case 'T': + /* only Tx: used with two machine tests */ + cfg_do_rx = false; + break; + case 'u': + cfg_proto = IPPROTO_UDP; + break; + case 'U': + /* send using real udp socket, + * to exercise tx checksum offload + */ + cfg_send_udp = true; + break; + case 'z': + cfg_zero_disable = true; + break; + case 'Z': + cfg_zero_sum = true; + break; + default: + error(1, 0, "unknown arg %c", c); + } + } + + if (!daddr || !saddr) + error(1, 0, "Must pass -D <daddr> and -S <saddr>"); + + if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst)) + error(1, 0, "Transmit with pf_packet requires mac addresses"); + + if (cfg_payload_len > MAX_PAYLOAD_LEN) + error(1, 0, "Payload length exceeds max"); + + if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable)) + error(1, 0, "Only UDP supports zero csum"); + + if (cfg_zero_sum && !cfg_send_udp) + error(1, 0, "Zero checksum conversion requires -U for tx csum offload"); + if (cfg_zero_sum && cfg_bad_csum) + error(1, 0, "Cannot combine zero checksum conversion and invalid checksum"); + if (cfg_zero_sum && cfg_random_seed) + error(1, 0, "Cannot combine zero checksum conversion with randomization"); + + if (cfg_family == PF_INET6) { + cfg_saddr6.sin6_port = htons(cfg_port_src); + cfg_daddr6.sin6_port = htons(cfg_port_dst); + + if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1) + error(1, errno, "Cannot parse ipv6 -D"); + if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1) + error(1, errno, "Cannot parse ipv6 -S"); + } else { + cfg_saddr4.sin_port = htons(cfg_port_src); + cfg_daddr4.sin_port = htons(cfg_port_dst); + + if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1) + error(1, errno, "Cannot parse ipv4 -D"); + if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1) + error(1, errno, "Cannot parse ipv4 -S"); + } + + if (cfg_do_tx && cfg_random_seed) { + /* special case: time-based seed */ + if (cfg_random_seed == 1) + cfg_random_seed = (unsigned int)gettimeofday_ms(); + srand(cfg_random_seed); + fprintf(stderr, "randomization seed: %u\n", cfg_random_seed); + } +} + +static void do_tx(void) +{ + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; + char *buf; + int fd, len, i; + + buf = build_packet(_buf, sizeof(_buf), &len); + + if (cfg_send_pfpacket) + fd = open_packet(); + else if (cfg_send_udp) + fd = open_inet(SOCK_DGRAM, 0); + else + fd = open_inet(SOCK_RAW, IPPROTO_RAW); + + for (i = 0; i < cfg_num_pkt; i++) { + if (cfg_send_pfpacket) + send_packet(fd, buf, len); + else + send_inet(fd, buf, len); + + /* randomize each packet individually to increase coverage */ + if (cfg_random_seed) { + cfg_payload_len = rand() % MAX_PAYLOAD_LEN; + buf = build_packet(_buf, sizeof(_buf), &len); + } + } + + if (close(fd)) + error(1, errno, "close tx"); +} + +static void do_rx(int fdp, int fdr) +{ + unsigned long count_udp = 0, count_pkt = 0; + long tleft, tstop; + struct pollfd pfd; + + tstop = gettimeofday_ms() + cfg_timeout_ms; + tleft = cfg_timeout_ms; + + do { + pfd.events = POLLIN; + pfd.fd = fdp; + if (poll(&pfd, 1, tleft) == -1) + error(1, errno, "poll"); + + if (pfd.revents & POLLIN) + count_pkt += recv_packet(fdp); + + if (cfg_proto == IPPROTO_UDP) + count_udp += recv_udp(fdr); + + tleft = tstop - gettimeofday_ms(); + } while (tleft > 0); + + if (close(fdr)) + error(1, errno, "close r"); + if (close(fdp)) + error(1, errno, "close p"); + + if (count_pkt < cfg_num_pkt) + error(1, 0, "rx: missing packets at pf_packet: %lu < %u", + count_pkt, cfg_num_pkt); + + if (cfg_proto == IPPROTO_UDP) { + if (cfg_bad_csum && count_udp) + error(1, 0, "rx: unexpected packets at udp"); + if (!cfg_bad_csum && !count_udp) + error(1, 0, "rx: missing packets at udp"); + } +} + +int main(int argc, char *const argv[]) +{ + int fdp = -1, fdr = -1; /* -1 to silence -Wmaybe-uninitialized */ + + parse_args(argc, argv); + + /* open receive sockets before transmitting */ + if (cfg_do_rx) { + fdp = recv_prepare_packet(); + fdr = recv_prepare_udp(); + } + + if (cfg_do_tx) + do_tx(); + + if (cfg_do_rx) + do_rx(fdp, fdr); + + fprintf(stderr, "OK\n"); + return 0; +} diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py index 58bb7e9b88ce..2d84c7a0be6b 100755 --- a/tools/testing/selftests/net/devlink_port_split.py +++ b/tools/testing/selftests/net/devlink_port_split.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 from subprocess import PIPE, Popen @@ -18,6 +18,8 @@ import sys # +# Kselftest framework requirement - SKIP code is 4 +KSFT_SKIP=4 Port = collections.namedtuple('Port', 'bus_info name') @@ -57,6 +59,8 @@ class devlink_ports(object): assert stderr == "" ports = json.loads(stdout)['port'] + validate_devlink_output(ports, 'flavour') + for port in ports: if dev in port: if ports[port]['flavour'] == 'physical': @@ -218,6 +222,27 @@ def split_splittable_port(port, k, lanes, dev): unsplit(port.bus_info) +def validate_devlink_output(devlink_data, target_property=None): + """ + Determine if test should be skipped by checking: + 1. devlink_data contains values + 2. The target_property exist in devlink_data + """ + skip_reason = None + if any(devlink_data.values()): + if target_property: + skip_reason = "{} not found in devlink output, test skipped".format(target_property) + for key in devlink_data: + if target_property in devlink_data[key]: + skip_reason = None + else: + skip_reason = 'devlink output is empty, test skipped' + + if skip_reason: + print(skip_reason) + sys.exit(KSFT_SKIP) + + def make_parser(): parser = argparse.ArgumentParser(description='A test for port splitting.') parser.add_argument('--dev', @@ -238,6 +263,7 @@ def main(cmdline=None): stdout, stderr = run_command(cmd) assert stderr == "" + validate_devlink_output(json.loads(stdout)) devs = json.loads(stdout)['dev'] dev = list(devs.keys())[0] @@ -249,6 +275,7 @@ def main(cmdline=None): ports = devlink_ports(dev) + found_max_lanes = False for port in ports.if_names: max_lanes = get_max_lanes(port.name) @@ -271,6 +298,11 @@ def main(cmdline=None): split_splittable_port(port, lane, max_lanes, dev) lane //= 2 + found_max_lanes = True + + if not found_max_lanes: + print(f"Test not started, no port of device {dev} reports max_lanes") + sys.exit(KSFT_SKIP) if __name__ == "__main__": diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh index b7650e30d18b..7c4818c971fc 100755 --- a/tools/testing/selftests/net/drop_monitor_tests.sh +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -2,10 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # This test is for checking drop monitor functionality. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS=" @@ -13,10 +11,6 @@ TESTS=" hw_drops " -IP="ip -netns ns1" -TC="tc -netns ns1" -DEVLINK="devlink -N ns1" -NS_EXEC="ip netns exec ns1" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} @@ -43,7 +37,7 @@ setup() modprobe netdevsim &> /dev/null set -e - ip netns add ns1 + setup_ns NS1 $IP link add dummy10 up type dummy $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device @@ -57,7 +51,7 @@ setup() cleanup() { $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device - ip netns del ns1 + cleanup_ns ${NS1} } sw_drops_test() @@ -194,8 +188,15 @@ if [ $? -ne 0 ]; then exit $ksft_skip fi -# start clean +# create netns first so we can get the namespace name +setup_ns NS1 cleanup &> /dev/null +trap cleanup EXIT + +IP="ip -netns ${NS1}" +TC="tc -netns ${NS1}" +DEVLINK="devlink -N ${NS1}" +NS_EXEC="ip netns exec ${NS1}" for t in $TESTS do diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index fb5c55dd6df8..386ebd829df5 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -37,6 +37,10 @@ # # server / client nomenclature relative to ns-A +source lib.sh + +PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH + VERBOSE=0 NSA_DEV=eth1 @@ -63,6 +67,14 @@ NSB_LO_IP=172.16.2.2 NSA_LO_IP6=2001:db8:2::1 NSB_LO_IP6=2001:db8:2::2 +# non-local addresses for freebind tests +NL_IP=172.17.1.1 +NL_IP6=2001:db8:4::1 + +# multicast and broadcast addresses +MCAST_IP=224.0.0.1 +BCAST_IP=255.255.255.255 + MD5_PW=abc123 MD5_WRONG_PW=abc1234 @@ -71,16 +83,15 @@ MCAST=ff02::1 NSA_LINKIP6= NSB_LINKIP6= -NSA=ns-A -NSB=ns-B -NSC=ns-C - -NSA_CMD="ip netns exec ${NSA}" -NSB_CMD="ip netns exec ${NSB}" -NSC_CMD="ip netns exec ${NSC}" - which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) +# Check if FIPS mode is enabled +if [ -f /proc/sys/crypto/fips_enabled ]; then + fips_enabled=`cat /proc/sys/crypto/fips_enabled` +else + fips_enabled=0 +fi + ################################################################################ # utilities @@ -89,6 +100,7 @@ log_test() local rc=$1 local expected=$2 local msg="$3" + local ans [ "${VERBOSE}" = "1" ] && echo @@ -98,19 +110,20 @@ log_test() else nfail=$((nfail+1)) printf "TEST: %-70s [FAIL]\n" "${msg}" + echo " expected rc $expected; actual rc $rc" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 + read ans + [ "$ans" = "q" ] && exit 1 fi fi if [ "${PAUSE}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 + read ans + [ "$ans" = "q" ] && exit 1 fi kill_procs @@ -179,6 +192,15 @@ kill_procs() sleep 1 } +set_ping_group() +{ + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'" + fi + + ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647' +} + do_run_cmd() { local cmd="$*" @@ -256,6 +278,28 @@ setup_cmd_nsb() fi } +setup_cmd_nsc() +{ + local cmd="$*" + local rc + + run_cmd_nsc ${cmd} + rc=$? + if [ $rc -ne 0 ]; then + # show user the command if not done so already + if [ "$VERBOSE" = "0" ]; then + echo "setup command: $cmd" + fi + echo "failed. stopping tests" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue" + read a + fi + exit $rc + fi +} + # set sysctl values in NS-A set_sysctl() { @@ -264,6 +308,12 @@ set_sysctl() run_cmd sysctl -q -w $* } +# get sysctl values in NS-A +get_sysctl() +{ + ${NSA_CMD} sysctl -n $* +} + ################################################################################ # Setup for tests @@ -273,6 +323,9 @@ addr2str() 127.0.0.1) echo "loopback";; ::1) echo "IPv6 loopback";; + ${BCAST_IP}) echo "broadcast";; + ${MCAST_IP}) echo "multicast";; + ${NSA_IP}) echo "ns-A IP";; ${NSA_IP6}) echo "ns-A IPv6";; ${NSA_LO_IP}) echo "ns-A loopback IP";; @@ -285,6 +338,9 @@ addr2str() ${NSB_LO_IP6}) echo "ns-B loopback IPv6";; ${NSB_LINKIP6}|${NSB_LINKIP6}%*) echo "ns-B IPv6 LLA";; + ${NL_IP}) echo "nonlocal IP";; + ${NL_IP6}) echo "nonlocal IPv6";; + ${VRF_IP}) echo "VRF IP";; ${VRF_IP6}) echo "VRF IPv6";; @@ -354,9 +410,6 @@ create_ns() local addr=$2 local addr6=$3 - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -414,13 +467,33 @@ cleanup() ip -netns ${NSA} link set dev ${NSA_DEV} down ip -netns ${NSA} link del dev ${NSA_DEV} - ip netns del ${NSA} + ip netns pids ${NSA} | xargs kill 2>/dev/null + cleanup_ns ${NSA} fi - ip netns del ${NSB} + ip netns pids ${NSB} | xargs kill 2>/dev/null + ip netns pids ${NSC} | xargs kill 2>/dev/null + cleanup_ns ${NSB} ${NSC} +} + +cleanup_vrf_dup() +{ + ip link del ${NSA_DEV2} >/dev/null 2>&1 + ip netns pids ${NSC} | xargs kill 2>/dev/null ip netns del ${NSC} >/dev/null 2>&1 } +setup_vrf_dup() +{ + # some VRF tests use ns-C which has the same config as + # ns-B but for a device NOT in the VRF + setup_ns NSC + NSC_CMD="ip netns exec ${NSC}" + create_ns ${NSC} "-" "-" + connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ + ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 +} + setup() { local with_vrf=${1} @@ -432,6 +505,10 @@ setup() log_debug "Configuring network namespaces" set -e + setup_ns NSA NSB + NSA_CMD="ip netns exec ${NSA}" + NSB_CMD="ip netns exec ${NSB}" + create_ns ${NSA} ${NSA_LO_IP}/32 ${NSA_LO_IP6}/128 create_ns ${NSB} ${NSB_LO_IP}/32 ${NSB_LO_IP6}/128 connect_ns ${NSA} ${NSA_DEV} ${NSA_IP}/24 ${NSA_IP6}/64 \ @@ -450,12 +527,6 @@ setup() ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV} ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV} - - # some VRF tests use ns-C which has the same config as - # ns-B but for a device NOT in the VRF - create_ns ${NSC} "-" "-" - connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ - ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 else ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV} ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV} @@ -471,6 +542,40 @@ setup() sleep 1 } +setup_lla_only() +{ + # make sure we are starting with a clean slate + kill_procs + cleanup 2>/dev/null + + log_debug "Configuring network namespaces" + set -e + + setup_ns NSA NSB NSC + NSA_CMD="ip netns exec ${NSA}" + NSB_CMD="ip netns exec ${NSB}" + NSC_CMD="ip netns exec ${NSC}" + create_ns ${NSA} "-" "-" + create_ns ${NSB} "-" "-" + create_ns ${NSC} "-" "-" + connect_ns ${NSA} ${NSA_DEV} "-" "-" \ + ${NSB} ${NSB_DEV} "-" "-" + connect_ns ${NSA} ${NSA_DEV2} "-" "-" \ + ${NSC} ${NSC_DEV} "-" "-" + + NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV}) + NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV}) + NSC_LINKIP6=$(get_linklocal ${NSC} ${NSC_DEV}) + + create_vrf ${NSA} ${VRF} ${VRF_TABLE} "-" "-" + ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF} + ip -netns ${NSA} link set dev ${NSA_DEV2} vrf ${VRF} + + set +e + + sleep 1 +} + ################################################################################ # IPv4 @@ -497,6 +602,20 @@ ipv4_ping_novrf() done # + # out, but don't use gateway if peer is not on link + # + a=${NSB_IP} + log_start + run_cmd ping -c 1 -w 1 -r ${a} + log_test_addr ${a} $? 0 "ping out (don't route), peer on link" + + a=${NSB_LO_IP} + log_start + show_hint "Fails since peer is not on link" + run_cmd ping -c 1 -w 1 -r ${a} + log_test_addr ${a} $? 1 "ping out (don't route), peer not on link" + + # # in # for a in ${NSA_IP} ${NSA_LO_IP} @@ -669,7 +788,7 @@ ipv4_ping_vrf() log_start show_hint "Fails since address on vrf device is out of device scope" run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} - log_test_addr ${a} $? 1 "ping local, device bind" + log_test_addr ${a} $? 2 "ping local, device bind" done # @@ -729,10 +848,16 @@ ipv4_ping() setup set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null ipv4_ping_novrf + setup + set_ping_group + ipv4_ping_novrf log_subsection "With VRF" setup "yes" ipv4_ping_vrf + setup "yes" + set_ping_group + ipv4_ping_vrf } ################################################################################ @@ -749,9 +874,9 @@ ipv4_tcp_md5_novrf() # basic use case log_start - run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" # client sends MD5, server not configured @@ -759,23 +884,23 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" # client from different address log_start show_hint "Should timeout due to MD5 mismatch" - run_cmd nettest -s -M ${MD5_PW} -r ${NSB_LO_IP} & + run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" # @@ -786,7 +911,7 @@ ipv4_tcp_md5_novrf() log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" # client in prefix, wrong password @@ -794,7 +919,7 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" # client outside of prefix @@ -802,7 +927,7 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -817,33 +942,33 @@ ipv4_tcp_md5() # basic use case log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" # client sends MD5, server not configured log_start show_hint "Should timeout since server does not have MD5 auth" - run_cmd nettest -s -d ${VRF} & + run_cmd nettest -s -I ${VRF} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" # client from different address log_start show_hint "Should timeout since server config differs from client" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" # @@ -852,25 +977,25 @@ ipv4_tcp_md5() # client in prefix log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" # client in prefix, wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" # client outside of prefix log_start show_hint "Should timeout since client address is outside of prefix" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" # @@ -878,76 +1003,183 @@ ipv4_tcp_md5() # log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" # # negative tests # log_start - run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP} + run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP} log_test $? 1 "MD5: VRF: Device must be a VRF - single address" log_start - run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET} + run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET} log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" + test_ipv4_md5_vrf__vrf_server__no_bind_ifindex + test_ipv4_md5_vrf__global_server__bind_ifindex0 +} + +test_ipv4_md5_vrf__vrf_server__no_bind_ifindex() +{ + log_start + show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX" + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection" + + log_start + show_hint "Binding both the socket and the key is not required but it works" + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection" +} + +test_ipv4_md5_vrf__global_server__bind_ifindex0() +{ + # This particular test needs tcp_l3mdev_accept=1 for Global server to accept VRF connections + local old_tcp_l3mdev_accept + old_tcp_l3mdev_accept=$(get_sysctl net.ipv4.tcp_l3mdev_accept) + set_sysctl net.ipv4.tcp_l3mdev_accept=1 + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection" + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection" + log_start + + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection" + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection" + + # restore value + set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept" +} + +ipv4_tcp_dontroute() +{ + local syncookies=$1 + local nsa_syncookies + local nsb_syncookies + local a + + # + # Link local connection tests (SO_DONTROUTE). + # Connections should succeed only when the remote IP address is + # on link (doesn't need to be routed through a gateway). + # + + nsa_syncookies=$(ip netns exec "${NSA}" sysctl -n net.ipv4.tcp_syncookies) + nsb_syncookies=$(ip netns exec "${NSB}" sysctl -n net.ipv4.tcp_syncookies) + ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies} + ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies} + + # Test with eth1 address (on link). + + a=${NSB_IP} + log_start + do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute + log_test_addr ${a} $? 0 "SO_DONTROUTE client, syncookies=${syncookies}" + + a=${NSB_IP} + log_start + do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --server-dontroute + log_test_addr ${a} $? 0 "SO_DONTROUTE server, syncookies=${syncookies}" + + # Test with loopback address (routed). + # + # The client would use the eth1 address as source IP by default. + # Therefore, we need to use the -c option here, to force the use of the + # routed (loopback) address as source IP (so that the server will try + # to respond to a routed address and not a link local one). + + a=${NSB_LO_IP} + log_start + show_hint "Should fail 'Network is unreachable' since server is not on link" + do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --client-dontroute + log_test_addr ${a} $? 1 "SO_DONTROUTE client, syncookies=${syncookies}" + + a=${NSB_LO_IP} + log_start + show_hint "Should timeout since server cannot respond (client is not on link)" + do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --server-dontroute + log_test_addr ${a} $? 2 "SO_DONTROUTE server, syncookies=${syncookies}" + + ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${nsb_syncookies} + ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${nsa_syncookies} } ipv4_tcp_novrf() @@ -968,7 +1200,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1024,7 +1256,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1033,7 +1265,7 @@ ipv4_tcp_novrf() do log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" - run_cmd nettest -s -d ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" @@ -1058,7 +1290,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1068,7 +1300,10 @@ ipv4_tcp_novrf() run_cmd nettest -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 1 "No server, device client, local conn" - ipv4_tcp_md5_novrf + [ "$fips_enabled" = "1" ] || ipv4_tcp_md5_novrf + + ipv4_tcp_dontroute 0 + ipv4_tcp_dontroute 2 } ipv4_tcp_vrf() @@ -1093,13 +1328,13 @@ ipv4_tcp_vrf() log_test_addr ${a} $? 1 "Global server" log_start - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1122,7 +1357,11 @@ ipv4_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests - ipv4_tcp_md5 + if [ "$fips_enabled" = "0" ]; then + setup_vrf_dup + ipv4_tcp_md5 + cleanup_vrf_dup + fi # # enable VRF global server @@ -1134,14 +1373,14 @@ ipv4_tcp_vrf() do log_start show_hint "client socket should be bound to VRF" - run_cmd nettest -s -2 ${VRF} & + run_cmd nettest -s -3 ${VRF} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" log_start show_hint "client socket should be bound to VRF" - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -1156,7 +1395,7 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start show_hint "client socket should be bound to device" - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1166,7 +1405,7 @@ ipv4_tcp_vrf() do log_start show_hint "Should fail 'Connection refused' since client is not bound to VRF" - run_cmd nettest -s -d ${VRF} & + run_cmd nettest -s -I ${VRF} & sleep 1 run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" @@ -1203,7 +1442,7 @@ ipv4_tcp_vrf() for a in ${NSA_IP} ${VRF_IP} 127.0.0.1 do log_start - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" @@ -1211,26 +1450,26 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" log_start show_hint "Should fail 'No route to host' since client is out of VRF scope" - run_cmd nettest -s -d ${VRF} & + run_cmd nettest -s -I ${VRF} & sleep 1 run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1269,7 +1508,7 @@ ipv4_udp_novrf() for a in ${NSA_IP} ${NSA_LO_IP} do log_start - run_cmd nettest -D -s -2 ${NSA_DEV} & + run_cmd nettest -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -1282,7 +1521,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start - run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1317,6 +1556,13 @@ ipv4_udp_novrf() log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF" log_start + run_cmd_nsb nettest -D -s & + sleep 1 + run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U + log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()" + + + log_start show_hint "Should fail 'Connection refused'" run_cmd nettest -D -r ${a} log_test_addr ${a} $? 1 "No server, unbound client" @@ -1341,7 +1587,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1350,7 +1596,7 @@ ipv4_udp_novrf() do log_start show_hint "Should fail 'Connection refused' since address is out of device scope" - run_cmd nettest -s -D -d ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} & sleep 1 run_cmd nettest -D -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" @@ -1375,6 +1621,13 @@ ipv4_udp_novrf() run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection" + log_start + run_cmd nettest -s -D & + sleep 1 + run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U + log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" + + # IPv4 with device bind has really weird behavior - it overrides the # fib lookup, generates an rtable and tries to send the packet. This # causes failures for local traffic at different places @@ -1400,11 +1653,20 @@ ipv4_udp_novrf() sleep 1 run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" + + log_start + show_hint "Should fail since addresses on loopback are out of device scope" + run_cmd nettest -D -s & + sleep 1 + run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U + log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" + + done a=${NSA_IP} log_start - run_cmd nettest -D -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -1412,6 +1674,23 @@ ipv4_udp_novrf() log_start run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 2 "No server, device client, local conn" + + # + # Link local connection tests (SO_DONTROUTE). + # Connections should succeed only when the remote IP address is + # on link (doesn't need to be routed through a gateway). + # + + a=${NSB_IP} + log_start + do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute + log_test_addr ${a} $? 0 "SO_DONTROUTE client" + + a=${NSB_LO_IP} + log_start + show_hint "Should fail 'Network is unreachable' since server is not on link" + do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute + log_test_addr ${a} $? 1 "SO_DONTROUTE client" } ipv4_udp_vrf() @@ -1435,13 +1714,13 @@ ipv4_udp_vrf() log_test_addr ${a} $? 1 "Global server" log_start - run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start - run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1461,26 +1740,26 @@ ipv4_udp_vrf() a=${NSA_IP} log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection" a=${NSA_IP} log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1495,19 +1774,19 @@ ipv4_udp_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest -D -s -2 ${NSA_DEV} & + run_cmd nettest -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start - run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start - run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1549,31 +1828,31 @@ ipv4_udp_vrf() # a=${NSA_IP} log_start - run_cmd nettest -D -s -2 ${NSA_DEV} & + run_cmd nettest -D -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1581,7 +1860,7 @@ ipv4_udp_vrf() for a in ${VRF_IP} 127.0.0.1 do log_start - run_cmd nettest -D -s -2 ${VRF} & + run_cmd nettest -D -s -3 ${VRF} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" @@ -1590,7 +1869,7 @@ ipv4_udp_vrf() for a in ${VRF_IP} 127.0.0.1 do log_start - run_cmd nettest -s -D -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -D -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -1645,20 +1924,49 @@ ipv4_addr_bind_novrf() log_test_addr ${a} $? 0 "Raw socket bind to local address" log_start - run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done # + # tests for nonlocal bind + # + a=${NL_IP} + log_start + run_cmd nettest -s -R -f -l ${a} -b + log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address" + + log_start + run_cmd nettest -s -f -l ${a} -b + log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address" + + log_start + run_cmd nettest -s -D -P icmp -f -l ${a} -b + log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address" + + # + # check that ICMP sockets cannot bind to broadcast and multicast addresses + # + a=${BCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -b + log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address" + + a=${MCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -b + log_test_addr ${a} $? 1 "ICMP socket bind to multicast address" + + # # tcp sockets # a=${NSA_IP} log_start - run_cmd nettest -l ${a} -r ${NSB_IP} -t1 -b + run_cmd nettest -c ${a} -r ${NSB_IP} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address" log_start - run_cmd nettest -l ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b + run_cmd nettest -c ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" # Sadly, the kernel allows binding a socket to a device and then @@ -1668,7 +1976,7 @@ ipv4_addr_bind_novrf() #a=${NSA_LO_IP} #log_start #show_hint "Should fail with 'Cannot assign requested address'" - #run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + #run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b #log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" } @@ -1680,46 +1988,76 @@ ipv4_addr_bind_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start + show_hint "Socket not bound to VRF, but address is in VRF" run_cmd nettest -s -R -P icmp -l ${a} -b - log_test_addr ${a} $? 0 "Raw socket bind to local address" + log_test_addr ${a} $? 1 "Raw socket bind to local address" log_start - run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" log_start - run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after VRF bind" done a=${NSA_LO_IP} log_start show_hint "Address on loopback is out of VRF scope" - run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind" # + # tests for nonlocal bind + # + a=${NL_IP} + log_start + run_cmd nettest -s -R -f -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind" + + log_start + run_cmd nettest -s -f -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address after VRF bind" + + log_start + run_cmd nettest -s -D -P icmp -f -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address after VRF bind" + + # + # check that ICMP sockets cannot bind to broadcast and multicast addresses + # + a=${BCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address after VRF bind" + + a=${MCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 1 "ICMP socket bind to multicast address after VRF bind" + + # # tcp sockets # for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address" log_start - run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" done a=${NSA_LO_IP} log_start show_hint "Address on loopback out of scope for VRF" - run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF" log_start show_hint "Address on loopback out of scope for device in VRF" - run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind" } @@ -1729,10 +2067,12 @@ ipv4_addr_bind() log_subsection "No VRF" setup + set_ping_group ipv4_addr_bind_novrf log_subsection "With VRF" setup "yes" + set_ping_group ipv4_addr_bind_vrf } @@ -1766,7 +2106,7 @@ ipv4_rt() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest ${varg} -s -d ${VRF} & + run_cmd nettest ${varg} -s -I ${VRF} & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -1779,7 +2119,7 @@ ipv4_rt() a=${NSA_IP} log_start - run_cmd nettest ${varg} -s -d ${NSA_DEV} & + run_cmd nettest ${varg} -s -I ${NSA_DEV} & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -1834,7 +2174,7 @@ ipv4_rt() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 @@ -1847,6 +2187,7 @@ ipv4_rt() a=${NSA_IP} log_start + run_cmd nettest ${varg} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & @@ -1858,7 +2199,7 @@ ipv4_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 @@ -1869,7 +2210,7 @@ ipv4_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${NSA_DEV} -s & + run_cmd nettest ${varg} -I ${NSA_DEV} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 @@ -2073,7 +2414,7 @@ ipv6_ping_vrf() log_start show_hint "Fails since VRF device does not support linklocal or multicast" run_cmd ${ping6} -c1 -w1 ${a} - log_test_addr ${a} $? 2 "ping out, VRF bind" + log_test_addr ${a} $? 1 "ping out, VRF bind" done for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} @@ -2193,10 +2534,16 @@ ipv6_ping() log_subsection "No VRF" setup ipv6_ping_novrf + setup + set_ping_group + ipv6_ping_novrf log_subsection "With VRF" setup "yes" ipv6_ping_vrf + setup "yes" + set_ping_group + ipv6_ping_vrf } ################################################################################ @@ -2213,9 +2560,9 @@ ipv6_tcp_md5_novrf() # basic use case log_start - run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" # client sends MD5, server not configured @@ -2223,23 +2570,23 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" # client from different address log_start show_hint "Should timeout due to MD5 mismatch" - run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_LO_IP6} & + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" # @@ -2250,7 +2597,7 @@ ipv6_tcp_md5_novrf() log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" # client in prefix, wrong password @@ -2258,7 +2605,7 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" # client outside of prefix @@ -2266,7 +2613,7 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -2281,33 +2628,33 @@ ipv6_tcp_md5() # basic use case log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" # client sends MD5, server not configured log_start show_hint "Should timeout since server does not have MD5 auth" - run_cmd nettest -6 -s -d ${VRF} & + run_cmd nettest -6 -s -I ${VRF} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" # client from different address log_start show_hint "Should timeout since server config differs from client" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" # @@ -2316,25 +2663,25 @@ ipv6_tcp_md5() # client in prefix log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" # client in prefix, wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" # client outside of prefix log_start show_hint "Should timeout since client address is outside of prefix" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" # @@ -2342,74 +2689,74 @@ ipv6_tcp_md5() # log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" # # negative tests # log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP6} + run_cmd nettest -6 -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP6} log_test $? 1 "MD5: VRF: Device must be a VRF - single address" log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6} + run_cmd nettest -6 -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6} log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" } @@ -2482,7 +2829,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2491,7 +2838,7 @@ ipv6_tcp_novrf() do log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" - run_cmd nettest -6 -s -d ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" @@ -2517,7 +2864,7 @@ ipv6_tcp_novrf() for a in ${NSA_IP6} ${NSA_LINKIP6} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -2531,7 +2878,7 @@ ipv6_tcp_novrf() log_test_addr ${a} $? 1 "No server, device client, local conn" done - ipv6_tcp_md5_novrf + [ "$fips_enabled" = "1" ] || ipv6_tcp_md5_novrf } ipv6_tcp_vrf() @@ -2559,7 +2906,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2568,7 +2915,7 @@ ipv6_tcp_vrf() # link local is always bound to ingress device a=${NSA_LINKIP6}%${NSB_DEV} log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2576,7 +2923,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -2601,7 +2948,11 @@ ipv6_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests - ipv6_tcp_md5 + if [ "$fips_enabled" = "0" ]; then + setup_vrf_dup + ipv6_tcp_md5 + cleanup_vrf_dup + fi # # enable VRF global server @@ -2612,7 +2963,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -2 ${VRF} & + run_cmd nettest -6 -s -3 ${VRF} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -2621,7 +2972,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2630,13 +2981,13 @@ ipv6_tcp_vrf() # For LLA, child socket is bound to device a=${NSA_LINKIP6}%${NSB_DEV} log_start - run_cmd nettest -6 -s -2 ${NSA_DEV} & + run_cmd nettest -6 -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2644,7 +2995,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -2664,7 +3015,7 @@ ipv6_tcp_vrf() do log_start show_hint "Fails 'Connection refused' since client is not in VRF" - run_cmd nettest -6 -s -d ${VRF} & + run_cmd nettest -6 -s -I ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" @@ -2719,7 +3070,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} ::1 do log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" @@ -2727,7 +3078,7 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" @@ -2735,13 +3086,13 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start show_hint "Should fail since unbound client is out of VRF scope" - run_cmd nettest -6 -s -d ${VRF} & + run_cmd nettest -6 -s -I ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" @@ -2749,7 +3100,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${NSA_LINKIP6} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -2789,13 +3140,13 @@ ipv6_udp_novrf() for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV} do log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -2803,7 +3154,7 @@ ipv6_udp_novrf() a=${NSA_LO_IP6} log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -2813,7 +3164,7 @@ ipv6_udp_novrf() # behavior. #log_start #show_hint "Should fail since loopback address is out of scope" - #run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + #run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & #sleep 1 #run_cmd_nsb nettest -6 -D -r ${a} #log_test_addr ${a} $? 1 "Device server" @@ -2881,7 +3232,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2890,7 +3241,7 @@ ipv6_udp_novrf() do log_start show_hint "Should fail 'Connection refused' since address is out of device scope" - run_cmd nettest -6 -s -D -d ${NSA_DEV} & + run_cmd nettest -6 -s -D -I ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Device server, local connection" @@ -2937,11 +3288,18 @@ ipv6_udp_novrf() sleep 1 run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" + + log_start + show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" + run_cmd nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U + log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" done a=${NSA_IP6} log_start - run_cmd nettest -6 -D -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -2988,7 +3346,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2997,7 +3355,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -3028,7 +3386,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${VRF} -s & + run_cmd nettest -6 -D -I ${VRF} -s & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3043,19 +3401,19 @@ ipv6_udp_vrf() log_test_addr ${a} $? 1 "Global server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -3070,7 +3428,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -3079,7 +3437,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -3088,7 +3446,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -3132,13 +3490,13 @@ ipv6_udp_vrf() # a=${NSA_IP6} log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" #log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3146,13 +3504,13 @@ ipv6_udp_vrf() a=${VRF_IP6} log_start - run_cmd nettest -6 -D -s -2 ${VRF} & + run_cmd nettest -6 -D -s -3 ${VRF} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${VRF} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3168,25 +3526,25 @@ ipv6_udp_vrf() # device to global IP a=${NSA_IP6} log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3280,11 +3638,19 @@ ipv6_addr_bind_novrf() log_test_addr ${a} $? 0 "Raw socket bind to local address" log_start - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done # + # raw socket with nonlocal bind + # + a=${NL_IP6} + log_start + run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b + log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address" + + # # tcp sockets # a=${NSA_IP6} @@ -3293,14 +3659,17 @@ ipv6_addr_bind_novrf() log_test_addr ${a} $? 0 "TCP socket bind to local address" log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. So this test passes + # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Should fail with 'Cannot assign requested address'" - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" + show_hint "Tecnically should fail since address is not on device but kernel allows" + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } ipv6_addr_bind_vrf() @@ -3311,50 +3680,63 @@ ipv6_addr_bind_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after vrf bind" log_start - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done a=${NSA_LO_IP6} log_start show_hint "Address on loopback is out of VRF scope" - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 1 "Raw socket bind to invalid local address after vrf bind" # + # raw socket with nonlocal bind + # + a=${NL_IP6} + log_start + run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind" + + # # tcp sockets # # address on enslaved device is valid for the VRF or device in a VRF for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with VRF bind" done a=${NSA_IP6} log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. The only restriction + # is that the address is valid in the L3 domain. So this test + # passes when it really should not a=${VRF_IP6} log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind" + show_hint "Tecnically should fail since address is not on device but kernel allows" + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" a=${NSA_LO_IP6} log_start show_hint "Address on loopback out of scope for VRF" - run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF" log_start show_hint "Address on loopback out of scope for device in VRF" - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind" } @@ -3402,7 +3784,7 @@ ipv6_rt() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -3416,7 +3798,7 @@ ipv6_rt() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest ${varg} -d ${NSA_DEV} -s & + run_cmd nettest ${varg} -I ${NSA_DEV} -s & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -3473,7 +3855,7 @@ ipv6_rt() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 @@ -3497,7 +3879,7 @@ ipv6_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 @@ -3508,7 +3890,7 @@ ipv6_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${NSA_DEV} -s & + run_cmd nettest ${varg} -I ${NSA_DEV} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 @@ -3787,10 +4169,81 @@ use_case_br() setup_cmd_nsb ip li del vlan100 2>/dev/null } +# VRF only. +# ns-A device is connected to both ns-B and ns-C on a single VRF but only has +# LLA on the interfaces +use_case_ping_lla_multi() +{ + setup_lla_only + # only want reply from ns-A + setup_cmd_nsb sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1 + setup_cmd_nsc sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Pre cycle, ping out ns-B" + + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Pre cycle, ping out ns-C" + + # cycle/flap the first ns-A interface + setup_cmd ip link set ${NSA_DEV} down + setup_cmd ip link set ${NSA_DEV} up + sleep 1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-B" + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-C" + + # cycle/flap the second ns-A interface + setup_cmd ip link set ${NSA_DEV2} down + setup_cmd ip link set ${NSA_DEV2} up + sleep 1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-B" + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C" +} + +# Perform IPv{4,6} SNAT on ns-A, and verify TCP connection is successfully +# established with ns-B. +use_case_snat_on_vrf() +{ + setup "yes" + + local port="12345" + + run_cmd iptables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF} + run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF} + + run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} & + sleep 1 + run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port} + log_test $? 0 "IPv4 TCP connection over VRF with SNAT" + + run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} & + sleep 1 + run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port} + log_test $? 0 "IPv6 TCP connection over VRF with SNAT" + + # Cleanup + run_cmd iptables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF} + run_cmd ip6tables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF} +} + use_cases() { log_section "Use cases" + log_subsection "Device enslaved to bridge" use_case_br + log_subsection "Ping LLA with multiple interfaces" + use_case_ping_lla_multi + log_subsection "SNAT on VRF" + use_case_snat_on_vrf } ################################################################################ @@ -3807,14 +4260,17 @@ usage: ${0##*/} OPTS -p Pause on fail -P Pause after each test -v Be verbose + +Tests: + $TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER EOF } ################################################################################ # main -TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter" -TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter" +TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" +TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" PAUSE_ON_FAIL=no @@ -3848,10 +4304,13 @@ elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" fi -which nettest >/dev/null -if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - exit 0 +# nettest can be run from PATH or from same directory as this selftest +if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + exit $ksft_skip + fi fi declare -i nfail=0 @@ -3879,8 +4338,6 @@ do # setup namespaces and config, but do not run any tests setup) setup; exit 0;; vrf_setup) setup "yes"; exit 0;; - - help) echo "Test names: $TESTS"; exit 0;; esac done @@ -3888,3 +4345,11 @@ cleanup 2>/dev/null printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} + +if [ $nfail -ne 0 ]; then + exit 1 # KSFT_FAIL +elif [ $nsuccess -eq 0 ]; then + exit $ksft_skip +fi + +exit 0 # KSFT_PASS diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh new file mode 100755 index 000000000000..d5e3abb8658c --- /dev/null +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -0,0 +1,813 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking functionality of flushing FDB entries. +# Check that flush works as expected with all the supported arguments and verify +# some combinations of arguments. + +source lib.sh + +FLUSH_BY_STATE_TESTS=" + vxlan_test_flush_by_permanent + vxlan_test_flush_by_nopermanent + vxlan_test_flush_by_static + vxlan_test_flush_by_nostatic + vxlan_test_flush_by_dynamic + vxlan_test_flush_by_nodynamic +" + +FLUSH_BY_FLAG_TESTS=" + vxlan_test_flush_by_extern_learn + vxlan_test_flush_by_noextern_learn + vxlan_test_flush_by_router + vxlan_test_flush_by_norouter +" + +TESTS=" + vxlan_test_flush_by_dev + vxlan_test_flush_by_vni + vxlan_test_flush_by_src_vni + vxlan_test_flush_by_port + vxlan_test_flush_by_dst_ip + vxlan_test_flush_by_nhid + $FLUSH_BY_STATE_TESTS + $FLUSH_BY_FLAG_TESTS + vxlan_test_flush_by_several_args + vxlan_test_flush_by_remote_attributes + bridge_test_flush_by_dev + bridge_test_flush_by_vlan + bridge_vxlan_test_flush +" + +: ${VERBOSE:=0} +: ${PAUSE_ON_FAIL:=no} +: ${PAUSE:=no} +: ${VXPORT:=4789} + +run_cmd() +{ + local cmd="$1" + local out + local rc + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + local nsuccess + local nfail + local ret + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +MAC_POOL_1=" + de:ad:be:ef:13:10 + de:ad:be:ef:13:11 + de:ad:be:ef:13:12 + de:ad:be:ef:13:13 + de:ad:be:ef:13:14 +" +mac_pool_1_len=$(echo "$MAC_POOL_1" | grep -c .) + +MAC_POOL_2=" + ca:fe:be:ef:13:10 + ca:fe:be:ef:13:11 + ca:fe:be:ef:13:12 + ca:fe:be:ef:13:13 + ca:fe:be:ef:13:14 +" +mac_pool_2_len=$(echo "$MAC_POOL_2" | grep -c .) + +fdb_add_mac_pool_1() +{ + local dev=$1; shift + local args="$@" + + for mac in $MAC_POOL_1 + do + $BRIDGE fdb add $mac dev $dev $args + done +} + +fdb_add_mac_pool_2() +{ + local dev=$1; shift + local args="$@" + + for mac in $MAC_POOL_2 + do + $BRIDGE fdb add $mac dev $dev $args + done +} + +fdb_check_n_entries_by_dev_filter() +{ + local dev=$1; shift + local exp_entries=$1; shift + local filter="$@" + + local entries=$($BRIDGE fdb show dev $dev | grep "$filter" | wc -l) + + [[ $entries -eq $exp_entries ]] + rc=$? + + log_test $rc 0 "$dev: Expected $exp_entries FDB entries, got $entries" + return $rc +} + +vxlan_test_flush_by_dev() +{ + local vni=3000 + local dst_ip=192.0.2.1 + + fdb_add_mac_pool_1 vx10 vni $vni dst $dst_ip + fdb_add_mac_pool_2 vx20 vni $vni dst $dst_ip + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len + fdb_check_n_entries_by_dev_filter vx20 $mac_pool_2_len + + run_cmd "$BRIDGE fdb flush dev vx10" + log_test $? 0 "Flush FDB by dev vx10" + + fdb_check_n_entries_by_dev_filter vx10 0 + log_test $? 0 "Flush FDB by dev vx10 - test vx10 entries" + + fdb_check_n_entries_by_dev_filter vx20 $mac_pool_2_len + log_test $? 0 "Flush FDB by dev vx10 - test vx20 entries" +} + +vxlan_test_flush_by_vni() +{ + local vni_1=3000 + local vni_2=4000 + local dst_ip=192.0.2.1 + + fdb_add_mac_pool_1 vx10 vni $vni_1 dst $dst_ip + fdb_add_mac_pool_2 vx10 vni $vni_2 dst $dst_ip + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni $vni_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len vni $vni_2 + + run_cmd "$BRIDGE fdb flush dev vx10 vni $vni_2" + log_test $? 0 "Flush FDB by dev vx10 and vni $vni_2" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni $vni_1 + log_test $? 0 "Test entries with vni $vni_1" + + fdb_check_n_entries_by_dev_filter vx10 0 vni $vni_2 + log_test $? 0 "Test entries with vni $vni_2" +} + +vxlan_test_flush_by_src_vni() +{ + # Set some entries with {vni=x,src_vni=y} and some with the opposite - + # {vni=y,src_vni=x}, to verify that when we flush by src_vni=x, entries + # with vni=x are not flused. + local vni_1=3000 + local vni_2=4000 + local src_vni_1=4000 + local src_vni_2=3000 + local dst_ip=192.0.2.1 + + # Reconfigure vx10 with 'external' to get 'src_vni' details in + # 'bridge fdb' output + $IP link del dev vx10 + $IP link add name vx10 type vxlan dstport "$VXPORT" external + + fdb_add_mac_pool_1 vx10 vni $vni_1 src_vni $src_vni_1 dst $dst_ip + fdb_add_mac_pool_2 vx10 vni $vni_2 src_vni $src_vni_2 dst $dst_ip + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len \ + src_vni $src_vni_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len \ + src_vni $src_vni_2 + + run_cmd "$BRIDGE fdb flush dev vx10 src_vni $src_vni_2" + log_test $? 0 "Flush FDB by dev vx10 and src_vni $src_vni_2" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len \ + src_vni $src_vni_1 + log_test $? 0 "Test entries with src_vni $src_vni_1" + + fdb_check_n_entries_by_dev_filter vx10 0 src_vni $src_vni_2 + log_test $? 0 "Test entries with src_vni $src_vni_2" +} + +vxlan_test_flush_by_port() +{ + local port_1=1234 + local port_2=4321 + local dst_ip=192.0.2.1 + + fdb_add_mac_pool_1 vx10 port $port_1 dst $dst_ip + fdb_add_mac_pool_2 vx10 port $port_2 dst $dst_ip + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len port $port_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len port $port_2 + + run_cmd "$BRIDGE fdb flush dev vx10 port $port_2" + log_test $? 0 "Flush FDB by dev vx10 and port $port_2" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len port $port_1 + log_test $? 0 "Test entries with port $port_1" + + fdb_check_n_entries_by_dev_filter vx10 0 port $port_2 + log_test $? 0 "Test entries with port $port_2" +} + +vxlan_test_flush_by_dst_ip() +{ + local dst_ip_1=192.0.2.1 + local dst_ip_2=192.0.2.2 + + fdb_add_mac_pool_1 vx10 dst $dst_ip_1 + fdb_add_mac_pool_2 vx10 dst $dst_ip_2 + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2 + + run_cmd "$BRIDGE fdb flush dev vx10 dst $dst_ip_2" + log_test $? 0 "Flush FDB by dev vx10 and dst $dst_ip_2" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1 + log_test $? 0 "Test entries with dst $dst_ip_1" + + fdb_check_n_entries_by_dev_filter vx10 0 dst $dst_ip_2 + log_test $? 0 "Test entries with dst $dst_ip_2" +} + +nexthops_add() +{ + local nhid_1=$1; shift + local nhid_2=$1; shift + + $IP nexthop add id 10 via 192.0.2.1 fdb + $IP nexthop add id $nhid_1 group 10 fdb + + $IP nexthop add id 20 via 192.0.2.2 fdb + $IP nexthop add id $nhid_2 group 20 fdb +} + +vxlan_test_flush_by_nhid() +{ + local nhid_1=100 + local nhid_2=200 + + nexthops_add $nhid_1 $nhid_2 + + fdb_add_mac_pool_1 vx10 nhid $nhid_1 + fdb_add_mac_pool_2 vx10 nhid $nhid_2 + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len nhid $nhid_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len nhid $nhid_2 + + run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_2" + log_test $? 0 "Flush FDB by dev vx10 and nhid $nhid_2" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len nhid $nhid_1 + log_test $? 0 "Test entries with nhid $nhid_1" + + fdb_check_n_entries_by_dev_filter vx10 0 nhid $nhid_2 + log_test $? 0 "Test entries with nhid $nhid_2" + + # Flush also entries with $nhid_1, and then verify that flushing by + # 'nhid' does not return an error when there are no entries with + # nexthops. + run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_1" + log_test $? 0 "Flush FDB by dev vx10 and nhid $nhid_1" + + fdb_check_n_entries_by_dev_filter vx10 0 nhid + log_test $? 0 "Test entries with 'nhid' keyword" + + run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_1" + log_test $? 0 "Flush FDB by nhid when there are no entries with nexthop" +} + +vxlan_test_flush_by_state() +{ + local flush_by_state=$1; shift + local state_1=$1; shift + local exp_state_1=$1; shift + local state_2=$1; shift + local exp_state_2=$1; shift + + local dst_ip_1=192.0.2.1 + local dst_ip_2=192.0.2.2 + + fdb_add_mac_pool_1 vx10 dst $dst_ip_1 $state_1 + fdb_add_mac_pool_2 vx10 dst $dst_ip_2 $state_2 + + # Check the entries by dst_ip as not all states appear in 'bridge fdb' + # output. + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2 + + run_cmd "$BRIDGE fdb flush dev vx10 $flush_by_state" + log_test $? 0 "Flush FDB by dev vx10 and state $flush_by_state" + + fdb_check_n_entries_by_dev_filter vx10 $exp_state_1 dst $dst_ip_1 + log_test $? 0 "Test entries with state $state_1" + + fdb_check_n_entries_by_dev_filter vx10 $exp_state_2 dst $dst_ip_2 + log_test $? 0 "Test entries with state $state_2" +} + +vxlan_test_flush_by_permanent() +{ + # Entries that are added without state get 'permanent' state by + # default, add some entries with flag 'extern_learn' instead of state, + # so they will be added with 'permanent' and should be flushed also. + local flush_by_state="permanent" + local state_1="permanent" + local exp_state_1=0 + local state_2="extern_learn" + local exp_state_2=0 + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_nopermanent() +{ + local flush_by_state="nopermanent" + local state_1="permanent" + local exp_state_1=$mac_pool_1_len + local state_2="static" + local exp_state_2=0 + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_static() +{ + local flush_by_state="static" + local state_1="static" + local exp_state_1=0 + local state_2="dynamic" + local exp_state_2=$mac_pool_2_len + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_nostatic() +{ + local flush_by_state="nostatic" + local state_1="permanent" + local exp_state_1=$mac_pool_1_len + local state_2="dynamic" + local exp_state_2=0 + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_dynamic() +{ + local flush_by_state="dynamic" + local state_1="dynamic" + local exp_state_1=0 + local state_2="static" + local exp_state_2=$mac_pool_2_len + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_nodynamic() +{ + local flush_by_state="nodynamic" + local state_1="permanent" + local exp_state_1=0 + local state_2="dynamic" + local exp_state_2=$mac_pool_2_len + + vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \ + $state_2 $exp_state_2 +} + +vxlan_test_flush_by_flag() +{ + local flush_by_flag=$1; shift + local flag_1=$1; shift + local exp_flag_1=$1; shift + local flag_2=$1; shift + local exp_flag_2=$1; shift + + local dst_ip_1=192.0.2.1 + local dst_ip_2=192.0.2.2 + + fdb_add_mac_pool_1 vx10 dst $dst_ip_1 $flag_1 + fdb_add_mac_pool_2 vx10 dst $dst_ip_2 $flag_2 + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len $flag_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len $flag_2 + + run_cmd "$BRIDGE fdb flush dev vx10 $flush_by_flag" + log_test $? 0 "Flush FDB by dev vx10 and flag $flush_by_flag" + + fdb_check_n_entries_by_dev_filter vx10 $exp_flag_1 dst $dst_ip_1 + log_test $? 0 "Test entries with flag $flag_1" + + fdb_check_n_entries_by_dev_filter vx10 $exp_flag_2 dst $dst_ip_2 + log_test $? 0 "Test entries with flag $flag_2" +} + +vxlan_test_flush_by_extern_learn() +{ + local flush_by_flag="extern_learn" + local flag_1="extern_learn" + local exp_flag_1=0 + local flag_2="router" + local exp_flag_2=$mac_pool_2_len + + vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \ + $flag_2 $exp_flag_2 +} + +vxlan_test_flush_by_noextern_learn() +{ + local flush_by_flag="noextern_learn" + local flag_1="extern_learn" + local exp_flag_1=$mac_pool_1_len + local flag_2="router" + local exp_flag_2=0 + + vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \ + $flag_2 $exp_flag_2 +} + +vxlan_test_flush_by_router() +{ + local flush_by_flag="router" + local flag_1="router" + local exp_flag_1=0 + local flag_2="extern_learn" + local exp_flag_2=$mac_pool_2_len + + vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \ + $flag_2 $exp_flag_2 +} + +vxlan_test_flush_by_norouter() +{ + + local flush_by_flag="norouter" + local flag_1="router" + local exp_flag_1=$mac_pool_1_len + local flag_2="extern_learn" + local exp_flag_2=0 + + vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \ + $flag_2 $exp_flag_2 +} + +vxlan_test_flush_by_several_args() +{ + local dst_ip_1=192.0.2.1 + local dst_ip_2=192.0.2.2 + local state_1=permanent + local state_2=static + local vni=3000 + local port=1234 + local nhid=100 + local flag=router + local flush_args + + ################### Flush by 2 args - nhid and flag #################### + $IP nexthop add id 10 via 192.0.2.1 fdb + $IP nexthop add id $nhid group 10 fdb + + fdb_add_mac_pool_1 vx10 nhid $nhid $flag $state_1 + fdb_add_mac_pool_2 vx10 nhid $nhid $flag $state_2 + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len $state_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len $state_2 + + run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid $flag" + log_test $? 0 "Flush FDB by dev vx10 nhid $nhid $flag" + + # All entries should be flushed as 'state' is not an argument for flush + # filtering. + fdb_check_n_entries_by_dev_filter vx10 0 $state_1 + log_test $? 0 "Test entries with state $state_1" + + fdb_check_n_entries_by_dev_filter vx10 0 $state_2 + log_test $? 0 "Test entries with state $state_2" + + ################ Flush by 3 args - VNI, port and dst_ip ################ + fdb_add_mac_pool_1 vx10 vni $vni port $port dst $dst_ip_1 + fdb_add_mac_pool_2 vx10 vni $vni port $port dst $dst_ip_2 + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2 + + flush_args="vni $vni port $port dst $dst_ip_2" + run_cmd "$BRIDGE fdb flush dev vx10 $flush_args" + log_test $? 0 "Flush FDB by dev vx10 $flush_args" + + # Only entries with $dst_ip_2 should be flushed, even the rest arguments + # match the filter, the flush should be AND of all the arguments. + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1 + log_test $? 0 "Test entries with dst $dst_ip_1" + + fdb_check_n_entries_by_dev_filter vx10 0 dst $dst_ip_2 + log_test $? 0 "Test entries with dst $dst_ip_2" +} + +multicast_fdb_entries_add() +{ + mac=00:00:00:00:00:00 + vnis=(2000 3000) + + for vni in "${vnis[@]}"; do + $BRIDGE fdb append $mac dev vx10 dst 192.0.2.1 vni $vni \ + src_vni 5000 + $BRIDGE fdb append $mac dev vx10 dst 192.0.2.1 vni $vni \ + port 1111 + $BRIDGE fdb append $mac dev vx10 dst 192.0.2.2 vni $vni \ + port 2222 + done +} + +vxlan_test_flush_by_remote_attributes() +{ + local flush_args + + # Reconfigure vx10 with 'external' to get 'src_vni' details in + # 'bridge fdb' output + $IP link del dev vx10 + $IP link add name vx10 type vxlan dstport "$VXPORT" external + + # For multicat FDB entries, the VXLAN driver stores a linked list of + # remotes for a given key. Verify that only the expected remotes are + # flushed. + multicast_fdb_entries_add + + ## Flush by 3 remote's attributes - destination IP, port and VNI ## + flush_args="dst 192.0.2.1 port 1111 vni 2000" + fdb_check_n_entries_by_dev_filter vx10 1 $flush_args + + t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + run_cmd "$BRIDGE fdb flush dev vx10 $flush_args" + log_test $? 0 "Flush FDB by dev vx10 $flush_args" + + fdb_check_n_entries_by_dev_filter vx10 0 $flush_args + + exp_n_entries=$((t0_n_entries - 1)) + t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + [[ $t1_n_entries -eq $exp_n_entries ]] + log_test $? 0 "Check how many entries were flushed" + + ## Flush by 2 remote's attributes - destination IP and port ## + flush_args="dst 192.0.2.2 port 2222" + + fdb_check_n_entries_by_dev_filter vx10 2 $flush_args + + t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + run_cmd "$BRIDGE fdb flush dev vx10 $flush_args" + log_test $? 0 "Flush FDB by dev vx10 $flush_args" + + fdb_check_n_entries_by_dev_filter vx10 0 $flush_args + + exp_n_entries=$((t0_n_entries - 2)) + t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + [[ $t1_n_entries -eq $exp_n_entries ]] + log_test $? 0 "Check how many entries were flushed" + + ## Flush by source VNI, which is not remote's attribute and VNI ## + flush_args="vni 3000 src_vni 5000" + + fdb_check_n_entries_by_dev_filter vx10 1 $flush_args + + t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + run_cmd "$BRIDGE fdb flush dev vx10 $flush_args" + log_test $? 0 "Flush FDB by dev vx10 $flush_args" + + fdb_check_n_entries_by_dev_filter vx10 0 $flush_args + + exp_n_entries=$((t0_n_entries -1)) + t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + [[ $t1_n_entries -eq $exp_n_entries ]] + log_test $? 0 "Check how many entries were flushed" + + # Flush by 1 remote's attribute - destination IP ## + flush_args="dst 192.0.2.1" + + fdb_check_n_entries_by_dev_filter vx10 2 $flush_args + + t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + run_cmd "$BRIDGE fdb flush dev vx10 $flush_args" + log_test $? 0 "Flush FDB by dev vx10 $flush_args" + + fdb_check_n_entries_by_dev_filter vx10 0 $flush_args + + exp_n_entries=$((t0_n_entries -2)) + t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l) + [[ $t1_n_entries -eq $exp_n_entries ]] + log_test $? 0 "Check how many entries were flushed" +} + +bridge_test_flush_by_dev() +{ + local dst_ip=192.0.2.1 + local br0_n_ent_t0=$($BRIDGE fdb show dev br0 | wc -l) + local br1_n_ent_t0=$($BRIDGE fdb show dev br1 | wc -l) + + fdb_add_mac_pool_1 br0 dst $dst_ip + fdb_add_mac_pool_2 br1 dst $dst_ip + + # Each 'fdb add' command adds one extra entry in the bridge with the + # default vlan. + local exp_br0_n_ent=$(($br0_n_ent_t0 + 2 * $mac_pool_1_len)) + local exp_br1_n_ent=$(($br1_n_ent_t0 + 2 * $mac_pool_2_len)) + + fdb_check_n_entries_by_dev_filter br0 $exp_br0_n_ent + fdb_check_n_entries_by_dev_filter br1 $exp_br1_n_ent + + run_cmd "$BRIDGE fdb flush dev br0" + log_test $? 0 "Flush FDB by dev br0" + + # The default entry should not be flushed + fdb_check_n_entries_by_dev_filter br0 1 + log_test $? 0 "Flush FDB by dev br0 - test br0 entries" + + fdb_check_n_entries_by_dev_filter br1 $exp_br1_n_ent + log_test $? 0 "Flush FDB by dev br0 - test br1 entries" +} + +bridge_test_flush_by_vlan() +{ + local vlan_1=10 + local vlan_2=20 + local vlan_1_ent_t0 + local vlan_2_ent_t0 + + $BRIDGE vlan add vid $vlan_1 dev br0 self + $BRIDGE vlan add vid $vlan_2 dev br0 self + + vlan_1_ent_t0=$($BRIDGE fdb show dev br0 | grep "vlan $vlan_1" | wc -l) + vlan_2_ent_t0=$($BRIDGE fdb show dev br0 | grep "vlan $vlan_2" | wc -l) + + fdb_add_mac_pool_1 br0 vlan $vlan_1 + fdb_add_mac_pool_2 br0 vlan $vlan_2 + + local exp_vlan_1_ent=$(($vlan_1_ent_t0 + $mac_pool_1_len)) + local exp_vlan_2_ent=$(($vlan_2_ent_t0 + $mac_pool_2_len)) + + fdb_check_n_entries_by_dev_filter br0 $exp_vlan_1_ent vlan $vlan_1 + fdb_check_n_entries_by_dev_filter br0 $exp_vlan_2_ent vlan $vlan_2 + + run_cmd "$BRIDGE fdb flush dev br0 vlan $vlan_1" + log_test $? 0 "Flush FDB by dev br0 and vlan $vlan_1" + + fdb_check_n_entries_by_dev_filter br0 0 vlan $vlan_1 + log_test $? 0 "Test entries with vlan $vlan_1" + + fdb_check_n_entries_by_dev_filter br0 $exp_vlan_2_ent vlan $vlan_2 + log_test $? 0 "Test entries with vlan $vlan_2" +} + +bridge_vxlan_test_flush() +{ + local vlan_1=10 + local dst_ip=192.0.2.1 + + $IP link set dev vx10 master br0 + $BRIDGE vlan add vid $vlan_1 dev br0 self + $BRIDGE vlan add vid $vlan_1 dev vx10 + + fdb_add_mac_pool_1 vx10 vni 3000 dst $dst_ip self master + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vlan $vlan_1 + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni 3000 + + # Such command should fail in VXLAN driver as vlan is not supported, + # but the command should flush the entries in the bridge + run_cmd "$BRIDGE fdb flush dev vx10 vlan $vlan_1 master self" + log_test $? 255 \ + "Flush FDB by dev vx10, vlan $vlan_1, master and self" + + fdb_check_n_entries_by_dev_filter vx10 0 vlan $vlan_1 + log_test $? 0 "Test entries with vlan $vlan_1" + + fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip + log_test $? 0 "Test entries with dst $dst_ip" +} + +setup() +{ + setup_ns NS + IP="ip -netns ${NS}" + BRIDGE="bridge -netns ${NS}" + + $IP link add name vx10 type vxlan id 1000 dstport "$VXPORT" + $IP link add name vx20 type vxlan id 2000 dstport "$VXPORT" + + $IP link add br0 type bridge vlan_filtering 1 + $IP link add br1 type bridge vlan_filtering 1 +} + +cleanup() +{ + $IP link del dev br1 + $IP link del dev br0 + + $IP link del dev vx20 + $IP link del dev vx10 + + cleanup_ns ${NS} +} + +################################################################################ +# main + +while getopts :t:pPhvw: o +do + case $o in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + w) PING_TIMEOUT=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +# Check a flag that is added to flush command as part of VXLAN flush support +bridge fdb help 2>&1 | grep -q "\[no\]router" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing flush command for VXLAN" + exit $ksft_skip +fi + +ip link add dev vx10 type vxlan id 1000 2> /dev/null +out=$(bridge fdb flush dev vx10 2>&1 | grep -q "Operation not supported") +if [ $? -eq 0 ]; then + echo "SKIP: kernel lacks vxlan flush support" + exit $ksft_skip +fi +ip link del dev vx10 + +for t in $TESTS +do + setup; $t; cleanup; +done diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index c287b90b8af8..ec2d6ceb1f08 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -3,6 +3,7 @@ # IPv4 and IPv6 onlink tests +source lib.sh PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} VERBOSE=0 @@ -74,9 +75,6 @@ TEST_NET4IN6[2]=10.2.1.254 # mcast address MCAST6=ff02::1 - -PEER_NS=bart -PEER_CMD="ip netns exec ${PEER_NS}" VRF=lisa VRF_TABLE=1101 PBR_TABLE=101 @@ -176,8 +174,7 @@ setup() set -e # create namespace - ip netns add ${PEER_NS} - ip -netns ${PEER_NS} li set lo up + setup_ns PEER_NS # add vrf table ip li add ${VRF} type vrf table ${VRF_TABLE} @@ -219,7 +216,7 @@ setup() cleanup() { # make sure we start from a clean slate - ip netns del ${PEER_NS} 2>/dev/null + cleanup_ns ${PEER_NS} 2>/dev/null for n in 1 3 5 7; do ip link del ${NETIFS[p${n}]} 2>/dev/null done diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh index 51df5e305855..e85248609af4 100755 --- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh +++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh @@ -12,6 +12,7 @@ # # routing in h0 to hN is done with nexthop objects. +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -72,12 +73,6 @@ create_ns() { local ns=${1} - ip netns del ${ns} 2>/dev/null - - ip netns add ${ns} - ip -netns ${ns} addr add 127.0.0.1/8 dev lo - ip -netns ${ns} link set lo up - ip netns exec ${ns} sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 case ${ns} in h*) @@ -97,7 +92,13 @@ setup() #set -e - for ns in h0 r1 h1 h2 h3 + setup_ns h0 r1 h1 h2 h3 + h[0]=$h0 + h[1]=$h1 + h[2]=$h2 + h[3]=$h3 + r[1]=$r1 + for ns in ${h[0]} ${r[1]} ${h[1]} ${h[2]} ${h[3]} do create_ns ${ns} done @@ -108,35 +109,35 @@ setup() for i in 0 1 2 3 do - ip -netns h${i} li add eth0 type veth peer name r1h${i} - ip -netns h${i} li set eth0 up - ip -netns h${i} li set r1h${i} netns r1 name eth${i} up - - ip -netns h${i} addr add dev eth0 172.16.10${i}.1/24 - ip -netns h${i} -6 addr add dev eth0 2001:db8:10${i}::1/64 - ip -netns r1 addr add dev eth${i} 172.16.10${i}.254/24 - ip -netns r1 -6 addr add dev eth${i} 2001:db8:10${i}::64/64 + ip -netns ${h[$i]} li add eth0 type veth peer name r1h${i} + ip -netns ${h[$i]} li set eth0 up + ip -netns ${h[$i]} li set r1h${i} netns ${r[1]} name eth${i} up + + ip -netns ${h[$i]} addr add dev eth0 172.16.10${i}.1/24 + ip -netns ${h[$i]} -6 addr add dev eth0 2001:db8:10${i}::1/64 + ip -netns ${r[1]} addr add dev eth${i} 172.16.10${i}.254/24 + ip -netns ${r[1]} -6 addr add dev eth${i} 2001:db8:10${i}::64/64 done - ip -netns h0 nexthop add id 4 via 172.16.100.254 dev eth0 - ip -netns h0 nexthop add id 6 via 2001:db8:100::64 dev eth0 + ip -netns ${h[0]} nexthop add id 4 via 172.16.100.254 dev eth0 + ip -netns ${h[0]} nexthop add id 6 via 2001:db8:100::64 dev eth0 - # routing from h0 to h1-h3 and back + # routing from ${h[0]} to h1-h3 and back for i in 1 2 3 do - ip -netns h0 ro add 172.16.10${i}.0/24 nhid 4 - ip -netns h${i} ro add 172.16.100.0/24 via 172.16.10${i}.254 + ip -netns ${h[0]} ro add 172.16.10${i}.0/24 nhid 4 + ip -netns ${h[$i]} ro add 172.16.100.0/24 via 172.16.10${i}.254 - ip -netns h0 -6 ro add 2001:db8:10${i}::/64 nhid 6 - ip -netns h${i} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64 + ip -netns ${h[0]} -6 ro add 2001:db8:10${i}::/64 nhid 6 + ip -netns ${h[$i]} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64 done if [ "$VERBOSE" = "1" ]; then echo echo "host 1 config" - ip -netns h0 li sh - ip -netns h0 ro sh - ip -netns h0 -6 ro sh + ip -netns ${h[0]} li sh + ip -netns ${h[0]} ro sh + ip -netns ${h[0]} -6 ro sh fi #set +e @@ -144,10 +145,7 @@ setup() cleanup() { - for n in h0 r1 h1 h2 h3 - do - ip netns del ${n} 2>/dev/null - done + cleanup_all_ns } change_mtu() @@ -156,7 +154,7 @@ change_mtu() local mtu=$2 run_cmd ip -netns h${hostid} li set eth0 mtu ${mtu} - run_cmd ip -netns r1 li set eth${hostid} mtu ${mtu} + run_cmd ip -netns ${r1} li set eth${hostid} mtu ${mtu} } ################################################################################ @@ -168,23 +166,23 @@ validate_v4_exception() local mtu=$2 local ping_sz=$3 local dst="172.16.10${i}.1" - local h0=172.16.100.1 - local r1=172.16.100.254 + local h0_ip=172.16.100.1 + local r1_ip=172.16.100.254 local rc if [ ${ping_sz} != "0" ]; then - run_cmd ip netns exec h0 ping -s ${ping_sz} -c5 -w5 ${dst} + run_cmd ip netns exec ${h0} ping -s ${ping_sz} -c5 -w5 ${dst} fi if [ "$VERBOSE" = "1" ]; then echo "Route get" - ip -netns h0 ro get ${dst} + ip -netns ${h0} ro get ${dst} echo "Searching for:" echo " cache .* mtu ${mtu}" echo fi - ip -netns h0 ro get ${dst} | \ + ip -netns ${h0} ro get ${dst} | \ grep -q "cache .* mtu ${mtu}" rc=$? @@ -197,24 +195,24 @@ validate_v6_exception() local mtu=$2 local ping_sz=$3 local dst="2001:db8:10${i}::1" - local h0=2001:db8:100::1 - local r1=2001:db8:100::64 + local h0_ip=2001:db8:100::1 + local r1_ip=2001:db8:100::64 local rc if [ ${ping_sz} != "0" ]; then - run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst} + run_cmd ip netns exec ${h0} ${ping6} -s ${ping_sz} -c5 -w5 ${dst} fi if [ "$VERBOSE" = "1" ]; then echo "Route get" - ip -netns h0 -6 ro get ${dst} + ip -netns ${h0} -6 ro get ${dst} echo "Searching for:" - echo " ${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" + echo " ${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}" echo fi - ip -netns h0 -6 ro get ${dst} | \ - grep -q "${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" + ip -netns ${h0} -6 ro get ${dst} | \ + grep -q "${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}" rc=$? log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}" @@ -242,11 +240,11 @@ for i in 1 2 3 do # generate a cached route per-cpu for c in ${cpus}; do - run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1 - [ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1 + run_cmd taskset -c ${c} ip netns exec ${h0} ping -c1 -w1 172.16.10${i}.1 + [ $? -ne 0 ] && printf "\nERROR: ping to ${h[$i]} failed\n" && ret=1 - run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1 - [ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1 + run_cmd taskset -c ${c} ip netns exec ${h0} ${ping6} -c1 -w1 2001:db8:10${i}::1 + [ $? -ne 0 ] && printf "\nERROR: ping6 to ${h[$i]} failed\n" && ret=1 [ $ret -ne 0 ] && break done @@ -282,11 +280,11 @@ if [ $ret -eq 0 ]; then validate_v6_exception 3 1400 0 # targeted deletes to trigger cleanup paths in kernel - ip -netns h0 ro del 172.16.102.0/24 nhid 4 - ip -netns h0 -6 ro del 2001:db8:102::/64 nhid 6 + ip -netns ${h0} ro del 172.16.102.0/24 nhid 4 + ip -netns ${h0} -6 ro del 2001:db8:102::/64 nhid 6 - ip -netns h0 nexthop del id 4 - ip -netns h0 nexthop del id 6 + ip -netns ${h0} nexthop del id 4 + ip -netns ${h0} nexthop del id 6 fi cleanup diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh new file mode 100755 index 000000000000..1ccf56f10171 --- /dev/null +++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# ns: h1 | ns: h2 +# 192.168.0.1/24 | +# eth0 | +# | 192.168.1.1/32 +# veth0 <---|---> veth1 +# Validate source address selection for route without gateway + +source lib.sh +PAUSE_ON_FAIL=no +VERBOSE=0 +ret=0 + +################################################################################ +# helpers + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + out=$(eval $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo "$out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +################################################################################ +# config +setup() +{ + setup_ns h1 h2 + + # Add a fake eth0 to support an ip address + ip -n $h1 link add name eth0 type dummy + ip -n $h1 link set eth0 up + ip -n $h1 address add 192.168.0.1/24 dev eth0 + + # Configure veths (same @mac, arp off) + ip -n $h1 link add name veth0 type veth peer name veth1 netns $h2 + ip -n $h1 link set veth0 up + + ip -n $h2 link set veth1 up + + # Configure @IP in the peer netns + ip -n $h2 address add 192.168.1.1/32 dev veth1 + ip -n $h2 route add default dev veth1 + + # Add a nexthop without @gw and use it in a route + ip -n $h1 nexthop add id 1 dev veth0 + ip -n $h1 route add 192.168.1.1 nhid 1 +} + +cleanup() +{ + cleanup_ns $h1 $h2 +} + +trap cleanup EXIT + +################################################################################ +# main + +while getopts :pv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=1;; + esac +done + +setup + +run_cmd ip -netns $h1 route get 192.168.1.1 +log_test $? 0 "nexthop: get route with nexthop without gw" +run_cmd ip netns exec $h1 ping -c1 192.168.1.1 +log_test $? 0 "nexthop: ping through nexthop without gw" + +exit $ret diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index eb693a3b7b4a..ac0b2c6a5761 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -14,19 +14,52 @@ # objects. Device reference counts and network namespace cleanup tested # by use of network namespace for peer. +source lib.sh ret=0 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 # all tests in this script. Can be overridden with -t option -IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture" -IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture" - -ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}" +IPV4_TESTS=" + ipv4_fcnal + ipv4_grp_fcnal + ipv4_res_grp_fcnal + ipv4_withv6_fcnal + ipv4_fcnal_runtime + ipv4_large_grp + ipv4_large_res_grp + ipv4_compat_mode + ipv4_fdb_grp_fcnal + ipv4_mpath_select + ipv4_torture + ipv4_res_torture +" + +IPV6_TESTS=" + ipv6_fcnal + ipv6_grp_fcnal + ipv6_res_grp_fcnal + ipv6_fcnal_runtime + ipv6_large_grp + ipv6_large_res_grp + ipv6_compat_mode + ipv6_fdb_grp_fcnal + ipv6_mpath_select + ipv6_torture + ipv6_res_torture +" + +ALL_TESTS=" + basic + basic_res + ${IPV4_TESTS} + ${IPV6_TESTS} +" TESTS="${ALL_TESTS}" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no +PING_TIMEOUT=5 nsid=100 @@ -116,13 +149,7 @@ create_ns() { local n=${1} - ip netns del ${n} 2>/dev/null - set -e - ip netns add ${n} - ip netns set ${n} $((nsid++)) - ip -netns ${n} addr add 127.0.0.1/8 dev lo - ip -netns ${n} link set lo up ip netns exec ${n} sysctl -qw net.ipv4.ip_forward=1 ip netns exec ${n} sysctl -qw net.ipv4.fib_multipath_use_neigh=1 @@ -141,12 +168,13 @@ setup() { cleanup - create_ns me - create_ns peer - create_ns remote + setup_ns me peer remote + create_ns $me + create_ns $peer + create_ns $remote - IP="ip -netns me" - BRIDGE="bridge -netns me" + IP="ip -netns $me" + BRIDGE="bridge -netns $me" set -e $IP li add veth1 type veth peer name veth2 $IP li set veth1 up @@ -158,24 +186,24 @@ setup() $IP addr add 172.16.2.1/24 dev veth3 $IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad - $IP li set veth2 netns peer up - ip -netns peer addr add 172.16.1.2/24 dev veth2 - ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad + $IP li set veth2 netns $peer up + ip -netns $peer addr add 172.16.1.2/24 dev veth2 + ip -netns $peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad - $IP li set veth4 netns peer up - ip -netns peer addr add 172.16.2.2/24 dev veth4 - ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad + $IP li set veth4 netns $peer up + ip -netns $peer addr add 172.16.2.2/24 dev veth4 + ip -netns $peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad - ip -netns remote li add veth5 type veth peer name veth6 - ip -netns remote li set veth5 up - ip -netns remote addr add dev veth5 172.16.101.1/24 - ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad - ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2 - ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2 + ip -netns $remote li add veth5 type veth peer name veth6 + ip -netns $remote li set veth5 up + ip -netns $remote addr add dev veth5 172.16.101.1/24 + ip -netns $remote -6 addr add dev veth5 2001:db8:101::1/64 nodad + ip -netns $remote ro add 172.16.0.0/22 via 172.16.101.2 + ip -netns $remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2 - ip -netns remote li set veth6 netns peer up - ip -netns peer addr add dev veth6 172.16.101.2/24 - ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad + ip -netns $remote li set veth6 netns $peer up + ip -netns $peer addr add dev veth6 172.16.101.2/24 + ip -netns $peer -6 addr add dev veth6 2001:db8:101::2/64 nodad set +e } @@ -183,7 +211,7 @@ cleanup() { local ns - for ns in me peer remote; do + for ns in $me $peer $remote; do ip netns del ${ns} 2>/dev/null done } @@ -232,6 +260,19 @@ check_nexthop() check_output "${out}" "${expected}" } +check_nexthop_bucket() +{ + local nharg="$1" + local expected="$2" + local out + + # remove the idle time since we cannot match it + out=$($IP nexthop bucket ${nharg} \ + | sed s/idle_time\ [0-9.]*\ // 2>/dev/null) + + check_output "${out}" "${expected}" +} + check_route() { local pfx="$1" @@ -308,6 +349,46 @@ check_large_grp() log_test $? 0 "Dump large (x$ecmp) ecmp groups" } +check_large_res_grp() +{ + local ipv=$1 + local buckets=$2 + local ipstr="" + + if [ $ipv -eq 4 ]; then + ipstr="172.16.1.2" + else + ipstr="2001:db8:91::2" + fi + + # create a resilient group with $buckets buckets and dump them + run_cmd "$IP nexthop add id 100 via $ipstr dev veth1" + run_cmd "$IP nexthop add id 1000 group 100 type resilient buckets $buckets" + run_cmd "$IP nexthop bucket list" + log_test $? 0 "Dump large (x$buckets) nexthop buckets" +} + +get_route_dev() +{ + local pfx="$1" + local out + + if out=$($IP -j route get "$pfx" | jq -re ".[0].dev"); then + echo "$out" + fi +} + +check_route_dev() +{ + local pfx="$1" + local expected="$2" + local out + + out=$(get_route_dev "$pfx") + + check_output "$out" "$expected" +} + start_ip_monitor() { local mtype=$1 @@ -344,6 +425,15 @@ check_nexthop_fdb_support() fi } +check_nexthop_res_support() +{ + $IP nexthop help 2>&1 | grep -q resilient + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing resilient nexthop group support" + return $ksft_skip + fi +} + ipv6_fdb_grp_fcnal() { local rc @@ -504,6 +594,112 @@ ipv4_fdb_grp_fcnal() $IP link del dev vx10 } +ipv4_mpath_select() +{ + local rc dev match h addr + + echo + echo "IPv4 multipath selection" + echo "------------------------" + if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test; need jq tool" + return $ksft_skip + fi + + # Use status of existing neighbor entry when determining nexthop for + # multipath routes. + local -A gws + gws=([veth1]=172.16.1.2 [veth3]=172.16.2.2) + local -A other_dev + other_dev=([veth1]=veth3 [veth3]=veth1) + + run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1" + run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3" + run_cmd "$IP nexthop add id 1001 group 1/2" + run_cmd "$IP ro add 172.16.101.0/24 nhid 1001" + rc=0 + for dev in veth1 veth3; do + match=0 + for h in {1..254}; do + addr="172.16.101.$h" + if [ "$(get_route_dev "$addr")" = "$dev" ]; then + match=1 + break + fi + done + if (( match == 0 )); then + echo "SKIP: Did not find a route using device $dev" + return $ksft_skip + fi + run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed" + if ! check_route_dev "$addr" "${other_dev[$dev]}"; then + rc=1 + break + fi + run_cmd "$IP neigh del ${gws[$dev]} dev $dev" + done + log_test $rc 0 "Use valid neighbor during multipath selection" + + run_cmd "$IP neigh add 172.16.1.2 dev veth1 nud incomplete" + run_cmd "$IP neigh add 172.16.2.2 dev veth3 nud incomplete" + run_cmd "$IP route get 172.16.101.1" + # if we did not crash, success + log_test $rc 0 "Multipath selection with no valid neighbor" +} + +ipv6_mpath_select() +{ + local rc dev match h addr + + echo + echo "IPv6 multipath selection" + echo "------------------------" + if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test; need jq tool" + return $ksft_skip + fi + + # Use status of existing neighbor entry when determining nexthop for + # multipath routes. + local -A gws + gws=([veth1]=2001:db8:91::2 [veth3]=2001:db8:92::2) + local -A other_dev + other_dev=([veth1]=veth3 [veth3]=veth1) + + run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1" + run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3" + run_cmd "$IP nexthop add id 1001 group 1/2" + run_cmd "$IP ro add 2001:db8:101::/64 nhid 1001" + rc=0 + for dev in veth1 veth3; do + match=0 + for h in {1..65535}; do + addr=$(printf "2001:db8:101::%x" $h) + if [ "$(get_route_dev "$addr")" = "$dev" ]; then + match=1 + break + fi + done + if (( match == 0 )); then + echo "SKIP: Did not find a route using device $dev" + return $ksft_skip + fi + run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed" + if ! check_route_dev "$addr" "${other_dev[$dev]}"; then + rc=1 + break + fi + run_cmd "$IP neigh del ${gws[$dev]} dev $dev" + done + log_test $rc 0 "Use valid neighbor during multipath selection" + + run_cmd "$IP neigh add 2001:db8:91::2 dev veth1 nud incomplete" + run_cmd "$IP neigh add 2001:db8:92::2 dev veth3 nud incomplete" + run_cmd "$IP route get 2001:db8:101::1" + # if we did not crash, success + log_test $rc 0 "Multipath selection with no valid neighbor" +} + ################################################################################ # basic operations (add, delete, replace) on nexthops and nexthop groups # @@ -559,6 +755,66 @@ ipv6_fcnal() log_test $? 0 "Nexthops removed on admin down" } +ipv6_grp_refs() +{ + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP link set dev veth1 up" + run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10" + run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20" + run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10" + run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20" + run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10" + run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20" + run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10" + run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20" + run_cmd "$IP nexthop add id 102 group 100" + run_cmd "$IP route add 2001:db8:101::1/128 nhid 102" + + # create per-cpu dsts through nh 100 + run_cmd "ip netns exec $me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" + + # remove nh 100 from the group to delete the route potentially leaving + # a stale per-cpu dst which holds a reference to the nexthop's net + # device and to the IPv6 route + run_cmd "$IP nexthop replace id 102 group 101" + run_cmd "$IP route del 2001:db8:101::1/128" + + # add both nexthops to the group so a reference is taken on them + run_cmd "$IP nexthop replace id 102 group 100/101" + + # if the bug described in commit "net: nexthop: release IPv6 per-cpu + # dsts when replacing a nexthop group" exists at this point we have + # an unlinked IPv6 route (but not freed due to stale dst) with a + # reference over the group so we delete the group which will again + # only unlink it due to the route reference + run_cmd "$IP nexthop del id 102" + + # delete the nexthop with stale dst, since we have an unlinked + # group with a ref to it and an unlinked IPv6 route with ref to the + # group, the nh will only be unlinked and not freed so the stale dst + # remains forever and we get a net device refcount imbalance + run_cmd "$IP nexthop del id 100" + + # if a reference was lost this command will hang because the net device + # cannot be removed + timeout -s KILL 5 ip netns exec $me ip link del veth1.10 >/dev/null 2>&1 + + # we can't cleanup if the command is hung trying to delete the netdev + if [ $? -eq 137 ]; then + return 1 + fi + + # cleanup + run_cmd "$IP link del veth1.20" + run_cmd "$IP nexthop flush" + + return 0 +} + ipv6_grp_fcnal() { local rc @@ -664,6 +920,73 @@ ipv6_grp_fcnal() run_cmd "$IP nexthop add id 108 group 31/24" log_test $? 2 "Nexthop group can not have a blackhole and another nexthop" + + ipv6_grp_refs + log_test $? 0 "Nexthop group replace refcounts" +} + +ipv6_res_grp_fcnal() +{ + local rc + + echo + echo "IPv6 resilient groups functional" + echo "--------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + # + # migration of nexthop buckets - equal weights + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 102 group 62/63 type resilient buckets 2 idle_timer 0" + + run_cmd "$IP nexthop del id 63" + check_nexthop "id 102" \ + "id 102 group 62 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 62 id 102 index 1 nhid 62" + log_test $? 0 "Nexthop buckets updated when entry is deleted" + + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 62/63 type resilient buckets 2 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 62/63 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 63 id 102 index 1 nhid 62" + log_test $? 0 "Nexthop buckets updated after replace" + + $IP nexthop flush >/dev/null 2>&1 + + # + # migration of nexthop buckets - unequal weights + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0" + + run_cmd "$IP nexthop del id 63" + check_nexthop "id 102" \ + "id 102 group 62,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 62 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62" + log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP" + + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 62,3/63 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62" + log_test $? 0 "Nexthop buckets updated after replace - nECMP" } ipv6_fcnal_runtime() @@ -685,13 +1008,13 @@ ipv6_fcnal_runtime() log_test $? 0 "Route delete" run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81" - run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping with nexthop" run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3" run_cmd "$IP nexthop add id 122 group 81/82" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122" - run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - multipath" # @@ -699,26 +1022,26 @@ ipv6_fcnal_runtime() # run_cmd "$IP -6 nexthop add id 83 blackhole" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83" - run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - blackhole" run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1" - run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - blackhole replaced with gateway" run_cmd "$IP -6 nexthop replace id 83 blackhole" - run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - gateway replaced by blackhole" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122" - run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" if [ $? -eq 0 ]; then run_cmd "$IP nexthop replace id 122 group 83" - run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - group with blackhole" run_cmd "$IP nexthop replace id 122 group 81/82" - run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - group blackhole replaced with gateways" else log_test 2 0 "Ping - multipath failed" @@ -791,17 +1114,25 @@ ipv6_fcnal_runtime() run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1" run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81" + # route can not use prefsrc with nexthops + run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 from 2001:db8:91::1" + log_test $? 2 "IPv6 route can not use src routing with external nexthop" + + # check cleanup path on invalid metric + run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 congctl lock foo" + log_test $? 2 "IPv6 route with invalid metric" + # rpfilter and default route $IP nexthop flush >/dev/null 2>&1 - run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" + run_cmd "ip netns exec $me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" run_cmd "$IP nexthop add id 91 via 2001:db8:91::2 dev veth1" run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3" run_cmd "$IP nexthop add id 93 group 91/92" run_cmd "$IP -6 ro add default nhid 91" - run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Nexthop with default route and rpfilter" run_cmd "$IP -6 ro replace default nhid 93" - run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Nexthop with multipath default route and rpfilter" # TO-DO: @@ -824,6 +1155,22 @@ ipv6_large_grp() $IP nexthop flush >/dev/null 2>&1 } +ipv6_large_res_grp() +{ + echo + echo "IPv6 large resilient group (128k buckets)" + echo "-----------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + check_large_res_grp 6 $((128 * 1024)) + + $IP nexthop flush >/dev/null 2>&1 +} + ipv6_del_add_loop1() { while :; do @@ -865,20 +1212,76 @@ ipv6_torture() pid1=$! ipv6_grp_replace_loop & pid2=$! - ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec $me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null # if we did not crash, success log_test 0 0 "IPv6 torture test" } +ipv6_res_grp_replace_loop() +{ + while :; do + $IP nexthop replace id 102 group 100/101 type resilient + done >/dev/null 2>&1 +} + +ipv6_res_torture() +{ + local pid1 + local pid2 + local pid3 + local pid4 + local pid5 + + echo + echo "IPv6 runtime resilient nexthop group torture" + echo "--------------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3" + run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0" + run_cmd "$IP route add 2001:db8:101::1 nhid 102" + run_cmd "$IP route add 2001:db8:101::2 nhid 102" + + ipv6_del_add_loop1 & + pid1=$! + ipv6_res_grp_replace_loop & + pid2=$! + ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + pid3=$! + ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + pid4=$! + ip netns exec $me mausezahn -6 veth1 \ + -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \ + -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + pid5=$! + + sleep 300 + kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null + + # if we did not crash, success + log_test 0 0 "IPv6 resilient nexthop group torture test" +} ipv4_fcnal() { @@ -931,6 +1334,36 @@ ipv4_fcnal() set +e check_nexthop "dev veth1" "" log_test $? 0 "Nexthops removed on admin down" + + # nexthop route delete warning: route add with nhid and delete + # using device + run_cmd "$IP li set dev veth1 up" + run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1" + out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l` + run_cmd "$IP route add 172.16.101.1/32 nhid 12" + run_cmd "$IP route delete 172.16.101.1/32 dev veth1" + out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l` + [ $out1 -eq $out2 ] + rc=$? + log_test $rc 0 "Delete nexthop route warning" + run_cmd "$IP route delete 172.16.101.1/32 nhid 12" + run_cmd "$IP nexthop del id 12" + + run_cmd "$IP nexthop add id 21 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.101.0/24 nhid 21" + run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" + log_test $? 2 "Delete multipath route with only nh id based entry" + + run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.102.0/24 nhid 22" + run_cmd "$IP ro del 172.16.102.0/24 dev veth1" + log_test $? 2 "Delete route when specifying only nexthop device" + + run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6" + log_test $? 2 "Delete route when specifying only gateway" + + run_cmd "$IP ro del 172.16.102.0/24" + log_test $? 0 "Delete route when not specifying nexthop attributes" } ipv4_grp_fcnal() @@ -1038,12 +1471,76 @@ ipv4_grp_fcnal() log_test $? 2 "Nexthop group can not have a blackhole and another nexthop" } +ipv4_res_grp_fcnal() +{ + local rc + + echo + echo "IPv4 resilient groups functional" + echo "--------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + # + # migration of nexthop buckets - equal weights + # + run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop add id 102 group 12/13 type resilient buckets 2 idle_timer 0" + + run_cmd "$IP nexthop del id 13" + check_nexthop "id 102" \ + "id 102 group 12 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 12 id 102 index 1 nhid 12" + log_test $? 0 "Nexthop buckets updated when entry is deleted" + + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 12/13 type resilient buckets 2 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 12/13 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 13 id 102 index 1 nhid 12" + log_test $? 0 "Nexthop buckets updated after replace" + + $IP nexthop flush >/dev/null 2>&1 + + # + # migration of nexthop buckets - unequal weights + # + run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop add id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0" + + run_cmd "$IP nexthop del id 13" + check_nexthop "id 102" \ + "id 102 group 12,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated when entry is deleted - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 12 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12" + log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP" + + run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1" + run_cmd "$IP nexthop replace id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0" + check_nexthop "id 102" \ + "id 102 group 12,3/13 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Nexthop group updated after replace - nECMP" + check_nexthop_bucket "list id 102" \ + "id 102 index 0 nhid 13 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12" + log_test $? 0 "Nexthop buckets updated after replace - nECMP" +} + ipv4_withv6_fcnal() { local lladdr set -e - lladdr=$(get_linklocal veth2 peer) + lladdr=$(get_linklocal veth2 $peer) run_cmd "$IP nexthop add id 11 via ${lladdr} dev veth1" set +e run_cmd "$IP ro add 172.16.101.1/32 nhid 11" @@ -1096,18 +1593,22 @@ ipv4_fcnal_runtime() run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3" log_test $? 2 "Nexthop replace with invalid scope for existing route" + # check cleanup path on invalid metric + run_cmd "$IP ro add 172.16.101.2/32 nhid 22 congctl lock foo" + log_test $? 2 "IPv4 route with invalid metric" + # # add route with nexthop and check traffic # run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1" run_cmd "$IP ro replace 172.16.101.1/32 nhid 21" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Basic ping" run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3" run_cmd "$IP nexthop add id 122 group 21/22" run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multipath" run_cmd "$IP ro delete 172.16.101.1/32 nhid 122" @@ -1118,7 +1619,7 @@ ipv4_fcnal_runtime() run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1" run_cmd "$IP ro add default nhid 501" run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multiple default routes, nh first" # flip the order @@ -1127,7 +1628,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20" run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1" run_cmd "$IP ro add default nhid 501 metric 20" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multiple default routes, nh second" run_cmd "$IP nexthop delete nhid 501" @@ -1138,26 +1639,26 @@ ipv4_fcnal_runtime() # run_cmd "$IP nexthop add id 23 blackhole" run_cmd "$IP ro replace 172.16.101.1/32 nhid 23" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - blackhole" run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - blackhole replaced with gateway" run_cmd "$IP nexthop replace id 23 blackhole" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - gateway replaced by blackhole" run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" if [ $? -eq 0 ]; then run_cmd "$IP nexthop replace id 122 group 23" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - group with blackhole" run_cmd "$IP nexthop replace id 122 group 21/22" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - group blackhole replaced with gateways" else log_test 2 0 "Ping - multipath failed" @@ -1180,11 +1681,11 @@ ipv4_fcnal_runtime() # IPv4 with IPv6 # set -e - lladdr=$(get_linklocal veth2 peer) + lladdr=$(get_linklocal veth2 $peer) run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1" set +e run_cmd "$IP ro replace 172.16.101.1/32 nhid 24" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" $IP neigh sh | grep -q "${lladdr} dev veth1" @@ -1208,11 +1709,11 @@ ipv4_fcnal_runtime() check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv4 route with IPv6 gateway" $IP neigh sh | grep -q "${lladdr} dev veth1" @@ -1229,7 +1730,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1" run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1" - run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv4 default route with IPv6 gateway" # @@ -1259,12 +1760,28 @@ ipv4_large_grp() $IP nexthop flush >/dev/null 2>&1 } +ipv4_large_res_grp() +{ + echo + echo "IPv4 large resilient group (128k buckets)" + echo "-----------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + check_large_res_grp 4 $((128 * 1024)) + + $IP nexthop flush >/dev/null 2>&1 +} + sysctl_nexthop_compat_mode_check() { local sysctlname="net.ipv4.nexthop_compat_mode" local lprefix=$1 - IPE="ip netns exec me" + IPE="ip netns exec $me" $IPE sysctl -q $sysctlname 2>&1 >/dev/null if [ $? -ne 0 ]; then @@ -1283,7 +1800,7 @@ sysctl_nexthop_compat_mode_set() local mode=$1 local lprefix=$2 - IPE="ip netns exec me" + IPE="ip netns exec $me" out=$($IPE sysctl -w $sysctlname=$mode) log_test $? 0 "$lprefix set compat mode - $mode" @@ -1467,20 +1984,77 @@ ipv4_torture() pid1=$! ipv4_grp_replace_loop & pid2=$! - ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec $me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null # if we did not crash, success log_test 0 0 "IPv4 torture test" } +ipv4_res_grp_replace_loop() +{ + while :; do + $IP nexthop replace id 102 group 100/101 type resilient + done >/dev/null 2>&1 +} + +ipv4_res_torture() +{ + local pid1 + local pid2 + local pid3 + local pid4 + local pid5 + + echo + echo "IPv4 runtime resilient nexthop group torture" + echo "--------------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3" + run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0" + run_cmd "$IP route add 172.16.101.1 nhid 102" + run_cmd "$IP route add 172.16.101.2 nhid 102" + + ipv4_del_add_loop1 & + pid1=$! + ipv4_res_grp_replace_loop & + pid2=$! + ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 & + pid3=$! + ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 & + pid4=$! + ip netns exec $me mausezahn veth1 \ + -B 172.16.101.2 -A 172.16.1.1 -c 0 \ + -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + pid5=$! + + sleep 300 + kill -9 $pid1 $pid2 $pid3 $pid4 $pid5 + wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null + + # if we did not crash, success + log_test 0 0 "IPv4 resilient nexthop group torture test" +} + basic() { echo @@ -1492,6 +2066,12 @@ basic() run_cmd "$IP nexthop get id 1" log_test $? 2 "Nexthop get on non-existent id" + run_cmd "$IP nexthop del id 1" + log_test $? 2 "Nexthop del with non-existent id" + + run_cmd "$IP nexthop del id 1 group 1/2/3/4/5/6/7/8" + log_test $? 2 "Nexthop del with non-existent id and extra attributes" + # attempt to create nh without a device or gw - fails run_cmd "$IP nexthop add id 1" log_test $? 2 "Nexthop with no device or gateway" @@ -1503,10 +2083,10 @@ basic() # create nh with linkdown device - fails $IP li set veth1 up - ip -netns peer li set veth2 down + ip -netns $peer li set veth2 down run_cmd "$IP nexthop add id 1 dev veth1" log_test $? 2 "Nexthop with device that is linkdown" - ip -netns peer li set veth2 up + ip -netns $peer li set veth2 up # device only run_cmd "$IP nexthop add id 1 dev veth1" @@ -1524,6 +2104,19 @@ basic() run_cmd "$IP nexthop replace id 2 blackhole dev veth1" log_test $? 2 "Blackhole nexthop with other attributes" + # blackhole nexthop should not be affected by the state of the loopback + # device + run_cmd "$IP link set dev lo down" + check_nexthop "id 2" "id 2 blackhole" + log_test $? 0 "Blackhole nexthop with loopback device down" + + run_cmd "$IP link set dev lo up" + + # Dump should not loop endlessly when maximum nexthop ID is configured. + run_cmd "$IP nexthop add id $((2**32-1)) blackhole" + run_cmd "timeout 5 $IP nexthop" + log_test $? 0 "Maximum nexthop ID dump" + # # groups # @@ -1582,6 +2175,225 @@ basic() log_test $? 2 "Nexthop group and blackhole" $IP nexthop flush >/dev/null 2>&1 + + # Test to ensure that flushing with a multi-part nexthop dump works as + # expected. + local batch_file=$(mktemp) + + for i in $(seq 1 $((64 * 1024))); do + echo "nexthop add id $i blackhole" >> $batch_file + done + + $IP -b $batch_file + $IP nexthop flush >/dev/null 2>&1 + [[ $($IP nexthop | wc -l) -eq 0 ]] + log_test $? 0 "Large scale nexthop flushing" + + rm $batch_file +} + +check_nexthop_buckets_balance() +{ + local nharg=$1; shift + local ret + + while (($# > 0)); do + local selector=$1; shift + local condition=$1; shift + local count + + count=$($IP -j nexthop bucket ${nharg} ${selector} | jq length) + (( $count $condition )) + ret=$? + if ((ret != 0)); then + return $ret + fi + done + + return 0 +} + +basic_res() +{ + echo + echo "Basic resilient nexthop group functional tests" + echo "----------------------------------------------" + + check_nexthop_res_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + run_cmd "$IP nexthop add id 1 dev veth1" + + # + # resilient nexthop group addition + # + + run_cmd "$IP nexthop add id 101 group 1 type resilient buckets 8" + log_test $? 0 "Add a nexthop group with default parameters" + + run_cmd "$IP nexthop get id 101" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 unbalanced_time 0" + log_test $? 0 "Get a nexthop group with default parameters" + + run_cmd "$IP nexthop add id 102 group 1 type resilient + buckets 4 idle_timer 100 unbalanced_timer 5" + run_cmd "$IP nexthop get id 102" + check_nexthop "id 102" \ + "id 102 group 1 type resilient buckets 4 idle_timer 100 unbalanced_timer 5 unbalanced_time 0" + log_test $? 0 "Get a nexthop group with non-default parameters" + + run_cmd "$IP nexthop add id 103 group 1 type resilient buckets 0" + log_test $? 2 "Add a nexthop group with 0 buckets" + + # + # resilient nexthop group replacement + # + + run_cmd "$IP nexthop replace id 101 group 1 type resilient + buckets 8 idle_timer 240 unbalanced_timer 80" + log_test $? 0 "Replace nexthop group parameters" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 240 unbalanced_timer 80 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing parameters" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient idle_timer 512" + log_test $? 0 "Replace idle timer" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 80 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing idle timer" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient unbalanced_timer 256" + log_test $? 0 "Replace unbalanced timer" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing unbalanced timer" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient" + log_test $? 0 "Replace with no parameters" + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing no parameters" + + run_cmd "$IP nexthop replace id 101 group 1" + log_test $? 2 "Replace nexthop group type - implicit" + + run_cmd "$IP nexthop replace id 101 group 1 type mpath" + log_test $? 2 "Replace nexthop group type - explicit" + + run_cmd "$IP nexthop replace id 101 group 1 type resilient buckets 1024" + log_test $? 2 "Replace number of nexthop buckets" + + check_nexthop "id 101" \ + "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0" + log_test $? 0 "Get a nexthop group after replacing with invalid parameters" + + # + # resilient nexthop buckets dump + # + + $IP nexthop flush >/dev/null 2>&1 + run_cmd "$IP nexthop add id 1 dev veth1" + run_cmd "$IP nexthop add id 2 dev veth3" + run_cmd "$IP nexthop add id 101 group 1/2 type resilient buckets 4" + run_cmd "$IP nexthop add id 201 group 1/2" + + check_nexthop_bucket "" \ + "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1" + log_test $? 0 "Dump all nexthop buckets" + + check_nexthop_bucket "list id 101" \ + "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1" + log_test $? 0 "Dump all nexthop buckets in a group" + + sleep 0.1 + (( $($IP -j nexthop bucket list id 101 | + jq '[.[] | select(.bucket.idle_time > 0 and + .bucket.idle_time < 2)] | length') == 4 )) + log_test $? 0 "All nexthop buckets report a positive near-zero idle time" + + check_nexthop_bucket "list dev veth1" \ + "id 101 index 2 nhid 1 id 101 index 3 nhid 1" + log_test $? 0 "Dump all nexthop buckets with a specific nexthop device" + + check_nexthop_bucket "list nhid 2" \ + "id 101 index 0 nhid 2 id 101 index 1 nhid 2" + log_test $? 0 "Dump all nexthop buckets with a specific nexthop identifier" + + run_cmd "$IP nexthop bucket list id 111" + log_test $? 2 "Dump all nexthop buckets in a non-existent group" + + run_cmd "$IP nexthop bucket list id 201" + log_test $? 2 "Dump all nexthop buckets in a non-resilient group" + + run_cmd "$IP nexthop bucket list dev bla" + log_test $? 255 "Dump all nexthop buckets using a non-existent device" + + run_cmd "$IP nexthop bucket list groups" + log_test $? 255 "Dump all nexthop buckets with invalid 'groups' keyword" + + run_cmd "$IP nexthop bucket list fdb" + log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword" + + # Dump should not loop endlessly when maximum nexthop ID is configured. + run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4" + run_cmd "timeout 5 $IP nexthop bucket" + log_test $? 0 "Maximum nexthop ID dump" + + # + # resilient nexthop buckets get requests + # + + check_nexthop_bucket "get id 101 index 0" "id 101 index 0 nhid 2" + log_test $? 0 "Get a valid nexthop bucket" + + run_cmd "$IP nexthop bucket get id 101 index 999" + log_test $? 2 "Get a nexthop bucket with valid group, but invalid index" + + run_cmd "$IP nexthop bucket get id 201 index 0" + log_test $? 2 "Get a nexthop bucket from a non-resilient group" + + run_cmd "$IP nexthop bucket get id 999 index 0" + log_test $? 2 "Get a nexthop bucket from a non-existent group" + + # + # tests for bucket migration + # + + $IP nexthop flush >/dev/null 2>&1 + + run_cmd "$IP nexthop add id 1 dev veth1" + run_cmd "$IP nexthop add id 2 dev veth3" + run_cmd "$IP nexthop add id 101 + group 1/2 type resilient buckets 10 + idle_timer 1 unbalanced_timer 20" + + check_nexthop_buckets_balance "list id 101" \ + "nhid 1" "== 5" \ + "nhid 2" "== 5" + log_test $? 0 "Initial bucket allocation" + + run_cmd "$IP nexthop replace id 101 + group 1,2/2,3 type resilient" + check_nexthop_buckets_balance "list id 101" \ + "nhid 1" "== 4" \ + "nhid 2" "== 6" + log_test $? 0 "Bucket allocation after replace" + + # Check that increase in idle timer does not make buckets appear busy. + run_cmd "$IP nexthop replace id 101 + group 1,2/2,3 type resilient + idle_timer 10" + run_cmd "$IP nexthop replace id 101 + group 1/2 type resilient" + check_nexthop_buckets_balance "list id 101" \ + "nhid 1" "== 5" \ + "nhid 2" "== 5" + log_test $? 0 "Buckets migrated after idle timer change" + + $IP nexthop flush >/dev/null 2>&1 } ################################################################################ @@ -1599,6 +2411,7 @@ usage: ${0##*/} OPTS -p Pause on fail -P Pause after each test before cleanup -v verbose mode (show commands and output) + -w Timeout for ping Runtime test -n num Number of nexthops to target @@ -1611,7 +2424,7 @@ EOF ################################################################################ # main -while getopts :t:pP46hv o +while getopts :t:pP46hvw: o do case $o in t) TESTS=$OPTARG;; @@ -1620,6 +2433,7 @@ do p) PAUSE_ON_FAIL=yes;; P) PAUSE=yes;; v) VERBOSE=$(($VERBOSE + 1));; + w) PING_TIMEOUT=$OPTARG;; h) usage; exit 0;; *) usage; exit 1;; esac @@ -1653,7 +2467,7 @@ fi for t in $TESTS do case $t in - none) IP="ip -netns peer"; setup; exit 0;; + none) IP="ip -netns $peer"; setup; exit 0;; *) setup; $t; cleanup;; esac done diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index a93e6b690e06..51157a5559b7 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -3,12 +3,12 @@ # This test is for checking IPv4 and IPv6 FIB rules API +source lib.sh ret=0 - PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} -IP="ip -netns testns" RTABLE=100 +RTABLE_PEER=101 GW_IP4=192.51.100.2 SRC_IP=192.51.100.3 GW_IP6=2001:db8:1::2 @@ -17,6 +17,9 @@ SRC_IP6=2001:db8:1::3 DEV_ADDR=192.51.100.1 DEV_ADDR6=2001:db8:1::1 DEV=dummy0 +TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect" + +SELFTEST_PATH="" log_test() { @@ -48,11 +51,36 @@ log_section() echo "######################################################################" } +check_nettest() +{ + if which nettest > /dev/null 2>&1; then + return 0 + fi + + # Add the selftest directory to PATH if not already done + if [ "${SELFTEST_PATH}" = "" ]; then + SELFTEST_PATH="$(dirname $0)" + PATH="${PATH}:${SELFTEST_PATH}" + + # Now retry with the new path + if which nettest > /dev/null 2>&1; then + return 0 + fi + + if [ "${ret}" -eq 0 ]; then + ret="${ksft_skip}" + fi + echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')" + fi + + return 1 +} + setup() { set -e - ip netns add testns - $IP link set dev lo up + setup_ns testns + IP="ip -netns $testns" $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -65,7 +93,41 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del testns + cleanup_ns $testns +} + +setup_peer() +{ + set -e + + setup_ns peerns + IP_PEER="ip -netns $peerns" + $IP_PEER link set dev lo up + + ip link add name veth0 netns $testns type veth \ + peer name veth1 netns $peerns + $IP link set dev veth0 up + $IP_PEER link set dev veth1 up + + $IP address add 192.0.2.10 peer 192.0.2.11/32 dev veth0 + $IP_PEER address add 192.0.2.11 peer 192.0.2.10/32 dev veth1 + + $IP address add 2001:db8::10 peer 2001:db8::11/128 dev veth0 nodad + $IP_PEER address add 2001:db8::11 peer 2001:db8::10/128 dev veth1 nodad + + $IP_PEER address add 198.51.100.11/32 dev lo + $IP route add table $RTABLE_PEER 198.51.100.11/32 via 192.0.2.11 + + $IP_PEER address add 2001:db8::1:11/128 dev lo + $IP route add table $RTABLE_PEER 2001:db8::1:11/128 via 2001:db8::11 + + set +e +} + +cleanup_peer() +{ + $IP link del dev veth0 + ip netns del $peerns } fib_check_iproute_support() @@ -93,7 +155,7 @@ fib_rule6_del() fib_rule6_del_by_pref() { - pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + pref=$($IP -6 rule show $1 table $RTABLE | cut -d ":" -f 1) $IP -6 rule del pref $pref } @@ -101,17 +163,36 @@ fib_rule6_test_match_n_redirect() { local match="$1" local getmatch="$2" + local description="$3" $IP -6 rule add $match table $RTABLE $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE" - log_test $? 0 "rule6 check: $1" + log_test $? 0 "rule6 check: $description" fib_rule6_del_by_pref "$match" - log_test $? 0 "rule6 del by pref: $match" + log_test $? 0 "rule6 del by pref: $description" +} + +fib_rule6_test_reject() +{ + local match="$1" + local rc + + $IP -6 rule add $match table $RTABLE 2>/dev/null + rc=$? + log_test $rc 2 "rule6 check: $match" + + if [ $rc -eq 0 ]; then + $IP -6 rule del $match table $RTABLE + fi } fib_rule6_test() { + local getmatch + local match + local cnt + # setup the fib rule redirect route $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink @@ -121,8 +202,21 @@ fib_rule6_test() match="from $SRC_IP6 iif $DEV" fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table" + # Reject dsfield (tos) options which have ECN bits set + for cnt in $(seq 1 3); do + match="dsfield $cnt" + fib_rule6_test_reject "$match" + done + + # Don't take ECN bits into account when matching on dsfield match="tos 0x10" - fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + # Using option 'tos' instead of 'dsfield' as old iproute2 + # versions don't support 'dsfield' in ip rule show. + getmatch="tos $cnt" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getmatch redirect to table" + done match="fwmark 0x64" getmatch="mark 0x64" @@ -154,6 +248,37 @@ fib_rule6_test() fi } +# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly +# taken into account when connecting the socket and when sending packets. +fib_rule6_connect_test() +{ + local dsfield + + if ! check_nettest; then + echo "SKIP: Could not run test without nettest tool" + return + fi + + setup_peer + $IP -6 rule add dsfield 0x04 table $RTABLE_PEER + + # Combine the base DS Field value (0x04) with all possible ECN values + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). + # The ECN bits shouldn't influence the result of the test. + for dsfield in 0x04 0x05 0x06 0x07; do + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \ + -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" + done + + $IP -6 rule del dsfield 0x04 table $RTABLE_PEER + cleanup_peer +} + fib_rule4_del() { $IP rule del $1 @@ -162,7 +287,7 @@ fib_rule4_del() fib_rule4_del_by_pref() { - pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + pref=$($IP rule show $1 table $RTABLE | cut -d ":" -f 1) $IP rule del pref $pref } @@ -170,17 +295,36 @@ fib_rule4_test_match_n_redirect() { local match="$1" local getmatch="$2" + local description="$3" $IP rule add $match table $RTABLE $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE" - log_test $? 0 "rule4 check: $1" + log_test $? 0 "rule4 check: $description" fib_rule4_del_by_pref "$match" - log_test $? 0 "rule4 del by pref: $match" + log_test $? 0 "rule4 del by pref: $description" +} + +fib_rule4_test_reject() +{ + local match="$1" + local rc + + $IP rule add $match table $RTABLE 2>/dev/null + rc=$? + log_test $rc 2 "rule4 check: $match" + + if [ $rc -eq 0 ]; then + $IP rule del $match table $RTABLE + fi } fib_rule4_test() { + local getmatch + local match + local cnt + # setup the fib rule redirect route $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink @@ -189,14 +333,27 @@ fib_rule4_test() # need enable forwarding and disable rp_filter temporarily as all the # addresses are in the same subnet and egress device == ingress device. - ip netns exec testns sysctl -w net.ipv4.ip_forward=1 - ip netns exec testns sysctl -w net.ipv4.conf.$DEV.rp_filter=0 + ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" - ip netns exec testns sysctl -w net.ipv4.ip_forward=0 + ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0 + + # Reject dsfield (tos) options which have ECN bits set + for cnt in $(seq 1 3); do + match="dsfield $cnt" + fib_rule4_test_reject "$match" + done + # Don't take ECN bits into account when matching on dsfield match="tos 0x10" - fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + # Using option 'tos' instead of 'dsfield' as old iproute2 + # versions don't support 'dsfield' in ip rule show. + getmatch="tos $cnt" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getmatch redirect to table" + done match="fwmark 0x64" getmatch="mark 0x64" @@ -228,6 +385,37 @@ fib_rule4_test() fi } +# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken +# into account when connecting the socket and when sending packets. +fib_rule4_connect_test() +{ + local dsfield + + if ! check_nettest; then + echo "SKIP: Could not run test without nettest tool" + return + fi + + setup_peer + $IP -4 rule add dsfield 0x04 table $RTABLE_PEER + + # Combine the base DS Field value (0x04) with all possible ECN values + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). + # The ECN bits shouldn't influence the result of the test. + for dsfield in 0x04 0x05 0x06 0x07; do + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q "${dsfield}" \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})" + + nettest -q -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" + done + + $IP -4 rule del dsfield 0x04 table $RTABLE_PEER + cleanup_peer +} + run_fibrule_tests() { log_section "IPv4 fib rule" @@ -235,21 +423,55 @@ run_fibrule_tests() log_section "IPv6 fib rule" fib_rule6_test } +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" - exit 0 + exit $ksft_skip fi if [ ! -x "$(command -v ip)" ]; then echo "SKIP: Could not run test without ip tool" - exit 0 + exit $ksft_skip fi # start clean cleanup &> /dev/null setup -run_fibrule_tests +for t in $TESTS +do + case $t in + fib_rule6_test|fib_rule6) fib_rule6_test;; + fib_rule4_test|fib_rule4) fib_rule4_test;; + fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;; + fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;; + + help) echo "Test names: $TESTS"; exit 0;; + + esac +done cleanup if [ "$TESTS" != "none" ]; then diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 84205c3a55eb..73895711cdf4 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -3,19 +3,19 @@ # This test is for checking IPv4 and IPv6 FIB behavior in response to # different events. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr" +TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ + ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \ + ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ + ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ + ipv4_mpath_list ipv6_mpath_list" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="ip -netns ns1" -NS_EXEC="ip netns exec ns1" which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) @@ -51,11 +51,11 @@ log_test() setup() { set -e - ip netns add ns1 - ip netns set ns1 auto - $IP link set dev lo up - ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 - ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 + setup_ns ns1 + IP="$(which ip) -netns $ns1" + NS_EXEC="$(which ip) netns exec $ns1" + ip netns exec $ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -68,8 +68,7 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del ns1 - ip netns del ns2 &> /dev/null + cleanup_ns $ns1 $ns2 } get_linklocal() @@ -444,24 +443,60 @@ fib_rp_filter_test() setup set -e + setup_ns ns2 + + $IP link add name veth1 type veth peer name veth2 + $IP link set dev veth2 netns $ns2 + $IP address add 192.0.2.1/24 dev veth1 + ip -netns $ns2 address add 192.0.2.1/24 dev veth2 + $IP link set dev veth1 up + ip -netns $ns2 link set dev veth2 up + $IP link set dev lo address 52:54:00:6a:c7:5e - $IP link set dummy0 address 52:54:00:6a:c7:5e - $IP link add dummy1 type dummy - $IP link set dummy1 address 52:54:00:6a:c7:5e - $IP link set dev dummy1 up + $IP link set dev veth1 address 52:54:00:6a:c7:5e + ip -netns $ns2 link set dev lo address 52:54:00:6a:c7:5e + ip -netns $ns2 link set dev veth2 address 52:54:00:6a:c7:5e + + # 1. (ns2) redirect lo's egress to veth2's egress + ip netns exec $ns2 tc qdisc add dev lo parent root handle 1: fq_codel + ip netns exec $ns2 tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth2 + ip netns exec $ns2 tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth2 + + # 2. (ns1) redirect veth1's ingress to lo's ingress + $NS_EXEC tc qdisc add dev veth1 ingress + $NS_EXEC tc filter add dev veth1 ingress protocol arp basic \ + action mirred ingress redirect dev lo + $NS_EXEC tc filter add dev veth1 ingress protocol ip basic \ + action mirred ingress redirect dev lo + + # 3. (ns1) redirect lo's egress to veth1's egress + $NS_EXEC tc qdisc add dev lo parent root handle 1: fq_codel + $NS_EXEC tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth1 + $NS_EXEC tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth1 + + # 4. (ns2) redirect veth2's ingress to lo's ingress + ip netns exec $ns2 tc qdisc add dev veth2 ingress + ip netns exec $ns2 tc filter add dev veth2 ingress protocol arp basic \ + action mirred ingress redirect dev lo + ip netns exec $ns2 tc filter add dev veth2 ingress protocol ip basic \ + action mirred ingress redirect dev lo + $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 - - $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel - $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo - $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 set +e - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1" + run_cmd "ip netns exec $ns2 ping -w1 -c1 192.0.2.1" log_test $? 0 "rp_filter passes local packets" - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1" + run_cmd "ip netns exec $ns2 ping -w1 -c1 127.0.0.1" log_test $? 0 "rp_filter passes loopback packets" cleanup @@ -616,6 +651,264 @@ fib_nexthop_test() cleanup } +fib6_notify_test() +{ + setup + + echo + echo "Fib6 info length calculation in route notify test" + set -e + + for i in 10 20 30 40 50 60 70; + do + $IP link add dummy_$i type dummy + $IP link set dev dummy_$i up + $IP -6 address add 2001:$i::1/64 dev dummy_$i + done + + $NS_EXEC ip monitor route &> errors.txt & + sleep 2 + + $IP -6 route add 2001::/64 \ + nexthop via 2001:10::2 dev dummy_10 \ + nexthop encap ip6 dst 2002::20 via 2001:20::2 dev dummy_20 \ + nexthop encap ip6 dst 2002::30 via 2001:30::2 dev dummy_30 \ + nexthop encap ip6 dst 2002::40 via 2001:40::2 dev dummy_40 \ + nexthop encap ip6 dst 2002::50 via 2001:50::2 dev dummy_50 \ + nexthop encap ip6 dst 2002::60 via 2001:60::2 dev dummy_60 \ + nexthop encap ip6 dst 2002::70 via 2001:70::2 dev dummy_70 + + set +e + + err=`cat errors.txt |grep "Message too long"` + if [ -z "$err" ];then + ret=0 + else + ret=1 + fi + + log_test $ret 0 "ipv6 route add notify" + + { kill %% && wait %%; } 2>/dev/null + + #rm errors.txt + + cleanup &> /dev/null +} + + +fib_notify_test() +{ + setup + + echo + echo "Fib4 info length calculation in route notify test" + + set -e + + for i in 10 20 30 40 50 60 70; + do + $IP link add dummy_$i type dummy + $IP link set dev dummy_$i up + $IP address add 20.20.$i.2/24 dev dummy_$i + done + + $NS_EXEC ip monitor route &> errors.txt & + sleep 2 + + $IP route add 10.0.0.0/24 \ + nexthop via 20.20.10.1 dev dummy_10 \ + nexthop encap ip dst 192.168.10.20 via 20.20.20.1 dev dummy_20 \ + nexthop encap ip dst 192.168.10.30 via 20.20.30.1 dev dummy_30 \ + nexthop encap ip dst 192.168.10.40 via 20.20.40.1 dev dummy_40 \ + nexthop encap ip dst 192.168.10.50 via 20.20.50.1 dev dummy_50 \ + nexthop encap ip dst 192.168.10.60 via 20.20.60.1 dev dummy_60 \ + nexthop encap ip dst 192.168.10.70 via 20.20.70.1 dev dummy_70 + + set +e + + err=`cat errors.txt |grep "Message too long"` + if [ -z "$err" ];then + ret=0 + else + ret=1 + fi + + log_test $ret 0 "ipv4 route add notify" + + { kill %% && wait %%; } 2>/dev/null + + rm errors.txt + + cleanup &> /dev/null +} + +# Create a new dummy_10 to remove all associated routes. +reset_dummy_10() +{ + $IP link del dev dummy_10 + + $IP link add dummy_10 type dummy + $IP link set dev dummy_10 up + $IP -6 address add 2001:10::1/64 dev dummy_10 +} + +check_rt_num() +{ + local expected=$1 + local num=$2 + + if [ $num -ne $expected ]; then + echo "FAIL: Expected $expected routes, got $num" + ret=1 + else + ret=0 + fi +} + +check_rt_num_clean() +{ + local expected=$1 + local num=$2 + + if [ $num -ne $expected ]; then + log_test 1 0 "expected $expected routes, got $num" + set +e + cleanup &> /dev/null + return 1 + fi + return 0 +} + +fib6_gc_test() +{ + setup + + echo + echo "Fib6 garbage collection test" + set -e + + EXPIRE=5 + GC_WAIT_TIME=$((EXPIRE * 2 + 2)) + + # Check expiration of routes every $EXPIRE seconds (GC) + $NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE + + $IP link add dummy_10 type dummy + $IP link set dev dummy_10 up + $IP -6 address add 2001:10::1/64 dev dummy_10 + + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + sleep $GC_WAIT_TIME + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection" + + reset_dummy_10 + + # Permanent routes + for i in $(seq 1 5); do + $IP -6 route add 2001:30::$i \ + via 2001:10::2 dev dummy_10 + done + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + # Wait for GC + sleep $GC_WAIT_TIME + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (with permanent routes)" + + reset_dummy_10 + + # Permanent routes + for i in $(seq 1 5); do + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 + done + # Replace with temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route replace 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + # Wait for GC + sleep $GC_WAIT_TIME + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (replace with expires)" + + reset_dummy_10 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + # Replace with permanent routes + for i in $(seq 1 5); do + $IP -6 route replace 2001:20::$i \ + via 2001:10::2 dev dummy_10 + done + check_rt_num_clean 0 $($IP -6 route list |grep expires|wc -l) || return + + # Wait for GC + sleep $GC_WAIT_TIME + check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) + log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" + + # ra6 is required for the next test. (ipv6toolkit) + if [ ! -x "$(command -v ra6)" ]; then + echo "SKIP: ra6 not found." + set +e + cleanup &> /dev/null + return + fi + + # Delete dummy_10 and remove all routes + $IP link del dev dummy_10 + + # Create a pair of veth devices to send a RA message from one + # device to another. + $IP link add veth1 type veth peer name veth2 + $IP link set dev veth1 up + $IP link set dev veth2 up + $IP -6 address add 2001:10::1/64 dev veth1 nodad + $IP -6 address add 2001:10::2/64 dev veth2 nodad + + # Make veth1 ready to receive RA messages. + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2 + + # Send a RA message with a route from veth2 to veth1. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -t $EXPIRE + + # Wait for the RA message. + sleep 1 + + # systemd may mess up the test. You syould make sure that + # systemd-networkd.service and systemd-networkd.socket are stopped. + check_rt_num_clean 1 $($IP -6 route list|grep expires|wc -l) || return + + # Wait for GC + sleep $GC_WAIT_TIME + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (RA message)" + + set +e + + cleanup &> /dev/null +} + fib_suppress_test() { echo @@ -762,34 +1055,32 @@ route_setup() [ "${VERBOSE}" = "1" ] && set -x set -e - ip netns add ns2 - ip netns set ns2 auto - ip -netns ns2 link set dev lo up - ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1 - ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 + setup_ns ns2 + ip netns exec $ns2 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP li add veth1 type veth peer name veth2 $IP li add veth3 type veth peer name veth4 $IP li set veth1 up $IP li set veth3 up - $IP li set veth2 netns ns2 up - $IP li set veth4 netns ns2 up - ip -netns ns2 li add dummy1 type dummy - ip -netns ns2 li set dummy1 up + $IP li set veth2 netns $ns2 up + $IP li set veth4 netns $ns2 up + ip -netns $ns2 li add dummy1 type dummy + ip -netns $ns2 li set dummy1 up $IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad $IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad $IP addr add 172.16.101.1/24 dev veth1 $IP addr add 172.16.103.1/24 dev veth3 - ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad - ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad - ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad + ip -netns $ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad + ip -netns $ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad + ip -netns $ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad - ip -netns ns2 addr add 172.16.101.2/24 dev veth2 - ip -netns ns2 addr add 172.16.103.2/24 dev veth4 - ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 + ip -netns $ns2 addr add 172.16.101.2/24 dev veth2 + ip -netns $ns2 addr add 172.16.103.2/24 dev veth4 + ip -netns $ns2 addr add 172.16.104.1/24 dev dummy1 set +e } @@ -949,12 +1240,25 @@ ipv6_rt_replace() ipv6_rt_replace_mpath } +ipv6_rt_dsfield() +{ + echo + echo "IPv6 route with dsfield tests" + + run_cmd "$IP -6 route flush 2001:db8:102::/64" + + # IPv6 doesn't support routing based on dsfield + run_cmd "$IP -6 route add 2001:db8:102::/64 dsfield 0x04 via 2001:db8:101::2" + log_test $? 2 "Reject route with dsfield" +} + ipv6_route_test() { route_setup ipv6_rt_add ipv6_rt_replace + ipv6_rt_dsfield route_cleanup } @@ -1028,7 +1332,7 @@ ipv6_addr_metric_test() log_test $rc 0 "Modify metric of address" # verify prefix route removed on down - run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" + run_cmd "ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then @@ -1055,7 +1359,6 @@ ipv6_addr_metric_test() check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260" log_test $? 0 "Set metric with peer route on local side" - log_test $? 0 "User specified metric on local address" check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260" log_test $? 0 "Set metric with peer route on peer side" @@ -1135,7 +1438,7 @@ ipv6_route_metrics_test() log_test $rc 0 "Multipath route with mtu metric" $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300 - run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1" + run_cmd "ip netns exec $ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1" log_test $? 0 "Using route with mtu metric" run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo" @@ -1385,12 +1688,113 @@ ipv4_rt_replace() ipv4_rt_replace_mpath } +# checks that cached input route on VRF port is deleted +# when VRF is deleted +ipv4_local_rt_cache() +{ + run_cmd "ip addr add 10.0.0.1/32 dev lo" + run_cmd "setup_ns test-ns" + run_cmd "ip link add veth-outside type veth peer name veth-inside" + run_cmd "ip link add vrf-100 type vrf table 1100" + run_cmd "ip link set veth-outside master vrf-100" + run_cmd "ip link set veth-inside netns $test-ns" + run_cmd "ip link set veth-outside up" + run_cmd "ip link set vrf-100 up" + run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100" + run_cmd "ip netns exec $test-ns ip link set veth-inside up" + run_cmd "ip netns exec $test-ns ip addr add 10.1.1.1/32 dev veth-inside" + run_cmd "ip netns exec $test-ns ip route add 10.0.0.1/32 dev veth-inside" + run_cmd "ip netns exec $test-ns ip route add default via 10.0.0.1" + run_cmd "ip netns exec $test-ns ping 10.0.0.1 -c 1 -i 1" + run_cmd "ip link delete vrf-100" + + # if we do not hang test is a success + log_test $? 0 "Cached route removed from VRF port device" +} + +ipv4_rt_dsfield() +{ + echo + echo "IPv4 route with dsfield tests" + + run_cmd "$IP route flush 172.16.102.0/24" + + # New routes should reject dsfield options that interfere with ECN + run_cmd "$IP route add 172.16.102.0/24 dsfield 0x01 via 172.16.101.2" + log_test $? 2 "Reject route with dsfield 0x01" + + run_cmd "$IP route add 172.16.102.0/24 dsfield 0x02 via 172.16.101.2" + log_test $? 2 "Reject route with dsfield 0x02" + + run_cmd "$IP route add 172.16.102.0/24 dsfield 0x03 via 172.16.101.2" + log_test $? 2 "Reject route with dsfield 0x03" + + # A generic route that doesn't take DSCP into account + run_cmd "$IP route add 172.16.102.0/24 via 172.16.101.2" + + # A more specific route for DSCP 0x10 + run_cmd "$IP route add 172.16.102.0/24 dsfield 0x10 via 172.16.103.2" + + # DSCP 0x10 should match the specific route, no matter the ECN bits + $IP route get fibmatch 172.16.102.1 dsfield 0x10 | \ + grep -q "via 172.16.103.2" + log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT" + + $IP route get fibmatch 172.16.102.1 dsfield 0x11 | \ + grep -q "via 172.16.103.2" + log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x12 | \ + grep -q "via 172.16.103.2" + log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x13 | \ + grep -q "via 172.16.103.2" + log_test $? 0 "IPv4 route with DSCP and ECN:CE" + + # Unknown DSCP should match the generic route, no matter the ECN bits + $IP route get fibmatch 172.16.102.1 dsfield 0x14 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT" + + $IP route get fibmatch 172.16.102.1 dsfield 0x15 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x16 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x17 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE" + + # Null DSCP should match the generic route, no matter the ECN bits + $IP route get fibmatch 172.16.102.1 dsfield 0x00 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT" + + $IP route get fibmatch 172.16.102.1 dsfield 0x01 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x02 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)" + + $IP route get fibmatch 172.16.102.1 dsfield 0x03 | \ + grep -q "via 172.16.101.2" + log_test $? 0 "IPv4 route with no DSCP and ECN:CE" +} + ipv4_route_test() { route_setup ipv4_rt_add ipv4_rt_replace + ipv4_local_rt_cache + ipv4_rt_dsfield route_cleanup } @@ -1531,7 +1935,7 @@ ipv4_route_metrics_test() log_test $rc 0 "Multipath route with mtu metric" $IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300 - run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1" + run_cmd "ip netns exec $ns1 ping -w1 -c1 -s 1500 172.16.104.1" log_test $? 0 "Using route with mtu metric" run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo" @@ -1559,13 +1963,21 @@ ipv4_del_addr_test() $IP addr add dev dummy1 172.16.104.1/24 $IP addr add dev dummy1 172.16.104.11/24 + $IP addr add dev dummy1 172.16.104.12/24 + $IP addr add dev dummy1 172.16.104.13/24 $IP addr add dev dummy2 172.16.104.1/24 $IP addr add dev dummy2 172.16.104.11/24 + $IP addr add dev dummy2 172.16.104.12/24 $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add 172.16.106.0/24 dev lo src 172.16.104.12 + $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13 $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 set +e # removing address from device in vrf should only remove route from vrf table + echo " Regular FIB info" + $IP addr del dev dummy2 172.16.104.11/24 $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 1 "Route removed from VRF when source address deleted" @@ -1583,11 +1995,189 @@ ipv4_del_addr_test() $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 0 "Route in VRF is not removed by address delete" + # removing address from device in vrf should only remove route from vrf + # table even when the associated fib info only differs in table ID + echo " Identical FIB info with different table ID" + + $IP addr del dev dummy2 172.16.104.12/24 + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed from VRF when source address deleted" + + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 0 "Route in default VRF not removed" + + $IP addr add dev dummy2 172.16.104.12/24 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 + + $IP addr del dev dummy1 172.16.104.12/24 + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 0 "Route in VRF is not removed by address delete" + + # removing address from device in default vrf should remove route from + # the default vrf even when route was inserted with a table ID of 0. + echo " Table ID 0" + + $IP addr del dev dummy1 172.16.104.13/24 + $IP ro ls | grep -q 172.16.107.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + $IP li del dummy1 $IP li del dummy2 cleanup } +ipv6_del_addr_test() +{ + echo + echo "IPv6 delete address route tests" + + setup + + set -e + for i in $(seq 6); do + $IP li add dummy${i} up type dummy + done + + $IP li add red up type vrf table 1111 + $IP ro add vrf red unreachable default + for i in $(seq 4 6); do + $IP li set dummy${i} vrf red + done + + $IP addr add dev dummy1 fe80::1/128 + $IP addr add dev dummy1 2001:db8:101::1/64 + $IP addr add dev dummy1 2001:db8:101::10/64 + $IP addr add dev dummy1 2001:db8:101::11/64 + $IP addr add dev dummy1 2001:db8:101::12/64 + $IP addr add dev dummy1 2001:db8:101::13/64 + $IP addr add dev dummy1 2001:db8:101::14/64 + $IP addr add dev dummy1 2001:db8:101::15/64 + $IP addr add dev dummy2 fe80::1/128 + $IP addr add dev dummy2 2001:db8:101::1/64 + $IP addr add dev dummy2 2001:db8:101::11/64 + $IP addr add dev dummy3 fe80::1/128 + + $IP addr add dev dummy4 2001:db8:101::1/64 + $IP addr add dev dummy4 2001:db8:101::10/64 + $IP addr add dev dummy4 2001:db8:101::11/64 + $IP addr add dev dummy4 2001:db8:101::12/64 + $IP addr add dev dummy4 2001:db8:101::13/64 + $IP addr add dev dummy4 2001:db8:101::14/64 + $IP addr add dev dummy5 2001:db8:101::1/64 + $IP addr add dev dummy5 2001:db8:101::11/64 + + # Single device using src address + $IP route add 2001:db8:110::/64 dev dummy3 src 2001:db8:101::10 + # Two devices with the same source address + $IP route add 2001:db8:111::/64 dev dummy3 src 2001:db8:101::11 + # VRF with single device using src address + $IP route add vrf red 2001:db8:110::/64 dev dummy6 src 2001:db8:101::10 + # VRF with two devices using src address + $IP route add vrf red 2001:db8:111::/64 dev dummy6 src 2001:db8:101::11 + # src address and nexthop dev in same VRF + $IP route add 2001:db8:112::/64 dev dummy3 src 2001:db8:101::12 + $IP route add vrf red 2001:db8:112::/64 dev dummy6 src 2001:db8:101::12 + # src address and nexthop device in different VRF + $IP route add 2001:db8:113::/64 dev lo src 2001:db8:101::13 + $IP route add vrf red 2001:db8:113::/64 dev lo src 2001:db8:101::13 + # table ID 0 + $IP route add table 0 2001:db8:115::/64 via 2001:db8:101::2 src 2001:db8:101::15 + # Link local source route + $IP route add 2001:db8:116::/64 dev dummy2 src fe80::1 + $IP route add 2001:db8:117::/64 dev dummy3 src fe80::1 + set +e + + echo " Single device using src address" + + $IP addr del dev dummy1 2001:db8:101::10/64 + $IP -6 route show | grep -q "src 2001:db8:101::10 " + log_test $? 1 "Prefsrc removed when src address removed on other device" + + echo " Two devices with the same source address" + + $IP addr del dev dummy1 2001:db8:101::11/64 + $IP -6 route show | grep -q "src 2001:db8:101::11 " + log_test $? 0 "Prefsrc not removed when src address exist on other device" + + $IP addr del dev dummy2 2001:db8:101::11/64 + $IP -6 route show | grep -q "src 2001:db8:101::11 " + log_test $? 1 "Prefsrc removed when src address removed on all devices" + + echo " VRF with single device using src address" + + $IP addr del dev dummy4 2001:db8:101::10/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::10 " + log_test $? 1 "Prefsrc removed when src address removed on other device" + + echo " VRF with two devices using src address" + + $IP addr del dev dummy4 2001:db8:101::11/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::11 " + log_test $? 0 "Prefsrc not removed when src address exist on other device" + + $IP addr del dev dummy5 2001:db8:101::11/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::11 " + log_test $? 1 "Prefsrc removed when src address removed on all devices" + + echo " src address and nexthop dev in same VRF" + + $IP addr del dev dummy4 2001:db8:101::12/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::12 " + log_test $? 1 "Prefsrc removed from VRF when source address deleted" + $IP -6 route show | grep -q " src 2001:db8:101::12 " + log_test $? 0 "Prefsrc in default VRF not removed" + + $IP addr add dev dummy4 2001:db8:101::12/64 + $IP route replace vrf red 2001:db8:112::/64 dev dummy6 src 2001:db8:101::12 + $IP addr del dev dummy1 2001:db8:101::12/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::12 " + log_test $? 0 "Prefsrc not removed from VRF when source address exist" + $IP -6 route show | grep -q " src 2001:db8:101::12 " + log_test $? 1 "Prefsrc in default VRF removed" + + echo " src address and nexthop device in different VRF" + + $IP addr del dev dummy4 2001:db8:101::13/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::13 " + log_test $? 0 "Prefsrc not removed from VRF when nexthop dev in diff VRF" + $IP -6 route show | grep -q "src 2001:db8:101::13 " + log_test $? 0 "Prefsrc not removed in default VRF" + + $IP addr add dev dummy4 2001:db8:101::13/64 + $IP addr del dev dummy1 2001:db8:101::13/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::13 " + log_test $? 1 "Prefsrc removed from VRF when nexthop dev in diff VRF" + $IP -6 route show | grep -q "src 2001:db8:101::13 " + log_test $? 1 "Prefsrc removed in default VRF" + + echo " Table ID 0" + + $IP addr del dev dummy1 2001:db8:101::15/64 + $IP -6 route show | grep -q "src 2001:db8:101::15" + log_test $? 1 "Prefsrc removed from default VRF when source address deleted" + + echo " Link local source route" + $IP addr del dev dummy1 fe80::1/128 + $IP -6 route show | grep -q "2001:db8:116::/64 dev dummy2 src fe80::1" + log_test $? 0 "Prefsrc not removed when delete ll addr from other dev" + $IP addr del dev dummy2 fe80::1/128 + $IP -6 route show | grep -q "2001:db8:116::/64 dev dummy2 src fe80::1" + log_test $? 1 "Prefsrc removed when delete ll addr" + $IP -6 route show | grep -q "2001:db8:117::/64 dev dummy3 src fe80::1" + log_test $? 0 "Prefsrc not removed when delete ll addr from other dev" + $IP addr add dev dummy1 fe80::1/128 + $IP addr del dev dummy3 fe80::1/128 + $IP -6 route show | grep -q "2001:db8:117::/64 dev dummy3 src fe80::1" + log_test $? 1 "Prefsrc removed even ll addr still exist on other dev" + + for i in $(seq 6); do + $IP li del dummy${i} + done + cleanup +} ipv4_route_v6_gw_test() { @@ -1609,7 +2199,7 @@ ipv4_route_v6_gw_test() check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1" fi - run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1" + run_cmd "ip netns exec $ns1 ping -w1 -c1 172.16.104.1" log_test $rc 0 "Single path route with IPv6 gateway - ping" run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2" @@ -1654,6 +2244,362 @@ ipv4_route_v6_gw_test() route_cleanup } +socat_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + return 0 +} + +iptables_check() +{ + iptables -t mangle -L OUTPUT &> /dev/null + if [ $? -ne 0 ]; then + echo "iptables configuration not supported. Skipping test" + return 1 + fi + + return 0 +} + +ip6tables_check() +{ + ip6tables -t mangle -L OUTPUT &> /dev/null + if [ $? -ne 0 ]; then + echo "ip6tables configuration not supported. Skipping test" + return 1 + fi + + return 0 +} + +ipv4_mangle_test() +{ + local rc + + echo + echo "IPv4 mangling tests" + + socat_check || return 1 + iptables_check || return 1 + + route_setup + sleep 2 + + local tmp_file=$(mktemp) + ip netns exec $ns2 socat UDP4-LISTEN:54321,fork $tmp_file & + + # Add a FIB rule and a route that will direct our connection to the + # listening server. + $IP rule add pref 100 ipproto udp sport 12345 dport 54321 table 123 + $IP route add table 123 172.16.101.0/24 dev veth1 + + # Add an unreachable route to the main table that will block our + # connection in case the FIB rule is not hit. + $IP route add unreachable 172.16.101.2/32 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters" + + run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=11111" + log_test $? 1 " Connection with incorrect parameters" + + # Add a mangling rule and make sure connection is still successful. + $NS_EXEC iptables -t mangle -A OUTPUT -j MARK --set-mark 1 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters - mangling" + + # Delete the mangling rule and make sure connection is still + # successful. + $NS_EXEC iptables -t mangle -D OUTPUT -j MARK --set-mark 1 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters - no mangling" + + # Verify connections were indeed successful on server side. + [[ $(cat $tmp_file | wc -l) -eq 3 ]] + log_test $? 0 " Connection check - server side" + + $IP route del unreachable 172.16.101.2/32 + $IP route del table 123 172.16.101.0/24 dev veth1 + $IP rule del pref 100 + + { kill %% && wait %%; } 2>/dev/null + rm $tmp_file + + route_cleanup +} + +ipv6_mangle_test() +{ + local rc + + echo + echo "IPv6 mangling tests" + + socat_check || return 1 + ip6tables_check || return 1 + + route_setup + sleep 2 + + local tmp_file=$(mktemp) + ip netns exec $ns2 socat UDP6-LISTEN:54321,fork $tmp_file & + + # Add a FIB rule and a route that will direct our connection to the + # listening server. + $IP -6 rule add pref 100 ipproto udp sport 12345 dport 54321 table 123 + $IP -6 route add table 123 2001:db8:101::/64 dev veth1 + + # Add an unreachable route to the main table that will block our + # connection in case the FIB rule is not hit. + $IP -6 route add unreachable 2001:db8:101::2/128 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters" + + run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=11111" + log_test $? 1 " Connection with incorrect parameters" + + # Add a mangling rule and make sure connection is still successful. + $NS_EXEC ip6tables -t mangle -A OUTPUT -j MARK --set-mark 1 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters - mangling" + + # Delete the mangling rule and make sure connection is still + # successful. + $NS_EXEC ip6tables -t mangle -D OUTPUT -j MARK --set-mark 1 + + run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345" + log_test $? 0 " Connection with correct parameters - no mangling" + + # Verify connections were indeed successful on server side. + [[ $(cat $tmp_file | wc -l) -eq 3 ]] + log_test $? 0 " Connection check - server side" + + $IP -6 route del unreachable 2001:db8:101::2/128 + $IP -6 route del table 123 2001:db8:101::/64 dev veth1 + $IP -6 rule del pref 100 + + { kill %% && wait %%; } 2>/dev/null + rm $tmp_file + + route_cleanup +} + +ip_neigh_get_check() +{ + ip neigh help 2>&1 | grep -q 'ip neigh get' + if [ $? -ne 0 ]; then + echo "iproute2 command does not support neigh get. Skipping test" + return 1 + fi + + return 0 +} + +ipv4_bcast_neigh_test() +{ + local rc + + echo + echo "IPv4 broadcast neighbour tests" + + ip_neigh_get_check || return 1 + + setup + + set -e + run_cmd "$IP neigh add 192.0.2.111 lladdr 00:11:22:33:44:55 nud perm dev dummy0" + run_cmd "$IP neigh add 192.0.2.255 lladdr 00:11:22:33:44:55 nud perm dev dummy0" + + run_cmd "$IP neigh get 192.0.2.111 dev dummy0" + run_cmd "$IP neigh get 192.0.2.255 dev dummy0" + + run_cmd "$IP address add 192.0.2.1/24 broadcast 192.0.2.111 dev dummy0" + + run_cmd "$IP neigh add 203.0.113.111 nud failed dev dummy0" + run_cmd "$IP neigh add 203.0.113.255 nud failed dev dummy0" + + run_cmd "$IP neigh get 203.0.113.111 dev dummy0" + run_cmd "$IP neigh get 203.0.113.255 dev dummy0" + + run_cmd "$IP address add 203.0.113.1/24 broadcast 203.0.113.111 dev dummy0" + set +e + + run_cmd "$IP neigh get 192.0.2.111 dev dummy0" + log_test $? 0 "Resolved neighbour for broadcast address" + + run_cmd "$IP neigh get 192.0.2.255 dev dummy0" + log_test $? 0 "Resolved neighbour for network broadcast address" + + run_cmd "$IP neigh get 203.0.113.111 dev dummy0" + log_test $? 2 "Unresolved neighbour for broadcast address" + + run_cmd "$IP neigh get 203.0.113.255 dev dummy0" + log_test $? 2 "Unresolved neighbour for network broadcast address" + + cleanup +} + +mpath_dep_check() +{ + if [ ! -x "$(command -v mausezahn)" ]; then + echo "mausezahn command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v jq)" ]; then + echo "jq command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v bc)" ]; then + echo "bc command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v perf)" ]; then + echo "perf command not found. Skipping test" + return 1 + fi + + perf list fib:* | grep -q fib_table_lookup + if [ $? -ne 0 ]; then + echo "IPv4 FIB tracepoint not found. Skipping test" + return 1 + fi + + perf list fib6:* | grep -q fib6_table_lookup + if [ $? -ne 0 ]; then + echo "IPv6 FIB tracepoint not found. Skipping test" + return 1 + fi + + return 0 +} + +link_stats_get() +{ + local ns=$1; shift + local dev=$1; shift + local dir=$1; shift + local stat=$1; shift + + ip -n $ns -j -s link show dev $dev \ + | jq '.[]["stats64"]["'$dir'"]["'$stat'"]' +} + +list_rcv_eval() +{ + local file=$1; shift + local expected=$1; shift + + local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor') + local ratio=$(echo "scale=2; $count / $expected" | bc -l) + local res=$(echo "$ratio >= 0.95" | bc) + [[ $res -eq 1 ]] + log_test $? 0 "Multipath route hit ratio ($ratio)" +} + +ipv4_mpath_list_test() +{ + echo + echo "IPv4 multipath list receive tests" + + mpath_dep_check || return 1 + + route_setup + + set -e + run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" + run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" + run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n $ns2 link add name nh1 up type dummy" + run_cmd "ip -n $ns2 link add name nh2 up type dummy" + run_cmd "ip -n $ns2 address add 172.16.201.1/24 dev nh1" + run_cmd "ip -n $ns2 address add 172.16.202.1/24 dev nh2" + run_cmd "ip -n $ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n $ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n $ns2 route add 203.0.113.0/24 + nexthop via 172.16.201.2 nexthop via 172.16.202.2" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" + set +e + + local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local tmp_file=$(mktemp) + local cmd="ip netns exec $ns1 mausezahn veth1 -a own -b $dmac + -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q" + + # Packets forwarded in a list using a multipath route must not reuse a + # cached result so that a flow always hits the same nexthop. In other + # words, the FIB lookup tracepoint needs to be triggered for every + # packet. + local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) + run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) + local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) + list_rcv_eval $tmp_file $diff + + rm $tmp_file + route_cleanup +} + +ipv6_mpath_list_test() +{ + echo + echo "IPv6 multipath list receive tests" + + mpath_dep_check || return 1 + + route_setup + + set -e + run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" + run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" + run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n $ns2 link add name nh1 up type dummy" + run_cmd "ip -n $ns2 link add name nh2 up type dummy" + run_cmd "ip -n $ns2 -6 address add 2001:db8:201::1/64 dev nh1" + run_cmd "ip -n $ns2 -6 address add 2001:db8:202::1/64 dev nh2" + run_cmd "ip -n $ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n $ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n $ns2 -6 route add 2001:db8:301::/64 + nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1" + set +e + + local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local tmp_file=$(mktemp) + local cmd="ip netns exec $ns1 mausezahn -6 veth1 -a own -b $dmac + -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q" + + # Packets forwarded in a list using a multipath route must not reuse a + # cached result so that a flow always hits the same nexthop. In other + # words, the FIB lookup tracepoint needs to be triggered for every + # packet. + local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) + run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) + local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) + list_rcv_eval $tmp_file $diff + + rm $tmp_file + route_cleanup +} + ################################################################################ # usage @@ -1673,6 +2619,8 @@ EOF ################################################################################ # main +trap cleanup EXIT + while getopts :t:pPhv o do case $o in @@ -1717,15 +2665,24 @@ do fib_carrier_test|carrier) fib_carrier_test;; fib_rp_filter_test|rp_filter) fib_rp_filter_test;; fib_nexthop_test|nexthop) fib_nexthop_test;; + fib_notify_test|ipv4_notify) fib_notify_test;; + fib6_notify_test|ipv6_notify) fib6_notify_test;; fib_suppress_test|suppress) fib_suppress_test;; ipv6_route_test|ipv6_rt) ipv6_route_test;; ipv4_route_test|ipv4_rt) ipv4_route_test;; ipv6_addr_metric) ipv6_addr_metric_test;; ipv4_addr_metric) ipv4_addr_metric_test;; ipv4_del_addr) ipv4_del_addr_test;; + ipv6_del_addr) ipv6_del_addr_test;; ipv6_route_metrics) ipv6_route_metrics_test;; ipv4_route_metrics) ipv4_route_metrics_test;; ipv4_route_v6_gw) ipv4_route_v6_gw_test;; + ipv4_mangle) ipv4_mangle_test;; + ipv6_mangle) ipv6_mangle_test;; + ipv4_bcast_neigh) ipv4_bcast_neigh_test;; + fib6_gc_test|ipv6_gc) fib6_gc_test;; + ipv4_mpath_list) ipv4_mpath_list_test;; + ipv6_mpath_list) ipv6_mpath_list_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 250fbb2d1625..535865b3d1d6 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -1,14 +1,40 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_igmp.sh \ +TEST_PROGS = bridge_fdb_learning_limit.sh \ + bridge_igmp.sh \ + bridge_locked_port.sh \ + bridge_mdb.sh \ + bridge_mdb_host.sh \ + bridge_mdb_max.sh \ + bridge_mdb_port_down.sh \ + bridge_mld.sh \ bridge_port_isolation.sh \ bridge_sticky_fdb.sh \ bridge_vlan_aware.sh \ + bridge_vlan_mcast.sh \ bridge_vlan_unaware.sh \ + custom_multipath_hash.sh \ + dual_vxlan_bridge.sh \ + ethtool_extended_state.sh \ + ethtool_mm.sh \ + ethtool_rmon.sh \ ethtool.sh \ + gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ + gre_multipath_nh_res.sh \ + gre_multipath_nh.sh \ gre_multipath.sh \ + hw_stats_l3.sh \ + hw_stats_l3_gre.sh \ + ip6_forward_instats_vrf.sh \ + ip6gre_custom_multipath_hash.sh \ + ip6gre_flat_key.sh \ + ip6gre_flat_keys.sh \ + ip6gre_flat.sh \ + ip6gre_hier_key.sh \ + ip6gre_hier_keys.sh \ + ip6gre_hier.sh \ ip6gre_inner_v4_multipath.sh \ ip6gre_inner_v6_multipath.sh \ ipip_flat_gre_key.sh \ @@ -17,6 +43,7 @@ TEST_PROGS = bridge_igmp.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ ipip_hier_gre.sh \ + local_termination.sh \ loopback.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ @@ -32,45 +59,78 @@ TEST_PROGS = bridge_igmp.sh \ mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ mirror_vlan.sh \ + no_forwarding.sh \ + pedit_dsfield.sh \ + pedit_ip.sh \ + pedit_l4port.sh \ + q_in_vni_ipv6.sh \ + q_in_vni.sh \ router_bridge.sh \ + router_bridge_1d.sh \ + router_bridge_1d_lag.sh \ + router_bridge_lag.sh \ router_bridge_vlan.sh \ + router_bridge_vlan_upper.sh \ + router_bridge_pvid_vlan_upper.sh \ + router_bridge_vlan_upper_pvid.sh \ router_broadcast.sh \ + router_mpath_nh_res.sh \ router_mpath_nh.sh \ router_multicast.sh \ router_multipath.sh \ + router_nh.sh \ router.sh \ router_vid_1.sh \ sch_ets.sh \ + sch_red.sh \ sch_tbf_ets.sh \ sch_tbf_prio.sh \ sch_tbf_root.sh \ + skbedit_priority.sh \ tc_actions.sh \ tc_chains.sh \ tc_flower_router.sh \ tc_flower.sh \ + tc_flower_l2_miss.sh \ + tc_flower_cfm.sh \ + tc_flower_port_range.sh \ + tc_mpls_l2vpn.sh \ + tc_police.sh \ tc_shblocks.sh \ + tc_tunnel_key.sh \ tc_vlan_modify.sh \ + vxlan_asymmetric_ipv6.sh \ vxlan_asymmetric.sh \ + vxlan_bridge_1d_ipv6.sh \ + vxlan_bridge_1d_port_8472_ipv6.sh \ vxlan_bridge_1d_port_8472.sh \ vxlan_bridge_1d.sh \ + vxlan_bridge_1q_ipv6.sh \ + vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ + vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh -TEST_PROGS_EXTENDED := devlink_lib.sh \ +TEST_FILES := devlink_lib.sh \ ethtool_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ + ip6gre_lib.sh \ ipip_lib.sh \ lib.sh \ mirror_gre_lib.sh \ mirror_gre_topo_lib.sh \ mirror_lib.sh \ mirror_topo_lib.sh \ + router_mpath_nh_lib.sh \ sch_ets_core.sh \ sch_ets_tests.sh \ sch_tbf_core.sh \ sch_tbf_etsprio.sh \ tc_common.sh +TEST_INCLUDES := \ + ../lib.sh + include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh new file mode 100755 index 000000000000..0760a34b7114 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# ShellCheck incorrectly believes that most of the code here is unreachable +# because it's invoked by variable name following ALL_TESTS. +# +# shellcheck disable=SC2317 + +ALL_TESTS="check_accounting check_limit" +NUM_NETIFS=6 +source lib.sh + +TEST_MAC_BASE=de:ad:be:ef:42: + +NUM_PKTS=16 +FDB_LIMIT=8 + +FDB_TYPES=( + # name is counted? overrides learned? + 'learned 1 0' + 'static 0 1' + 'user 0 1' + 'extern_learn 0 1' + 'local 0 1' +) + +mac() +{ + printf "${TEST_MAC_BASE}%02x" "$1" +} + +H1_DEFAULT_MAC=$(mac 42) + +switch_create() +{ + ip link add dev br0 type bridge + + ip link set dev "$swp1" master br0 + ip link set dev "$swp2" master br0 + # swp3 is used to add local MACs, so do not add it to the bridge yet. + + # swp2 is only used for replying when learning on swp1, its MAC should not be learned. + ip link set dev "$swp2" type bridge_slave learning off + + ip link set dev br0 up + + ip link set dev "$swp1" up + ip link set dev "$swp2" up + ip link set dev "$swp3" up +} + +switch_destroy() +{ + ip link set dev "$swp3" down + ip link set dev "$swp2" down + ip link set dev "$swp1" down + + ip link del dev br0 +} + +h_create() +{ + ip link set "$h1" addr "$H1_DEFAULT_MAC" + + simple_if_init "$h1" 192.0.2.1/24 + simple_if_init "$h2" 192.0.2.2/24 +} + +h_destroy() +{ + simple_if_fini "$h1" 192.0.2.1/24 + simple_if_fini "$h2" 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + h2=${NETIFS[p3]} + swp2=${NETIFS[p4]} + + swp3=${NETIFS[p6]} + + vrf_prepare + + h_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h_destroy + + vrf_cleanup +} + +fdb_get_n_learned() +{ + ip -d -j link show dev br0 type bridge | \ + jq '.[]["linkinfo"]["info_data"]["fdb_n_learned"]' +} + +fdb_get_n_mac() +{ + local mac=${1} + + bridge -j fdb show br br0 | \ + jq "map(select(.mac == \"${mac}\" and (has(\"vlan\") | not))) | length" +} + +fdb_fill_learned() +{ + local i + + for i in $(seq 1 "$NUM_PKTS"); do + fdb_add learned "$(mac "$i")" + done +} + +fdb_reset() +{ + bridge fdb flush dev br0 + + # Keep the default MAC address of h1 in the table. We set it to a different one when + # testing dynamic learning. + bridge fdb add "$H1_DEFAULT_MAC" dev "$swp1" master static use +} + +fdb_add() +{ + local type=$1 mac=$2 + + case "$type" in + learned) + ip link set "$h1" addr "$mac" + # Wait for a reply so we implicitly wait until after the forwarding + # code finished and the FDB entry was created. + PING_COUNT=1 ping_do "$h1" 192.0.2.2 + check_err $? "Failed to ping another bridge port" + ip link set "$h1" addr "$H1_DEFAULT_MAC" + ;; + local) + ip link set dev "$swp3" addr "$mac" && ip link set "$swp3" master br0 + ;; + static) + bridge fdb replace "$mac" dev "$swp1" master static + ;; + user) + bridge fdb replace "$mac" dev "$swp1" master static use + ;; + extern_learn) + bridge fdb replace "$mac" dev "$swp1" master extern_learn + ;; + esac + + check_err $? "Failed to add a FDB entry of type ${type}" +} + +fdb_del() +{ + local type=$1 mac=$2 + + case "$type" in + local) + ip link set "$swp3" nomaster + ;; + *) + bridge fdb del "$mac" dev "$swp1" master + ;; + esac + + check_err $? "Failed to remove a FDB entry of type ${type}" +} + +check_accounting_one_type() +{ + local type=$1 is_counted=$2 overrides_learned=$3 + shift 3 + RET=0 + + fdb_reset + fdb_add "$type" "$(mac 0)" + learned=$(fdb_get_n_learned) + [ "$learned" -ne "$is_counted" ] + check_fail $? "Inserted FDB type ${type}: Expected the count ${is_counted}, but got ${learned}" + + fdb_del "$type" "$(mac 0)" + learned=$(fdb_get_n_learned) + [ "$learned" -ne 0 ] + check_fail $? "Removed FDB type ${type}: Expected the count 0, but got ${learned}" + + if [ "$overrides_learned" -eq 1 ]; then + fdb_reset + fdb_add learned "$(mac 0)" + fdb_add "$type" "$(mac 0)" + learned=$(fdb_get_n_learned) + [ "$learned" -ne "$is_counted" ] + check_fail $? "Set a learned entry to FDB type ${type}: Expected the count ${is_counted}, but got ${learned}" + fdb_del "$type" "$(mac 0)" + fi + + log_test "FDB accounting interacting with FDB type ${type}" +} + +check_accounting() +{ + local type_args learned + RET=0 + + fdb_reset + learned=$(fdb_get_n_learned) + [ "$learned" -ne 0 ] + check_fail $? "Flushed the FDB table: Expected the count 0, but got ${learned}" + + fdb_fill_learned + sleep 1 + + learned=$(fdb_get_n_learned) + [ "$learned" -ne "$NUM_PKTS" ] + check_fail $? "Filled the FDB table: Expected the count ${NUM_PKTS}, but got ${learned}" + + log_test "FDB accounting" + + for type_args in "${FDB_TYPES[@]}"; do + # This is intentional use of word splitting. + # shellcheck disable=SC2086 + check_accounting_one_type $type_args + done +} + +check_limit_one_type() +{ + local type=$1 is_counted=$2 + local n_mac expected=$((1 - is_counted)) + RET=0 + + fdb_reset + fdb_fill_learned + + fdb_add "$type" "$(mac 0)" + n_mac=$(fdb_get_n_mac "$(mac 0)") + [ "$n_mac" -ne "$expected" ] + check_fail $? "Inserted FDB type ${type} at limit: Expected the count ${expected}, but got ${n_mac}" + + log_test "FDB limits interacting with FDB type ${type}" +} + +check_limit() +{ + local learned + RET=0 + + ip link set br0 type bridge fdb_max_learned "$FDB_LIMIT" + + fdb_reset + fdb_fill_learned + + learned=$(fdb_get_n_learned) + [ "$learned" -ne "$FDB_LIMIT" ] + check_fail $? "Filled the limited FDB table: Expected the count ${FDB_LIMIT}, but got ${learned}" + + log_test "FDB limits" + + for type_args in "${FDB_TYPES[@]}"; do + # This is intentional use of word splitting. + # shellcheck disable=SC2086 + check_limit_one_type $type_args + done +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 88d2472ba151..2aa66d2a1702 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,11 +1,37 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="reportleave_test" +ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ + v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ + v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ + v3exc_timeout_test v3star_ex_auto_add_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" TEST_GROUP_MAC="01:00:5e:0a:0a:0a" + +ALL_GROUP="224.0.0.1" +ALL_MAC="01:00:5e:00:00:01" + +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3 +MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03" +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12 +MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.20,192.0.2.30 +MZPKT_IS_INC3="22:00:5f:b4:00:00:00:01:01:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" +# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12 +MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" +# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30 +MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" +# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21 +MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15" +# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.20,192.0.2.30 +MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" +# IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30 +MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e" +# IGMPv3 block report: grp 239.10.10.10 block 192.0.2.1,192.0.2.20,192.0.2.30 +MZPKT_BLOCK="22:00:98:b1:00:00:00:01:06:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e" + source lib.sh h1_create() @@ -70,47 +96,13 @@ cleanup() switch_destroy - # Always cleanup the mcast group - ip address del dev $h2 $TEST_GROUP/32 2>&1 1>/dev/null - h2_destroy h1_destroy vrf_cleanup } -# return 0 if the packet wasn't seen on host2_if or 1 if it was -mcast_packet_test() -{ - local mac=$1 - local ip=$2 - local host1_if=$3 - local host2_if=$4 - local seen=0 - - # Add an ACL on `host2_if` which will tell us whether the packet - # was received by it or not. - tc qdisc add dev $host2_if ingress - tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ - flower dst_mac $mac action drop - - $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q - sleep 1 - - tc -j -s filter show dev $host2_if ingress \ - | jq -e ".[] | select(.options.handle == 101) \ - | select(.options.actions[0].stats.packets == 1)" &> /dev/null - if [[ $? -eq 0 ]]; then - seen=1 - fi - - tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower - tc qdisc del dev $host2_if ingress - - return $seen -} - -reportleave_test() +v2reportleave_test() { RET=0 ip address add dev $h2 $TEST_GROUP/32 autojoin @@ -118,12 +110,12 @@ reportleave_test() sleep 5 bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null - check_err $? "Report didn't create mdb entry for $TEST_GROUP" + check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP" - mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 check_fail $? "Traffic to $TEST_GROUP wasn't forwarded" - log_test "IGMP report $TEST_GROUP" + log_test "IGMPv2 report $TEST_GROUP" RET=0 bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null @@ -136,10 +128,430 @@ reportleave_test() bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP" - mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry" - log_test "IGMP leave $TEST_GROUP" + log_test "IGMPv2 leave $TEST_GROUP" +} + +v3include_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.3") + + ip link set dev br0 type bridge mcast_igmp_version 3 + check_err $? "Could not change bridge IGMP version to 3" + + $MZ $host1_if -b $mac -c 1 -B $group -t ip "proto=2,p=$MZPKT_IS_INC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null + check_err $? "Missing *,G entry with source list" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + brmcast_check_sg_entries "is_include" "${X[@]}" +} + +v3exclude_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local pkt=$4 + local X=("192.0.2.1" "192.0.2.2") + local Y=("192.0.2.20" "192.0.2.21") + + v3include_prepare $host1_if $mac $group + + $MZ $host1_if -c 1 -b $mac -B $group -t ip "proto=2,p=$MZPKT_IS_EXC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.3\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists" +} + +v3cleanup() +{ + local port=$1 + local group=$2 + + bridge mdb del dev br0 port $port grp $group + ip link set dev br0 type bridge mcast_igmp_version 2 +} + +v3include_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.3") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP is_include" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_allow_test() +{ + RET=0 + local X=("192.0.2.10" "192.0.2.11" "192.0.2.12") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> allow" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_is_include_test() +{ + RET=0 + local X=("192.0.2.10" "192.0.2.11" "192.0.2.12") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> is_include" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_is_exclude_test() +{ + RET=0 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP include -> is_exclude" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_to_exclude_test() +{ + RET=0 + local X=("192.0.2.1") + local Y=("192.0.2.20" "192.0.2.30") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.21\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP include -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_allow_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30") + local Y=("192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> allow" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_is_include_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30") + local Y=("192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> is_include" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_is_exclude_test() +{ + RET=0 + local X=("192.0.2.30") + local Y=("192.0.2.20") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q + sleep 1 + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_to_exclude_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.30") + local Y=("192.0.2.20") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q + sleep 1 + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_block_test() +{ + RET=0 + local X=("192.0.2.2" "192.0.2.3") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q + # make sure the lowered timers have expired (by default 2 seconds) + sleep 3 + brmcast_check_sg_entries "block" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> block" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_block_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.30") + local Y=("192.0.2.20" "192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q + sleep 1 + brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> block" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_timeout_test() +{ + RET=0 + local X=("192.0.2.20" "192.0.2.30") + + # GMI should be 3 seconds + ip link set dev br0 type bridge mcast_query_interval 100 \ + mcast_query_response_interval 100 \ + mcast_membership_interval 300 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + ip link set dev br0 type bridge mcast_query_interval 500 \ + mcast_query_response_interval 500 \ + mcast_membership_interval 1500 + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q + sleep 3 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists" + + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 192.0.2.100 + + log_test "IGMPv3 group $TEST_GROUP exclude timeout" + + ip link set dev br0 type bridge mcast_query_interval 12500 \ + mcast_query_response_interval 1000 \ + mcast_membership_interval 26000 + + v3cleanup $swp1 $TEST_GROUP +} + +v3star_ex_auto_add_test() +{ + RET=0 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h2 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \ + .port == \"$swp1\")" &>/dev/null + check_err $? "S,G entry for *,G port doesn't exist" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \ + .port == \"$swp1\" and \ + .flags[] == \"added_by_star_ex\")" &>/dev/null + check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag" + + brmcast_check_sg_fwding 1 192.0.2.3 + + log_test "IGMPv3 S,G port entry automatic add to a *,G port" + + v3cleanup $swp1 $TEST_GROUP + v3cleanup $swp2 $TEST_GROUP } trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh new file mode 100755 index 000000000000..c62331b2e006 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh @@ -0,0 +1,365 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + locked_port_ipv4 + locked_port_ipv6 + locked_port_vlan + locked_port_mab + locked_port_mab_roam + locked_port_mab_config + locked_port_mab_flush + locked_port_mab_redirect +" + +NUM_NETIFS=4 +CHECK_TC="no" +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + vlan_create $h1 100 v$h1 198.51.100.1/24 +} + +h1_destroy() +{ + vlan_destroy $h1 100 + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 + vlan_create $h2 100 v$h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 100 + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + bridge link set dev $swp1 learning off + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +locked_port_ipv4() +{ + RET=0 + + check_locked_port_support || return 0 + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work before locking port" + + bridge link set dev $swp1 locked on + + ping_do $h1 192.0.2.2 + check_fail $? "Ping worked after locking port, but before adding FDB entry" + + bridge fdb add `mac_get $h1` dev $swp1 master static + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work after locking port and adding FDB entry" + + bridge link set dev $swp1 locked off + bridge fdb del `mac_get $h1` dev $swp1 master static + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work after unlocking port and removing FDB entry." + + log_test "Locked port ipv4" +} + +locked_port_vlan() +{ + RET=0 + + check_locked_port_support || return 0 + + bridge vlan add vid 100 dev $swp1 + bridge vlan add vid 100 dev $swp2 + + ping_do $h1.100 198.51.100.2 + check_err $? "Ping through vlan did not work before locking port" + + bridge link set dev $swp1 locked on + ping_do $h1.100 198.51.100.2 + check_fail $? "Ping through vlan worked after locking port, but before adding FDB entry" + + bridge fdb add `mac_get $h1` dev $swp1 vlan 100 master static + + ping_do $h1.100 198.51.100.2 + check_err $? "Ping through vlan did not work after locking port and adding FDB entry" + + bridge link set dev $swp1 locked off + bridge fdb del `mac_get $h1` dev $swp1 vlan 100 master static + + ping_do $h1.100 198.51.100.2 + check_err $? "Ping through vlan did not work after unlocking port and removing FDB entry" + + bridge vlan del vid 100 dev $swp1 + bridge vlan del vid 100 dev $swp2 + log_test "Locked port vlan" +} + +locked_port_ipv6() +{ + RET=0 + check_locked_port_support || return 0 + + ping6_do $h1 2001:db8:1::2 + check_err $? "Ping6 did not work before locking port" + + bridge link set dev $swp1 locked on + + ping6_do $h1 2001:db8:1::2 + check_fail $? "Ping6 worked after locking port, but before adding FDB entry" + + bridge fdb add `mac_get $h1` dev $swp1 master static + ping6_do $h1 2001:db8:1::2 + check_err $? "Ping6 did not work after locking port and adding FDB entry" + + bridge link set dev $swp1 locked off + bridge fdb del `mac_get $h1` dev $swp1 master static + + ping6_do $h1 2001:db8:1::2 + check_err $? "Ping6 did not work after unlocking port and removing FDB entry" + + log_test "Locked port ipv6" +} + +locked_port_mab() +{ + RET=0 + check_port_mab_support || return 0 + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work before locking port" + + bridge link set dev $swp1 learning on locked on + + ping_do $h1 192.0.2.2 + check_fail $? "Ping worked on a locked port without an FDB entry" + + bridge fdb get `mac_get $h1` br br0 vlan 1 &> /dev/null + check_fail $? "FDB entry created before enabling MAB" + + bridge link set dev $swp1 learning on locked on mab on + + ping_do $h1 192.0.2.2 + check_fail $? "Ping worked on MAB enabled port without an FDB entry" + + bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked" + check_err $? "Locked FDB entry not created" + + bridge fdb replace `mac_get $h1` dev $swp1 master static + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work after replacing FDB entry" + + bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked" + check_fail $? "FDB entry marked as locked after replacement" + + bridge fdb del `mac_get $h1` dev $swp1 master + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB" +} + +# Check that entries cannot roam to a locked port, but that entries can roam +# to an unlocked port. +locked_port_mab_roam() +{ + local mac=a0:b0:c0:c0:b0:a0 + + RET=0 + check_port_mab_support || return 0 + + bridge link set dev $swp1 learning on locked on mab on + + $MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand + bridge fdb get $mac br br0 vlan 1 | grep "dev $swp1" | grep -q "locked" + check_err $? "No locked entry on first injection" + + $MZ $h2 -q -c 5 -d 100msec -t udp -a $mac -b rand + bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp2" + check_err $? "Entry did not roam to an unlocked port" + + bridge fdb get $mac br br0 vlan 1 | grep -q "locked" + check_fail $? "Entry roamed with locked flag on" + + $MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand + bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp1" + check_fail $? "Entry roamed back to locked port" + + bridge fdb del $mac vlan 1 dev $swp2 master + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB roam" +} + +# Check that MAB can only be enabled on a port that is both locked and has +# learning enabled. +locked_port_mab_config() +{ + RET=0 + check_port_mab_support || return 0 + + bridge link set dev $swp1 learning on locked off mab on &> /dev/null + check_fail $? "MAB enabled while port is unlocked" + + bridge link set dev $swp1 learning off locked on mab on &> /dev/null + check_fail $? "MAB enabled while port has learning disabled" + + bridge link set dev $swp1 learning on locked on mab on + check_err $? "Failed to enable MAB when port is locked and has learning enabled" + + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB configuration" +} + +# Check that locked FDB entries are flushed from a port when MAB is disabled. +locked_port_mab_flush() +{ + local locked_mac1=00:01:02:03:04:05 + local unlocked_mac1=00:01:02:03:04:06 + local locked_mac2=00:01:02:03:04:07 + local unlocked_mac2=00:01:02:03:04:08 + + RET=0 + check_port_mab_support || return 0 + + bridge link set dev $swp1 learning on locked on mab on + bridge link set dev $swp2 learning on locked on mab on + + # Create regular and locked FDB entries on each port. + bridge fdb add $unlocked_mac1 dev $swp1 vlan 1 master static + bridge fdb add $unlocked_mac2 dev $swp2 vlan 1 master static + + $MZ $h1 -q -c 5 -d 100msec -t udp -a $locked_mac1 -b rand + bridge fdb get $locked_mac1 br br0 vlan 1 | grep "dev $swp1" | \ + grep -q "locked" + check_err $? "Failed to create locked FDB entry on first port" + + $MZ $h2 -q -c 5 -d 100msec -t udp -a $locked_mac2 -b rand + bridge fdb get $locked_mac2 br br0 vlan 1 | grep "dev $swp2" | \ + grep -q "locked" + check_err $? "Failed to create locked FDB entry on second port" + + # Disable MAB on the first port and check that only the first locked + # FDB entry was flushed. + bridge link set dev $swp1 mab off + + bridge fdb get $unlocked_mac1 br br0 vlan 1 &> /dev/null + check_err $? "Regular FDB entry on first port was flushed after disabling MAB" + + bridge fdb get $unlocked_mac2 br br0 vlan 1 &> /dev/null + check_err $? "Regular FDB entry on second port was flushed after disabling MAB" + + bridge fdb get $locked_mac1 br br0 vlan 1 &> /dev/null + check_fail $? "Locked FDB entry on first port was not flushed after disabling MAB" + + bridge fdb get $locked_mac2 br br0 vlan 1 &> /dev/null + check_err $? "Locked FDB entry on second port was flushed after disabling MAB" + + bridge fdb del $unlocked_mac2 dev $swp2 vlan 1 master static + bridge fdb del $unlocked_mac1 dev $swp1 vlan 1 master static + + bridge link set dev $swp2 learning on locked off mab off + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB FDB flush" +} + +# Check that traffic can be redirected from a locked bridge port and that it +# does not create locked FDB entries. +locked_port_mab_redirect() +{ + RET=0 + check_port_mab_support || return 0 + + tc qdisc add dev $swp1 clsact + tc filter add dev $swp1 ingress protocol all pref 1 handle 101 flower \ + action mirred egress redirect dev $swp2 + bridge link set dev $swp1 learning on locked on mab on + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work with redirection" + + bridge fdb get `mac_get $h1` br br0 vlan 1 2> /dev/null | \ + grep "dev $swp1" | grep -q "locked" + check_fail $? "Locked entry created for redirected traffic" + + tc filter del dev $swp1 ingress protocol all pref 1 handle 101 flower + + ping_do $h1 192.0.2.2 + check_fail $? "Ping worked without redirection" + + bridge fdb get `mac_get $h1` br br0 vlan 1 2> /dev/null | \ + grep "dev $swp1" | grep -q "locked" + check_err $? "Locked entry not created after deleting filter" + + bridge fdb del `mac_get $h1` vlan 1 dev $swp1 master + bridge link set dev $swp1 learning off locked off mab off + tc qdisc del dev $swp1 clsact + + log_test "Locked port MAB redirect" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh new file mode 100755 index 000000000000..d9d587454d20 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -0,0 +1,1371 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR0 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + cfg_test + fwd_test + ctrl_test +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + simple_if_fini $h2 +} + +switch_create() +{ + ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2 + bridge vlan add vid 10 dev br0 self + bridge vlan add vid 20 dev br0 self + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 + + tc qdisc add dev br0 clsact + tc qdisc add dev $h2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $h2 clsact + tc qdisc del dev br0 clsact + + bridge vlan del vid 20 dev $swp2 + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + bridge vlan del vid 20 dev br0 self + bridge vlan del vid 10 dev br0 self + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +cfg_test_host_common() +{ + local name=$1; shift + local grp=$1; shift + local src=$1; shift + local state=$1; shift + local invalid_state=$1; shift + + RET=0 + + # Check basic add, replace and delete behavior. + bridge mdb add dev br0 port br0 grp $grp $state vid 10 + bridge mdb get dev br0 grp $grp vid 10 &> /dev/null + check_err $? "Failed to add $name host entry" + + bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null + check_fail $? "Managed to replace $name host entry" + + bridge mdb del dev br0 port br0 grp $grp $state vid 10 + bridge mdb get dev br0 grp $grp vid 10 &> /dev/null + check_fail $? "Failed to delete $name host entry" + + # Check error cases. + bridge mdb add dev br0 port br0 grp $grp $invalid_state vid 10 \ + &> /dev/null + check_fail $? "Managed to add $name host entry with a $invalid_state state" + + bridge mdb add dev br0 port br0 grp $grp src $src $state vid 10 \ + &> /dev/null + check_fail $? "Managed to add $name host entry with a source" + + bridge mdb add dev br0 port br0 grp $grp $state vid 10 \ + filter_mode exclude &> /dev/null + check_fail $? "Managed to add $name host entry with a filter mode" + + bridge mdb add dev br0 port br0 grp $grp $state vid 10 \ + source_list $src &> /dev/null + check_fail $? "Managed to add $name host entry with a source list" + + bridge mdb add dev br0 port br0 grp $grp $state vid 10 \ + proto 123 &> /dev/null + check_fail $? "Managed to add $name host entry with a protocol" + + log_test "Common host entries configuration tests ($name)" +} + +# Check configuration of host entries from all types. +cfg_test_host() +{ + echo + log_info "# Host entries configuration tests" + + cfg_test_host_common "IPv4" "239.1.1.1" "192.0.2.1" "temp" "permanent" + cfg_test_host_common "IPv6" "ff0e::1" "2001:db8:1::1" "temp" "permanent" + cfg_test_host_common "L2" "01:02:03:04:05:06" "00:00:00:00:00:01" \ + "permanent" "temp" +} + +cfg_test_port_common() +{ + local name=$1;shift + local grp_key=$1; shift + + RET=0 + + # Check basic add, replace and delete behavior. + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + bridge mdb get dev br0 $grp_key vid 10 &> /dev/null + check_err $? "Failed to add $name entry" + + bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \ + &> /dev/null + check_err $? "Failed to replace $name entry" + + bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 + bridge mdb get dev br0 $grp_key vid 10 &> /dev/null + check_fail $? "Failed to delete $name entry" + + # Check default protocol and replacement. + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "static" + check_err $? "$name entry not added with default \"static\" protocol" + + bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \ + proto 123 + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "123" + check_err $? "Failed to replace protocol of $name entry" + bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 + + # Check behavior when VLAN is not specified. + bridge mdb add dev br0 port $swp1 $grp_key permanent + bridge mdb get dev br0 $grp_key vid 10 &> /dev/null + check_err $? "$name entry with VLAN 10 not added when VLAN was not specified" + bridge mdb get dev br0 $grp_key vid 20 &> /dev/null + check_err $? "$name entry with VLAN 20 not added when VLAN was not specified" + + bridge mdb del dev br0 port $swp1 $grp_key permanent + bridge mdb get dev br0 $grp_key vid 10 &> /dev/null + check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified" + bridge mdb get dev br0 $grp_key vid 20 &> /dev/null + check_fail $? "$name entry with VLAN 20 not deleted when VLAN was not specified" + + # Check behavior when bridge port is down. + ip link set dev $swp1 down + + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + check_err $? "Failed to add $name permanent entry when bridge port is down" + + bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 + + bridge mdb add dev br0 port $swp1 $grp_key temp vid 10 &> /dev/null + check_fail $? "Managed to add $name temporary entry when bridge port is down" + + ip link set dev $swp1 up + setup_wait_dev $swp1 + + # Check error cases. + ip link set dev br0 down + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 \ + &> /dev/null + check_fail $? "Managed to add $name entry when bridge is down" + ip link set dev br0 up + + ip link set dev br0 type bridge mcast_snooping 0 + bridge mdb add dev br0 port $swp1 $grp_key permanent vid \ + 10 &> /dev/null + check_fail $? "Managed to add $name entry when multicast snooping is disabled" + ip link set dev br0 type bridge mcast_snooping 1 + + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 5000 \ + &> /dev/null + check_fail $? "Managed to add $name entry with an invalid VLAN" + + log_test "Common port group entries configuration tests ($name)" +} + +src_list_create() +{ + local src_prefix=$1; shift + local num_srcs=$1; shift + local src_list + local i + + for i in $(seq 1 $num_srcs); do + src_list=${src_list},${src_prefix}${i} + done + + echo $src_list | cut -c 2- +} + +__cfg_test_port_ip_star_g() +{ + local name=$1; shift + local grp=$1; shift + local invalid_grp=$1; shift + local src_prefix=$1; shift + local src1=${src_prefix}1 + local src2=${src_prefix}2 + local src3=${src_prefix}3 + local max_srcs=31 + local num_srcs + + RET=0 + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude" + check_err $? "Default filter mode is not \"exclude\"" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check basic add and delete behavior. + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $src1 + bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null + check_err $? "(*, G) entry not created" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null + check_err $? "(S, G) entry not created" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null + check_fail $? "(*, G) entry not deleted" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null + check_fail $? "(S, G) entry not deleted" + + ## State (permanent / temp) tests. + + # Check that group and source timer are not set for permanent entries. + bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \ + filter_mode exclude source_list $src1 + + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent" + check_err $? "(*, G) entry not added as \"permanent\" when should" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | \ + grep -q "permanent" + check_err $? "(S, G) entry not added as \"permanent\" when should" + + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" + check_err $? "(*, G) \"permanent\" entry has a pending group timer" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00" + check_err $? "\"permanent\" source entry has a pending source timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that group timer is set for temporary (*, G) EXCLUDE, but not + # the source timer. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp" + check_err $? "(*, G) EXCLUDE entry not added as \"temp\" when should" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp" + check_err $? "(S, G) \"blocked\" entry not added as \"temp\" when should" + + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" + check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00" + check_err $? "\"blocked\" source entry has a pending source timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that group timer is not set for temporary (*, G) INCLUDE, but + # that the source timer is set. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode include source_list $src1 + + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp" + check_err $? "(*, G) INCLUDE entry not added as \"temp\" when should" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp" + check_err $? "(S, G) entry not added as \"temp\" when should" + + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" + check_err $? "(*, G) INCLUDE entry has a pending group timer" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00" + check_fail $? "Source entry does not have a pending source timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that group timer is never set for (S, G) entries. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode include source_list $src1 + + bridge -d -s mdb get dev br0 grp $grp src $src1 vid 10 | grep -q " 0.00" + check_err $? "(S, G) entry has a pending group timer" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Filter mode (include / exclude) tests. + + # Check that (*, G) INCLUDE entries are added with correct filter mode + # and that (S, G) entries are not marked as "blocked". + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + filter_mode include source_list $src1 + + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include" + check_err $? "(*, G) INCLUDE not added with \"include\" filter mode" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked" + check_fail $? "(S, G) entry marked as \"blocked\" when should not" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that (*, G) EXCLUDE entries are added with correct filter mode + # and that (S, G) entries are marked as "blocked". + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + filter_mode exclude source_list $src1 + + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude" + check_err $? "(*, G) EXCLUDE not added with \"exclude\" filter mode" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked" + check_err $? "(S, G) entry not marked as \"blocked\" when should" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Protocol tests. + + # Check that (*, G) and (S, G) entries are added with the specified + # protocol. + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + filter_mode exclude source_list $src1 proto zebra + + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "zebra" + check_err $? "(*, G) entry not added with \"zebra\" protocol" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "zebra" + check_err $? "(S, G) entry not marked added with \"zebra\" protocol" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Replace tests. + + # Check that state can be modified. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge mdb replace dev br0 port $swp1 grp $grp permanent vid 10 \ + filter_mode exclude source_list $src1 + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent" + check_err $? "(*, G) entry not marked as \"permanent\" after replace" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "permanent" + check_err $? "(S, G) entry not marked as \"permanent\" after replace" + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp" + check_err $? "(*, G) entry not marked as \"temp\" after replace" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp" + check_err $? "(S, G) entry not marked as \"temp\" after replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that filter mode can be modified. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode include source_list $src1 + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include" + check_err $? "(*, G) not marked with \"include\" filter mode after replace" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked" + check_fail $? "(S, G) marked as \"blocked\" after replace" + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude" + check_err $? "(*, G) not marked with \"exclude\" filter mode after replace" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked" + check_err $? "(S, G) not marked as \"blocked\" after replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that sources can be added to and removed from the source list. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1,$src2,$src3 + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null + check_err $? "(S, G) entry for source $src1 not created after replace" + bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null + check_err $? "(S, G) entry for source $src2 not created after replace" + bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null + check_err $? "(S, G) entry for source $src3 not created after replace" + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1,$src3 + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null + check_err $? "(S, G) entry for source $src1 not created after second replace" + bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null + check_fail $? "(S, G) entry for source $src2 created after second replace" + bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null + check_err $? "(S, G) entry for source $src3 not created after second replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + # Check that protocol can be modified. + bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 proto zebra + + bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ + filter_mode exclude source_list $src1 proto bgp + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "bgp" + check_err $? "(*, G) protocol not changed to \"bgp\" after replace" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "bgp" + check_err $? "(S, G) protocol not changed to \"bgp\" after replace" + + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + ## Star exclude tests. + + # Check star exclude functionality. When adding a new EXCLUDE (*, G), + # it needs to be also added to all (S, G) entries for proper + # replication. + bridge mdb add dev br0 port $swp2 grp $grp vid 10 \ + filter_mode include source_list $src1 + bridge mdb add dev br0 port $swp1 grp $grp vid 10 + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep "$swp1" | \ + grep -q "added_by_star_ex" + check_err $? "\"added_by_star_ex\" entry not created after adding (*, G) entry" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + bridge mdb del dev br0 port $swp2 grp $grp src $src1 vid 10 + + ## Error cases tests. + + bridge mdb add dev br0 port $swp1 grp $invalid_grp vid 10 &> /dev/null + check_fail $? "Managed to add an entry with an invalid group" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + &> /dev/null + check_fail $? "Managed to add an INCLUDE entry with an empty source list" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + source_list $grp &> /dev/null + check_fail $? "Managed to add an entry with an invalid source in source list" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ + source_list $src &> /dev/null + check_fail $? "Managed to add an entry with a source list and no filter mode" + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + source_list $src1 + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $src1 &> /dev/null + check_fail $? "Managed to replace an entry without using replace" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + bridge mdb add dev br0 port $swp1 grp $grp src $src2 vid 10 + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \ + source_list $src1,$src2,$src3 &> /dev/null + check_fail $? "Managed to add a source that already has a forwarding entry" + bridge mdb del dev br0 port $swp1 grp $grp src $src2 vid 10 + + # Check maximum number of sources. + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $(src_list_create $src_prefix $max_srcs) + num_srcs=$(bridge -d mdb show dev br0 vid 10 | grep "$grp" | \ + grep "src" | wc -l) + [[ $num_srcs -eq $max_srcs ]] + check_err $? "Failed to configure maximum number of sources ($max_srcs)" + bridge mdb del dev br0 port $swp1 grp $grp vid 10 + + bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ + source_list $(src_list_create $src_prefix $((max_srcs + 1))) \ + &> /dev/null + check_fail $? "Managed to exceed maximum number of sources ($max_srcs)" + + log_test "$name (*, G) port group entries configuration tests" +} + +cfg_test_port_ip_star_g() +{ + echo + log_info "# Port group entries configuration tests - (*, G)" + + cfg_test_port_common "IPv4 (*, G)" "grp 239.1.1.1" + cfg_test_port_common "IPv6 (*, G)" "grp ff0e::1" + __cfg_test_port_ip_star_g "IPv4" "239.1.1.1" "224.0.0.1" "192.0.2." + __cfg_test_port_ip_star_g "IPv6" "ff0e::1" "ff02::1" "2001:db8:1::" +} + +__cfg_test_port_ip_sg() +{ + local name=$1; shift + local grp=$1; shift + local src=$1; shift + local grp_key="grp $grp src $src" + + RET=0 + + bridge mdb add dev br0 port $swp1 $grp_key vid 10 + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "include" + check_err $? "Default filter mode is not \"include\"" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check that entries can be added as both permanent and temp and that + # group timer is set correctly. + bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent" + check_err $? "Entry not added as \"permanent\" when should" + bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00" + check_err $? "\"permanent\" entry has a pending group timer" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + bridge mdb add dev br0 port $swp1 $grp_key temp vid 10 + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp" + check_err $? "Entry not added as \"temp\" when should" + bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00" + check_fail $? "\"temp\" entry has an unpending group timer" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check error cases. + bridge mdb add dev br0 port $swp1 $grp_key vid 10 \ + filter_mode include &> /dev/null + check_fail $? "Managed to add an entry with a filter mode" + + bridge mdb add dev br0 port $swp1 $grp_key vid 10 \ + filter_mode include source_list $src &> /dev/null + check_fail $? "Managed to add an entry with a source list" + + bridge mdb add dev br0 port $swp1 grp $grp src $grp vid 10 &> /dev/null + check_fail $? "Managed to add an entry with an invalid source" + + bridge mdb add dev br0 port $swp1 $grp_key vid 10 temp + bridge mdb add dev br0 port $swp1 $grp_key vid 10 permanent &> /dev/null + check_fail $? "Managed to replace an entry without using replace" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check that we can replace available attributes. + bridge mdb add dev br0 port $swp1 $grp_key vid 10 proto 123 + bridge mdb replace dev br0 port $swp1 $grp_key vid 10 proto 111 + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "111" + check_err $? "Failed to replace protocol" + + bridge mdb replace dev br0 port $swp1 $grp_key vid 10 permanent + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent" + check_err $? "Entry not marked as \"permanent\" after replace" + bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00" + check_err $? "Entry has a pending group timer after replace" + + bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp" + check_err $? "Entry not marked as \"temp\" after replace" + bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00" + check_fail $? "Entry has an unpending group timer after replace" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + + # Check star exclude functionality. When adding a (S, G), all matching + # (*, G) ports need to be added to it. + bridge mdb add dev br0 port $swp2 grp $grp vid 10 + bridge mdb add dev br0 port $swp1 $grp_key vid 10 + bridge mdb get dev br0 $grp_key vid 10 | grep $swp2 | \ + grep -q "added_by_star_ex" + check_err $? "\"added_by_star_ex\" entry not created after adding (S, G) entry" + bridge mdb del dev br0 port $swp1 $grp_key vid 10 + bridge mdb del dev br0 port $swp2 grp $grp vid 10 + + log_test "$name (S, G) port group entries configuration tests" +} + +cfg_test_port_ip_sg() +{ + echo + log_info "# Port group entries configuration tests - (S, G)" + + cfg_test_port_common "IPv4 (S, G)" "grp 239.1.1.1 src 192.0.2.1" + cfg_test_port_common "IPv6 (S, G)" "grp ff0e::1 src 2001:db8:1::1" + __cfg_test_port_ip_sg "IPv4" "239.1.1.1" "192.0.2.1" + __cfg_test_port_ip_sg "IPv6" "ff0e::1" "2001:db8:1::1" +} + +cfg_test_port_ip() +{ + cfg_test_port_ip_star_g + cfg_test_port_ip_sg +} + +__cfg_test_port_l2() +{ + local grp="01:02:03:04:05:06" + + RET=0 + + bridge meb add dev br0 port $swp grp 00:01:02:03:04:05 \ + permanent vid 10 &> /dev/null + check_fail $? "Managed to add an entry with unicast MAC" + + bridge mdb add dev br0 port $swp grp $grp src 00:01:02:03:04:05 \ + permanent vid 10 &> /dev/null + check_fail $? "Managed to add an entry with a source" + + bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \ + filter_mode include &> /dev/null + check_fail $? "Managed to add an entry with a filter mode" + + bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \ + source_list 00:01:02:03:04:05 &> /dev/null + check_fail $? "Managed to add an entry with a source list" + + log_test "L2 (*, G) port group entries configuration tests" +} + +cfg_test_port_l2() +{ + echo + log_info "# Port group entries configuration tests - L2" + + cfg_test_port_common "L2 (*, G)" "grp 01:02:03:04:05:06" + __cfg_test_port_l2 +} + +# Check configuration of regular (port) entries of all types. +cfg_test_port() +{ + cfg_test_port_ip + cfg_test_port_l2 +} + +ipv4_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "239.1.1.$i" + done +} + +ipv6_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "ff0e::$(printf %x $i)" + done +} + +l2_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "01:00:00:00:00:$(printf %02x $i)" + done +} + +cfg_test_dump_common() +{ + local name=$1; shift + local fn=$1; shift + local max_bridges=2 + local max_grps=256 + local max_ports=32 + local num_entries + local batch_file + local grp + local i j + + RET=0 + + # Create net devices. + for i in $(seq 1 $max_bridges); do + ip link add name br-test${i} up type bridge vlan_filtering 1 \ + mcast_snooping 1 + for j in $(seq 1 $max_ports); do + ip link add name br-test${i}-du${j} up \ + master br-test${i} type dummy + done + done + + # Create batch file with MDB entries. + batch_file=$(mktemp) + for i in $(seq 1 $max_bridges); do + for j in $(seq 1 $max_ports); do + for grp in $($fn $max_grps); do + echo "mdb add dev br-test${i} \ + port br-test${i}-du${j} grp $grp \ + permanent vid 1" >> $batch_file + done + done + done + + # Program the batch file and check for expected number of entries. + bridge -b $batch_file + for i in $(seq 1 $max_bridges); do + num_entries=$(bridge mdb show dev br-test${i} | \ + grep "permanent" | wc -l) + [[ $num_entries -eq $((max_grps * max_ports)) ]] + check_err $? "Wrong number of entries in br-test${i}" + done + + # Cleanup. + rm $batch_file + for i in $(seq 1 $max_bridges); do + ip link del dev br-test${i} + for j in $(seq $max_ports); do + ip link del dev br-test${i}-du${j} + done + done + + log_test "$name large scale dump tests" +} + +# Check large scale dump. +cfg_test_dump() +{ + echo + log_info "# Large scale dump tests" + + cfg_test_dump_common "IPv4" ipv4_grps_get + cfg_test_dump_common "IPv6" ipv6_grps_get + cfg_test_dump_common "L2" l2_grps_get +} + +# Check flush functionality with different parameters. +cfg_test_flush() +{ + local num_entries + + # Add entries with different attributes and check that they are all + # flushed when the flush command is given with no parameters. + + # Different port. + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.2 vid 10 + + # Different VLAN ID. + bridge mdb add dev br0 port $swp1 grp 239.1.1.3 vid 10 + bridge mdb add dev br0 port $swp1 grp 239.1.1.4 vid 20 + + # Different routing protocol. + bridge mdb add dev br0 port $swp1 grp 239.1.1.5 vid 10 proto bgp + bridge mdb add dev br0 port $swp1 grp 239.1.1.6 vid 10 proto zebra + + # Different state. + bridge mdb add dev br0 port $swp1 grp 239.1.1.7 vid 10 permanent + bridge mdb add dev br0 port $swp1 grp 239.1.1.8 vid 10 temp + + bridge mdb flush dev br0 + num_entries=$(bridge mdb show dev br0 | wc -l) + [[ $num_entries -eq 0 ]] + check_err $? 0 "Not all entries flushed after flush all" + + # Check that when flushing by port only entries programmed with the + # specified port are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10 + + bridge mdb flush dev br0 port $swp1 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by specified port" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong port" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_err $? "Host entry flushed by wrong port" + + bridge mdb flush dev br0 port br0 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_fail $? "Host entry not flushed by specified port" + + bridge mdb flush dev br0 + + # Check that when flushing by VLAN ID only entries programmed with the + # specified VLAN ID are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 20 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 20 + + bridge mdb flush dev br0 vid 10 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_fail $? "Entry not flushed by specified VLAN ID" + bridge mdb get dev br0 grp 239.1.1.1 vid 20 &> /dev/null + check_err $? "Entry flushed by wrong VLAN ID" + + bridge mdb flush dev br0 + + # Check that all permanent entries are flushed when "permanent" is + # specified and that temporary entries are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by \"permanent\" state" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 + + # Check that all temporary entries are flushed when "nopermanent" is + # specified and that permanent entries are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_err $? "Entry flushed by wrong state (\"nopermanent\")" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_fail $? "Entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that L2 host entries are not flushed when "nopermanent" is + # specified, but flushed when "permanent" is specified. + + bridge mdb add dev br0 port br0 grp 01:02:03:04:05:06 permanent vid 10 + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null + check_err $? "L2 host entry flushed by wrong state (\"nopermanent\")" + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null + check_fail $? "L2 host entry not flushed by \"permanent\" state" + + bridge mdb flush dev br0 + + # Check that IPv4 host entries are not flushed when "permanent" is + # specified, but flushed when "nopermanent" is specified. + + bridge mdb add dev br0 port br0 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_err $? "IPv4 host entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_fail $? "IPv4 host entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that IPv6 host entries are not flushed when "permanent" is + # specified, but flushed when "nopermanent" is specified. + + bridge mdb add dev br0 port br0 grp ff0e::1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null + check_err $? "IPv6 host entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null + check_fail $? "IPv6 host entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that when flushing by routing protocol only entries programmed + # with the specified routing protocol are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 proto bgp + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 proto zebra + bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10 + + bridge mdb flush dev br0 proto bgp + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by specified routing protocol" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong routing protocol" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_err $? "Host entry flushed by wrong routing protocol" + + bridge mdb flush dev br0 + + # Test that an error is returned when trying to flush using unsupported + # parameters. + + bridge mdb flush dev br0 src_vni 10 &> /dev/null + check_fail $? "Managed to flush by source VNI" + + bridge mdb flush dev br0 dst 198.51.100.1 &> /dev/null + check_fail $? "Managed to flush by destination IP" + + bridge mdb flush dev br0 dst_port 4789 &> /dev/null + check_fail $? "Managed to flush by UDP destination port" + + bridge mdb flush dev br0 vni 10 &> /dev/null + check_fail $? "Managed to flush by destination VNI" + + log_test "Flush tests" +} + +cfg_test() +{ + cfg_test_host + cfg_test_port + cfg_test_dump + cfg_test_flush +} + +__fwd_test_host_ip() +{ + local grp=$1; shift + local dmac=$1; shift + local src=$1; shift + local mode=$1; shift + local name + local eth_type + + RET=0 + + if [[ $mode == "-4" ]]; then + name="IPv4" + eth_type="ipv4" + else + name="IPv6" + eth_type="ipv6" + fi + + tc filter add dev br0 ingress protocol 802.1q pref 1 handle 1 flower \ + vlan_ethtype $eth_type vlan_id 10 dst_ip $grp src_ip $src \ + action drop + + # Packet should only be flooded to multicast router ports when there is + # no matching MDB entry. The bridge is not configured as a multicast + # router port. + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 0 + check_err $? "Packet locally received after flood" + + # Install a regular port group entry and expect the packet to not be + # locally received. + bridge mdb add dev br0 port $swp2 grp $grp temp vid 10 + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 0 + check_err $? "Packet locally received after installing a regular entry" + + # Add a host entry and expect the packet to be locally received. + bridge mdb add dev br0 port br0 grp $grp temp vid 10 + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet not locally received after adding a host entry" + + # Remove the host entry and expect the packet to not be locally + # received. + bridge mdb del dev br0 port br0 grp $grp vid 10 + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet locally received after removing a host entry" + + bridge mdb del dev br0 port $swp2 grp $grp vid 10 + + tc filter del dev br0 ingress protocol 802.1q pref 1 handle 1 flower + + log_test "$name host entries forwarding tests" +} + +fwd_test_host_ip() +{ + __fwd_test_host_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "-4" + __fwd_test_host_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "-6" +} + +fwd_test_host_l2() +{ + local dmac=01:02:03:04:05:06 + + RET=0 + + tc filter add dev br0 ingress protocol all pref 1 handle 1 flower \ + dst_mac $dmac action drop + + # Packet should be flooded and locally received when there is no + # matching MDB entry. + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet not locally received after flood" + + # Install a regular port group entry and expect the packet to not be + # locally received. + bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 1 + check_err $? "Packet locally received after installing a regular entry" + + # Add a host entry and expect the packet to be locally received. + bridge mdb add dev br0 port br0 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 2 + check_err $? "Packet not locally received after adding a host entry" + + # Remove the host entry and expect the packet to not be locally + # received. + bridge mdb del dev br0 port br0 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev br0 ingress" 1 2 + check_err $? "Packet locally received after removing a host entry" + + bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10 + + tc filter del dev br0 ingress protocol all pref 1 handle 1 flower + + log_test "L2 host entries forwarding tests" +} + +fwd_test_host() +{ + # Disable multicast router on the bridge to ensure that packets are + # only locally received when a matching host entry is present. + ip link set dev br0 type bridge mcast_router 0 + + fwd_test_host_ip + fwd_test_host_l2 + + ip link set dev br0 type bridge mcast_router 1 +} + +__fwd_test_port_ip() +{ + local grp=$1; shift + local dmac=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mode=$1; shift + local filter_mode=$1; shift + local name + local eth_type + local src_list + + RET=0 + + if [[ $mode == "-4" ]]; then + name="IPv4" + eth_type="ipv4" + else + name="IPv6" + eth_type="ipv6" + fi + + # The valid source is the one we expect to get packets from after + # adding the entry. + if [[ $filter_mode == "include" ]]; then + src_list=$valid_src + else + src_list=$invalid_src + fi + + tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 1 flower \ + vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \ + src_ip $valid_src action drop + tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 2 flower \ + vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \ + src_ip $invalid_src action drop + + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 0 + check_err $? "Packet from valid source received on H2 before adding entry" + + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 0 + check_err $? "Packet from invalid source received on H2 before adding entry" + + bridge mdb add dev br0 port $swp2 grp $grp vid 10 \ + filter_mode $filter_mode source_list $src_list + + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Packet from valid source not received on H2 after adding entry" + + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 0 + check_err $? "Packet from invalid source received on H2 after adding entry" + + bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \ + filter_mode exclude + + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 2 + check_err $? "Packet from valid source not received on H2 after allowing all sources" + + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 1 + check_err $? "Packet from invalid source not received on H2 after allowing all sources" + + bridge mdb del dev br0 port $swp2 grp $grp vid 10 + + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 1 2 + check_err $? "Packet from valid source received on H2 after deleting entry" + + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + tc_check_packets "dev $h2 ingress" 2 1 + check_err $? "Packet from invalid source received on H2 after deleting entry" + + tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 2 flower + tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 1 flower + + log_test "$name port group \"$filter_mode\" entries forwarding tests" +} + +fwd_test_port_ip() +{ + __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "exclude" + __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \ + "exclude" + __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "include" + __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \ + "include" +} + +fwd_test_port_l2() +{ + local dmac=01:02:03:04:05:06 + + RET=0 + + tc filter add dev $h2 ingress protocol all pref 1 handle 1 flower \ + dst_mac $dmac action drop + + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev $h2 ingress" 1 0 + check_err $? "Packet received on H2 before adding entry" + + bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Packet not received on H2 after adding entry" + + bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10 + $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Packet received on H2 after deleting entry" + + tc filter del dev $h2 ingress protocol all pref 1 handle 1 flower + + log_test "L2 port entries forwarding tests" +} + +fwd_test_port() +{ + # Disable multicast flooding to ensure that packets are only forwarded + # out of a port when a matching port group entry is present. + bridge link set dev $swp2 mcast_flood off + + fwd_test_port_ip + fwd_test_port_l2 + + bridge link set dev $swp2 mcast_flood on +} + +fwd_test() +{ + echo + log_info "# Forwarding tests" + + # Set the Max Response Delay to 100 centiseconds (1 second) so that the + # bridge will start forwarding according to its MDB soon after a + # multicast querier is enabled. + ip link set dev br0 type bridge mcast_query_response_interval 100 + + # Forwarding according to MDB entries only takes place when the bridge + # detects that there is a valid querier in the network. Set the bridge + # as the querier and assign it a valid IPv6 link-local address to be + # used as the source address for MLD queries. + ip -6 address add fe80::1/64 nodad dev br0 + ip link set dev br0 type bridge mcast_querier 1 + sleep 10 + + fwd_test_host + fwd_test_port + + ip link set dev br0 type bridge mcast_querier 0 + ip -6 address del fe80::1/64 dev br0 + ip link set dev br0 type bridge mcast_query_response_interval 1000 +} + +ctrl_igmpv3_is_in_test() +{ + RET=0 + + # Add a permanent entry and check that it is not affected by the + # received IGMP packet. + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 \ + filter_mode include source_list 192.0.2.1 + + # IS_IN ( 192.0.2.2 ) + $MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \ + -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q + + bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null + check_fail $? "Permanent entry affected by IGMP packet" + + # Replace the permanent entry with a temporary one and check that after + # processing the IGMP packet, a new source is added to the list along + # with a new forwarding entry. + bridge mdb replace dev br0 port $swp1 grp 239.1.1.1 temp vid 10 \ + filter_mode include source_list 192.0.2.1 + + # IS_IN ( 192.0.2.2 ) + $MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \ + -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q + + bridge -d mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q 192.0.2.2 + check_err $? "Source not add to source list" + + bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null + check_err $? "(S, G) entry not created for new source" + + bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10 + + log_test "IGMPv3 MODE_IS_INCLUDE tests" +} + +ctrl_mldv2_is_in_test() +{ + RET=0 + + # Add a permanent entry and check that it is not affected by the + # received MLD packet. + bridge mdb add dev br0 port $swp1 grp ff0e::1 permanent vid 10 \ + filter_mode include source_list 2001:db8:1::1 + + # IS_IN ( 2001:db8:1::2 ) + local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2) + $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \ + -t ip hop=1,next=0,p="$p" -q + + bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null + check_fail $? "Permanent entry affected by MLD packet" + + # Replace the permanent entry with a temporary one and check that after + # processing the MLD packet, a new source is added to the list along + # with a new forwarding entry. + bridge mdb replace dev br0 port $swp1 grp ff0e::1 temp vid 10 \ + filter_mode include source_list 2001:db8:1::1 + + # IS_IN ( 2001:db8:1::2 ) + $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \ + -t ip hop=1,next=0,p="$p" -q + + bridge -d mdb get dev br0 grp ff0e::1 vid 10 | grep -q 2001:db8:1::2 + check_err $? "Source not add to source list" + + bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null + check_err $? "(S, G) entry not created for new source" + + bridge mdb del dev br0 port $swp1 grp ff0e::1 vid 10 + + log_test "MLDv2 MODE_IS_INCLUDE tests" +} + +ctrl_test() +{ + echo + log_info "# Control packets tests" + + ctrl_igmpv3_is_in_test + ctrl_mldv2_is_in_test +} + +if ! bridge mdb help 2>&1 | grep -q "flush"; then + echo "SKIP: iproute2 too old, missing bridge mdb flush support" + exit $ksft_skip +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh new file mode 100755 index 000000000000..b1ba6876dd86 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh @@ -0,0 +1,103 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that adding host mdb entries work as intended for all types of +# multicast filters: ipv4, ipv6, and mac + +ALL_TESTS="mdb_add_del_test" +NUM_NETIFS=2 + +TEST_GROUP_IP4="225.1.2.3" +TEST_GROUP_IP6="ff02::42" +TEST_GROUP_MAC="01:00:01:c0:ff:ee" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +switch_create() +{ + # Enable multicast filtering + ip link add dev br0 type bridge mcast_snooping 1 + + ip link set dev $swp1 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up +} + +switch_destroy() +{ + ip link set dev $swp1 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +do_mdb_add_del() +{ + local group=$1 + local flag=$2 + + RET=0 + bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null + check_err $? "Failed adding $group to br0, port br0" + + if [ -z "$flag" ]; then + flag="temp" + fi + + bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null + check_err $? "$group not added with $flag flag" + + bridge mdb del dev br0 port br0 grp $group 2>/dev/null + check_err $? "Failed deleting $group from br0, port br0" + + bridge mdb show dev br0 | grep -q $group >/dev/null + check_err_fail 1 $? "$group still in mdb after delete" + + log_test "MDB add/del group $group to bridge port br0" +} + +mdb_add_del_test() +{ + do_mdb_add_del $TEST_GROUP_MAC permanent + do_mdb_add_del $TEST_GROUP_IP4 + do_mdb_add_del $TEST_GROUP_IP6 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh new file mode 100755 index 000000000000..3da9d93ab36f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh @@ -0,0 +1,1347 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR0 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + test_8021d + test_8021q + test_8021qvs +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + simple_if_fini $h2 +} + +switch_create_8021d() +{ + log_info "802.1d tests" + + ip link add name br0 type bridge vlan_filtering 0 \ + mcast_snooping 1 \ + mcast_igmp_version 3 mcast_mld_version 2 + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + bridge link set dev $swp1 fastleave on + + ip link set dev $swp2 master br0 + ip link set dev $swp2 up +} + +switch_create_8021q() +{ + local br_flags=$1; shift + + log_info "802.1q $br_flags${br_flags:+ }tests" + + ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 1 $br_flags \ + mcast_igmp_version 3 mcast_mld_version 2 + bridge vlan add vid 10 dev br0 self + bridge vlan add vid 20 dev br0 self + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + bridge link set dev $swp1 fastleave on + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 +} + +switch_create_8021qvs() +{ + switch_create_8021q "mcast_vlan_snooping 1" + bridge vlan global set dev br0 vid 10 mcast_igmp_version 3 + bridge vlan global set dev br0 vid 10 mcast_mld_version 2 + bridge vlan global set dev br0 vid 20 mcast_igmp_version 3 + bridge vlan global set dev br0 vid 20 mcast_mld_version 2 +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy 2>/dev/null + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +cfg_src_list() +{ + local IPs=("$@") + local IPstr=$(echo ${IPs[@]} | tr '[:space:]' , | sed 's/,$//') + + echo ${IPstr:+source_list }${IPstr} +} + +cfg_group_op() +{ + local op=$1; shift + local locus=$1; shift + local GRP=$1; shift + local state=$1; shift + local IPs=("$@") + + local source_list=$(cfg_src_list ${IPs[@]}) + + # Everything besides `bridge mdb' uses the "dev X vid Y" syntax, + # so we use it here as well and convert. + local br_locus=$(echo "$locus" | sed 's/^dev /port /') + + bridge mdb $op dev br0 $br_locus grp $GRP $state \ + filter_mode include $source_list +} + +cfg4_entries_op() +{ + local op=$1; shift + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local GRP=239.1.1.${grp} + local IPs=$(seq -f 192.0.2.%g 1 $((n - 1))) + cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]} +} + +cfg4_entries_add() +{ + cfg4_entries_op add "$@" +} + +cfg4_entries_del() +{ + cfg4_entries_op del "$@" +} + +cfg6_entries_op() +{ + local op=$1; shift + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local GRP=ff0e::${grp} + local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1)))) + cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]} +} + +cfg6_entries_add() +{ + cfg6_entries_op add "$@" +} + +cfg6_entries_del() +{ + cfg6_entries_op del "$@" +} + +locus_dev_peer() +{ + local dev_kw=$1; shift + local dev=$1; shift + local vid_kw=$1; shift + local vid=$1; shift + + echo "$h1.${vid:-10}" +} + +locus_dev() +{ + local dev_kw=$1; shift + local dev=$1; shift + + echo $dev +} + +ctl4_entries_add() +{ + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local IPs=$(seq -f 192.0.2.%g 1 $((n - 1))) + local peer=$(locus_dev_peer $locus) + local GRP=239.1.1.${grp} + local dmac=01:00:5e:01:01:$(printf "%02x" $grp) + $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B $GRP \ + -t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q + sleep 1 + + local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l) + if ((nn != n)); then + echo mcast_max_groups > /dev/stderr + false + fi +} + +ctl4_entries_del() +{ + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local peer=$(locus_dev_peer $locus) + local GRP=239.1.1.${grp} + local dmac=01:00:5e:00:00:02 + $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B 224.0.0.2 \ + -t ip proto=2,p=$(igmpv2_leave_get $GRP) -q + sleep 1 + ! bridge mdb show dev br0 | grep -q $GRP +} + +ctl6_entries_add() +{ + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1)))) + local peer=$(locus_dev_peer $locus) + local SIP=fe80::1 + local GRP=ff0e::${grp} + local dmac=33:33:00:00:00:$(printf "%02x" $grp) + local p=$(mldv2_is_in_get $SIP $GRP $IPs) + $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \ + -t ip hop=1,next=0,p="$p" -q + sleep 1 + + local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l) + if ((nn != n)); then + echo mcast_max_groups > /dev/stderr + false + fi +} + +ctl6_entries_del() +{ + local locus=$1; shift + local state=$1; shift + local n=$1; shift + local grp=${1:-1}; shift + + local peer=$(locus_dev_peer $locus) + local SIP=fe80::1 + local GRP=ff0e::${grp} + local dmac=33:33:00:00:00:$(printf "%02x" $grp) + local p=$(mldv1_done_get $SIP $GRP) + $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \ + -t ip hop=1,next=0,p="$p" -q + sleep 1 + ! bridge mdb show dev br0 | grep -q $GRP +} + +bridge_maxgroups_errmsg_check_cfg() +{ + local msg=$1; shift + local needle=$1; shift + + echo "$msg" | grep -q mcast_max_groups + check_err $? "Adding MDB entries failed for the wrong reason: $msg" +} + +bridge_maxgroups_errmsg_check_cfg4() +{ + bridge_maxgroups_errmsg_check_cfg "$@" +} + +bridge_maxgroups_errmsg_check_cfg6() +{ + bridge_maxgroups_errmsg_check_cfg "$@" +} + +bridge_maxgroups_errmsg_check_ctl4() +{ + : +} + +bridge_maxgroups_errmsg_check_ctl6() +{ + : +} + +bridge_port_ngroups_get() +{ + local locus=$1; shift + + bridge -j -d link show $locus | + jq '.[].mcast_n_groups' +} + +bridge_port_maxgroups_get() +{ + local locus=$1; shift + + bridge -j -d link show $locus | + jq '.[].mcast_max_groups' +} + +bridge_port_maxgroups_set() +{ + local locus=$1; shift + local max=$1; shift + + bridge link set dev $(locus_dev $locus) mcast_max_groups $max +} + +bridge_port_vlan_ngroups_get() +{ + local locus=$1; shift + + bridge -j -d vlan show $locus | + jq '.[].vlans[].mcast_n_groups' +} + +bridge_port_vlan_maxgroups_get() +{ + local locus=$1; shift + + bridge -j -d vlan show $locus | + jq '.[].vlans[].mcast_max_groups' +} + +bridge_port_vlan_maxgroups_set() +{ + local locus=$1; shift + local max=$1; shift + + bridge vlan set $locus mcast_max_groups $max +} + +test_ngroups_reporting() +{ + local CFG=$1; shift + local context=$1; shift + local locus=$1; shift + + RET=0 + + local n0=$(bridge_${context}_ngroups_get "$locus") + ${CFG}_entries_add "$locus" temp 5 + check_err $? "Couldn't add MDB entries" + local n1=$(bridge_${context}_ngroups_get "$locus") + + ((n1 == n0 + 5)) + check_err $? "Number of groups was $n0, now is $n1, but $((n0 + 5)) expected" + + ${CFG}_entries_del "$locus" temp 5 + check_err $? "Couldn't delete MDB entries" + local n2=$(bridge_${context}_ngroups_get "$locus") + + ((n2 == n0)) + check_err $? "Number of groups was $n0, now is $n2, but should be back to $n0" + + log_test "$CFG: $context: ngroups reporting" +} + +test_8021d_ngroups_reporting_cfg4() +{ + test_ngroups_reporting cfg4 port "dev $swp1" +} + +test_8021d_ngroups_reporting_ctl4() +{ + test_ngroups_reporting ctl4 port "dev $swp1" +} + +test_8021d_ngroups_reporting_cfg6() +{ + test_ngroups_reporting cfg6 port "dev $swp1" +} + +test_8021d_ngroups_reporting_ctl6() +{ + test_ngroups_reporting ctl6 port "dev $swp1" +} + +test_8021q_ngroups_reporting_cfg4() +{ + test_ngroups_reporting cfg4 port "dev $swp1 vid 10" +} + +test_8021q_ngroups_reporting_ctl4() +{ + test_ngroups_reporting ctl4 port "dev $swp1 vid 10" +} + +test_8021q_ngroups_reporting_cfg6() +{ + test_ngroups_reporting cfg6 port "dev $swp1 vid 10" +} + +test_8021q_ngroups_reporting_ctl6() +{ + test_ngroups_reporting ctl6 port "dev $swp1 vid 10" +} + +test_8021qvs_ngroups_reporting_cfg4() +{ + test_ngroups_reporting cfg4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_ngroups_reporting_ctl4() +{ + test_ngroups_reporting ctl4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_ngroups_reporting_cfg6() +{ + test_ngroups_reporting cfg6 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_ngroups_reporting_ctl6() +{ + test_ngroups_reporting ctl6 port_vlan "dev $swp1 vid 10" +} + +test_ngroups_cross_vlan() +{ + local CFG=$1; shift + + local locus1="dev $swp1 vid 10" + local locus2="dev $swp1 vid 20" + + RET=0 + + local n10=$(bridge_port_vlan_ngroups_get "$locus1") + local n20=$(bridge_port_vlan_ngroups_get "$locus2") + ${CFG}_entries_add "$locus1" temp 5 111 + check_err $? "Couldn't add MDB entries to VLAN 10" + local n11=$(bridge_port_vlan_ngroups_get "$locus1") + local n21=$(bridge_port_vlan_ngroups_get "$locus2") + + ((n11 == n10 + 5)) + check_err $? "Number of groups at VLAN 10 was $n10, now is $n11, but 5 entries added on VLAN 10, $((n10 + 5)) expected" + + ((n21 == n20)) + check_err $? "Number of groups at VLAN 20 was $n20, now is $n21, but no change expected on VLAN 20" + + ${CFG}_entries_add "$locus2" temp 5 112 + check_err $? "Couldn't add MDB entries to VLAN 20" + local n12=$(bridge_port_vlan_ngroups_get "$locus1") + local n22=$(bridge_port_vlan_ngroups_get "$locus2") + + ((n12 == n11)) + check_err $? "Number of groups at VLAN 10 was $n11, now is $n12, but no change expected on VLAN 10" + + ((n22 == n21 + 5)) + check_err $? "Number of groups at VLAN 20 was $n21, now is $n22, but 5 entries added on VLAN 20, $((n21 + 5)) expected" + + ${CFG}_entries_del "$locus1" temp 5 111 + check_err $? "Couldn't delete MDB entries from VLAN 10" + ${CFG}_entries_del "$locus2" temp 5 112 + check_err $? "Couldn't delete MDB entries from VLAN 20" + local n13=$(bridge_port_vlan_ngroups_get "$locus1") + local n23=$(bridge_port_vlan_ngroups_get "$locus2") + + ((n13 == n10)) + check_err $? "Number of groups at VLAN 10 was $n10, now is $n13, but should be back to $n10" + + ((n23 == n20)) + check_err $? "Number of groups at VLAN 20 was $n20, now is $n23, but should be back to $n20" + + log_test "$CFG: port_vlan: isolation of port and per-VLAN ngroups" +} + +test_8021qvs_ngroups_cross_vlan_cfg4() +{ + test_ngroups_cross_vlan cfg4 +} + +test_8021qvs_ngroups_cross_vlan_ctl4() +{ + test_ngroups_cross_vlan ctl4 +} + +test_8021qvs_ngroups_cross_vlan_cfg6() +{ + test_ngroups_cross_vlan cfg6 +} + +test_8021qvs_ngroups_cross_vlan_ctl6() +{ + test_ngroups_cross_vlan ctl6 +} + +test_maxgroups_zero() +{ + local CFG=$1; shift + local context=$1; shift + local locus=$1; shift + + RET=0 + local max + + max=$(bridge_${context}_maxgroups_get "$locus") + ((max == 0)) + check_err $? "Max groups on $locus should be 0, but $max reported" + + bridge_${context}_maxgroups_set "$locus" 100 + check_err $? "Failed to set max to 100" + max=$(bridge_${context}_maxgroups_get "$locus") + ((max == 100)) + check_err $? "Max groups expected to be 100, but $max reported" + + bridge_${context}_maxgroups_set "$locus" 0 + check_err $? "Couldn't set maximum to 0" + + # Test that setting 0 explicitly still serves as infinity. + ${CFG}_entries_add "$locus" temp 5 + check_err $? "Adding 5 MDB entries failed but should have passed" + ${CFG}_entries_del "$locus" temp 5 + check_err $? "Couldn't delete MDB entries" + + log_test "$CFG: $context maxgroups: reporting and treatment of 0" +} + +test_8021d_maxgroups_zero_cfg4() +{ + test_maxgroups_zero cfg4 port "dev $swp1" +} + +test_8021d_maxgroups_zero_ctl4() +{ + test_maxgroups_zero ctl4 port "dev $swp1" +} + +test_8021d_maxgroups_zero_cfg6() +{ + test_maxgroups_zero cfg6 port "dev $swp1" +} + +test_8021d_maxgroups_zero_ctl6() +{ + test_maxgroups_zero ctl6 port "dev $swp1" +} + +test_8021q_maxgroups_zero_cfg4() +{ + test_maxgroups_zero cfg4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_zero_ctl4() +{ + test_maxgroups_zero ctl4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_zero_cfg6() +{ + test_maxgroups_zero cfg6 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_zero_ctl6() +{ + test_maxgroups_zero ctl6 port "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_zero_cfg4() +{ + test_maxgroups_zero cfg4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_zero_ctl4() +{ + test_maxgroups_zero ctl4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_zero_cfg6() +{ + test_maxgroups_zero cfg6 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_zero_ctl6() +{ + test_maxgroups_zero ctl6 port_vlan "dev $swp1 vid 10" +} + +test_maxgroups_zero_cross_vlan() +{ + local CFG=$1; shift + + local locus0="dev $swp1" + local locus1="dev $swp1 vid 10" + local locus2="dev $swp1 vid 20" + local max + + RET=0 + + bridge_port_vlan_maxgroups_set "$locus1" 100 + check_err $? "$locus1: Failed to set max to 100" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 0)) + check_err $? "$locus0: Max groups expected to be 0, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 0)) + check_err $? "$locus2: Max groups expected to be 0, but $max reported" + + bridge_port_vlan_maxgroups_set "$locus2" 100 + check_err $? "$locus2: Failed to set max to 100" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 0)) + check_err $? "$locus0: Max groups expected to be 0, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 100)) + check_err $? "$locus2: Max groups expected to be 100, but $max reported" + + bridge_port_maxgroups_set "$locus0" 100 + check_err $? "$locus0: Failed to set max to 100" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 100)) + check_err $? "$locus0: Max groups expected to be 100, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 100)) + check_err $? "$locus2: Max groups expected to be 100, but $max reported" + + bridge_port_vlan_maxgroups_set "$locus1" 0 + check_err $? "$locus1: Failed to set max to 0" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 100)) + check_err $? "$locus0: Max groups expected to be 100, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 100)) + check_err $? "$locus2: Max groups expected to be 100, but $max reported" + + bridge_port_vlan_maxgroups_set "$locus2" 0 + check_err $? "$locus2: Failed to set max to 0" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 100)) + check_err $? "$locus0: Max groups expected to be 100, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 0)) + check_err $? "$locus2: Max groups expected to be 0 but $max reported" + + bridge_port_maxgroups_set "$locus0" 0 + check_err $? "$locus0: Failed to set max to 0" + + max=$(bridge_port_maxgroups_get "$locus0") + ((max == 0)) + check_err $? "$locus0: Max groups expected to be 0, but $max reported" + + max=$(bridge_port_vlan_maxgroups_get "$locus2") + ((max == 0)) + check_err $? "$locus2: Max groups expected to be 0, but $max reported" + + log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN maximums" +} + +test_8021qvs_maxgroups_zero_cross_vlan_cfg4() +{ + test_maxgroups_zero_cross_vlan cfg4 +} + +test_8021qvs_maxgroups_zero_cross_vlan_ctl4() +{ + test_maxgroups_zero_cross_vlan ctl4 +} + +test_8021qvs_maxgroups_zero_cross_vlan_cfg6() +{ + test_maxgroups_zero_cross_vlan cfg6 +} + +test_8021qvs_maxgroups_zero_cross_vlan_ctl6() +{ + test_maxgroups_zero_cross_vlan ctl6 +} + +test_maxgroups_too_low() +{ + local CFG=$1; shift + local context=$1; shift + local locus=$1; shift + + RET=0 + + local n=$(bridge_${context}_ngroups_get "$locus") + local msg + + ${CFG}_entries_add "$locus" temp 5 111 + check_err $? "$locus: Couldn't add MDB entries" + + bridge_${context}_maxgroups_set "$locus" $((n+2)) + check_err $? "$locus: Setting maxgroups to $((n+2)) failed" + + msg=$(${CFG}_entries_add "$locus" temp 2 112 2>&1) + check_fail $? "$locus: Adding more entries passed when max<n" + bridge_maxgroups_errmsg_check_cfg "$msg" + + ${CFG}_entries_del "$locus" temp 5 111 + check_err $? "$locus: Couldn't delete MDB entries" + + ${CFG}_entries_add "$locus" temp 2 112 + check_err $? "$locus: Adding more entries failed" + + ${CFG}_entries_del "$locus" temp 2 112 + check_err $? "$locus: Deleting more entries failed" + + bridge_${context}_maxgroups_set "$locus" 0 + check_err $? "$locus: Couldn't set maximum to 0" + + log_test "$CFG: $context maxgroups: configure below ngroups" +} + +test_8021d_maxgroups_too_low_cfg4() +{ + test_maxgroups_too_low cfg4 port "dev $swp1" +} + +test_8021d_maxgroups_too_low_ctl4() +{ + test_maxgroups_too_low ctl4 port "dev $swp1" +} + +test_8021d_maxgroups_too_low_cfg6() +{ + test_maxgroups_too_low cfg6 port "dev $swp1" +} + +test_8021d_maxgroups_too_low_ctl6() +{ + test_maxgroups_too_low ctl6 port "dev $swp1" +} + +test_8021q_maxgroups_too_low_cfg4() +{ + test_maxgroups_too_low cfg4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_low_ctl4() +{ + test_maxgroups_too_low ctl4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_low_cfg6() +{ + test_maxgroups_too_low cfg6 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_low_ctl6() +{ + test_maxgroups_too_low ctl6 port "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_low_cfg4() +{ + test_maxgroups_too_low cfg4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_low_ctl4() +{ + test_maxgroups_too_low ctl4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_low_cfg6() +{ + test_maxgroups_too_low cfg6 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_low_ctl6() +{ + test_maxgroups_too_low ctl6 port_vlan "dev $swp1 vid 10" +} + +test_maxgroups_too_many_entries() +{ + local CFG=$1; shift + local context=$1; shift + local locus=$1; shift + + RET=0 + + local n=$(bridge_${context}_ngroups_get "$locus") + local msg + + # Configure a low maximum + bridge_${context}_maxgroups_set "$locus" $((n+1)) + check_err $? "$locus: Couldn't set maximum" + + # Try to add more entries than the configured maximum + msg=$(${CFG}_entries_add "$locus" temp 5 2>&1) + check_fail $? "Adding 5 MDB entries passed, but should have failed" + bridge_maxgroups_errmsg_check_${CFG} "$msg" + + # When adding entries through the control path, as many as possible + # get created. That's consistent with the mcast_hash_max behavior. + # So there, drop the entries explicitly. + if [[ ${CFG%[46]} == ctl ]]; then + ${CFG}_entries_del "$locus" temp 17 2>&1 + fi + + local n2=$(bridge_${context}_ngroups_get "$locus") + ((n2 == n)) + check_err $? "Number of groups was $n, but after a failed attempt to add MDB entries it changed to $n2" + + bridge_${context}_maxgroups_set "$locus" 0 + check_err $? "$locus: Couldn't set maximum to 0" + + log_test "$CFG: $context maxgroups: add too many MDB entries" +} + +test_8021d_maxgroups_too_many_entries_cfg4() +{ + test_maxgroups_too_many_entries cfg4 port "dev $swp1" +} + +test_8021d_maxgroups_too_many_entries_ctl4() +{ + test_maxgroups_too_many_entries ctl4 port "dev $swp1" +} + +test_8021d_maxgroups_too_many_entries_cfg6() +{ + test_maxgroups_too_many_entries cfg6 port "dev $swp1" +} + +test_8021d_maxgroups_too_many_entries_ctl6() +{ + test_maxgroups_too_many_entries ctl6 port "dev $swp1" +} + +test_8021q_maxgroups_too_many_entries_cfg4() +{ + test_maxgroups_too_many_entries cfg4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_many_entries_ctl4() +{ + test_maxgroups_too_many_entries ctl4 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_many_entries_cfg6() +{ + test_maxgroups_too_many_entries cfg6 port "dev $swp1 vid 10" +} + +test_8021q_maxgroups_too_many_entries_ctl6() +{ + test_maxgroups_too_many_entries ctl6 port "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_many_entries_cfg4() +{ + test_maxgroups_too_many_entries cfg4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_many_entries_ctl4() +{ + test_maxgroups_too_many_entries ctl4 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_many_entries_cfg6() +{ + test_maxgroups_too_many_entries cfg6 port_vlan "dev $swp1 vid 10" +} + +test_8021qvs_maxgroups_too_many_entries_ctl6() +{ + test_maxgroups_too_many_entries ctl6 port_vlan "dev $swp1 vid 10" +} + +test_maxgroups_too_many_cross_vlan() +{ + local CFG=$1; shift + + RET=0 + + local locus0="dev $swp1" + local locus1="dev $swp1 vid 10" + local locus2="dev $swp1 vid 20" + local n1=$(bridge_port_vlan_ngroups_get "$locus1") + local n2=$(bridge_port_vlan_ngroups_get "$locus2") + local msg + + if ((n1 > n2)); then + local tmp=$n1 + n1=$n2 + n2=$tmp + + tmp="$locus1" + locus1="$locus2" + locus2="$tmp" + fi + + # Now 0 <= n1 <= n2. + ${CFG}_entries_add "$locus2" temp 5 112 + check_err $? "Couldn't add 5 entries" + + n2=$(bridge_port_vlan_ngroups_get "$locus2") + # Now 0 <= n1 < n2-1. + + # Setting locus1'maxgroups to n2-1 should pass. The number is + # smaller than both the absolute number of MDB entries, and in + # particular than number of locus2's number of entries, but it is + # large enough to cover locus1's entries. Thus we check that + # individual VLAN's ngroups are independent. + bridge_port_vlan_maxgroups_set "$locus1" $((n2-1)) + check_err $? "Setting ${locus1}'s maxgroups to $((n2-1)) failed" + + msg=$(${CFG}_entries_add "$locus1" temp $n2 111 2>&1) + check_fail $? "$locus1: Adding $n2 MDB entries passed, but should have failed" + bridge_maxgroups_errmsg_check_${CFG} "$msg" + + bridge_port_maxgroups_set "$locus0" $((n1 + n2 + 2)) + check_err $? "$locus0: Couldn't set maximum" + + msg=$(${CFG}_entries_add "$locus1" temp 5 111 2>&1) + check_fail $? "$locus1: Adding 5 MDB entries passed, but should have failed" + bridge_maxgroups_errmsg_check_${CFG} "$msg" + + # IGMP/MLD packets can cause several entries to be added, before + # the maximum is hit and the rest is then bounced. Remove what was + # committed, if anything. + ${CFG}_entries_del "$locus1" temp 5 111 2>/dev/null + + ${CFG}_entries_add "$locus1" temp 2 111 + check_err $? "$locus1: Adding 2 MDB entries failed, but should have passed" + + ${CFG}_entries_del "$locus1" temp 2 111 + check_err $? "Couldn't delete MDB entries" + + ${CFG}_entries_del "$locus2" temp 5 112 + check_err $? "Couldn't delete MDB entries" + + bridge_port_vlan_maxgroups_set "$locus1" 0 + check_err $? "$locus1: Couldn't set maximum to 0" + + bridge_port_maxgroups_set "$locus0" 0 + check_err $? "$locus0: Couldn't set maximum to 0" + + log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN ngroups" +} + +test_8021qvs_maxgroups_too_many_cross_vlan_cfg4() +{ + test_maxgroups_too_many_cross_vlan cfg4 +} + +test_8021qvs_maxgroups_too_many_cross_vlan_ctl4() +{ + test_maxgroups_too_many_cross_vlan ctl4 +} + +test_8021qvs_maxgroups_too_many_cross_vlan_cfg6() +{ + test_maxgroups_too_many_cross_vlan cfg6 +} + +test_8021qvs_maxgroups_too_many_cross_vlan_ctl6() +{ + test_maxgroups_too_many_cross_vlan ctl6 +} + +test_vlan_attributes() +{ + local locus=$1; shift + local expect=$1; shift + + RET=0 + + local max=$(bridge_port_vlan_maxgroups_get "$locus") + local n=$(bridge_port_vlan_ngroups_get "$locus") + + eval "[[ $max $expect ]]" + check_err $? "$locus: maxgroups attribute expected to be $expect, but was $max" + + eval "[[ $n $expect ]]" + check_err $? "$locus: ngroups attribute expected to be $expect, but was $n" + + log_test "port_vlan: presence of ngroups and maxgroups attributes" +} + +test_8021q_vlan_attributes() +{ + test_vlan_attributes "dev $swp1 vid 10" "== null" +} + +test_8021qvs_vlan_attributes() +{ + test_vlan_attributes "dev $swp1 vid 10" "-ge 0" +} + +test_toggle_vlan_snooping() +{ + local mode=$1; shift + + RET=0 + + local CFG=cfg4 + local context=port_vlan + local locus="dev $swp1 vid 10" + + ${CFG}_entries_add "$locus" $mode 5 + check_err $? "Couldn't add MDB entries" + + bridge_${context}_maxgroups_set "$locus" 100 + check_err $? "Failed to set max to 100" + + ip link set dev br0 type bridge mcast_vlan_snooping 0 + sleep 1 + ip link set dev br0 type bridge mcast_vlan_snooping 1 + + local n=$(bridge_${context}_ngroups_get "$locus") + local nn=$(bridge mdb show dev br0 | grep $swp1 | wc -l) + ((nn == n)) + check_err $? "mcast_n_groups expected to be $nn, but $n reported" + + local max=$(bridge_${context}_maxgroups_get "$locus") + ((max == 100)) + check_err $? "Max groups expected to be 100 but $max reported" + + bridge_${context}_maxgroups_set "$locus" 0 + check_err $? "Failed to set max to 0" + + log_test "$CFG: $context: $mode: mcast_vlan_snooping toggle" +} + +test_toggle_vlan_snooping_temp() +{ + test_toggle_vlan_snooping temp +} + +test_toggle_vlan_snooping_permanent() +{ + test_toggle_vlan_snooping permanent +} + +# ngroup test suites + +test_8021d_ngroups_cfg4() +{ + test_8021d_ngroups_reporting_cfg4 +} + +test_8021d_ngroups_ctl4() +{ + test_8021d_ngroups_reporting_ctl4 +} + +test_8021d_ngroups_cfg6() +{ + test_8021d_ngroups_reporting_cfg6 +} + +test_8021d_ngroups_ctl6() +{ + test_8021d_ngroups_reporting_ctl6 +} + +test_8021q_ngroups_cfg4() +{ + test_8021q_ngroups_reporting_cfg4 +} + +test_8021q_ngroups_ctl4() +{ + test_8021q_ngroups_reporting_ctl4 +} + +test_8021q_ngroups_cfg6() +{ + test_8021q_ngroups_reporting_cfg6 +} + +test_8021q_ngroups_ctl6() +{ + test_8021q_ngroups_reporting_ctl6 +} + +test_8021qvs_ngroups_cfg4() +{ + test_8021qvs_ngroups_reporting_cfg4 + test_8021qvs_ngroups_cross_vlan_cfg4 +} + +test_8021qvs_ngroups_ctl4() +{ + test_8021qvs_ngroups_reporting_ctl4 + test_8021qvs_ngroups_cross_vlan_ctl4 +} + +test_8021qvs_ngroups_cfg6() +{ + test_8021qvs_ngroups_reporting_cfg6 + test_8021qvs_ngroups_cross_vlan_cfg6 +} + +test_8021qvs_ngroups_ctl6() +{ + test_8021qvs_ngroups_reporting_ctl6 + test_8021qvs_ngroups_cross_vlan_ctl6 +} + +# maxgroups test suites + +test_8021d_maxgroups_cfg4() +{ + test_8021d_maxgroups_zero_cfg4 + test_8021d_maxgroups_too_low_cfg4 + test_8021d_maxgroups_too_many_entries_cfg4 +} + +test_8021d_maxgroups_ctl4() +{ + test_8021d_maxgroups_zero_ctl4 + test_8021d_maxgroups_too_low_ctl4 + test_8021d_maxgroups_too_many_entries_ctl4 +} + +test_8021d_maxgroups_cfg6() +{ + test_8021d_maxgroups_zero_cfg6 + test_8021d_maxgroups_too_low_cfg6 + test_8021d_maxgroups_too_many_entries_cfg6 +} + +test_8021d_maxgroups_ctl6() +{ + test_8021d_maxgroups_zero_ctl6 + test_8021d_maxgroups_too_low_ctl6 + test_8021d_maxgroups_too_many_entries_ctl6 +} + +test_8021q_maxgroups_cfg4() +{ + test_8021q_maxgroups_zero_cfg4 + test_8021q_maxgroups_too_low_cfg4 + test_8021q_maxgroups_too_many_entries_cfg4 +} + +test_8021q_maxgroups_ctl4() +{ + test_8021q_maxgroups_zero_ctl4 + test_8021q_maxgroups_too_low_ctl4 + test_8021q_maxgroups_too_many_entries_ctl4 +} + +test_8021q_maxgroups_cfg6() +{ + test_8021q_maxgroups_zero_cfg6 + test_8021q_maxgroups_too_low_cfg6 + test_8021q_maxgroups_too_many_entries_cfg6 +} + +test_8021q_maxgroups_ctl6() +{ + test_8021q_maxgroups_zero_ctl6 + test_8021q_maxgroups_too_low_ctl6 + test_8021q_maxgroups_too_many_entries_ctl6 +} + +test_8021qvs_maxgroups_cfg4() +{ + test_8021qvs_maxgroups_zero_cfg4 + test_8021qvs_maxgroups_zero_cross_vlan_cfg4 + test_8021qvs_maxgroups_too_low_cfg4 + test_8021qvs_maxgroups_too_many_entries_cfg4 + test_8021qvs_maxgroups_too_many_cross_vlan_cfg4 +} + +test_8021qvs_maxgroups_ctl4() +{ + test_8021qvs_maxgroups_zero_ctl4 + test_8021qvs_maxgroups_zero_cross_vlan_ctl4 + test_8021qvs_maxgroups_too_low_ctl4 + test_8021qvs_maxgroups_too_many_entries_ctl4 + test_8021qvs_maxgroups_too_many_cross_vlan_ctl4 +} + +test_8021qvs_maxgroups_cfg6() +{ + test_8021qvs_maxgroups_zero_cfg6 + test_8021qvs_maxgroups_zero_cross_vlan_cfg6 + test_8021qvs_maxgroups_too_low_cfg6 + test_8021qvs_maxgroups_too_many_entries_cfg6 + test_8021qvs_maxgroups_too_many_cross_vlan_cfg6 +} + +test_8021qvs_maxgroups_ctl6() +{ + test_8021qvs_maxgroups_zero_ctl6 + test_8021qvs_maxgroups_zero_cross_vlan_ctl6 + test_8021qvs_maxgroups_too_low_ctl6 + test_8021qvs_maxgroups_too_many_entries_ctl6 + test_8021qvs_maxgroups_too_many_cross_vlan_ctl6 +} + +# other test suites + +test_8021qvs_toggle_vlan_snooping() +{ + test_toggle_vlan_snooping_temp + test_toggle_vlan_snooping_permanent +} + +# test groups + +test_8021d() +{ + # Tests for vlan_filtering 0 mcast_vlan_snooping 0. + + switch_create_8021d + setup_wait + + test_8021d_ngroups_cfg4 + test_8021d_ngroups_ctl4 + test_8021d_ngroups_cfg6 + test_8021d_ngroups_ctl6 + test_8021d_maxgroups_cfg4 + test_8021d_maxgroups_ctl4 + test_8021d_maxgroups_cfg6 + test_8021d_maxgroups_ctl6 + + switch_destroy +} + +test_8021q() +{ + # Tests for vlan_filtering 1 mcast_vlan_snooping 0. + + switch_create_8021q + setup_wait + + test_8021q_vlan_attributes + test_8021q_ngroups_cfg4 + test_8021q_ngroups_ctl4 + test_8021q_ngroups_cfg6 + test_8021q_ngroups_ctl6 + test_8021q_maxgroups_cfg4 + test_8021q_maxgroups_ctl4 + test_8021q_maxgroups_cfg6 + test_8021q_maxgroups_ctl6 + + switch_destroy +} + +test_8021qvs() +{ + # Tests for vlan_filtering 1 mcast_vlan_snooping 1. + + switch_create_8021qvs + setup_wait + + test_8021qvs_vlan_attributes + test_8021qvs_ngroups_cfg4 + test_8021qvs_ngroups_ctl4 + test_8021qvs_ngroups_cfg6 + test_8021qvs_ngroups_ctl6 + test_8021qvs_maxgroups_cfg4 + test_8021qvs_maxgroups_ctl4 + test_8021qvs_maxgroups_cfg6 + test_8021qvs_maxgroups_ctl6 + test_8021qvs_toggle_vlan_snooping + + switch_destroy +} + +if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then + echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support" + exit $ksft_skip +fi + +trap cleanup EXIT + +setup_prepare +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh new file mode 100755 index 000000000000..1a0480e71d83 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that permanent mdb entries can be added to and deleted from bridge +# interfaces that are down, and works correctly when done so. + +ALL_TESTS="add_del_to_port_down" +NUM_NETIFS=4 + +TEST_GROUP="239.10.10.10" +TEST_GROUP_MAC="01:00:5e:0a:0a:0a" + +source lib.sh + + +add_del_to_port_down() { + RET=0 + + ip link set dev $swp2 down + bridge mdb add dev br0 port "$swp2" grp $TEST_GROUP permanent 2>/dev/null + check_err $? "Failed adding mdb entry" + + ip link set dev $swp2 up + setup_wait_dev $swp2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 + check_fail $? "Traffic to $TEST_GROUP wasn't forwarded" + + ip link set dev $swp2 down + bridge mdb show dev br0 | grep -q "$TEST_GROUP permanent" 2>/dev/null + check_err $? "MDB entry did not persist after link up/down" + + bridge mdb del dev br0 port "$swp2" grp $TEST_GROUP 2>/dev/null + check_err $? "Failed deleting mdb entry" + + ip link set dev $swp2 up + setup_wait_dev $swp2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 + check_err $? "Traffic to $TEST_GROUP was forwarded after entry removed" + + log_test "MDB add/del entry to port with state down " +} + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + # Enable multicast filtering + ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + + bridge link set dev $swp2 mcast_flood off + # Bridge currently has a "grace time" at creation time before it + # forwards multicast according to the mdb. Since we disable the + # mcast_flood setting per port + sleep 10 +} + +switch_destroy() +{ + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + h2_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +tests_run +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh new file mode 100755 index 000000000000..e2b9ff773c6b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -0,0 +1,564 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ + mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ + mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ + mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test" +NUM_NETIFS=4 +CHECK_TC="yes" +TEST_GROUP="ff02::cc" +TEST_GROUP_MAC="33:33:00:00:00:cc" + +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::1,2001:db8:1::2,2001:db8:1::3 +MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\ +00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:\ +00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\ +00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\ +00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12 +MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\ +00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:\ +05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\ +00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\ +00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::20,2001:db8:1::30 +MZPKT_IS_INC3="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:bc:5a:00:00:00:01:01:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12 +MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\ +00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\ +02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\ +00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\ +00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" +# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::20,2001:db8:1::30 +MZPKT_ALLOW2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:b8:5a:00:00:00:01:05:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21 +MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21" +# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::20,2001:db8:1::30 +MZPKT_IS_EXC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:bb:5a:00:00:00:01:02:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30 +MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 block report: grp ff02::cc block 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30 +MZPKT_BLOCK="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:00:\ +00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:00:\ +00:00:8f:00:89:8e:00:00:00:01:06:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:01:\ +0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:\ +0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge mcast_snooping 1 mcast_query_response_interval 100 \ + mcast_mld_version 2 mcast_startup_query_interval 300 \ + mcast_querier 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + # make sure a query has been generated + sleep 5 +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +mldv2include_prepare() +{ + local host1_if=$1 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3") + + ip link set dev br0 type bridge mcast_mld_version 2 + check_err $? "Could not change bridge MLD version to 2" + + $MZ $host1_if $MZPKT_IS_INC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null + check_err $? "Missing *,G entry with source list" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + brmcast_check_sg_entries "is_include" "${X[@]}" +} + +mldv2exclude_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local pkt=$4 + local X=("2001:db8:1::1" "2001:db8:1::2") + local Y=("2001:db8:1::20" "2001:db8:1::21") + + mldv2include_prepare $h1 + + $MZ $host1_if -c 1 $MZPKT_IS_EXC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::3\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::3 entry still exists" +} + +mldv2cleanup() +{ + local port=$1 + + bridge mdb del dev br0 port $port grp $TEST_GROUP + ip link set dev br0 type bridge mcast_mld_version 1 +} + +mldv2include_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3") + + mldv2include_prepare $h1 + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP is_include" + + mldv2cleanup $swp1 +} + +mldv2inc_allow_test() +{ + RET=0 + local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_ALLOW -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP include -> allow" + + mldv2cleanup $swp1 +} + +mldv2inc_is_include_test() +{ + RET=0 + local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_INC2 -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP include -> is_include" + + mldv2cleanup $swp1 +} + +mldv2inc_is_exclude_test() +{ + RET=0 + + mldv2exclude_prepare $h1 + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP include -> is_exclude" + + mldv2cleanup $swp1 +} + +mldv2inc_to_exclude_test() +{ + RET=0 + local X=("2001:db8:1::1") + local Y=("2001:db8:1::20" "2001:db8:1::30") + + mldv2include_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_TO_EXC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::21\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::21 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP include -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + +mldv2exc_allow_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30") + local Y=("2001:db8:1::21") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_ALLOW2 -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> allow" + + mldv2cleanup $swp1 +} + +mldv2exc_is_include_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30") + local Y=("2001:db8:1::21") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_INC3 -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> is_include" + + mldv2cleanup $swp1 +} + +mldv2exc_is_exclude_test() +{ + RET=0 + local X=("2001:db8:1::30") + local Y=("2001:db8:1::20") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_EXC2 -q + sleep 1 + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> is_exclude" + + mldv2cleanup $swp1 +} + +mldv2exc_to_exclude_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::30") + local Y=("2001:db8:1::20") + + mldv2exclude_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_TO_EXC -q + sleep 1 + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + +mldv2inc_block_test() +{ + RET=0 + local X=("2001:db8:1::2" "2001:db8:1::3") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_BLOCK -q + # make sure the lowered timers have expired (by default 2 seconds) + sleep 3 + brmcast_check_sg_entries "block" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 2001:db8:1::100 + + log_test "MLDv2 report $TEST_GROUP include -> block" + + mldv2cleanup $swp1 +} + +mldv2exc_block_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::30") + local Y=("2001:db8:1::20" "2001:db8:1::21") + + mldv2exclude_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_BLOCK -q + sleep 1 + brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> block" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + +mldv2exc_timeout_test() +{ + RET=0 + local X=("2001:db8:1::20" "2001:db8:1::30") + + # GMI should be 3 seconds + ip link set dev br0 type bridge mcast_query_interval 100 \ + mcast_query_response_interval 100 \ + mcast_membership_interval 300 + + mldv2exclude_prepare $h1 + ip link set dev br0 type bridge mcast_query_interval 500 \ + mcast_query_response_interval 500 \ + mcast_membership_interval 1500 + + $MZ $h1 -c 1 $MZPKT_ALLOW2 -q + sleep 3 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists" + + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 2001:db8:1::100 + + log_test "MLDv2 group $TEST_GROUP exclude timeout" + + ip link set dev br0 type bridge mcast_query_interval 12500 \ + mcast_query_response_interval 1000 \ + mcast_membership_interval 26000 + + mldv2cleanup $swp1 +} + +mldv2star_ex_auto_add_test() +{ + RET=0 + + mldv2exclude_prepare $h1 + + $MZ $h2 -c 1 $MZPKT_IS_INC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \ + .port == \"$swp1\")" &>/dev/null + check_err $? "S,G entry for *,G port doesn't exist" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \ + .port == \"$swp1\" and \ + .flags[] == \"added_by_star_ex\")" &>/dev/null + check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag" + + brmcast_check_sg_fwding 1 2001:db8:1::3 + + log_test "MLDv2 S,G port entry automatic add to a *,G port" + + mldv2cleanup $swp1 + mldv2cleanup $swp2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index b90dff8d3a94..64bd00fe9a4f 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -28,8 +28,9 @@ h2_destroy() switch_create() { - # 10 Seconds ageing time. - ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \ + ip link add dev br0 type bridge \ + vlan_filtering 1 \ + ageing_time $LOW_AGEING_TIME \ mcast_snooping 0 ip link set dev $swp1 master br0 diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh new file mode 100755 index 000000000000..72dfbeaf56b9 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh @@ -0,0 +1,546 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="vlmc_control_test vlmc_querier_test vlmc_igmp_mld_version_test \ + vlmc_last_member_test vlmc_startup_query_test vlmc_membership_test \ + vlmc_querier_intvl_test vlmc_query_intvl_test vlmc_query_response_intvl_test \ + vlmc_router_port_test vlmc_filtering_test" +NUM_NETIFS=4 +CHECK_TC="yes" +TEST_GROUP="239.10.10.10" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + ip link add l $h1 $h1.10 up type vlan id 10 +} + +h1_destroy() +{ + ip link del $h1.10 + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 + ip link add l $h2 $h2.10 up type vlan id 10 +} + +h2_destroy() +{ + ip link del $h2.10 + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + + bridge vlan add vid 10-11 dev $swp1 master + bridge vlan add vid 10-11 dev $swp2 master + + ip link set dev br0 type bridge mcast_vlan_snooping 1 + check_err $? "Could not enable global vlan multicast snooping" + log_test "Vlan multicast snooping enable" +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +vlmc_v2join_test() +{ + local expect=$1 + + RET=0 + ip address add dev $h2.10 $TEST_GROUP/32 autojoin + check_err $? "Could not join $TEST_GROUP" + + sleep 5 + bridge -j mdb show dev br0 | + jq -e ".[].mdb[] | select(.grp == \"$TEST_GROUP\" and .vid == 10)" &>/dev/null + if [ $expect -eq 0 ]; then + check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP" + else + check_fail $? "IGMPv2 report shouldn't have created mdb entry for $TEST_GROUP" + fi + + # check if we need to cleanup + if [ $RET -eq 0 ]; then + ip address del dev $h2.10 $TEST_GROUP/32 2>&1 1>/dev/null + sleep 5 + bridge -j mdb show dev br0 | + jq -e ".[].mdb[] | select(.grp == \"$TEST_GROUP\" and \ + .vid == 10)" &>/dev/null + check_fail $? "IGMPv2 leave didn't remove mdb entry for $TEST_GROUP" + fi +} + +vlmc_control_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + log_test "Vlan global options existence" + + RET=0 + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_snooping == 1) " &>/dev/null + check_err $? "Wrong default mcast_snooping global option value" + log_test "Vlan mcast_snooping global option default value" + + RET=0 + vlmc_v2join_test 0 + bridge vlan global set vid 10 dev br0 mcast_snooping 0 + check_err $? "Could not disable multicast snooping in vlan 10" + vlmc_v2join_test 1 + log_test "Vlan 10 multicast snooping control" +} + +# setup for general query counting +vlmc_query_cnt_xstats() +{ + local type=$1 + local version=$2 + local dev=$3 + + ip -j link xstats type bridge_slave dev $dev | \ + jq -e ".[].multicast.${type}_queries.tx_v${version}" +} + +vlmc_query_cnt_setup() +{ + local type=$1 + local dev=$2 + + if [[ $type == "igmp" ]]; then + tc filter add dev $dev egress pref 10 prot 802.1Q \ + flower vlan_id 10 vlan_ethtype ipv4 dst_ip 224.0.0.1 ip_proto 2 \ + action pass + else + tc filter add dev $dev egress pref 10 prot 802.1Q \ + flower vlan_id 10 vlan_ethtype ipv6 dst_ip ff02::1 ip_proto icmpv6 \ + action pass + fi + + ip link set dev br0 type bridge mcast_stats_enabled 1 +} + +vlmc_query_cnt_cleanup() +{ + local dev=$1 + + ip link set dev br0 type bridge mcast_stats_enabled 0 + tc filter del dev $dev egress pref 10 +} + +vlmc_check_query() +{ + local type=$1 + local version=$2 + local dev=$3 + local expect=$4 + local time=$5 + local ret=0 + + vlmc_query_cnt_setup $type $dev + + local pre_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev) + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1 + ret=$? + if [[ $ret -eq 0 ]]; then + sleep $time + + local tcstats=$(tc_rule_stats_get $dev 10 egress) + local post_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev) + + if [[ $tcstats != $expect || \ + $(($post_tx_xstats-$pre_tx_xstats)) != $expect || \ + $tcstats != $(($post_tx_xstats-$pre_tx_xstats)) ]]; then + ret=1 + fi + fi + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 0 + vlmc_query_cnt_cleanup $dev + + return $ret +} + +vlmc_querier_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_querier == 0) " &>/dev/null + check_err $? "Wrong default mcast_querier global vlan option value" + log_test "Vlan mcast_querier global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1 + check_err $? "Could not enable querier in vlan 10" + log_test "Vlan 10 multicast querier enable" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 0 + + RET=0 + vlmc_check_query igmp 2 $swp1 1 1 + check_err $? "No vlan tagged IGMPv2 general query packets sent" + log_test "Vlan 10 tagged IGMPv2 general query sent" + + RET=0 + vlmc_check_query mld 1 $swp1 1 1 + check_err $? "No vlan tagged MLD general query packets sent" + log_test "Vlan 10 tagged MLD general query sent" +} + +vlmc_igmp_mld_version_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_igmp_version == 2) " &>/dev/null + check_err $? "Wrong default mcast_igmp_version global vlan option value" + log_test "Vlan mcast_igmp_version global option default value" + + RET=0 + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_mld_version == 1) " &>/dev/null + check_err $? "Wrong default mcast_mld_version global vlan option value" + log_test "Vlan mcast_mld_version global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 3 + check_err $? "Could not set mcast_igmp_version in vlan 10" + log_test "Vlan 10 mcast_igmp_version option changed to 3" + + RET=0 + vlmc_check_query igmp 3 $swp1 1 1 + check_err $? "No vlan tagged IGMPv3 general query packets sent" + log_test "Vlan 10 tagged IGMPv3 general query sent" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 2 + check_err $? "Could not set mcast_mld_version in vlan 10" + log_test "Vlan 10 mcast_mld_version option changed to 2" + + RET=0 + vlmc_check_query mld 2 $swp1 1 1 + check_err $? "No vlan tagged MLDv2 general query packets sent" + log_test "Vlan 10 tagged MLDv2 general query sent" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 2 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 1 +} + +vlmc_last_member_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_last_member_count == 2) " &>/dev/null + check_err $? "Wrong default mcast_last_member_count global vlan option value" + log_test "Vlan mcast_last_member_count global option default value" + + RET=0 + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_last_member_interval == 100) " &>/dev/null + check_err $? "Wrong default mcast_last_member_interval global vlan option value" + log_test "Vlan mcast_last_member_interval global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_count 3 + check_err $? "Could not set mcast_last_member_count in vlan 10" + log_test "Vlan 10 mcast_last_member_count option changed to 3" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_count 2 + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_interval 200 + check_err $? "Could not set mcast_last_member_interval in vlan 10" + log_test "Vlan 10 mcast_last_member_interval option changed to 200" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_interval 100 +} + +vlmc_startup_query_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_startup_query_interval == 3125) " &>/dev/null + check_err $? "Wrong default mcast_startup_query_interval global vlan option value" + log_test "Vlan mcast_startup_query_interval global option default value" + + RET=0 + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_startup_query_count == 2) " &>/dev/null + check_err $? "Wrong default mcast_startup_query_count global vlan option value" + log_test "Vlan mcast_startup_query_count global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_interval 100 + check_err $? "Could not set mcast_startup_query_interval in vlan 10" + vlmc_check_query igmp 2 $swp1 2 3 + check_err $? "Wrong number of tagged IGMPv2 general queries sent" + log_test "Vlan 10 mcast_startup_query_interval option changed to 100" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 3 + check_err $? "Could not set mcast_startup_query_count in vlan 10" + vlmc_check_query igmp 2 $swp1 3 4 + check_err $? "Wrong number of tagged IGMPv2 general queries sent" + log_test "Vlan 10 mcast_startup_query_count option changed to 3" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_interval 3125 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2 +} + +vlmc_membership_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_membership_interval == 26000) " &>/dev/null + check_err $? "Wrong default mcast_membership_interval global vlan option value" + log_test "Vlan mcast_membership_interval global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_membership_interval 200 + check_err $? "Could not set mcast_membership_interval in vlan 10" + log_test "Vlan 10 mcast_membership_interval option changed to 200" + + RET=0 + vlmc_v2join_test 1 + log_test "Vlan 10 mcast_membership_interval mdb entry expire" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_membership_interval 26000 +} + +vlmc_querier_intvl_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_querier_interval == 25500) " &>/dev/null + check_err $? "Wrong default mcast_querier_interval global vlan option value" + log_test "Vlan mcast_querier_interval global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier_interval 100 + check_err $? "Could not set mcast_querier_interval in vlan 10" + log_test "Vlan 10 mcast_querier_interval option changed to 100" + + RET=0 + ip link add dev br1 type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1 \ + mcast_vlan_snooping 1 + bridge vlan add vid 10 dev br1 self pvid untagged + ip link set dev $h1 master br1 + ip link set dev br1 up + bridge vlan add vid 10 dev $h1 master + bridge vlan global set vid 10 dev br1 mcast_snooping 1 mcast_querier 1 + sleep 2 + ip link del dev br1 + ip addr replace 2001:db8:1::1/64 dev $h1 + vlmc_check_query igmp 2 $swp1 1 1 + check_err $? "Wrong number of IGMPv2 general queries after querier interval" + log_test "Vlan 10 mcast_querier_interval expire after outside query" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier_interval 25500 +} + +vlmc_query_intvl_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_query_interval == 12500) " &>/dev/null + check_err $? "Wrong default mcast_query_interval global vlan option value" + log_test "Vlan mcast_query_interval global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 200 + check_err $? "Could not set mcast_query_interval in vlan 10" + # 1 is sent immediately, then 2 more in the next 5 seconds + vlmc_check_query igmp 2 $swp1 3 5 + check_err $? "Wrong number of tagged IGMPv2 general queries sent" + log_test "Vlan 10 mcast_query_interval option changed to 200" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 12500 +} + +vlmc_query_response_intvl_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_query_response_interval == 1000) " &>/dev/null + check_err $? "Wrong default mcast_query_response_interval global vlan option value" + log_test "Vlan mcast_query_response_interval global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 200 + check_err $? "Could not set mcast_query_response_interval in vlan 10" + log_test "Vlan 10 mcast_query_response_interval option changed to 200" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000 +} + +vlmc_router_port_test() +{ + RET=0 + local goutput=`bridge -j -d vlan show` + echo -n $goutput | + jq -e ".[] | select(.ifname == \"$swp1\" and \ + .vlans[].vlan == 10)" &>/dev/null + check_err $? "Could not find port vlan 10's options" + + echo -n $goutput | + jq -e ".[] | select(.ifname == \"$swp1\" and \ + .vlans[].vlan == 10 and \ + .vlans[].mcast_router == 1)" &>/dev/null + check_err $? "Wrong default port mcast_router option value" + log_test "Port vlan 10 option mcast_router default value" + + RET=0 + bridge vlan set vid 10 dev $swp1 mcast_router 2 + check_err $? "Could not set port vlan 10's mcast_router option" + log_test "Port vlan 10 mcast_router option changed to 2" + + RET=0 + tc filter add dev $swp1 egress pref 10 prot 802.1Q \ + flower vlan_id 10 vlan_ethtype ipv4 dst_ip 239.1.1.1 ip_proto udp action pass + tc filter add dev $swp2 egress pref 10 prot 802.1Q \ + flower vlan_id 10 vlan_ethtype ipv4 dst_ip 239.1.1.1 ip_proto udp action pass + bridge vlan set vid 10 dev $swp2 mcast_router 0 + # we need to enable querier and disable query response interval to + # make sure packets are flooded only to router ports + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1 \ + mcast_query_response_interval 0 + bridge vlan add vid 10 dev br0 self + sleep 1 + mausezahn br0 -Q 10 -c 10 -p 128 -b 01:00:5e:01:01:01 -B 239.1.1.1 \ + -t udp "dp=1024" &>/dev/null + local swp1_tcstats=$(tc_rule_stats_get $swp1 10 egress) + if [[ $swp1_tcstats != 10 ]]; then + check_err 1 "Wrong number of vlan 10 multicast packets flooded" + fi + local swp2_tcstats=$(tc_rule_stats_get $swp2 10 egress) + check_err $swp2_tcstats "Vlan 10 multicast packets flooded to non-router port" + log_test "Flood unknown vlan multicast packets to router port only" + + tc filter del dev $swp2 egress pref 10 + tc filter del dev $swp1 egress pref 10 + bridge vlan del vid 10 dev br0 self + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000 + bridge vlan set vid 10 dev $swp2 mcast_router 1 + bridge vlan set vid 10 dev $swp1 mcast_router 1 +} + +vlmc_filtering_test() +{ + RET=0 + ip link set dev br0 type bridge vlan_filtering 0 + ip -j -d link show dev br0 | \ + jq -e "select(.[0].linkinfo.info_data.mcast_vlan_snooping == 1)" &>/dev/null + check_fail $? "Vlan filtering is disabled but multicast vlan snooping is still enabled" + log_test "Disable multicast vlan snooping when vlan filtering is disabled" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index c15c6c85c984..1c8a26046589 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -27,8 +27,9 @@ h2_destroy() switch_create() { - # 10 Seconds ageing time. - ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0 + ip link add dev br0 type bridge \ + ageing_time $LOW_AGEING_TIME \ + mcast_snooping 0 ip link set dev $swp1 master br0 ip link set dev $swp2 master br0 diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index da96eff72a8e..8d7a1a004b7c 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,9 +6,49 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y +CONFIG_DUMMY=m +CONFIG_IPV6=y +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_MACVLAN=m +CONFIG_NET_ACT_CT=m +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_MPLS=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_SAMPLE=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_META=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPIP=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_LOADBALANCE=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_TABLES=m CONFIG_VETH=m CONFIG_NAMESPACES=y CONFIG_NET_NS=y +CONFIG_VXLAN=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh new file mode 100755 index 000000000000..1783c10215e5 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -0,0 +1,372 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test traffic distribution between two paths when using custom hash policy. +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.{2-253}/24 | | +# | 2001:db8:1::{2-fd}/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------------+ +# | SW1 | | +# | $rp1 + | +# | 198.51.100.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | | +# | $rp11 + + $rp12 | +# | 192.0.2.1/28 | | 192.0.2.17/28 | +# | 2001:db8:2::1/64 | | 2001:db8:3::1/64 | +# +------------------|-------------|------------------+ +# | | +# +------------------|-------------|------------------+ +# | SW2 | | | +# | | | | +# | $rp21 + + $rp22 | +# | 192.0.2.2/28 192.0.2.18/28 | +# | 2001:db8:2::2/64 2001:db8:3::2/64 | +# | | +# | | +# | $rp2 + | +# | 203.0.113.1/24 | | +# | 2001:db8:4::1/64 | | +# +-------------------------|-------------------------+ +# | +# +-------------------------|------+ +# | H2 | | +# | $h2 + | +# | 203.0.113.{2-253}/24 | +# | 2001:db8:4::{2-fd}/64 | +# +--------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + custom_hash +" + +NUM_NETIFS=8 +source lib.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64 + ip route add vrf v$h1 default via 198.51.100.1 dev $h1 + ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 default + ip route del vrf v$h1 default + simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw1_create() +{ + simple_if_init $rp1 198.51.100.1/24 2001:db8:1::1/64 + __simple_if_init $rp11 v$rp1 192.0.2.1/28 2001:db8:2::1/64 + __simple_if_init $rp12 v$rp1 192.0.2.17/28 2001:db8:3::1/64 + + ip route add vrf v$rp1 203.0.113.0/24 \ + nexthop via 192.0.2.2 dev $rp11 \ + nexthop via 192.0.2.18 dev $rp12 + + ip -6 route add vrf v$rp1 2001:db8:4::/64 \ + nexthop via 2001:db8:2::2 dev $rp11 \ + nexthop via 2001:db8:3::2 dev $rp12 +} + +sw1_destroy() +{ + ip -6 route del vrf v$rp1 2001:db8:4::/64 + + ip route del vrf v$rp1 203.0.113.0/24 + + __simple_if_fini $rp12 192.0.2.17/28 2001:db8:3::1/64 + __simple_if_fini $rp11 192.0.2.1/28 2001:db8:2::1/64 + simple_if_fini $rp1 198.51.100.1/24 2001:db8:1::1/64 +} + +sw2_create() +{ + simple_if_init $rp2 203.0.113.1/24 2001:db8:4::1/64 + __simple_if_init $rp21 v$rp2 192.0.2.2/28 2001:db8:2::2/64 + __simple_if_init $rp22 v$rp2 192.0.2.18/28 2001:db8:3::2/64 + + ip route add vrf v$rp2 198.51.100.0/24 \ + nexthop via 192.0.2.1 dev $rp21 \ + nexthop via 192.0.2.17 dev $rp22 + + ip -6 route add vrf v$rp2 2001:db8:1::/64 \ + nexthop via 2001:db8:2::1 dev $rp21 \ + nexthop via 2001:db8:3::1 dev $rp22 +} + +sw2_destroy() +{ + ip -6 route del vrf v$rp2 2001:db8:1::/64 + + ip route del vrf v$rp2 198.51.100.0/24 + + __simple_if_fini $rp22 192.0.2.18/28 2001:db8:3::2/64 + __simple_if_fini $rp21 192.0.2.2/28 2001:db8:2::2/64 + simple_if_fini $rp2 203.0.113.1/24 2001:db8:4::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.2/24 2001:db8:4::2/64 + ip route add vrf v$h2 default via 203.0.113.1 dev $h2 + ip -6 route add vrf v$h2 default via 2001:db8:4::1 dev $h2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 default + ip route del vrf v$h2 default + simple_if_fini $h2 203.0.113.2/24 2001:db8:4::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + + rp1=${NETIFS[p2]} + + rp11=${NETIFS[p3]} + rp21=${NETIFS[p4]} + + rp12=${NETIFS[p5]} + rp22=${NETIFS[p6]} + + rp2=${NETIFS[p7]} + + h2=${NETIFS[p8]} + + vrf_prepare + h1_create + sw1_create + sw2_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 203.0.113.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:4::2 +} + +send_src_ipv4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_src_udp4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A 198.51.100.2 -B 203.0.113.2 \ + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A 198.51.100.2 -B 203.0.113.2 \ + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" +} + +send_src_ipv6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_flowlabel() +{ + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec v$h1 \ + $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1 + done +} + +send_src_udp6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A 2001:db8:1::2 -B 2001:db8:4::2 \ + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A 2001:db8:1::2 -B 2001:db8:4::2 \ + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" +} + +custom_hash_test() +{ + local field="$1"; shift + local balanced="$1"; shift + local send_flows="$@" + + RET=0 + + local t0_rp11=$(link_stats_tx_packets_get $rp11) + local t0_rp12=$(link_stats_tx_packets_get $rp12) + + $send_flows + + local t1_rp11=$(link_stats_tx_packets_get $rp11) + local t1_rp12=$(link_stats_tx_packets_get $rp12) + + local d_rp11=$((t1_rp11 - t0_rp11)) + local d_rp12=$((t1_rp12 - t0_rp12)) + + local diff=$((d_rp12 - d_rp11)) + local sum=$((d_rp11 + d_rp12)) + + local pct=$(echo "$diff / $sum * 100" | bc -l) + local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc) + + [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) || + ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]] + check_err $? "Expected traffic to be $balanced, but it is not" + + log_test "Multipath hash field: $field ($balanced)" + log_info "Packets sent on path1 / path2: $d_rp11 / $d_rp12" +} + +custom_hash_v4() +{ + log_info "Running IPv4 custom multipath hash tests" + + sysctl_set net.ipv4.fib_multipath_hash_policy 3 + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv4.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0001 + custom_hash_test "Source IP" "balanced" send_src_ipv4 + custom_hash_test "Source IP" "unbalanced" send_dst_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0002 + custom_hash_test "Destination IP" "balanced" send_dst_ipv4 + custom_hash_test "Destination IP" "unbalanced" send_src_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0010 + custom_hash_test "Source port" "balanced" send_src_udp4 + custom_hash_test "Source port" "unbalanced" send_dst_udp4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0020 + custom_hash_test "Destination port" "balanced" send_dst_udp4 + custom_hash_test "Destination port" "unbalanced" send_src_udp4 + + sysctl_restore net.ipv4.neigh.default.gc_thresh3 + sysctl_restore net.ipv4.neigh.default.gc_thresh2 + sysctl_restore net.ipv4.neigh.default.gc_thresh1 + + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +custom_hash_v6() +{ + log_info "Running IPv6 custom multipath hash tests" + + sysctl_set net.ipv6.fib_multipath_hash_policy 3 + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv6.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0001 + custom_hash_test "Source IP" "balanced" send_src_ipv6 + custom_hash_test "Source IP" "unbalanced" send_dst_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0002 + custom_hash_test "Destination IP" "balanced" send_dst_ipv6 + custom_hash_test "Destination IP" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0008 + custom_hash_test "Flowlabel" "balanced" send_flowlabel + custom_hash_test "Flowlabel" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0010 + custom_hash_test "Source port" "balanced" send_src_udp6 + custom_hash_test "Source port" "unbalanced" send_dst_udp6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0020 + custom_hash_test "Destination port" "balanced" send_dst_udp6 + custom_hash_test "Destination port" "unbalanced" send_src_udp6 + + sysctl_restore net.ipv6.neigh.default.gc_thresh3 + sysctl_restore net.ipv6.neigh.default.gc_thresh2 + sysctl_restore net.ipv6.neigh.default.gc_thresh1 + + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +custom_hash() +{ + # Test that when the hash policy is set to custom, traffic is + # distributed only according to the fields set in the + # fib_multipath_hash_fields sysctl. + # + # Each time set a different field and make sure traffic is only + # distributed when the field is changed in the packet stream. + custom_hash_v4 + custom_hash_v6 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 9c12c4fd3afc..f1de525cfa55 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -1,6 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + ############################################################################## # Defines @@ -9,15 +12,21 @@ if [[ ! -v DEVLINK_DEV ]]; then | jq -r '.port | keys[]' | cut -d/ -f-2) if [ -z "$DEVLINK_DEV" ]; then echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it" - exit 1 + exit $ksft_skip fi if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then echo "SKIP: devlink device's bus is not PCI" - exit 1 + exit $ksft_skip fi DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \ -n | cut -d" " -f3) +elif [[ ! -z "$DEVLINK_DEV" ]]; then + devlink dev show $DEVLINK_DEV &> /dev/null + if [ $? -ne 0 ]; then + echo "SKIP: devlink device \"$DEVLINK_DEV\" not found" + exit $ksft_skip + fi fi ############################################################################## @@ -26,19 +35,19 @@ fi devlink help 2>&1 | grep resource &> /dev/null if [ $? -ne 0 ]; then echo "SKIP: iproute2 too old, missing devlink resource support" - exit 1 + exit $ksft_skip fi devlink help 2>&1 | grep trap &> /dev/null if [ $? -ne 0 ]; then echo "SKIP: iproute2 too old, missing devlink trap support" - exit 1 + exit $ksft_skip fi devlink dev help 2>&1 | grep info &> /dev/null if [ $? -ne 0 ]; then echo "SKIP: iproute2 too old, missing devlink dev info support" - exit 1 + exit $ksft_skip fi ############################################################################## @@ -318,6 +327,14 @@ devlink_trap_rx_bytes_get() | jq '.[][][]["stats"]["rx"]["bytes"]' } +devlink_trap_drop_packets_get() +{ + local trap_name=$1; shift + + devlink -js trap show $DEVLINK_DEV trap $trap_name \ + | jq '.[][][]["stats"]["rx"]["dropped"]' +} + devlink_trap_stats_idle_test() { local trap_name=$1; shift @@ -339,6 +356,24 @@ devlink_trap_stats_idle_test() fi } +devlink_trap_drop_stats_idle_test() +{ + local trap_name=$1; shift + local t0_packets t0_bytes + + t0_packets=$(devlink_trap_drop_packets_get $trap_name) + + sleep 1 + + t1_packets=$(devlink_trap_drop_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + return 0 + else + return 1 + fi +} + devlink_traps_enable_all() { local trap_name @@ -468,25 +503,30 @@ devlink_trap_drop_cleanup() tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower } -devlink_trap_stats_test() +devlink_trap_stats_check() { - local test_name=$1; shift local trap_name=$1; shift local send_one="$@" local t0_packets local t1_packets - RET=0 - t0_packets=$(devlink_trap_rx_packets_get $trap_name) $send_one && sleep 1 t1_packets=$(devlink_trap_rx_packets_get $trap_name) - if [[ $t1_packets -eq $t0_packets ]]; then - check_err 1 "Trap stats did not increase" - fi + [[ $t1_packets -ne $t0_packets ]] +} + +devlink_trap_stats_test() +{ + local test_name=$1; shift + + RET=0 + + devlink_trap_stats_check "$@" + check_err $? "Trap stats did not increase" log_test "$test_name" } @@ -528,12 +568,6 @@ devlink_trap_group_policer_get() | jq '.[][][]["policer"]' } -devlink_trap_policer_ids_get() -{ - devlink -j -p trap policer show \ - | jq '.[]["'$DEVLINK_DEV'"][]["policer"]' -} - devlink_port_by_netdev() { local if_name=$1 @@ -555,3 +589,8 @@ devlink_cell_size_get() devlink sb pool show "$DEVLINK_DEV" pool 0 -j \ | jq '.pool[][].cell_size' } + +devlink_pool_size_get() +{ + devlink sb show "$DEVLINK_DEV" -j | jq '.[][][]["size"]' +} diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh new file mode 100755 index 000000000000..68ee92df3e07 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh @@ -0,0 +1,367 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + h1.10 | | + h2.20 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | + $h1 | | + $h2 | +# | | | | | | +# +----|---------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|--------------------+ +# | SW | | | +# | +--|-------------------------------+ +----------------|------------------+ | +# | | + $swp1 BR1 (802.1ad) | | BR2 (802.1d) + $swp2 | | +# | | vid 100 pvid untagged | | | | | +# | | | | + $swp2.20 | | +# | | | | | | +# | | + vx100 (vxlan) | | + vx200 (vxlan) | | +# | | local 192.0.2.17 | | local 192.0.2.17 | | +# | | remote 192.0.2.34 | | remote 192.0.2.50 | | +# | | id 1000 dstport $VXPORT | | id 2000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | +# | +--------------------------------- + +-----------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|-----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR3 (802.1ad) | | | | BR3 (802.1d) | | +# | | + vx100 (vxlan) | | | | + vx200 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | | | +# | | | | | | + w1.20 | | +# | | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 100 pvid untagged | | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | | | | | | | | | +# | | | | | | | | | | +# | | + w2.10 | | | | + w2.20 | | +# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 +} + +h1_destroy() +{ + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 20 v$h2 192.0.2.2/28 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + #### BR1 #### + ip link add name br1 type bridge vlan_filtering 1 \ + vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br1 addrgenmode none + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + #### BR2 #### + ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br2 address $(mac_get $swp2) + ip link set dev br2 up + + ip link set dev $rp1 up + rp1_set_addr + + #### VX100 #### + ip link add name vx100 type vxlan id 1000 local 192.0.2.17 \ + dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev vx100 master br1 + bridge vlan add vid 100 dev vx100 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 100 dev $swp1 pvid untagged + + #### VX200 #### + ip link add name vx200 type vxlan id 2000 local 192.0.2.17 \ + dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx200 up + + ip link set dev vx200 master br2 + + ip link set dev $swp2 up + ip link add name $swp2.20 link $swp2 type vlan id 20 + ip link set dev $swp2.20 master br2 + ip link set dev $swp2.20 up + + bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + bridge fdb del dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx200 nomaster + ip link set dev vx200 down + ip link del dev vx200 + + ip link del dev $swp2.20 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 100 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx100 nomaster + ip link set dev vx100 down + ip link del dev vx100 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br2 down + ip link del dev br2 + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local vxlan_name=$1; shift + local vxlan_id=$1; shift + local vlan_id=$1; shift + local host_addr=$1; shift + local nh_addr=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br3 type bridge vlan_filtering 0 + ip link set dev br3 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br3 + ip link set dev w1 up + + ip link add name $vxlan_name type vxlan id $vxlan_id local $in_addr \ + dstport "$VXPORT" + ip link set dev $vxlan_name up + bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev $vxlan_name master br3 + tc qdisc add dev $vxlan_name clsact + + simple_if_init w2 + vlan_create w2 $vlan_id vw2 $host_addr/28 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 vx100 1000 10 192.0.2.3 \ + 192.0.2.33 + + in_ns ns1 bridge vlan add vid 100 dev vx100 pvid untagged +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 vx200 2000 20 192.0.2.4 \ + 192.0.2.49 + + in_ns ns2 ip link add name w1.20 link w1 type vlan id 20 + in_ns ns2 ip link set dev w1.20 master br3 + in_ns ns2 ip link set dev w1.20 up +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.3 ": local->remote 1 through VxLAN with an 802.1ad bridge" + ping_test $h2 192.0.2.4 ": local->remote 2 through VxLAN with an 802.1d bridge" +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh index dbb9fcf759e0..aa2eafb7b243 100755 --- a/tools/testing/selftests/net/forwarding/ethtool.sh +++ b/tools/testing/selftests/net/forwarding/ethtool.sh @@ -286,6 +286,8 @@ different_speeds_autoneg_on() ethtool -s $h1 autoneg on } +skip_on_veth + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh index 4b42dfd4efd1..17f89c3b7c02 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh @@ -11,6 +11,8 @@ NUM_NETIFS=2 source lib.sh source ethtool_lib.sh +TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + setup_prepare() { swp1=${NETIFS[p1]} @@ -18,7 +20,7 @@ setup_prepare() swp3=$NETIF_NO_CABLE } -ethtool_extended_state_check() +ethtool_ext_state() { local dev=$1; shift local expected_ext_state=$1; shift @@ -30,21 +32,27 @@ ethtool_extended_state_check() | sed -e 's/^[[:space:]]*//') ext_state=$(echo $ext_state | cut -d "," -f1) - [[ $ext_state == $expected_ext_state ]] - check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\"" - - [[ $ext_substate == $expected_ext_substate ]] - check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\"" + if [[ $ext_state != $expected_ext_state ]]; then + echo "Expected \"$expected_ext_state\", got \"$ext_state\"" + return 1 + fi + if [[ $ext_substate != $expected_ext_substate ]]; then + echo "Expected \"$expected_ext_substate\", got \"$ext_substate\"" + return 1 + fi } autoneg() { + local msg + RET=0 ip link set dev $swp1 up - sleep 4 - ethtool_extended_state_check $swp1 "Autoneg" "No partner detected" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected") + check_err $? "$msg" log_test "Autoneg, No partner detected" @@ -53,6 +61,8 @@ autoneg() autoneg_force_mode() { + local msg + RET=0 ip link set dev $swp1 up @@ -65,12 +75,13 @@ autoneg_force_mode() ethtool_set $swp1 speed $speed1 autoneg off ethtool_set $swp2 speed $speed2 autoneg off - sleep 4 - ethtool_extended_state_check $swp1 "Autoneg" \ - "No partner detected during force mode" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" - ethtool_extended_state_check $swp2 "Autoneg" \ - "No partner detected during force mode" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" log_test "Autoneg, No partner detected during force mode" @@ -83,18 +94,22 @@ autoneg_force_mode() no_cable() { + local msg + RET=0 ip link set dev $swp3 up - sleep 1 - ethtool_extended_state_check $swp3 "No cable" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable") + check_err $? "$msg" log_test "No cable" ip link set dev $swp3 down } +skip_on_veth + setup_prepare tests_run diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh index 9188e624dec0..b9bfb45085af 100644 --- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh @@ -22,6 +22,40 @@ ethtool_set() check_err $out "error in configuration. $cmd" } +dev_linkmodes_params_get() +{ + local dev=$1; shift + local adver=$1; shift + local -a linkmodes_params + local param_count + local arr + + if (($adver)); then + mode="Advertised link modes" + else + mode="Supported link modes" + fi + + local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver)) + for ((i=0; i<${#dev_linkmodes[@]}; i++)); do + linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \ + # Replaces all non numbers with spaces + sed -e 's/[^0-9]/ /g' | \ + # Squeeze spaces in sequence to 1 space + tr -s ' ') + # Count how many numbers were found in the linkmode + param_count=$(echo "${linkmodes_params[$i]}" | wc -w) + if [[ $param_count -eq 1 ]]; then + linkmodes_params[$i]="${linkmodes_params[$i]} 1" + elif [[ $param_count -ge 3 ]]; then + arr=(${linkmodes_params[$i]}) + # Take only first two params + linkmodes_params[$i]=$(echo "${arr[@]:0:2}") + fi + done + echo ${linkmodes_params[@]} +} + dev_speeds_get() { local dev=$1; shift diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh new file mode 100755 index 000000000000..50d5bfb17ef1 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh @@ -0,0 +1,340 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + manual_with_verification_h1_to_h2 + manual_with_verification_h2_to_h1 + manual_without_verification_h1_to_h2 + manual_without_verification_h2_to_h1 + manual_failed_verification_h1_to_h2 + manual_failed_verification_h2_to_h1 + lldp +" + +NUM_NETIFS=2 +REQUIRE_MZ=no +PREEMPTIBLE_PRIO=0 +source lib.sh + +traffic_test() +{ + local if=$1; shift + local src=$1; shift + local num_pkts=10000 + local before= + local after= + local delta= + + if [ ${has_pmac_stats[$if]} = false ]; then + src="aggregate" + fi + + before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) + + $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO + + after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) + + delta=$((after - before)) + + # Allow an extra 1% tolerance for random packets sent by the stack + [ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ] +} + +manual_with_verification() +{ + local tx=$1; shift + local rx=$1; shift + + RET=0 + + # It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit + # Processing state diagram whether the "send_r" variable (send response + # to verification frame) should be taken into consideration while the + # MAC Merge TX direction is disabled. That being said, at least the + # NXP ENETC does not, and requires tx-enabled on in order to respond to + # the link partner's verification frames. + ethtool --set-mm $rx tx-enabled on + ethtool --set-mm $tx verify-enabled on tx-enabled on + + # Wait for verification to finish + sleep 1 + + ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ + grep -q 'SUCCEEDED' + check_err "$?" "Verification did not succeed" + + ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' + check_err "$?" "pMAC TX is not active" + + traffic_test $tx "pmac" + check_err "$?" "Traffic did not get sent through $tx's pMAC" + + ethtool --set-mm $tx verify-enabled off tx-enabled off + ethtool --set-mm $rx tx-enabled off + + log_test "Manual configuration with verification: $tx to $rx" +} + +manual_with_verification_h1_to_h2() +{ + manual_with_verification $h1 $h2 +} + +manual_with_verification_h2_to_h1() +{ + manual_with_verification $h2 $h1 +} + +manual_without_verification() +{ + local tx=$1; shift + local rx=$1; shift + + RET=0 + + ethtool --set-mm $tx verify-enabled off tx-enabled on + + ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ + grep -q 'DISABLED' + check_err "$?" "Verification is not disabled" + + ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' + check_err "$?" "pMAC TX is not active" + + traffic_test $tx "pmac" + check_err "$?" "Traffic did not get sent through $tx's pMAC" + + ethtool --set-mm $tx verify-enabled off tx-enabled off + + log_test "Manual configuration without verification: $tx to $rx" +} + +manual_without_verification_h1_to_h2() +{ + manual_without_verification $h1 $h2 +} + +manual_without_verification_h2_to_h1() +{ + manual_without_verification $h2 $h1 +} + +manual_failed_verification() +{ + local tx=$1; shift + local rx=$1; shift + + RET=0 + + ethtool --set-mm $rx pmac-enabled off + ethtool --set-mm $tx verify-enabled on tx-enabled on + + # Wait for verification to time out + sleep 1 + + ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ + grep -q 'SUCCEEDED' + check_fail "$?" "Verification succeeded when it shouldn't have" + + ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' + check_fail "$?" "pMAC TX is active when it shouldn't have" + + traffic_test $tx "emac" + check_err "$?" "Traffic did not get sent through $tx's eMAC" + + ethtool --set-mm $tx verify-enabled off tx-enabled off + ethtool --set-mm $rx pmac-enabled on + + log_test "Manual configuration with failed verification: $tx to $rx" +} + +manual_failed_verification_h1_to_h2() +{ + manual_failed_verification $h1 $h2 +} + +manual_failed_verification_h2_to_h1() +{ + manual_failed_verification $h2 $h1 +} + +smallest_supported_add_frag_size() +{ + local iface=$1 + local rx_min_frag_size= + + rx_min_frag_size=$(ethtool --json --show-mm $iface | \ + jq '.[]."rx-min-frag-size"') + + if [ $rx_min_frag_size -le 60 ]; then + echo 0 + elif [ $rx_min_frag_size -le 124 ]; then + echo 1 + elif [ $rx_min_frag_size -le 188 ]; then + echo 2 + elif [ $rx_min_frag_size -le 252 ]; then + echo 3 + else + echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP" + exit 1 + fi +} + +expected_add_frag_size() +{ + local iface=$1 + local requested=$2 + local min=$(smallest_supported_add_frag_size $iface) + + [ $requested -le $min ] && echo $min || echo $requested +} + +lldp_change_add_frag_size() +{ + local add_frag_size=$1 + local pattern= + + lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null + # Wait for TLVs to be received + sleep 2 + pattern=$(printf "Additional fragment size: %d" \ + $(expected_add_frag_size $h1 $add_frag_size)) + lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern" +} + +lldp() +{ + RET=0 + + systemctl start lldpad + + # Configure the interfaces to receive and transmit LLDPDUs + lldptool -L -i $h1 adminStatus=rxtx >/dev/null + lldptool -L -i $h2 adminStatus=rxtx >/dev/null + + # Enable the transmission of Additional Ethernet Capabilities TLV + lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null + lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null + + # Wait for TLVs to be received + sleep 2 + + lldptool -i $h1 -t -n -V addEthCaps | \ + grep -q "Preemption capability active" + check_err "$?" "$h1 pMAC TX is not active" + + lldptool -i $h2 -t -n -V addEthCaps | \ + grep -q "Preemption capability active" + check_err "$?" "$h2 pMAC TX is not active" + + lldp_change_add_frag_size 3 + check_err "$?" "addFragSize 3" + + lldp_change_add_frag_size 2 + check_err "$?" "addFragSize 2" + + lldp_change_add_frag_size 1 + check_err "$?" "addFragSize 1" + + lldp_change_add_frag_size 0 + check_err "$?" "addFragSize 0" + + traffic_test $h1 "pmac" + check_err "$?" "Traffic did not get sent through $h1's pMAC" + + traffic_test $h2 "pmac" + check_err "$?" "Traffic did not get sent through $h2's pMAC" + + systemctl stop lldpad + + log_test "LLDP" +} + +h1_create() +{ + ip link set dev $h1 up + + tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \ + queues 1@0 1@1 1@2 1@3 \ + fp P E E E \ + hw 1 + + ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off +} + +h2_create() +{ + ip link set dev $h2 up + + ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off + + tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \ + queues 1@0 1@1 1@2 1@3 \ + fp P E E E \ + hw 1 +} + +h1_destroy() +{ + ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off + + tc qdisc del dev $h1 root + + ip link set dev $h1 down +} + +h2_destroy() +{ + tc qdisc del dev $h2 root + + ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off + + ip link set dev $h2 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy +} + +check_ethtool_mm_support +check_tc_fp_support +require_command lldptool +bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually" + +for netif in ${NETIFS[@]}; do + ethtool --show-mm $netif 2>&1 &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: $netif does not support MAC Merge" + exit $ksft_skip + fi + + if check_ethtool_pmac_std_stats_support $netif eth-mac; then + has_pmac_stats[$netif]=true + else + has_pmac_stats[$netif]=false + echo "$netif does not report pMAC statistics, falling back to aggregate" + fi +done + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh new file mode 100755 index 000000000000..41a34a61f763 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + rmon_rx_histogram + rmon_tx_histogram +" + +NUM_NETIFS=2 +source lib.sh + +ETH_FCS_LEN=4 +ETH_HLEN=$((6+6+2)) + +declare -A netif_mtu + +ensure_mtu() +{ + local iface=$1; shift + local len=$1; shift + local current=$(ip -j link show dev $iface | jq -r '.[0].mtu') + local required=$((len - ETH_HLEN - ETH_FCS_LEN)) + + if [ $current -lt $required ]; then + ip link set dev $iface mtu $required || return 1 + fi +} + +bucket_test() +{ + local iface=$1; shift + local neigh=$1; shift + local set=$1; shift + local bucket=$1; shift + local len=$1; shift + local num_rx=10000 + local num_tx=20000 + local expected= + local before= + local after= + local delta= + + # Mausezahn does not include FCS bytes in its length - but the + # histogram counters do + len=$((len - ETH_FCS_LEN)) + + before=$(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") + + # Send 10k one way and 20k in the other, to detect counters + # mapped to the wrong direction + $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us + $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us + + after=$(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") + + delta=$((after - before)) + + expected=$([ $set = rx ] && echo $num_rx || echo $num_tx) + + # Allow some extra tolerance for other packets sent by the stack + [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ] +} + +rmon_histogram() +{ + local iface=$1; shift + local neigh=$1; shift + local set=$1; shift + local nbuckets=0 + local step= + + RET=0 + + while read -r -a bucket; do + step="$set-pkts${bucket[0]}to${bucket[1]} on $iface" + + for if in $iface $neigh; do + if ! ensure_mtu $if ${bucket[0]}; then + log_test_skip "$if does not support the required MTU for $step" + return + fi + done + + if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then + check_err 1 "$step failed" + return 1 + fi + log_test "$step" + nbuckets=$((nbuckets + 1)) + done < <(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null) + + if [ $nbuckets -eq 0 ]; then + log_test_skip "$iface does not support $set histogram counters" + return + fi +} + +rmon_rx_histogram() +{ + rmon_histogram $h1 $h2 rx + rmon_histogram $h2 $h1 rx +} + +rmon_tx_histogram() +{ + rmon_histogram $h1 $h2 tx + rmon_histogram $h2 $h1 tx +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + for iface in $h1 $h2; do + netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu') + ip link set dev $iface up + done +} + +cleanup() +{ + pre_cleanup + + for iface in $h2 $h1; do + ip link set dev $iface \ + mtu ${netif_mtu[$iface]} \ + down + done +} + +check_ethtool_counter_group_support +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh index 66496659bea7..1b3b46292179 100644 --- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh +++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh @@ -99,15 +99,15 @@ fib_ipv4_tos_test() fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false check_err $? "Route not in hardware when should" - ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 2 metric 1024 - fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 2 metric 1024" false + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 8 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 8 metric 1024" false check_err $? "Highest TOS route not in hardware when should" fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true check_err $? "Lowest TOS route still in hardware when should not" - ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 metric 1024 - fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1 metric 1024" true + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 4 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 4 metric 1024" true check_err $? "Middle TOS route in hardware when should not" log_test "IPv4 routes with TOS" @@ -224,7 +224,7 @@ fib_ipv4_plen_test() ip -n $ns link set dev dummy1 up # Add two routes with the same key and different prefix length and - # make sure both are in hardware. It can be verfied that both are + # make sure both are in hardware. It can be verified that both are # sharing the same leaf by checking the /proc/net/fib_trie ip -n $ns route add 192.0.2.0/24 dev dummy1 ip -n $ns route add 192.0.2.0/25 dev dummy1 @@ -277,11 +277,11 @@ fib_ipv4_replay_tos_test() ip -n $ns link set dev dummy1 up ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0 - ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 4 devlink -N $ns dev reload $devlink_dev - fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1" false + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 4" false check_err $? "Highest TOS route not in hardware when should" fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0" true diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index b802c14d2950..1fc4f0242fc5 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -13,6 +13,8 @@ NETIFS[p5]=veth4 NETIFS[p6]=veth5 NETIFS[p7]=veth6 NETIFS[p8]=veth7 +NETIFS[p9]=veth8 +NETIFS[p10]=veth9 # Port that does not have a cable connected. NETIF_NO_CABLE=eth8 @@ -26,6 +28,8 @@ PING=ping PING6=ping6 # Packet generator. Some distributions use 'mz'. MZ=mausezahn +# mausezahn delay between transmissions in microseconds. +MZ_DELAY=0 # Time to wait after interfaces participating in the test are all UP WAIT_TIME=5 # Whether to pause on failure or not. @@ -39,3 +43,11 @@ NETIF_CREATE=yes # Timeout (in seconds) before ping exits regardless of how many packets have # been sent or received PING_TIMEOUT=5 +# Minimum ageing_time (in centiseconds) supported by hardware +LOW_AGEING_TIME=1000 +# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process +# filter by software/hardware +TC_FLAG=skip_hw +# IPv6 traceroute utility name. +TROUTE6=traceroute6 + diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh new file mode 100755 index 000000000000..9788bd0f6e8b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -0,0 +1,464 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test traffic distribution when there are multiple paths between an IPv4 GRE +# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts. +# Multiple routes are in the underlay network. With the default multipath +# policy, SW2 will only look at the outer IP addresses, hence only a single +# route would be used. +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.{2-253}/24 | | +# | 2001:db8:1::{2-fd}/64 | | +# +-------------------------|------+ +# | +# +-------------------------|------------------+ +# | SW1 | | +# | $ol1 + | +# | 198.51.100.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | + g1 (gre) | +# | loc=192.0.2.1 | +# | rem=192.0.2.2 --. | +# | tos=inherit | | +# | v | +# | + $ul1 | +# | | 192.0.2.17/28 | +# +---------------------|----------------------+ +# | +# +---------------------|----------------------+ +# | SW2 | | +# | $ul21 + | +# | 192.0.2.18/28 | | +# | | | +# ! __________________+___ | +# | / \ | +# | | | | +# | + $ul22.111 (vlan) + $ul22.222 (vlan) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# | | | | +# +--|----------------------|------------------+ +# | | +# +--|----------------------|------------------+ +# | | | | +# | + $ul32.111 (vlan) + $ul32.222 (vlan) | +# | | 192.0.2.34/28 | 192.0.2.50/28 | +# | | | | +# | \__________________+___/ | +# | | | +# | | | +# | $ul31 + | +# | 192.0.2.65/28 | SW3 | +# +---------------------|----------------------+ +# | +# +---------------------|----------------------+ +# | + $ul4 | +# | ^ 192.0.2.66/28 | +# | | | +# | + g2 (gre) | | +# | loc=192.0.2.2 | | +# | rem=192.0.2.1 --' | +# | tos=inherit | +# | | +# | $ol4 + | +# | 203.0.113.1/24 | | +# | 2001:db8:2::1/64 | SW4 | +# +-------------------------|------------------+ +# | +# +-------------------------|------+ +# | | | +# | $h2 + | +# | 203.0.113.{2-253}/24 | +# | 2001:db8:2::{2-fd}/64 H2 | +# +--------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + custom_hash +" + +NUM_NETIFS=10 +source lib.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64 + ip route add vrf v$h1 default via 198.51.100.1 dev $h1 + ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 default + ip route del vrf v$h1 default + simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw1_create() +{ + simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64 + __simple_if_init $ul1 v$ol1 192.0.2.17/28 + + tunnel_create g1 gre 192.0.2.1 192.0.2.2 tos inherit dev v$ol1 + __simple_if_init g1 v$ol1 192.0.2.1/32 + ip route add vrf v$ol1 192.0.2.2/32 via 192.0.2.18 + + ip route add vrf v$ol1 203.0.113.0/24 dev g1 + ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1 +} + +sw1_destroy() +{ + ip -6 route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 203.0.113.0/24 + + ip route del vrf v$ol1 192.0.2.2/32 + __simple_if_fini g1 192.0.2.1/32 + tunnel_destroy g1 + + __simple_if_fini $ul1 192.0.2.17/28 + simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64 +} + +sw2_create() +{ + simple_if_init $ul21 192.0.2.18/28 + __simple_if_init $ul22 v$ul21 + vlan_create $ul22 111 v$ul21 192.0.2.33/28 + vlan_create $ul22 222 v$ul21 192.0.2.49/28 + + ip route add vrf v$ul21 192.0.2.1/32 via 192.0.2.17 + ip route add vrf v$ul21 192.0.2.2/32 \ + nexthop via 192.0.2.34 \ + nexthop via 192.0.2.50 +} + +sw2_destroy() +{ + ip route del vrf v$ul21 192.0.2.2/32 + ip route del vrf v$ul21 192.0.2.1/32 + + vlan_destroy $ul22 222 + vlan_destroy $ul22 111 + __simple_if_fini $ul22 + simple_if_fini $ul21 192.0.2.18/28 +} + +sw3_create() +{ + simple_if_init $ul31 192.0.2.65/28 + __simple_if_init $ul32 v$ul31 + vlan_create $ul32 111 v$ul31 192.0.2.34/28 + vlan_create $ul32 222 v$ul31 192.0.2.50/28 + + ip route add vrf v$ul31 192.0.2.2/32 via 192.0.2.66 + ip route add vrf v$ul31 192.0.2.1/32 \ + nexthop via 192.0.2.33 \ + nexthop via 192.0.2.49 + + tc qdisc add dev $ul32 clsact + tc filter add dev $ul32 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul32 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw3_destroy() +{ + tc qdisc del dev $ul32 clsact + + ip route del vrf v$ul31 192.0.2.1/32 + ip route del vrf v$ul31 192.0.2.2/32 + + vlan_destroy $ul32 222 + vlan_destroy $ul32 111 + __simple_if_fini $ul32 + simple_if_fini $ul31 192.0.2.65/28 +} + +sw4_create() +{ + simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64 + __simple_if_init $ul4 v$ol4 192.0.2.66/28 + + tunnel_create g2 gre 192.0.2.2 192.0.2.1 tos inherit dev v$ol4 + __simple_if_init g2 v$ol4 192.0.2.2/32 + ip route add vrf v$ol4 192.0.2.1/32 via 192.0.2.65 + + ip route add vrf v$ol4 198.51.100.0/24 dev g2 + ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2 +} + +sw4_destroy() +{ + ip -6 route del vrf v$ol4 2001:db8:1::/64 + ip route del vrf v$ol4 198.51.100.0/24 + + ip route del vrf v$ol4 192.0.2.1/32 + __simple_if_fini g2 192.0.2.2/32 + tunnel_destroy g2 + + __simple_if_fini $ul4 192.0.2.66/28 + simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64 + ip route add vrf v$h2 default via 203.0.113.1 dev $h2 + ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 default + ip route del vrf v$h2 default + simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + + ol1=${NETIFS[p2]} + ul1=${NETIFS[p3]} + + ul21=${NETIFS[p4]} + ul22=${NETIFS[p5]} + + ul32=${NETIFS[p6]} + ul31=${NETIFS[p7]} + + ul4=${NETIFS[p8]} + ol4=${NETIFS[p9]} + + h2=${NETIFS[p10]} + + vrf_prepare + h1_create + sw1_create + sw2_create + sw3_create + sw4_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw4_destroy + sw3_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 203.0.113.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +send_src_ipv4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_src_udp4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A 198.51.100.2 -B 203.0.113.2 \ + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A 198.51.100.2 -B 203.0.113.2 \ + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" +} + +send_src_ipv6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_flowlabel() +{ + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec v$h1 \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 + done +} + +send_src_udp6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" +} + +custom_hash_test() +{ + local field="$1"; shift + local balanced="$1"; shift + local send_flows="$@" + + RET=0 + + local t0_111=$(tc_rule_stats_get $ul32 111 ingress) + local t0_222=$(tc_rule_stats_get $ul32 222 ingress) + + $send_flows + + local t1_111=$(tc_rule_stats_get $ul32 111 ingress) + local t1_222=$(tc_rule_stats_get $ul32 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + + local diff=$((d222 - d111)) + local sum=$((d111 + d222)) + + local pct=$(echo "$diff / $sum * 100" | bc -l) + local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc) + + [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) || + ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]] + check_err $? "Expected traffic to be $balanced, but it is not" + + log_test "Multipath hash field: $field ($balanced)" + log_info "Packets sent on path1 / path2: $d111 / $d222" +} + +custom_hash_v4() +{ + log_info "Running IPv4 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv4.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv4 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp4 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp4 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp4 + + sysctl_restore net.ipv4.neigh.default.gc_thresh3 + sysctl_restore net.ipv4.neigh.default.gc_thresh2 + sysctl_restore net.ipv4.neigh.default.gc_thresh1 +} + +custom_hash_v6() +{ + log_info "Running IPv6 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv6.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv6 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0200 + custom_hash_test "Inner flowlabel" "balanced" send_flowlabel + custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp6 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp6 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp6 + + sysctl_restore net.ipv6.neigh.default.gc_thresh3 + sysctl_restore net.ipv6.neigh.default.gc_thresh2 + sysctl_restore net.ipv6.neigh.default.gc_thresh1 +} + +custom_hash() +{ + # Test that when the hash policy is set to custom, traffic is + # distributed only according to the fields set in the + # fib_multipath_hash_fields sysctl. + # + # Each time set a different field and make sure traffic is only + # distributed when the field is changed in the packet stream. + + sysctl_set net.ipv4.fib_multipath_hash_policy 3 + + custom_hash_v4 + custom_hash_v6 + + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh index e4009f658003..efca6114a3ce 100755 --- a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh @@ -267,7 +267,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh index e449475c4d3e..a71ad39fc0c3 100755 --- a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh @@ -266,9 +266,9 @@ multipath6_test() local t0_222=$(tc_rule_stats_get $ul32 222 ingress) ip vrf exec v$h1 \ - $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \ - -B "2001:db8:2::2-2001:db8:2::1e" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \ + -B "2001:db8:2::2-2001:db8:2::3e" \ + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh index a8d8e8b3dc81..57531c1d884d 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh @@ -220,7 +220,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh new file mode 100755 index 000000000000..7d5b2b9cc133 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh @@ -0,0 +1,319 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test traffic distribution when a wECMP route forwards traffic to two GRE +# tunnels. +# +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# | 2001:db8:1::1/64 | | +# +-------------------|-----+ +# | +# +-------------------|------------------------+ +# | SW1 | | +# | $ol1 + | +# | 192.0.2.2/28 | +# | 2001:db8:1::2/64 | +# | | +# | + g1a (gre) + g1b (gre) | +# | loc=192.0.2.65 loc=192.0.2.81 | +# | rem=192.0.2.66 --. rem=192.0.2.82 --. | +# | tos=inherit | tos=inherit | | +# | .------------------' | | +# | | .------------------' | +# | v v | +# | + $ul1.111 (vlan) + $ul1.222 (vlan) | +# | | 192.0.2.129/28 | 192.0.2.145/28 | +# | \ / | +# | \________________/ | +# | | | +# | + $ul1 | +# +------------|-------------------------------+ +# | +# +------------|-------------------------------+ +# | SW2 + $ul2 | +# | _______|________ | +# | / \ | +# | / \ | +# | + $ul2.111 (vlan) + $ul2.222 (vlan) | +# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 | +# | | | | +# | | '------------------. | +# | '------------------. | | +# | + g2a (gre) | + g2b (gre) | | +# | loc=192.0.2.66 | loc=192.0.2.82 | | +# | rem=192.0.2.65 --' rem=192.0.2.81 --' | +# | tos=inherit tos=inherit | +# | | +# | $ol2 + | +# | 192.0.2.17/28 | | +# | 2001:db8:2::1/64 | | +# +-------------------|------------------------+ +# | +# +-------------------|-----+ +# | H2 | | +# | $h2 + | +# | 192.0.2.18/28 | +# | 2001:db8:2::2/64 | +# +-------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_ipv4 + multipath_ipv6 +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 + ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 +} + +h1_destroy() +{ + ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 + ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 +} + +sw1_create() +{ + simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64 + __simple_if_init $ul1 v$ol1 + vlan_create $ul1 111 v$ol1 192.0.2.129/28 + vlan_create $ul1 222 v$ol1 192.0.2.145/28 + + tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1 + __simple_if_init g1a v$ol1 192.0.2.65/32 + ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + + tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1 + __simple_if_init g1b v$ol1 192.0.2.81/32 + ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + + ip -6 nexthop add id 101 dev g1a + ip -6 nexthop add id 102 dev g1b + ip nexthop add id 103 group 101/102 + + ip route add vrf v$ol1 192.0.2.16/28 nhid 103 + ip route add vrf v$ol1 2001:db8:2::/64 nhid 103 +} + +sw1_destroy() +{ + ip route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 192.0.2.16/28 + + ip nexthop del id 103 + ip -6 nexthop del id 102 + ip -6 nexthop del id 101 + + ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + __simple_if_fini g1b 192.0.2.81/32 + tunnel_destroy g1b + + ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + __simple_if_fini g1a 192.0.2.65/32 + tunnel_destroy g1a + + vlan_destroy $ul1 222 + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64 +} + +sw2_create() +{ + simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 192.0.2.130/28 + vlan_create $ul2 222 v$ol2 192.0.2.146/28 + + tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2 + __simple_if_init g2a v$ol2 192.0.2.66/32 + ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + + tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2 + __simple_if_init g2b v$ol2 192.0.2.82/32 + ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + + ip -6 nexthop add id 201 dev g2a + ip -6 nexthop add id 202 dev g2b + ip nexthop add id 203 group 201/202 + + ip route add vrf v$ol2 192.0.2.0/28 nhid 203 + ip route add vrf v$ol2 2001:db8:1::/64 nhid 203 + + tc qdisc add dev $ul2 clsact + tc filter add dev $ul2 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul2 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw2_destroy() +{ + tc qdisc del dev $ul2 clsact + + ip route del vrf v$ol2 2001:db8:1::/64 + ip route del vrf v$ol2 192.0.2.0/28 + + ip nexthop del id 203 + ip -6 nexthop del id 202 + ip -6 nexthop del id 201 + + ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + __simple_if_fini g2b 192.0.2.82/32 + tunnel_destroy g2b + + ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + __simple_if_fini g2a 192.0.2.66/32 + tunnel_destroy g2a + + vlan_destroy $ul2 222 + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64 + ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17 + ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 + ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17 + simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + vrf_prepare + h1_create + sw1_create + sw2_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +multipath4_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +multipath6_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.18 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +multipath_ipv4() +{ + log_info "Running IPv4 multipath tests" + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 +} + +multipath_ipv6() +{ + log_info "Running IPv6 multipath tests" + multipath6_test "ECMP" 1 1 + multipath6_test "Weighted MP 2:1" 2 1 + multipath6_test "Weighted MP 11:45" 11 45 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh new file mode 100755 index 000000000000..370f9925302d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh @@ -0,0 +1,323 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test traffic distribution when a wECMP route forwards traffic to two GRE +# tunnels. +# +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# | 2001:db8:1::1/64 | | +# +-------------------|-----+ +# | +# +-------------------|------------------------+ +# | SW1 | | +# | $ol1 + | +# | 192.0.2.2/28 | +# | 2001:db8:1::2/64 | +# | | +# | + g1a (gre) + g1b (gre) | +# | loc=192.0.2.65 loc=192.0.2.81 | +# | rem=192.0.2.66 --. rem=192.0.2.82 --. | +# | tos=inherit | tos=inherit | | +# | .------------------' | | +# | | .------------------' | +# | v v | +# | + $ul1.111 (vlan) + $ul1.222 (vlan) | +# | | 192.0.2.129/28 | 192.0.2.145/28 | +# | \ / | +# | \________________/ | +# | | | +# | + $ul1 | +# +------------|-------------------------------+ +# | +# +------------|-------------------------------+ +# | SW2 + $ul2 | +# | _______|________ | +# | / \ | +# | / \ | +# | + $ul2.111 (vlan) + $ul2.222 (vlan) | +# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 | +# | | | | +# | | '------------------. | +# | '------------------. | | +# | + g2a (gre) | + g2b (gre) | | +# | loc=192.0.2.66 | loc=192.0.2.82 | | +# | rem=192.0.2.65 --' rem=192.0.2.81 --' | +# | tos=inherit tos=inherit | +# | | +# | $ol2 + | +# | 192.0.2.17/28 | | +# | 2001:db8:2::1/64 | | +# +-------------------|------------------------+ +# | +# +-------------------|-----+ +# | H2 | | +# | $h2 + | +# | 192.0.2.18/28 | +# | 2001:db8:2::2/64 | +# +-------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_ipv4 + multipath_ipv6 +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2 + ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 +} + +h1_destroy() +{ + ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 + ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 +} + +sw1_create() +{ + simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64 + __simple_if_init $ul1 v$ol1 + vlan_create $ul1 111 v$ol1 192.0.2.129/28 + vlan_create $ul1 222 v$ol1 192.0.2.145/28 + + tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1 + __simple_if_init g1a v$ol1 192.0.2.65/32 + ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + + tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1 + __simple_if_init g1b v$ol1 192.0.2.81/32 + ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + + ip -6 nexthop add id 101 dev g1a + ip -6 nexthop add id 102 dev g1b + ip nexthop add id 103 group 101/102 type resilient buckets 512 \ + idle_timer 0 + + ip route add vrf v$ol1 192.0.2.16/28 nhid 103 + ip route add vrf v$ol1 2001:db8:2::/64 nhid 103 +} + +sw1_destroy() +{ + ip route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 192.0.2.16/28 + + ip nexthop del id 103 + ip -6 nexthop del id 102 + ip -6 nexthop del id 101 + + ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146 + __simple_if_fini g1b 192.0.2.81/32 + tunnel_destroy g1b + + ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130 + __simple_if_fini g1a 192.0.2.65/32 + tunnel_destroy g1a + + vlan_destroy $ul1 222 + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64 +} + +sw2_create() +{ + simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 192.0.2.130/28 + vlan_create $ul2 222 v$ol2 192.0.2.146/28 + + tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2 + __simple_if_init g2a v$ol2 192.0.2.66/32 + ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + + tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2 + __simple_if_init g2b v$ol2 192.0.2.82/32 + ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + + ip -6 nexthop add id 201 dev g2a + ip -6 nexthop add id 202 dev g2b + ip nexthop add id 203 group 201/202 type resilient buckets 512 \ + idle_timer 0 + + ip route add vrf v$ol2 192.0.2.0/28 nhid 203 + ip route add vrf v$ol2 2001:db8:1::/64 nhid 203 + + tc qdisc add dev $ul2 clsact + tc filter add dev $ul2 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul2 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw2_destroy() +{ + tc qdisc del dev $ul2 clsact + + ip route del vrf v$ol2 2001:db8:1::/64 + ip route del vrf v$ol2 192.0.2.0/28 + + ip nexthop del id 203 + ip -6 nexthop del id 202 + ip -6 nexthop del id 201 + + ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145 + __simple_if_fini g2b 192.0.2.82/32 + tunnel_destroy g2b + + ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129 + __simple_if_fini g2a 192.0.2.66/32 + tunnel_destroy g2a + + vlan_destroy $ul2 222 + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64 + ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17 + ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1 + ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17 + simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + vrf_prepare + h1_create + sw1_create + sw2_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +multipath4_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ + type resilient + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 type resilient + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +multipath6_test() +{ + local what=$1; shift + local weight1=$1; shift + local weight2=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ + type resilient + + local t0_111=$(tc_rule_stats_get $ul2 111 ingress) + local t0_222=$(tc_rule_stats_get $ul2 222 ingress) + + ip vrf exec v$h1 \ + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + + local t1_111=$(tc_rule_stats_get $ul2 111 ingress) + local t1_222=$(tc_rule_stats_get $ul2 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + multipath_eval "$what" $weight1 $weight2 $d111 $d222 + + ip nexthop replace id 103 group 101/102 type resilient + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.18 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +multipath_ipv4() +{ + log_info "Running IPv4 multipath tests" + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 +} + +multipath_ipv6() +{ + log_info "Running IPv6 multipath tests" + multipath6_test "ECMP" 1 1 + multipath6_test "Weighted MP 2:1" 2 1 + multipath6_test "Weighted MP 11:45" 11 45 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh new file mode 100755 index 000000000000..48584a51388f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh @@ -0,0 +1,340 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.200 + | | + $h2.200 | +# | 192.0.2.1/28 | | | | 192.0.2.18/28 | +# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 | +# | | | | | | +# | $h1 + | | + $h2 | +# | | | | | | +# +------------------|-+ +-|--------------------+ +# | | +# +------------------|-------------------------|--------------------+ +# | SW | | | +# | | | | +# | $rp1 + + $rp2 | +# | | | | +# | $rp1.200 + + $rp2.200 | +# | 192.0.2.2/28 192.0.2.17/28 | +# | 2001:db8:1::2/64 2001:db8:2::2/64 | +# | | +# +-----------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + respin_enablement + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + reapply_config + ping_ipv4 + ping_ipv6 + test_stats_rx_ipv4 + test_stats_tx_ipv4 + test_stats_rx_ipv6 + test_stats_tx_ipv6 + test_stats_report_rx + test_stats_report_tx + test_destroy_enabled + test_double_enable +" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64 + ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 + ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + vlan_destroy $h1 200 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64 + ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 + ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 + vlan_destroy $h2 200 + simple_if_fini $h2 +} + +router_rp1_200_create() +{ + ip link add name $rp1.200 link $rp1 type vlan id 200 + ip link set dev $rp1.200 addrgenmode eui64 + ip link set dev $rp1.200 up + ip address add dev $rp1.200 192.0.2.2/28 + ip address add dev $rp1.200 2001:db8:1::2/64 + ip stats set dev $rp1.200 l3_stats on +} + +router_rp1_200_destroy() +{ + ip stats set dev $rp1.200 l3_stats off + ip address del dev $rp1.200 2001:db8:1::2/64 + ip address del dev $rp1.200 192.0.2.2/28 + ip link del dev $rp1.200 +} + +router_create() +{ + ip link set dev $rp1 up + router_rp1_200_create + + ip link set dev $rp2 up + vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64 +} + +router_destroy() +{ + vlan_destroy $rp2 200 + ip link set dev $rp2 down + + router_rp1_200_destroy + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1mac=$(mac_get $rp1) + rp2mac=$(mac_get $rp2) + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.200 192.0.2.18 " IPv4" +} + +ping_ipv6() +{ + ping_test $h1.200 2001:db8:2::1 " IPv6" +} + +send_packets_rx_ipv4() +{ + # Send 21 packets instead of 20, because the first one might trap and go + # through the SW datapath, which might not bump the HW counter. + $MZ $h1.200 -c 21 -d 20msec -p 100 \ + -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_rx_ipv6() +{ + $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \ + -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_tx_ipv4() +{ + $MZ $h2.200 -c 21 -d 20msec -p 100 \ + -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \ + -q -t udp sp=54321,dp=12345 +} + +send_packets_tx_ipv6() +{ + $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \ + -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \ + -q -t udp sp=54321,dp=12345 +} + +___test_stats() +{ + local dir=$1; shift + local prot=$1; shift + + local a + local b + + a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) + send_packets_${dir}_${prot} + "$@" + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + hw_stats_get l3_stats $rp1.200 ${dir} packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" +} + +__test_stats() +{ + local dir=$1; shift + local prot=$1; shift + + RET=0 + ___test_stats "$dir" "$prot" + log_test "Test $dir packets: $prot" +} + +test_stats_rx_ipv4() +{ + __test_stats rx ipv4 +} + +test_stats_tx_ipv4() +{ + __test_stats tx ipv4 +} + +test_stats_rx_ipv6() +{ + __test_stats rx ipv6 +} + +test_stats_tx_ipv6() +{ + __test_stats tx ipv6 +} + +# Make sure everything works well even after stats have been disabled and +# reenabled on the same device without touching the L3 configuration. +respin_enablement() +{ + log_info "Turning stats off and on again" + ip stats set dev $rp1.200 l3_stats off + ip stats set dev $rp1.200 l3_stats on +} + +# For the initial run, l3_stats is enabled on a completely set up netdevice. Now +# do it the other way around: enabling the L3 stats on an L2 netdevice, and only +# then apply the L3 configuration. +reapply_config() +{ + log_info "Reapplying configuration" + + router_rp1_200_destroy + + ip link add name $rp1.200 link $rp1 type vlan id 200 + ip link set dev $rp1.200 addrgenmode none + ip stats set dev $rp1.200 l3_stats on + ip link set dev $rp1.200 addrgenmode eui64 + ip link set dev $rp1.200 up + ip address add dev $rp1.200 192.0.2.2/28 + ip address add dev $rp1.200 2001:db8:1::2/64 +} + +__test_stats_report() +{ + local dir=$1; shift + local prot=$1; shift + + local a + local b + + RET=0 + + a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) + send_packets_${dir}_${prot} + ip address flush dev $rp1.200 + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + hw_stats_get l3_stats $rp1.200 ${dir} packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" + log_test "Test ${dir} packets: stats pushed on loss of L3" + + ip stats set dev $rp1.200 l3_stats off + ip link del dev $rp1.200 + router_rp1_200_create +} + +test_stats_report_rx() +{ + __test_stats_report rx ipv4 +} + +test_stats_report_tx() +{ + __test_stats_report tx ipv4 +} + +test_destroy_enabled() +{ + RET=0 + + ip link del dev $rp1.200 + router_rp1_200_create + + log_test "Destroy l3_stats-enabled netdev" +} + +test_double_enable() +{ + RET=0 + ___test_stats rx ipv4 \ + ip stats set dev $rp1.200 l3_stats on + log_test "Test stat retention across a spurious enablement" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info | + jq '.[].info.l3_stats.used') +kind=$(ip -j -d link show dev $rp1 | + jq -r '.[].linkinfo.info_kind') +if [[ $used != true ]]; then + if [[ $kind == veth ]]; then + log_test_skip "l3_stats not offloaded on veth interface" + EXIT_STATUS=$ksft_skip + else + RET=1 log_test "l3_stats not offloaded" + fi +else + tests_run +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh new file mode 100755 index 000000000000..7594bbb49029 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test L3 stats on IP-in-IP GRE tunnel without key. + +# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more +# details. + +ALL_TESTS=" + ping_ipv4 + test_stats_rx + test_stats_tx +" +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + ol1mac=$(mac_get $ol1) + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create gre $ol1 $ul1 + sw2_flat_create gre $ol2 $ul2 + ip stats set dev g1a l3_stats on + ip stats set dev g2a l3_stats on +} + +cleanup() +{ + pre_cleanup + + ip stats set dev g1a l3_stats off + ip stats set dev g2a l3_stats off + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + + vrf_cleanup + forwarding_restore +} + +ping_ipv4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre flat" +} + +send_packets_ipv4() +{ + # Send 21 packets instead of 20, because the first one might trap and go + # through the SW datapath, which might not bump the HW counter. + $MZ $h1 -c 21 -d 20msec -p 100 \ + -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \ + -q -t udp sp=54321,dp=12345 +} + +test_stats() +{ + local dev=$1; shift + local dir=$1; shift + + local a + local b + + RET=0 + + a=$(hw_stats_get l3_stats $dev $dir packets) + send_packets_ipv4 + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + hw_stats_get l3_stats $dev $dir packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" + + log_test "Test $dir packets: $prot" +} + +test_stats_tx() +{ + test_stats g1a tx +} + +test_stats_rx() +{ + test_stats g2a rx +} + +skip_on_veth + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh new file mode 100755 index 000000000000..49fa94b53a1c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -0,0 +1,174 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test ipv6 stats on the incoming if when forwarding with VRF + +ALL_TESTS=" + ipv6_ping + ipv6_in_too_big_err + ipv6_in_hdr_err + ipv6_in_addr_err + ipv6_in_discard +" + +NUM_NETIFS=4 +source lib.sh + +require_command $TROUTE6 + +h1_create() +{ + simple_if_init $h1 2001:1:1::2/64 + ip -6 route add vrf v$h1 2001:1:2::/64 via 2001:1:1::1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 2001:1:2::/64 via 2001:1:1::1 + simple_if_fini $h1 2001:1:1::2/64 +} + +router_create() +{ + vrf_create router + __simple_if_init $rtr1 router 2001:1:1::1/64 + __simple_if_init $rtr2 router 2001:1:2::1/64 + mtu_set $rtr2 1280 +} + +router_destroy() +{ + mtu_restore $rtr2 + __simple_if_fini $rtr2 2001:1:2::1/64 + __simple_if_fini $rtr1 2001:1:1::1/64 + vrf_destroy router +} + +h2_create() +{ + simple_if_init $h2 2001:1:2::2/64 + ip -6 route add vrf v$h2 2001:1:1::/64 via 2001:1:2::1 + mtu_set $h2 1280 +} + +h2_destroy() +{ + mtu_restore $h2 + ip -6 route del vrf v$h2 2001:1:1::/64 via 2001:1:2::1 + simple_if_fini $h2 2001:1:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rtr1=${NETIFS[p2]} + + rtr2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + h1_create + router_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + router_destroy + h1_destroy + vrf_cleanup +} + +ipv6_in_too_big_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) + local vrf_name=$(master_name_get $h1) + + # Send too big packets + ip vrf exec $vrf_name \ + $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + + local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InTooBigErrors" +} + +ipv6_in_hdr_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InHdrErrors) + local vrf_name=$(master_name_get $h1) + + # Send packets with hop limit 1, easiest with traceroute6 as some ping6 + # doesn't allow hop limit to be specified + ip vrf exec $vrf_name \ + $TROUTE6 2001:1:2::2 &> /dev/null + + local t1=$(ipv6_stats_get $rtr1 Ip6InHdrErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InHdrErrors" +} + +ipv6_in_addr_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) + local vrf_name=$(master_name_get $h1) + + # Disable forwarding temporary while sending the packet + sysctl -qw net.ipv6.conf.all.forwarding=0 + ip vrf exec $vrf_name \ + $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + sysctl -qw net.ipv6.conf.all.forwarding=1 + + local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InAddrErrors" +} + +ipv6_in_discard() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InDiscards) + local vrf_name=$(master_name_get $h1) + + # Add a policy to discard + ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block + ip vrf exec $vrf_name \ + $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + ip xfrm policy del dst 2001:1:2::2/128 dir fwd + + local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InDiscards" +} +ipv6_ping() +{ + RET=0 + + ping6_test $h1 2001:1:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh new file mode 100755 index 000000000000..2ab9eaaa5532 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -0,0 +1,466 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test traffic distribution when there are multiple paths between an IPv6 GRE +# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts. +# Multiple routes are in the underlay network. With the default multipath +# policy, SW2 will only look at the outer IP addresses, hence only a single +# route would be used. +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.{2-253}/24 | | +# | 2001:db8:1::{2-fd}/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------+ +# | SW1 | | +# | $ol1 + | +# | 198.51.100.1/24 | +# | 2001:db8:1::1/64 | +# | | +# |+ g1 (ip6gre) | +# | loc=2001:db8:3::1 | +# | rem=2001:db8:3::2 -. | +# | tos=inherit | | +# | v | +# | + $ul1 | +# | | 2001:db8:10::1/64 | +# +---------------------|-----------------------+ +# | +# +---------------------|-----------------------+ +# | SW2 | | +# | $ul21 + | +# | 2001:db8:10::2/64 | | +# | | | +# ! __________________+___ | +# | / \ | +# | | | | +# | + $ul22.111 (vlan) + $ul22.222 (vlan) | +# | | 2001:db8:11::1/64 | 2001:db8:12::1/64 | +# | | | | +# +--|----------------------|-------------------+ +# | | +# +--|----------------------|-------------------+ +# | | | | +# | + $ul32.111 (vlan) + $ul32.222 (vlan) | +# | | 2001:db8:11::2/64 | 2001:db8:12::2/64 | +# | | | | +# | \__________________+___/ | +# | | | +# | | | +# | $ul31 + | +# | 2001:db8:13::1/64 | SW3 | +# +---------------------|-----------------------+ +# | +# +---------------------|-----------------------+ +# | + $ul4 | +# | ^ 2001:db8:13::2/64 | +# | | | +# |+ g2 (ip6gre) | | +# | loc=2001:db8:3::2 | | +# | rem=2001:db8:3::1 -' | +# | tos=inherit | +# | | +# | $ol4 + | +# | 203.0.113.1/24 | | +# | 2001:db8:2::1/64 | SW4 | +# +-------------------------|-------------------+ +# | +# +-------------------------|------+ +# | | | +# | $h2 + | +# | 203.0.113.{2-253}/24 | +# | 2001:db8:2::{2-fd}/64 H2 | +# +--------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + custom_hash +" + +NUM_NETIFS=10 +source lib.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64 + ip route add vrf v$h1 default via 198.51.100.1 dev $h1 + ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 default + ip route del vrf v$h1 default + simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw1_create() +{ + simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64 + __simple_if_init $ul1 v$ol1 2001:db8:10::1/64 + + tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + dev v$ol1 + __simple_if_init g1 v$ol1 2001:db8:3::1/128 + ip route add vrf v$ol1 2001:db8:3::2/128 via 2001:db8:10::2 + + ip route add vrf v$ol1 203.0.113.0/24 dev g1 + ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1 +} + +sw1_destroy() +{ + ip -6 route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 203.0.113.0/24 + + ip route del vrf v$ol1 2001:db8:3::2/128 + __simple_if_fini g1 2001:db8:3::1/128 + tunnel_destroy g1 + + __simple_if_fini $ul1 2001:db8:10::1/64 + simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64 +} + +sw2_create() +{ + simple_if_init $ul21 2001:db8:10::2/64 + __simple_if_init $ul22 v$ul21 + vlan_create $ul22 111 v$ul21 2001:db8:11::1/64 + vlan_create $ul22 222 v$ul21 2001:db8:12::1/64 + + ip -6 route add vrf v$ul21 2001:db8:3::1/128 via 2001:db8:10::1 + ip -6 route add vrf v$ul21 2001:db8:3::2/128 \ + nexthop via 2001:db8:11::2 \ + nexthop via 2001:db8:12::2 +} + +sw2_destroy() +{ + ip -6 route del vrf v$ul21 2001:db8:3::2/128 + ip -6 route del vrf v$ul21 2001:db8:3::1/128 + + vlan_destroy $ul22 222 + vlan_destroy $ul22 111 + __simple_if_fini $ul22 + simple_if_fini $ul21 2001:db8:10::2/64 +} + +sw3_create() +{ + simple_if_init $ul31 2001:db8:13::1/64 + __simple_if_init $ul32 v$ul31 + vlan_create $ul32 111 v$ul31 2001:db8:11::2/64 + vlan_create $ul32 222 v$ul31 2001:db8:12::2/64 + + ip -6 route add vrf v$ul31 2001:db8:3::2/128 via 2001:db8:13::2 + ip -6 route add vrf v$ul31 2001:db8:3::1/128 \ + nexthop via 2001:db8:11::1 \ + nexthop via 2001:db8:12::1 + + tc qdisc add dev $ul32 clsact + tc filter add dev $ul32 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul32 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw3_destroy() +{ + tc qdisc del dev $ul32 clsact + + ip -6 route del vrf v$ul31 2001:db8:3::1/128 + ip -6 route del vrf v$ul31 2001:db8:3::2/128 + + vlan_destroy $ul32 222 + vlan_destroy $ul32 111 + __simple_if_fini $ul32 + simple_if_fini $ul31 2001:db8:13::1/64 +} + +sw4_create() +{ + simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64 + __simple_if_init $ul4 v$ol4 2001:db8:13::2/64 + + tunnel_create g2 ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \ + dev v$ol4 + __simple_if_init g2 v$ol4 2001:db8:3::2/128 + ip -6 route add vrf v$ol4 2001:db8:3::1/128 via 2001:db8:13::1 + + ip route add vrf v$ol4 198.51.100.0/24 dev g2 + ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2 +} + +sw4_destroy() +{ + ip -6 route del vrf v$ol4 2001:db8:1::/64 + ip route del vrf v$ol4 198.51.100.0/24 + + ip -6 route del vrf v$ol4 2001:db8:3::1/128 + __simple_if_fini g2 2001:db8:3::2/128 + tunnel_destroy g2 + + __simple_if_fini $ul4 2001:db8:13::2/64 + simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64 + ip route add vrf v$h2 default via 203.0.113.1 dev $h2 + ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 default + ip route del vrf v$h2 default + simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + + ol1=${NETIFS[p2]} + ul1=${NETIFS[p3]} + + ul21=${NETIFS[p4]} + ul22=${NETIFS[p5]} + + ul32=${NETIFS[p6]} + ul31=${NETIFS[p7]} + + ul4=${NETIFS[p8]} + ol4=${NETIFS[p9]} + + h2=${NETIFS[p10]} + + vrf_prepare + h1_create + sw1_create + sw2_create + sw3_create + sw4_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw4_destroy + sw3_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 203.0.113.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +send_src_ipv4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_src_udp4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A 198.51.100.2 -B 203.0.113.2 \ + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp4() +{ + ip vrf exec v$h1 $MZ $h1 -q -p 64 \ + -A 198.51.100.2 -B 203.0.113.2 \ + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" +} + +send_src_ipv6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" +} + +send_flowlabel() +{ + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec v$h1 \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 + done +} + +send_src_udp6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp6() +{ + ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ + -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" +} + +custom_hash_test() +{ + local field="$1"; shift + local balanced="$1"; shift + local send_flows="$@" + + RET=0 + + local t0_111=$(tc_rule_stats_get $ul32 111 ingress) + local t0_222=$(tc_rule_stats_get $ul32 222 ingress) + + $send_flows + + local t1_111=$(tc_rule_stats_get $ul32 111 ingress) + local t1_222=$(tc_rule_stats_get $ul32 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + + local diff=$((d222 - d111)) + local sum=$((d111 + d222)) + + local pct=$(echo "$diff / $sum * 100" | bc -l) + local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc) + + [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) || + ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]] + check_err $? "Expected traffic to be $balanced, but it is not" + + log_test "Multipath hash field: $field ($balanced)" + log_info "Packets sent on path1 / path2: $d111 / $d222" +} + +custom_hash_v4() +{ + log_info "Running IPv4 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv4.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv4 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp4 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp4 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp4 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp4 + + sysctl_restore net.ipv4.neigh.default.gc_thresh3 + sysctl_restore net.ipv4.neigh.default.gc_thresh2 + sysctl_restore net.ipv4.neigh.default.gc_thresh1 +} + +custom_hash_v6() +{ + log_info "Running IPv6 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv6.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv6 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0200 + custom_hash_test "Inner flowlabel" "balanced" send_flowlabel + custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp6 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp6 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp6 + + sysctl_restore net.ipv6.neigh.default.gc_thresh3 + sysctl_restore net.ipv6.neigh.default.gc_thresh2 + sysctl_restore net.ipv6.neigh.default.gc_thresh1 +} + +custom_hash() +{ + # Test that when the hash policy is set to custom, traffic is + # distributed only according to the fields set in the + # fib_multipath_hash_fields sysctl. + # + # Each time set a different field and make sure traffic is only + # distributed when the field is changed in the packet stream. + + sysctl_set net.ipv6.fib_multipath_hash_policy 3 + + custom_hash_v4 + custom_hash_v6 + + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh new file mode 100755 index 000000000000..96c97064f2d3 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel without key. +# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for +# more details. + +ALL_TESTS=" + gre_flat + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create $ol1 $ul1 + sw2_flat_create $ol2 $ul2 +} + +gre_flat() +{ + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6" +} + +gre_mtu_change() +{ + test_mtu_change +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh new file mode 100755 index 000000000000..ff9fb0db9bd1 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel with key. +# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for +# more details. + +ALL_TESTS=" + gre_flat + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create $ol1 $ul1 key 233 + sw2_flat_create $ol2 $ul2 key 233 +} + +gre_flat() +{ + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key" +} + +gre_mtu_change() +{ + test_mtu_change +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh new file mode 100755 index 000000000000..12c138785242 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnel with keys. +# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for +# more details. + +ALL_TESTS=" + gre_flat + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create $ol1 $ul1 ikey 111 okey 222 + sw2_flat_create $ol2 $ul2 ikey 222 okey 111 +} + +gre_flat() +{ + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh new file mode 100755 index 000000000000..83b55c30a5c3 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ip6gre_lib.sh for more details. + +ALL_TESTS=" + gre_hier + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create $ol1 $ul1 + sw2_hierarchical_create $ol2 $ul2 +} + +gre_hier() +{ + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh new file mode 100755 index 000000000000..256607916d92 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ip6gre_lib.sh for more details. + +ALL_TESTS=" + gre_hier + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create $ol1 $ul1 key 22 + sw2_hierarchical_create $ol2 $ul2 key 22 +} + +gre_hier() +{ + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh new file mode 100755 index 000000000000..ad1bcd6334a8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test IP-in-IP GRE tunnels without key. +# This test uses hierarchical topology for IP tunneling tests. See +# ip6gre_lib.sh for more details. + +ALL_TESTS=" + gre_hier + gre_mtu_change +" + +NUM_NETIFS=6 +source lib.sh +source ip6gre_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_hierarchical_create $ol1 $ul1 ikey 111 okey 222 + sw2_hierarchical_create $ol2 $ul2 ikey 222 okey 111 +} + +gre_hier() +{ + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey" +} + +gre_mtu_change() +{ + test_mtu_change gre +} + +cleanup() +{ + pre_cleanup + + sw2_hierarchical_destroy $ol2 $ul2 + sw1_hierarchical_destroy $ol1 $ul1 + h2_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh index a257979d3fc5..32d1461f37b7 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh @@ -266,7 +266,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh index d208f5243ade..e1a4b50505f5 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh @@ -265,9 +265,9 @@ multipath6_test() local t0_222=$(tc_rule_stats_get $ul32 222 ingress) ip vrf exec v$h1 \ - $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \ - -B "2001:db8:2::2-2001:db8:2::1e" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \ + -B "2001:db8:2::2-2001:db8:2::3e" \ + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh new file mode 100644 index 000000000000..24f4ab328bd2 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -0,0 +1,438 @@ +# SPDX-License-Identifier: GPL-2.0 +#!/bin/bash + +# Handles creation and destruction of IP-in-IP or GRE tunnels over the given +# topology. Supports both flat and hierarchical models. +# +# Flat Model: +# Overlay and underlay share the same VRF. +# SW1 uses default VRF so tunnel has no bound dev. +# SW2 uses non-default VRF tunnel has a bound dev. +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.1/24 | | +# | 2001:db8:1::1/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------+ +# | SW1 | | +# | $ol1 + | +# | 198.51.100.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | + g1a (ip6gre) | +# | loc=2001:db8:3::1 | +# | rem=2001:db8:3::2 --. | +# | tos=inherit | | +# | . | +# | .--------------------- | +# | | | +# | v | +# | + $ul1.111 (vlan) | +# | | 2001:db8:10::1/64 | +# | \ | +# | \____________ | +# | | | +# | VRF default + $ul1 | +# +---------------------|-----------------------+ +# | +# +---------------------|-----------------------+ +# | SW2 | | +# | $ul2 + | +# | ___________| | +# | / | +# | / | +# | + $ul2.111 (vlan) | +# | ^ 2001:db8:10::2/64 | +# | | | +# | | | +# | '----------------------. | +# | + g2a (ip6gre) | | +# | loc=2001:db8:3::2 | | +# | rem=2001:db8:3::1 --' | +# | tos=inherit | +# | | +# | + $ol2 | +# | | 203.0.113.2/24 | +# | VRF v$ol2 | 2001:db8:2::2/64 | +# +---------------------|-----------------------+ +# +---------------------|----------+ +# | H2 | | +# | $h2 + | +# | 203.0.113.1/24 | +# | 2001:db8:2::1/64 | +# +--------------------------------+ +# +# Hierarchical model: +# The tunnel is bound to a device in a different VRF +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.1/24 | | +# | 2001:db8:1::1/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------+ +# | SW1 | | +# | +-----------------------|-----------------+ | +# | | $ol1 + | | +# | | 198.51.100.2/24 | | +# | | 2001:db8:1::2/64 | | +# | | | | +# | | + g1a (ip6gre) | | +# | | loc=2001:db8:3::1 | | +# | | rem=2001:db8:3::2 | | +# | | tos=inherit | | +# | | ^ | | +# | | VRF v$ol1 | | | +# | +--------------------|--------------------+ | +# | | | +# | +--------------------|--------------------+ | +# | | VRF v$ul1 | | | +# | | | | | +# | | v | | +# | | dummy1 + | | +# | | 2001:db8:3::1/64 | | +# | | .-----------' | | +# | | | | | +# | | v | | +# | | + $ul1.111 (vlan) | | +# | | | 2001:db8:10::1/64 | | +# | | \ | | +# | | \__________ | | +# | | | | | +# | | + $ul1 | | +# | +---------------------|-------------------+ | +# +-----------------------|---------------------+ +# | +# +-----------------------|---------------------+ +# | SW2 | | +# | +---------------------|-------------------+ | +# | | + $ul2 | | +# | | _____| | | +# | | / | | +# | | / | | +# | | | $ul2.111 (vlan) | | +# | | + 2001:db8:10::2/64 | | +# | | ^ | | +# | | | | | +# | | '------. | | +# | | dummy2 + | | +# | | 2001:db8:3::2/64 | | +# | | ^ | | +# | | | | | +# | | | | | +# | | VRF v$ul2 | | | +# | +---------------------|-------------------+ | +# | | | +# | +---------------------|-------------------+ | +# | | VRF v$ol2 | | | +# | | | | | +# | | v | | +# | | g2a (ip6gre) + | | +# | | loc=2001:db8:3::2 | | +# | | rem=2001:db8:3::1 | | +# | | tos=inherit | | +# | | | | +# | | $ol2 + | | +# | | 203.0.113.2/24 | | | +# | | 2001:db8:2::2/64 | | | +# | +---------------------|-------------------+ | +# +-----------------------|---------------------+ +# | +# +-----------------------|--------+ +# | H2 | | +# | $h2 + | +# | 203.0.113.1/24 | +# | 2001:db8:2::1/64 | +# +--------------------------------+ + +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.1/24 2001:db8:1::1/64 + ip route add vrf v$h1 203.0.113.0/24 via 198.51.100.2 + ip -6 route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2 + ip route del vrf v$h1 203.0.113.0/24 via 198.51.100.2 + simple_if_fini $h1 198.51.100.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.1/24 2001:db8:2::1/64 + ip route add vrf v$h2 198.51.100.0/24 via 203.0.113.2 + ip -6 route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2 + ip route del vrf v$h2 198.51.100.0/24 via 203.0.113.2 + simple_if_fini $h2 203.0.113.1/24 2001:db8:2::1/64 +} + +sw1_flat_create() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip link set dev $ol1 up + __addr_add_del $ol1 add 198.51.100.2/24 2001:db8:1::2/64 + + ip link set dev $ul1 up + vlan_create $ul1 111 "" 2001:db8:10::1/64 + + tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit "$@" + ip link set dev g1a up + __addr_add_del g1a add "2001:db8:3::1/128" + + ip -6 route add 2001:db8:3::2/128 via 2001:db8:10::2 + ip route add 203.0.113.0/24 dev g1a + ip -6 route add 2001:db8:2::/64 dev g1a +} + +sw1_flat_destroy() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip -6 route del 2001:db8:2::/64 + ip route del 203.0.113.0/24 + ip -6 route del 2001:db8:3::2/128 via 2001:db8:10::2 + + __simple_if_fini g1a 2001:db8:3::1/128 + tunnel_destroy g1a + + vlan_destroy $ul1 111 + __simple_if_fini $ul1 + __simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw2_flat_create() +{ + local ol2=$1; shift + local ul2=$1; shift + + simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64 + __simple_if_init $ul2 v$ol2 + vlan_create $ul2 111 v$ol2 2001:db8:10::2/64 + + tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \ + ttl inherit dev v$ol2 "$@" + __simple_if_init g2a v$ol2 2001:db8:3::2/128 + + # Replace neighbor to avoid 1 dropped packet due to "unresolved neigh" + ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2) + ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2) + + ip -6 route add vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1 + ip route add vrf v$ol2 198.51.100.0/24 dev g2a + ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a +} + +sw2_flat_destroy() +{ + local ol2=$1; shift + local ul2=$1; shift + + ip -6 route del vrf v$ol2 2001:db8:2::/64 + ip route del vrf v$ol2 198.51.100.0/24 + ip -6 route del vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1 + + __simple_if_fini g2a 2001:db8:3::2/128 + tunnel_destroy g2a + + vlan_destroy $ul2 111 + __simple_if_fini $ul2 + simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64 +} + +sw1_hierarchical_create() +{ + local ol1=$1; shift + local ul1=$1; shift + + simple_if_init $ol1 198.51.100.2/24 2001:db8:1::2/64 + simple_if_init $ul1 + ip link add name dummy1 type dummy + __simple_if_init dummy1 v$ul1 2001:db8:3::1/64 + + vlan_create $ul1 111 v$ul1 2001:db8:10::1/64 + tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + ttl inherit dev dummy1 "$@" + ip link set dev g1a master v$ol1 + + ip -6 route add vrf v$ul1 2001:db8:3::2/128 via 2001:db8:10::2 + ip route add vrf v$ol1 203.0.113.0/24 dev g1a + ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1a +} + +sw1_hierarchical_destroy() +{ + local ol1=$1; shift + local ul1=$1; shift + + ip -6 route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 203.0.113.0/24 + ip -6 route del vrf v$ul1 2001:db8:3::2/128 + + tunnel_destroy g1a + vlan_destroy $ul1 111 + + __simple_if_fini dummy1 2001:db8:3::1/64 + ip link del dev dummy1 + + simple_if_fini $ul1 + simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw2_hierarchical_create() +{ + local ol2=$1; shift + local ul2=$1; shift + + simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64 + simple_if_init $ul2 + + ip link add name dummy2 type dummy + __simple_if_init dummy2 v$ul2 2001:db8:3::2/64 + + vlan_create $ul2 111 v$ul2 2001:db8:10::2/64 + tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \ + ttl inherit dev dummy2 "$@" + ip link set dev g2a master v$ol2 + + # Replace neighbor to avoid 1 dropped packet due to "unresolved neigh" + ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2) + ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2) + + ip -6 route add vrf v$ul2 2001:db8:3::1/128 via 2001:db8:10::1 + ip route add vrf v$ol2 198.51.100.0/24 dev g2a + ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a +} + +sw2_hierarchical_destroy() +{ + local ol2=$1; shift + local ul2=$1; shift + + ip -6 route del vrf v$ol2 2001:db8:2::/64 + ip route del vrf v$ol2 198.51.100.0/24 + ip -6 route del vrf v$ul2 2001:db8:3::1/128 + + tunnel_destroy g2a + vlan_destroy $ul2 111 + + __simple_if_fini dummy2 2001:db8:3::2/64 + ip link del dev dummy2 + + simple_if_fini $ul2 + simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64 +} + +test_traffic_ip4ip6() +{ + RET=0 + + h1mac=$(mac_get $h1) + ol1mac=$(mac_get $ol1) + + tc qdisc add dev $ul1 clsact + tc filter add dev $ul1 egress proto all pref 1 handle 101 \ + flower $TC_FLAG action pass + + tc qdisc add dev $ol2 clsact + tc filter add dev $ol2 egress protocol ipv4 pref 1 handle 101 \ + flower $TC_FLAG dst_ip 203.0.113.1 action pass + + $MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \ + -B 203.0.113.1 -t ip -q -d $MZ_DELAY + + # Check ports after encap and after decap. + tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 + check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG" + + tc_check_at_least_x_packets "dev $ol2 egress" 101 1000 + check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG" + + log_test "$@" + + tc filter del dev $ol2 egress protocol ipv4 pref 1 handle 101 flower + tc qdisc del dev $ol2 clsact + tc filter del dev $ul1 egress proto all pref 1 handle 101 flower + tc qdisc del dev $ul1 clsact +} + +test_traffic_ip6ip6() +{ + RET=0 + + h1mac=$(mac_get $h1) + ol1mac=$(mac_get $ol1) + + tc qdisc add dev $ul1 clsact + tc filter add dev $ul1 egress proto all pref 1 handle 101 \ + flower $TC_FLAG action pass + + tc qdisc add dev $ol2 clsact + tc filter add dev $ol2 egress protocol ipv6 pref 1 handle 101 \ + flower $TC_FLAG dst_ip 2001:db8:2::1 action pass + + $MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \ + -B 2001:db8:2::1 -t ip -q -d $MZ_DELAY + + # Check ports after encap and after decap. + tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 + check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG" + + tc_check_at_least_x_packets "dev $ol2 egress" 101 1000 + check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG" + + log_test "$@" + + tc filter del dev $ol2 egress protocol ipv6 pref 1 handle 101 flower + tc qdisc del dev $ol2 clsact + tc filter del dev $ul1 egress proto all pref 1 handle 101 flower + tc qdisc del dev $ul1 clsact +} + +topo_mtu_change() +{ + local mtu=$1 + + ip link set mtu $mtu dev $h1 + ip link set mtu $mtu dev $ol1 + ip link set mtu $mtu dev g1a + ip link set mtu $mtu dev $ul1 + ip link set mtu $mtu dev $ul1.111 + ip link set mtu $mtu dev $h2 + ip link set mtu $mtu dev $ol2 + ip link set mtu $mtu dev g2a + ip link set mtu $mtu dev $ul2 + ip link set mtu $mtu dev $ul2.111 +} + +test_mtu_change() +{ + RET=0 + + ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3" + check_fail $? "ping GRE IPv6 should not pass with packet size 1800" + + RET=0 + + topo_mtu_change 2000 + ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3" + check_err $? + log_test "ping GRE IPv6, packet size 1800 after MTU change" +} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 927f9ba49e08..e579c2e0c462 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -8,6 +8,7 @@ PING=${PING:=ping} PING6=${PING6:=ping6} MZ=${MZ:=mausezahn} +MZ_DELAY=${MZ_DELAY:=0} ARPING=${ARPING:=arping} TEAMD=${TEAMD:=teamd} WAIT_TIME=${WAIT_TIME:=5} @@ -17,18 +18,51 @@ NETIF_TYPE=${NETIF_TYPE:=veth} NETIF_CREATE=${NETIF_CREATE:=yes} MCD=${MCD:=smcrouted} MC_CLI=${MC_CLI:=smcroutectl} +PING_COUNT=${PING_COUNT:=10} PING_TIMEOUT=${PING_TIMEOUT:=5} WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} - -relative_path="${BASH_SOURCE%/*}" -if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then - relative_path="." +LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000} +REQUIRE_JQ=${REQUIRE_JQ:=yes} +REQUIRE_MZ=${REQUIRE_MZ:=yes} +REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no} +STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no} +TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=} +TROUTE6=${TROUTE6:=traceroute6} + +net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +if [[ -f $net_forwarding_dir/forwarding.config ]]; then + source "$net_forwarding_dir/forwarding.config" fi -if [[ -f $relative_path/forwarding.config ]]; then - source "$relative_path/forwarding.config" -fi +source "$net_forwarding_dir/../lib.sh" + +# timeout in seconds +slowwait() +{ + local timeout=$1; shift + + local start_time="$(date -u +%s)" + while true + do + local out + out=$("$@") + local ret=$? + if ((!ret)); then + echo -n "$out" + return 0 + fi + + local current_time="$(date -u +%s)" + if ((current_time - start_time > timeout)); then + echo -n "$out" + return 1 + fi + + sleep 0.1 + done +} ############################################################################## # Sanity checks @@ -38,7 +72,48 @@ check_tc_version() tc -j &> /dev/null if [[ $? -ne 0 ]]; then echo "SKIP: iproute2 too old; tc is missing JSON support" - exit 1 + exit $ksft_skip + fi +} + +# Old versions of tc don't understand "mpls_uc" +check_tc_mpls_support() +{ + local dev=$1; shift + + tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \ + matchall action pipe &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing MPLS support" + return $ksft_skip + fi + tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \ + matchall +} + +# Old versions of tc produce invalid json output for mpls lse statistics +check_tc_mpls_lse_stats() +{ + local dev=$1; shift + local ret; + + tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \ + flower mpls lse depth 2 \ + action continue &> /dev/null + + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support" + return $ksft_skip + fi + + tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null + ret=$? + tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \ + flower + + if [[ $ret -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters" + return $ksft_skip fi } @@ -47,7 +122,7 @@ check_tc_shblock_support() tc filter help 2>&1 | grep block &> /dev/null if [[ $? -ne 0 ]]; then echo "SKIP: iproute2 too old; tc is missing shared block support" - exit 1 + exit $ksft_skip fi } @@ -56,7 +131,7 @@ check_tc_chain_support() tc help 2>&1|grep chain &> /dev/null if [[ $? -ne 0 ]]; then echo "SKIP: iproute2 too old; tc is missing chain support" - exit 1 + exit $ksft_skip fi } @@ -65,13 +140,85 @@ check_tc_action_hw_stats_support() tc actions help 2>&1 | grep -q hw_stats if [[ $? -ne 0 ]]; then echo "SKIP: iproute2 too old; tc is missing action hw_stats support" - exit 1 + exit $ksft_skip + fi +} + +check_tc_fp_support() +{ + tc qdisc add dev lo mqprio help 2>&1 | grep -q "fp " + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing frame preemption support" + exit $ksft_skip + fi +} + +check_ethtool_lanes_support() +{ + ethtool --help 2>&1| grep lanes &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: ethtool too old; it is missing lanes support" + exit $ksft_skip + fi +} + +check_ethtool_mm_support() +{ + ethtool --help 2>&1| grep -- '--show-mm' &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: ethtool too old; it is missing MAC Merge layer support" + exit $ksft_skip + fi +} + +check_ethtool_counter_group_support() +{ + ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: ethtool too old; it is missing standard counter group support" + exit $ksft_skip + fi +} + +check_ethtool_pmac_std_stats_support() +{ + local dev=$1; shift + local grp=$1; shift + + [ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \ + | jq ".[].\"$grp\" | length") ] +} + +check_locked_port_support() +{ + if ! bridge -d link show | grep -q " locked"; then + echo "SKIP: iproute2 too old; Locked port feature not supported." + return $ksft_skip + fi +} + +check_port_mab_support() +{ + if ! bridge -d link show | grep -q "mab"; then + echo "SKIP: iproute2 too old; MacAuth feature not supported." + return $ksft_skip + fi +} + +skip_on_veth() +{ + local kind=$(ip -j -d link show dev ${NETIFS[p1]} | + jq -r '.[].linkinfo.info_kind') + + if [[ $kind == veth ]]; then + echo "SKIP: Test cannot be run with veth pairs" + exit $ksft_skip fi } if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" - exit 0 + exit $ksft_skip fi if [[ "$CHECK_TC" = "yes" ]]; then @@ -84,16 +231,26 @@ require_command() if [[ ! -x "$(command -v "$cmd")" ]]; then echo "SKIP: $cmd not installed" - exit 1 + exit $ksft_skip fi } -require_command jq -require_command $MZ +if [[ "$REQUIRE_JQ" = "yes" ]]; then + require_command jq +fi +if [[ "$REQUIRE_MZ" = "yes" ]]; then + require_command $MZ +fi +if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then + # https://github.com/vladimiroltean/mtools/ + # patched for IPv6 support + require_command msend + require_command mreceive +fi if [[ ! -v NUM_NETIFS ]]; then echo "SKIP: importer does not define \"NUM_NETIFS\"" - exit 1 + exit $ksft_skip fi ############################################################################## @@ -121,6 +278,11 @@ create_netif_veth() for ((i = 1; i <= NUM_NETIFS; ++i)); do local j=$((i+1)) + if [ -z ${NETIFS[p$i]} ]; then + echo "SKIP: Cannot create interface. Name not specified" + exit $ksft_skip + fi + ip link show dev ${NETIFS[p$i]} &> /dev/null if [[ $? -ne 0 ]]; then ip link add ${NETIFS[p$i]} type veth \ @@ -145,15 +307,46 @@ create_netif() esac } +declare -A MAC_ADDR_ORIG +mac_addr_prepare() +{ + local new_addr= + local dev= + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + dev=${NETIFS[p$i]} + new_addr=$(printf "00:01:02:03:04:%02x" $i) + + MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address') + # Strip quotes + MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/} + ip link set dev $dev address $new_addr + done +} + +mac_addr_restore() +{ + local dev= + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + dev=${NETIFS[p$i]} + ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]} + done +} + if [[ "$NETIF_CREATE" = "yes" ]]; then create_netif fi +if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then + mac_addr_prepare +fi + for ((i = 1; i <= NUM_NETIFS; ++i)); do ip link show dev ${NETIFS[p$i]} &> /dev/null if [[ $? -ne 0 ]]; then echo "SKIP: could not find all required interfaces" - exit 1 + exit $ksft_skip fi done @@ -227,6 +420,15 @@ log_test() return 0 } +log_test_skip() +{ + local test_name=$1 + local opt_str=$2 + + printf "TEST: %-60s [SKIP]\n" "$test_name $opt_str" + return 0 +} + log_info() { local msg=$1 @@ -234,33 +436,24 @@ log_info() echo "INFO: $msg" } -busywait() +not() { - local timeout=$1; shift + "$@" + [[ $? != 0 ]] +} - local start_time="$(date -u +%s%3N)" - while true - do - local out - out=$("$@") - local ret=$? - if ((!ret)); then - echo -n "$out" - return 0 - fi +get_max() +{ + local arr=("$@") - local current_time="$(date -u +%s%3N)" - if ((current_time - start_time > timeout)); then - echo -n "$out" - return 1 + max=${arr[0]} + for cur in ${arr[@]}; do + if [[ $cur -gt $max ]]; then + max=$cur fi done -} -not() -{ - "$@" - [[ $? != 0 ]] + echo $max } grep_bridge_fdb() @@ -279,11 +472,21 @@ grep_bridge_fdb() $@ | grep $addr | grep $flag "$word" } +wait_for_port_up() +{ + "$@" | grep -q "Link detected: yes" +} + wait_for_offload() { "$@" | grep -q offload } +wait_for_trap() +{ + "$@" | grep -q trap +} + until_counter_is() { local expr=$1; shift @@ -302,6 +505,15 @@ busywait_for_counter() busywait "$timeout" until_counter_is ">= $((base + delta))" "$@" } +slowwait_for_counter() +{ + local timeout=$1; shift + local delta=$1; shift + + local base=$("$@") + slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" +} + setup_wait_dev() { local dev=$1; shift @@ -374,33 +586,16 @@ cmd_jq() [ ! -z "$output" ] } -lldpad_app_wait_set() -{ - local dev=$1; shift - - while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do - echo "$dev: waiting for lldpad to push pending APP updates" - sleep 5 - done -} - -lldpad_app_wait_del() -{ - # Give lldpad a chance to push down the changes. If the device is downed - # too soon, the updates will be left pending. However, they will have - # been struck off the lldpad's DB already, so we won't be able to tell - # they are pending. Then on next test iteration this would cause - # weirdness as newly-added APP rules conflict with the old ones, - # sometimes getting stuck in an "unknown" state. - sleep 5 -} - pre_cleanup() { if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then echo "Pausing before cleanup, hit any key to continue" read fi + + if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then + mac_addr_restore + fi } vrf_prepare() @@ -640,8 +835,9 @@ tc_rule_handle_stats_get() local id=$1; shift local handle=$1; shift local selector=${1:-.packets}; shift + local netns=${1:-""}; shift - tc -j -s filter show $id \ + tc $netns -j -s filter show $id \ | jq ".[] | select(.options.handle == $handle) | \ .options.actions[0].stats$selector" } @@ -654,6 +850,17 @@ ethtool_stats_get() ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 } +ethtool_std_stats_get() +{ + local dev=$1; shift + local grp=$1; shift + local name=$1; shift + local src=$1; shift + + ethtool --json -S $dev --groups $grp -- --src $src | \ + jq '.[]."'"$grp"'"."'$name'"' +} + qdisc_stats_get() { local dev=$1; shift @@ -674,6 +881,52 @@ qdisc_parent_stats_get() | jq '.[] | select(.parent == "'"$parent"'") | '"$selector" } +ipv6_stats_get() +{ + local dev=$1; shift + local stat=$1; shift + + cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2 +} + +hw_stats_get() +{ + local suite=$1; shift + local if_name=$1; shift + local dir=$1; shift + local stat=$1; shift + + ip -j stats show dev $if_name group offload subgroup $suite | + jq ".[0].stats64.$dir.$stat" +} + +__nh_stats_get() +{ + local key=$1; shift + local group_id=$1; shift + local member_id=$1; shift + + ip -j -s -s nexthop show id $group_id | + jq --argjson member_id "$member_id" --arg key "$key" \ + '.[].group_stats[] | select(.id == $member_id) | .[$key]' +} + +nh_stats_get() +{ + local group_id=$1; shift + local member_id=$1; shift + + __nh_stats_get packets "$group_id" "$member_id" +} + +nh_stats_get_hw() +{ + local group_id=$1; shift + local member_id=$1; shift + + __nh_stats_get packets_hw "$group_id" "$member_id" +} + humanize() { local speed=$1; shift @@ -698,6 +951,15 @@ rate() echo $((8 * (t1 - t0) / interval)) } +packets_rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $(((t1 - t0) / interval)) +} + mac_get() { local if_name=$1 @@ -705,6 +967,15 @@ mac_get() ip -j link show dev $if_name | jq -r '.[]["address"]' } +ipv6_lladdr_get() +{ + local if_name=$1 + + ip -j addr show dev $if_name | \ + jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \ + head -1 +} + bridge_ageing_time_get() { local bridge=$1 @@ -723,14 +994,14 @@ sysctl_set() local value=$1; shift SYSCTL_ORIG[$key]=$(sysctl -n $key) - sysctl -qw $key=$value + sysctl -qw $key="$value" } sysctl_restore() { local key=$1; shift - sysctl -qw $key=${SYSCTL_ORIG["$key"]} + sysctl -qw $key="${SYSCTL_ORIG[$key]}" } forwarding_enable() @@ -1002,7 +1273,8 @@ ping_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null + $PING $args $dip -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT &> /dev/null } ping_test() @@ -1014,6 +1286,15 @@ ping_test() log_test "ping$3" } +ping_test_fails() +{ + RET=0 + + ping_do $1 $2 + check_fail $? + log_test "ping fails$3" +} + ping6_do() { local if_name=$1 @@ -1023,7 +1304,8 @@ ping6_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING6 $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null + $PING6 $args $dip -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT &> /dev/null } ping6_test() @@ -1035,6 +1317,15 @@ ping6_test() log_test "ping6$3" } +ping6_test_fails() +{ + RET=0 + + ping6_do $1 $2 + check_fail $? + log_test "ping6 fails$3" +} + learning_test() { local bridge=$1 @@ -1055,6 +1346,7 @@ learning_test() # FDB entry was installed. bridge link set dev $br_port1 flood off + ip link set $host1_if promisc on tc qdisc add dev $host1_if ingress tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop @@ -1065,7 +1357,7 @@ learning_test() tc -j -s filter show dev $host1_if ingress \ | jq -e ".[] | select(.options.handle == 101) \ | select(.options.actions[0].stats.packets == 1)" &> /dev/null - check_fail $? "Packet reached second host when should not" + check_fail $? "Packet reached first host when should not" $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q sleep 1 @@ -1104,6 +1396,7 @@ learning_test() tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower tc qdisc del dev $host1_if ingress + ip link set $host1_if promisc off bridge link set dev $br_port1 flood on @@ -1121,6 +1414,7 @@ flood_test_do() # Add an ACL on `host2_if` which will tell us whether the packet # was flooded to it or not. + ip link set $host2_if promisc on tc qdisc add dev $host2_if ingress tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop @@ -1138,6 +1432,7 @@ flood_test_do() tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower tc qdisc del dev $host2_if ingress + ip link set $host2_if promisc off return $err } @@ -1201,25 +1496,40 @@ flood_test() __start_traffic() { + local pktsize=$1; shift local proto=$1; shift local h_in=$1; shift # Where the traffic egresses the host local sip=$1; shift local dip=$1; shift local dmac=$1; shift - $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ + $MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \ -a own -b $dmac -t "$proto" -q "$@" & sleep 1 } +start_traffic_pktsize() +{ + local pktsize=$1; shift + + __start_traffic $pktsize udp "$@" +} + +start_tcp_traffic_pktsize() +{ + local pktsize=$1; shift + + __start_traffic $pktsize tcp "$@" +} + start_traffic() { - __start_traffic udp "$@" + start_traffic_pktsize 8000 "$@" } start_tcp_traffic() { - __start_traffic tcp "$@" + start_tcp_traffic_pktsize 8000 "$@" } stop_traffic() @@ -1228,13 +1538,17 @@ stop_traffic() { kill %% && wait %%; } 2>/dev/null } +declare -A cappid +declare -A capfile +declare -A capout + tcpdump_start() { local if_name=$1; shift local ns=$1; shift - capfile=$(mktemp) - capout=$(mktemp) + capfile[$if_name]=$(mktemp) + capout[$if_name]=$(mktemp) if [ -z $ns ]; then ns_cmd="" @@ -1248,25 +1562,485 @@ tcpdump_start() capuser="-Z $SUDO_USER" fi - $ns_cmd tcpdump -e -n -Q in -i $if_name \ - -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & - cappid=$! + $ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \ + -s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \ + > "${capout[$if_name]}" 2>&1 & + cappid[$if_name]=$! sleep 1 } tcpdump_stop() { - $ns_cmd kill $cappid + local if_name=$1 + local pid=${cappid[$if_name]} + + $ns_cmd kill "$pid" && wait "$pid" sleep 1 } tcpdump_cleanup() { - rm $capfile $capout + local if_name=$1 + + rm ${capfile[$if_name]} ${capout[$if_name]} } tcpdump_show() { - tcpdump -e -n -r $capfile 2>&1 + local if_name=$1 + + tcpdump -e -n -r ${capfile[$if_name]} 2>&1 +} + +# return 0 if the packet wasn't seen on host2_if or 1 if it was +mcast_packet_test() +{ + local mac=$1 + local src_ip=$2 + local ip=$3 + local host1_if=$4 + local host2_if=$5 + local seen=0 + local tc_proto="ip" + local mz_v6arg="" + + # basic check to see if we were passed an IPv4 address, if not assume IPv6 + if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then + tc_proto="ipv6" + mz_v6arg="-6" + fi + + # Add an ACL on `host2_if` which will tell us whether the packet + # was received by it or not. + tc qdisc add dev $host2_if ingress + tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \ + flower ip_proto udp dst_mac $mac action drop + + $MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q + sleep 1 + + tc -j -s filter show dev $host2_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + if [[ $? -eq 0 ]]; then + seen=1 + fi + + tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower + tc qdisc del dev $host2_if ingress + + return $seen +} + +brmcast_check_sg_entries() +{ + local report=$1; shift + local slist=("$@") + local sarg="" + + for src in "${slist[@]}"; do + sarg="${sarg} and .source_list[].address == \"$src\"" + done + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null + check_err $? "Wrong *,G entry source list after $report report" + + for sgent in "${slist[@]}"; do + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null + check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)" + done +} + +brmcast_check_sg_fwding() +{ + local should_fwd=$1; shift + local sources=("$@") + + for src in "${sources[@]}"; do + local retval=0 + + mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1 + retval=$? + if [ $should_fwd -eq 1 ]; then + check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)" + else + check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)" + fi + done +} + +brmcast_check_sg_state() +{ + local is_blocked=$1; shift + local sources=("$@") + local should_fail=1 + + if [ $is_blocked -eq 1 ]; then + should_fail=0 + fi + + for src in "${sources[@]}"; do + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null) | + .source_list[] | + select(.address == \"$src\") | + select(.timer == \"0.00\")" &>/dev/null + check_err_fail $should_fail $? "Entry $src has zero timer" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \ + .flags[] == \"blocked\")" &>/dev/null + check_err_fail $should_fail $? "Entry $src has blocked flag" + done +} + +mc_join() +{ + local if_name=$1 + local group=$2 + local vrf_name=$(master_name_get $if_name) + + # We don't care about actual reception, just about joining the + # IP multicast group and adding the L2 address to the device's + # MAC filtering table + ip vrf exec $vrf_name \ + mreceive -g $group -I $if_name > /dev/null 2>&1 & + mreceive_pid=$! + + sleep 1 +} + +mc_leave() +{ + kill "$mreceive_pid" && wait "$mreceive_pid" +} + +mc_send() +{ + local if_name=$1 + local groups=$2 + local vrf_name=$(master_name_get $if_name) + + ip vrf exec $vrf_name \ + msend -g $groups -I $if_name -c 1 > /dev/null 2>&1 +} + +start_ip_monitor() +{ + local mtype=$1; shift + local ip=${1-ip}; shift + + # start the monitor in the background + tmpfile=`mktemp /var/run/nexthoptestXXX` + mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null` + sleep 0.2 + echo "$mpid $tmpfile" +} + +stop_ip_monitor() +{ + local mpid=$1; shift + local tmpfile=$1; shift + local el=$1; shift + local what=$1; shift + + sleep 0.2 + kill $mpid + local lines=`grep '^\w' $tmpfile | wc -l` + test $lines -eq $el + check_err $? "$what: $lines lines of events, expected $el" + rm -rf $tmpfile +} + +hw_stats_monitor_test() +{ + local dev=$1; shift + local type=$1; shift + local make_suitable=$1; shift + local make_unsuitable=$1; shift + local ip=${1-ip}; shift + + RET=0 + + # Expect a notification about enablement. + local ipmout=$(start_ip_monitor stats "$ip") + $ip stats set dev $dev ${type}_stats on + stop_ip_monitor $ipmout 1 "${type}_stats enablement" + + # Expect a notification about offload. + local ipmout=$(start_ip_monitor stats "$ip") + $make_suitable + stop_ip_monitor $ipmout 1 "${type}_stats installation" + + # Expect a notification about loss of offload. + local ipmout=$(start_ip_monitor stats "$ip") + $make_unsuitable + stop_ip_monitor $ipmout 1 "${type}_stats deinstallation" + + # Expect a notification about disablement + local ipmout=$(start_ip_monitor stats "$ip") + $ip stats set dev $dev ${type}_stats off + stop_ip_monitor $ipmout 1 "${type}_stats disablement" + + log_test "${type}_stats notifications" +} + +ipv4_to_bytes() +{ + local IP=$1; shift + + printf '%02x:' ${IP//./ } | + sed 's/:$//' +} + +# Convert a given IPv6 address, `IP' such that the :: token, if present, is +# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal +# digits. An optional `BYTESEP' parameter can be given to further separate +# individual bytes of each 16-bit group. +expand_ipv6() +{ + local IP=$1; shift + local bytesep=$1; shift + + local cvt_ip=${IP/::/_} + local colons=${cvt_ip//[^:]/} + local allcol=::::::: + # IP where :: -> the appropriate number of colons: + local allcol_ip=${cvt_ip/_/${allcol:${#colons}}} + + echo $allcol_ip | tr : '\n' | + sed s/^/0000/ | + sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' | + tr '\n' : | + sed 's/:$//' +} + +ipv6_to_bytes() +{ + local IP=$1; shift + + expand_ipv6 "$IP" : +} + +u16_to_bytes() +{ + local u16=$1; shift + + printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/' +} + +# Given a mausezahn-formatted payload (colon-separated bytes given as %02x), +# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be, +# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any) +# stands for 00:00. +payload_template_calc_checksum() +{ + local payload=$1; shift + + ( + # Set input radix. + echo "16i" + # Push zero for the initial checksum. + echo 0 + + # Pad the payload with a terminating 00: in case we get an odd + # number of bytes. + echo "${payload%:}:00:" | + sed 's/CHECKSUM/00:00/g' | + tr '[:lower:]' '[:upper:]' | + # Add the word to the checksum. + sed 's/\(..\):\(..\):/\1\2+\n/g' | + # Strip the extra odd byte we pushed if left unconverted. + sed 's/\(..\):$//' + + echo "10000 ~ +" # Calculate and add carry. + echo "FFFF r - p" # Bit-flip and print. + ) | + dc | + tr '[:upper:]' '[:lower:]' +} + +payload_template_expand_checksum() +{ + local payload=$1; shift + local checksum=$1; shift + + local ckbytes=$(u16_to_bytes $checksum) + + echo "$payload" | sed "s/CHECKSUM/$ckbytes/g" +} + +payload_template_nbytes() +{ + local payload=$1; shift + + payload_template_expand_checksum "${payload%:}" 0 | + sed 's/:/\n/g' | wc -l +} + +igmpv3_is_in_get() +{ + local GRP=$1; shift + local sources=("$@") + + local igmpv3 + local nsources=$(u16_to_bytes ${#sources[@]}) + + # IS_IN ( $sources ) + igmpv3=$(: + )"22:"$( : Type - Membership Report + )"00:"$( : Reserved + )"CHECKSUM:"$( : Checksum + )"00:00:"$( : Reserved + )"00:01:"$( : Number of Group Records + )"01:"$( : Record Type - IS_IN + )"00:"$( : Aux Data Len + )"${nsources}:"$( : Number of Sources + )"$(ipv4_to_bytes $GRP):"$( : Multicast Address + )"$(for src in "${sources[@]}"; do + ipv4_to_bytes $src + echo -n : + done)"$( : Source Addresses + ) + local checksum=$(payload_template_calc_checksum "$igmpv3") + + payload_template_expand_checksum "$igmpv3" $checksum +} + +igmpv2_leave_get() +{ + local GRP=$1; shift + + local payload=$(: + )"17:"$( : Type - Leave Group + )"00:"$( : Max Resp Time - not meaningful + )"CHECKSUM:"$( : Checksum + )"$(ipv4_to_bytes $GRP)"$( : Group Address + ) + local checksum=$(payload_template_calc_checksum "$payload") + + payload_template_expand_checksum "$payload" $checksum +} + +mldv2_is_in_get() +{ + local SIP=$1; shift + local GRP=$1; shift + local sources=("$@") + + local hbh + local icmpv6 + local nsources=$(u16_to_bytes ${#sources[@]}) + + hbh=$(: + )"3a:"$( : Next Header - ICMPv6 + )"00:"$( : Hdr Ext Len + )"00:00:00:00:00:00:"$( : Options and Padding + ) + + icmpv6=$(: + )"8f:"$( : Type - MLDv2 Report + )"00:"$( : Code + )"CHECKSUM:"$( : Checksum + )"00:00:"$( : Reserved + )"00:01:"$( : Number of Group Records + )"01:"$( : Record Type - IS_IN + )"00:"$( : Aux Data Len + )"${nsources}:"$( : Number of Sources + )"$(ipv6_to_bytes $GRP):"$( : Multicast address + )"$(for src in "${sources[@]}"; do + ipv6_to_bytes $src + echo -n : + done)"$( : Source Addresses + ) + + local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6)) + local sudohdr=$(: + )"$(ipv6_to_bytes $SIP):"$( : SIP + )"$(ipv6_to_bytes $GRP):"$( : DIP is multicast address + )"${len}:"$( : Upper-layer length + )"00:3a:"$( : Zero and next-header + ) + local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6}) + + payload_template_expand_checksum "$hbh$icmpv6" $checksum +} + +mldv1_done_get() +{ + local SIP=$1; shift + local GRP=$1; shift + + local hbh + local icmpv6 + + hbh=$(: + )"3a:"$( : Next Header - ICMPv6 + )"00:"$( : Hdr Ext Len + )"00:00:00:00:00:00:"$( : Options and Padding + ) + + icmpv6=$(: + )"84:"$( : Type - MLDv1 Done + )"00:"$( : Code + )"CHECKSUM:"$( : Checksum + )"00:00:"$( : Max Resp Delay - not meaningful + )"00:00:"$( : Reserved + )"$(ipv6_to_bytes $GRP):"$( : Multicast address + ) + + local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6)) + local sudohdr=$(: + )"$(ipv6_to_bytes $SIP):"$( : SIP + )"$(ipv6_to_bytes $GRP):"$( : DIP is multicast address + )"${len}:"$( : Upper-layer length + )"00:3a:"$( : Zero and next-header + ) + local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6}) + + payload_template_expand_checksum "$hbh$icmpv6" $checksum +} + +bail_on_lldpad() +{ + local reason1="$1"; shift + local reason2="$1"; shift + + if systemctl is-active --quiet lldpad; then + + cat >/dev/stderr <<-EOF + WARNING: lldpad is running + + lldpad will likely $reason1, and this test will + $reason2. Both are not supported at the same time, + one of them is arbitrarily going to overwrite the + other. That will cause spurious failures (or, unlikely, + passes) of this test. + EOF + + if [[ -z $ALLOW_LLDPAD ]]; then + cat >/dev/stderr <<-EOF + + If you want to run the test anyway, please set + an environment variable ALLOW_LLDPAD to a + non-empty string. + EOF + exit 1 + else + return + fi + fi +} + +absval() +{ + local v=$1; shift + + echo $((v > 0 ? v : -v)) } diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh new file mode 100755 index 000000000000..c5b0cbc85b3e --- /dev/null +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -0,0 +1,299 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="standalone bridge" +NUM_NETIFS=2 +PING_COUNT=1 +REQUIRE_MTOOLS=yes +REQUIRE_MZ=no + +source lib.sh + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +BRIDGE_ADDR="00:00:de:ad:be:ee" +MACVLAN_ADDR="00:00:de:ad:be:ef" +UNKNOWN_UC_ADDR1="de:ad:be:ef:ee:03" +UNKNOWN_UC_ADDR2="de:ad:be:ef:ee:04" +UNKNOWN_UC_ADDR3="de:ad:be:ef:ee:05" +JOINED_IPV4_MC_ADDR="225.1.2.3" +UNKNOWN_IPV4_MC_ADDR1="225.1.2.4" +UNKNOWN_IPV4_MC_ADDR2="225.1.2.5" +UNKNOWN_IPV4_MC_ADDR3="225.1.2.6" +JOINED_IPV6_MC_ADDR="ff2e::0102:0304" +UNKNOWN_IPV6_MC_ADDR1="ff2e::0102:0305" +UNKNOWN_IPV6_MC_ADDR2="ff2e::0102:0306" +UNKNOWN_IPV6_MC_ADDR3="ff2e::0102:0307" + +JOINED_MACV4_MC_ADDR="01:00:5e:01:02:03" +UNKNOWN_MACV4_MC_ADDR1="01:00:5e:01:02:04" +UNKNOWN_MACV4_MC_ADDR2="01:00:5e:01:02:05" +UNKNOWN_MACV4_MC_ADDR3="01:00:5e:01:02:06" +JOINED_MACV6_MC_ADDR="33:33:01:02:03:04" +UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05" +UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06" +UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07" + +NON_IP_MC="01:02:03:04:05:06" +NON_IP_PKT="00:04 48:45:4c:4f" +BC="ff:ff:ff:ff:ff:ff" + +# Disable promisc to ensure we don't receive unknown MAC DA packets +export TCPDUMP_EXTRA_FLAGS="-pl" + +h1=${NETIFS[p1]} +h2=${NETIFS[p2]} + +send_non_ip() +{ + local if_name=$1 + local smac=$2 + local dmac=$3 + + $MZ -q $if_name "$dmac $smac $NON_IP_PKT" +} + +send_uc_ipv4() +{ + local if_name=$1 + local dmac=$2 + + ip neigh add $H2_IPV4 lladdr $dmac dev $if_name + ping_do $if_name $H2_IPV4 + ip neigh del $H2_IPV4 dev $if_name +} + +check_rcv() +{ + local if_name=$1 + local type=$2 + local pattern=$3 + local should_receive=$4 + local should_fail= + + [ $should_receive = true ] && should_fail=0 || should_fail=1 + RET=0 + + tcpdump_show $if_name | grep -q "$pattern" + + check_err_fail "$should_fail" "$?" "reception" + + log_test "$if_name: $type" +} + +mc_route_prepare() +{ + local if_name=$1 + local vrf_name=$(master_name_get $if_name) + + ip route add 225.100.1.0/24 dev $if_name vrf $vrf_name + ip -6 route add ff2e::/64 dev $if_name vrf $vrf_name +} + +mc_route_destroy() +{ + local if_name=$1 + local vrf_name=$(master_name_get $if_name) + + ip route del 225.100.1.0/24 dev $if_name vrf $vrf_name + ip -6 route del ff2e::/64 dev $if_name vrf $vrf_name +} + +run_test() +{ + local rcv_if_name=$1 + local smac=$(mac_get $h1) + local rcv_dmac=$(mac_get $rcv_if_name) + + tcpdump_start $rcv_if_name + + mc_route_prepare $h1 + mc_route_prepare $rcv_if_name + + send_uc_ipv4 $h1 $rcv_dmac + send_uc_ipv4 $h1 $MACVLAN_ADDR + send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1 + + ip link set dev $rcv_if_name promisc on + send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2 + mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2 + mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2 + ip link set dev $rcv_if_name promisc off + + mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR + mc_send $h1 $JOINED_IPV4_MC_ADDR + mc_leave + + mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR + mc_send $h1 $JOINED_IPV6_MC_ADDR + mc_leave + + mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1 + mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1 + + ip link set dev $rcv_if_name allmulticast on + send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3 + mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3 + mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3 + ip link set dev $rcv_if_name allmulticast off + + mc_route_destroy $rcv_if_name + mc_route_destroy $h1 + + sleep 1 + + tcpdump_stop $rcv_if_name + + check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \ + "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \ + "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ + "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ + false + + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ + "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \ + "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ + false + + check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ + "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \ + "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ + false + + check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ + "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \ + "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \ + true + + check_rcv $rcv_if_name "Multicast IPv6 to joined group" \ + "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ + true + + check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ + "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ + false + + check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ + "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ + true + + check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \ + "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ + true + + tcpdump_cleanup $rcv_if_name +} + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +bridge_create() +{ + ip link add br0 type bridge + ip link set br0 address $BRIDGE_ADDR + ip link set br0 up + + ip link set $h2 master br0 + ip link set $h2 up + + simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 +} + +bridge_destroy() +{ + simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 + + ip link del br0 +} + +standalone() +{ + h1_create + h2_create + + ip link add link $h2 name macvlan0 type macvlan mode private + ip link set macvlan0 address $MACVLAN_ADDR + ip link set macvlan0 up + + run_test $h2 + + ip link del macvlan0 + + h2_destroy + h1_destroy +} + +bridge() +{ + h1_create + bridge_create + + ip link add link br0 name macvlan0 type macvlan mode private + ip link set macvlan0 address $MACVLAN_ADDR + ip link set macvlan0 up + + run_test br0 + + ip link del macvlan0 + + bridge_destroy + h1_destroy +} + +cleanup() +{ + pre_cleanup + vrf_cleanup +} + +setup_prepare() +{ + vrf_prepare + # setup_wait() needs this + ip link set $h1 up + ip link set $h2 up +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh index 360ca133bead..6c257ec03756 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh @@ -98,6 +98,7 @@ switch_create() # Bridge between H1 and H2. ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none ip link set dev br1 up ip link set dev $swp1 master br1 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh index c5095da7f6bf..04fd14b0a9b7 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh @@ -65,7 +65,8 @@ setup_prepare() vrf_prepare mirror_gre_topo_create - ip link add name br2 type bridge vlan_filtering 0 + ip link add name br2 address $(mac_get $swp3) \ + type bridge vlan_filtering 0 ip link set dev br2 up ip link set dev $swp3 master br2 @@ -93,12 +94,16 @@ cleanup() test_gretap() { + ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \ + nud permanent dev br2 full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap" full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" } test_ip6gretap() { + ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \ + nud permanent dev br2 full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap" full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh index 197e769c2ed1..f35313c76fac 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -35,7 +35,8 @@ setup_prepare() vrf_prepare mirror_gre_topo_create - ip link add name br2 type bridge vlan_filtering 0 + ip link add name br2 address $(mac_get $swp3) \ + type bridge vlan_filtering 0 ip link set dev br2 up vlan_create $swp3 555 @@ -80,17 +81,26 @@ test_gretap() test_ip6gretap() { - test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \ + test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ipv6' \ "mirror to ip6gretap" } test_gretap_stp() { + # Sometimes after mirror installation, the neighbor's state is not valid. + # The reason is that there is no SW datapath activity related to the + # neighbor for the remote GRE address. Therefore whether the corresponding + # neighbor will be valid is a matter of luck, and the test is thus racy. + # Set the neighbor's state to permanent, so it would be always valid. + ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \ + nud permanent dev br2 full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap" } test_ip6gretap_stp() { + ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \ + nud permanent dev br2 full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh index a3402cd8d5b6..0cf4c47a46f9 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh @@ -61,9 +61,12 @@ setup_prepare() vrf_prepare mirror_gre_topo_create + # Avoid changing br1's PVID while it is operational as a L3 interface. + ip link set dev br1 down ip link set dev $swp3 master br1 bridge vlan add dev br1 vid 555 pvid untagged self + ip link set dev br1 up ip address add dev br1 192.0.2.129/28 ip address add dev br1 2001:db8:2::1/64 @@ -87,12 +90,16 @@ cleanup() test_gretap() { + ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \ + nud permanent dev br1 full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap" full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" } test_ip6gretap() { + ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \ + nud permanent dev br1 full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap" full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index 28d568c48a73..c53148b1dc63 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -140,13 +140,15 @@ switch_create() ip link set dev $swp3 up ip link set dev $swp4 up - ip link add name br1 type bridge vlan_filtering 1 - ip link set dev br1 up - __addr_add_del br1 add 192.0.2.129/32 - ip -4 route add 192.0.2.130/32 dev br1 + ip link add name br1 address $(mac_get $swp3) \ + type bridge vlan_filtering 1 team_create lag loadbalance $swp3 $swp4 ip link set dev lag master br1 + + ip link set dev br1 up + __addr_add_del br1 add 192.0.2.129/32 + ip -4 route add 192.0.2.130/32 dev br1 } switch_destroy() diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh index 472bd023e2a5..5ea9d63915f7 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -72,9 +72,10 @@ test_span_gre_ttl() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" + mirror_install $swp1 ingress $tundev \ + "prot ip flower $tcflags ip_prot icmp" tc filter add dev $h3 ingress pref 77 prot $prot \ - flower ip_ttl 50 action pass + flower skip_hw ip_ttl 50 action pass mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh index fac486178ef7..0c36546e131e 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -source "$relative_path/mirror_lib.sh" +source "$net_forwarding_dir/mirror_lib.sh" quick_test_span_gre_dir_ips() { diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh index 39c03e2867f4..6e615fffa4ef 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh @@ -33,7 +33,7 @@ # | | # +-------------------------------------------------------------------------+ -source "$relative_path/mirror_topo_lib.sh" +source "$net_forwarding_dir/mirror_topo_lib.sh" mirror_gre_topo_h3_create() { diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index c02291e9841e..c8a9b5bd841f 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -141,7 +141,7 @@ test_gretap() test_ip6gretap() { - test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \ + test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ipv6' \ "mirror to ip6gretap" } @@ -218,6 +218,7 @@ test_ip6gretap_forbidden_egress() test_span_gre_untagged_egress() { local tundev=$1; shift + local ul_proto=$1; shift local what=$1; shift RET=0 @@ -225,7 +226,7 @@ test_span_gre_untagged_egress() mirror_install $swp1 ingress $tundev "matchall $tcflags" quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress + quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" h3_addr_add_del del $h3.555 bridge vlan add dev $swp3 vid 555 pvid untagged @@ -233,7 +234,7 @@ test_span_gre_untagged_egress() sleep 5 quick_test_span_gre_dir $tundev ingress - fail_test_span_vlan_dir $h3 555 ingress + fail_test_span_vlan_dir $h3 555 ingress "$ul_proto" h3_addr_add_del del $h3 bridge vlan add dev $swp3 vid 555 @@ -241,7 +242,7 @@ test_span_gre_untagged_egress() sleep 5 quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress + quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" mirror_uninstall $swp1 ingress @@ -250,12 +251,12 @@ test_span_gre_untagged_egress() test_gretap_untagged_egress() { - test_span_gre_untagged_egress gt4 "mirror to gretap" + test_span_gre_untagged_egress gt4 ip "mirror to gretap" } test_ip6gretap_untagged_egress() { - test_span_gre_untagged_egress gt6 "mirror to ip6gretap" + test_span_gre_untagged_egress gt6 ipv6 "mirror to ip6gretap" } test_span_gre_fdb_roaming() @@ -271,7 +272,7 @@ test_span_gre_fdb_roaming() while ((RET == 0)); do bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null - bridge fdb add dev $swp2 $h3mac vlan 555 master + bridge fdb add dev $swp2 $h3mac vlan 555 master static sleep 1 fail_test_span_gre_dir $tundev ingress diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 13db1cb50e57..3e8ebeff3019 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -20,6 +20,13 @@ mirror_uninstall() tc filter del dev $swp1 $direction pref 1000 } +is_ipv6() +{ + local addr=$1; shift + + [[ -z ${addr//[0-9a-fA-F:]/} ]] +} + mirror_test() { local vrf_name=$1; shift @@ -29,9 +36,17 @@ mirror_test() local pref=$1; shift local expect=$1; shift + if is_ipv6 $dip; then + local proto=-6 + local type="icmp6 type=128" # Echo request. + else + local proto= + local type="icmp echoreq" + fi + local t0=$(tc_rule_stats_get $dev $pref) - $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ - -c 10 -d 100msec -t icmp type=8 + $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ + -c 10 -d 100msec -t $type sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) @@ -100,13 +115,14 @@ do_test_span_vlan_dir_ips() local dev=$1; shift local vid=$1; shift local direction=$1; shift + local ul_proto=$1; shift local ip1=$1; shift local ip2=$1; shift # Install the capture as skip_hw to avoid double-counting of packets. # The traffic is meant for local box anyway, so will be trapped to # kernel. - vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip" + vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto" mirror_test v$h1 $ip1 $ip2 $dev 100 $expect mirror_test v$h2 $ip2 $ip1 $dev 100 $expect vlan_capture_uninstall $dev diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh index 04979e5962e7..bb1adbb7b98a 100644 --- a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh @@ -60,6 +60,7 @@ mirror_topo_switch_create() ip link set dev $swp3 up ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none ip link set dev br1 up ip link set dev $swp1 master br1 diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh index 9ab2ce77b332..0b44e148235e 100755 --- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh @@ -85,9 +85,9 @@ test_tagged_vlan_dir() RET=0 mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \ + do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \ 192.0.2.17 192.0.2.18 - do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \ + do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" ip \ 192.0.2.17 192.0.2.18 mirror_uninstall $swp1 $direction diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh new file mode 100755 index 000000000000..af3b398d13f0 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh @@ -0,0 +1,261 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="standalone two_bridges one_bridge_two_pvids" +NUM_NETIFS=4 + +source lib.sh + +h1=${NETIFS[p1]} +h2=${NETIFS[p3]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p4]} + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +IPV4_ALLNODES="224.0.0.1" +IPV6_ALLNODES="ff02::1" +MACV4_ALLNODES="01:00:5e:00:00:01" +MACV6_ALLNODES="33:33:00:00:00:01" +NON_IP_MC="01:02:03:04:05:06" +NON_IP_PKT="00:04 48:45:4c:4f" +BC="ff:ff:ff:ff:ff:ff" + +# The full 4K VLAN space is too much to check, so strategically pick some +# values which should provide reasonable coverage +vids=(0 1 2 5 10 20 50 100 200 500 1000 1000 2000 4000 4094) + +send_non_ip() +{ + local if_name=$1 + local smac=$2 + local dmac=$3 + + $MZ -q $if_name "$dmac $smac $NON_IP_PKT" +} + +send_uc_ipv4() +{ + local if_name=$1 + local dmac=$2 + + ip neigh add $H2_IPV4 lladdr $dmac dev $if_name + ping_do $if_name $H2_IPV4 + ip neigh del $H2_IPV4 dev $if_name +} + +send_mc_ipv4() +{ + local if_name=$1 + + ping_do $if_name $IPV4_ALLNODES "-I $if_name" +} + +send_uc_ipv6() +{ + local if_name=$1 + local dmac=$2 + + ip -6 neigh add $H2_IPV6 lladdr $dmac dev $if_name + ping6_do $if_name $H2_IPV6 + ip -6 neigh del $H2_IPV6 dev $if_name +} + +send_mc_ipv6() +{ + local if_name=$1 + + ping6_do $if_name $IPV6_ALLNODES%$if_name +} + +check_rcv() +{ + local if_name=$1 + local type=$2 + local pattern=$3 + local should_fail=1 + + RET=0 + + tcpdump_show $if_name | grep -q "$pattern" + + check_err_fail "$should_fail" "$?" "reception" + + log_test "$type" +} + +run_test() +{ + local test_name="$1" + local smac=$(mac_get $h1) + local dmac=$(mac_get $h2) + local h1_ipv6_lladdr=$(ipv6_lladdr_get $h1) + local vid= + + echo "$test_name: Sending packets" + + tcpdump_start $h2 + + send_non_ip $h1 $smac $dmac + send_non_ip $h1 $smac $NON_IP_MC + send_non_ip $h1 $smac $BC + send_uc_ipv4 $h1 $dmac + send_mc_ipv4 $h1 + send_uc_ipv6 $h1 $dmac + send_mc_ipv6 $h1 + + for vid in "${vids[@]}"; do + vlan_create $h1 $vid + simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64 + + send_non_ip $h1.$vid $smac $dmac + send_non_ip $h1.$vid $smac $NON_IP_MC + send_non_ip $h1.$vid $smac $BC + send_uc_ipv4 $h1.$vid $dmac + send_mc_ipv4 $h1.$vid + send_uc_ipv6 $h1.$vid $dmac + send_mc_ipv6 $h1.$vid + + simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64 + vlan_destroy $h1 $vid + done + + sleep 1 + + echo "$test_name: Checking which packets were received" + + tcpdump_stop $h2 + + check_rcv $h2 "$test_name: Unicast non-IP untagged" \ + "$smac > $dmac, 802.3, length 4:" + + check_rcv $h2 "$test_name: Multicast non-IP untagged" \ + "$smac > $NON_IP_MC, 802.3, length 4:" + + check_rcv $h2 "$test_name: Broadcast non-IP untagged" \ + "$smac > $BC, 802.3, length 4:" + + check_rcv $h2 "$test_name: Unicast IPv4 untagged" \ + "$smac > $dmac, ethertype IPv4 (0x0800)" + + check_rcv $h2 "$test_name: Multicast IPv4 untagged" \ + "$smac > $MACV4_ALLNODES, ethertype IPv4 (0x0800).*: $H1_IPV4 > $IPV4_ALLNODES" + + check_rcv $h2 "$test_name: Unicast IPv6 untagged" \ + "$smac > $dmac, ethertype IPv6 (0x86dd).*8: $H1_IPV6 > $H2_IPV6" + + check_rcv $h2 "$test_name: Multicast IPv6 untagged" \ + "$smac > $MACV6_ALLNODES, ethertype IPv6 (0x86dd).*: $h1_ipv6_lladdr > $IPV6_ALLNODES" + + for vid in "${vids[@]}"; do + check_rcv $h2 "$test_name: Unicast non-IP VID $vid" \ + "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4" + + check_rcv $h2 "$test_name: Multicast non-IP VID $vid" \ + "$smac > $NON_IP_MC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4" + + check_rcv $h2 "$test_name: Broadcast non-IP VID $vid" \ + "$smac > $BC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4" + + check_rcv $h2 "$test_name: Unicast IPv4 VID $vid" \ + "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $H2_IPV4" + + check_rcv $h2 "$test_name: Multicast IPv4 VID $vid" \ + "$smac > $MACV4_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $IPV4_ALLNODES" + + check_rcv $h2 "$test_name: Unicast IPv6 VID $vid" \ + "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $H1_IPV6 > $H2_IPV6" + + check_rcv $h2 "$test_name: Multicast IPv6 VID $vid" \ + "$smac > $MACV6_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $h1_ipv6_lladdr > $IPV6_ALLNODES" + done + + tcpdump_cleanup $h2 +} + +standalone() +{ + run_test "Standalone switch ports" +} + +two_bridges() +{ + ip link add br0 type bridge && ip link set br0 up + ip link add br1 type bridge && ip link set br1 up + ip link set $swp1 master br0 + ip link set $swp2 master br1 + + run_test "Switch ports in different bridges" + + ip link del br1 + ip link del br0 +} + +one_bridge_two_pvids() +{ + ip link add br0 type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set br0 up + ip link set $swp1 master br0 + ip link set $swp2 master br0 + + bridge vlan add dev $swp1 vid 1 pvid untagged + bridge vlan add dev $swp1 vid 2 pvid untagged + + run_test "Switch ports in VLAN-aware bridge with different PVIDs" + + ip link del br0 +} + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + # we call simple_if_init from the test itself, but setup_wait expects + # that we call it from here, and waits until the interfaces are up + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh index 55eeacf59241..af008fbf2725 100755 --- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh +++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh @@ -60,7 +60,9 @@ h2_destroy() switch_create() { - ip link add name br1 up type bridge vlan_filtering 1 + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none + ip link set dev br1 up ip link set dev $swp1 master br1 ip link set dev $swp1 up ip link set dev $swp2 master br1 @@ -75,7 +77,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/forwarding/pedit_ip.sh b/tools/testing/selftests/net/forwarding/pedit_ip.sh new file mode 100755 index 000000000000..d14efb2d23b2 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/pedit_ip.sh @@ -0,0 +1,201 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on +# egress of $swp2, the traffic is acted upon by a pedit action. An ingress +# filter installed on $h2 verifies that the packet looks like expected. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + test_ip4_src + test_ip4_dst + test_ip6_src + test_ip6_dst +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + +do_test_pedit_ip() +{ + local pedit_locus=$1; shift + local pedit_action=$1; shift + local match_prot=$1; shift + local match_flower=$1; shift + local mz_flags=$1; shift + + tc filter add $pedit_locus handle 101 pref 1 \ + flower action pedit ex munge $pedit_action + tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \ + flower skip_hw $match_flower action pass + + RET=0 + + $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 -a own -b $h2mac -q -t ip + + local pkts + pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \ + tc_rule_handle_stats_get "dev $h2 ingress" 101) + check_err $? "Expected to get 10 packets, but got $pkts." + + pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101) + ((pkts >= 10)) + check_err $? "Expected to get 10 packets on pedit rule, but got $pkts." + + log_test "$pedit_locus pedit $pedit_action" + + tc filter del dev $h2 ingress pref 1 + tc filter del $pedit_locus pref 1 +} + +do_test_pedit_ip6() +{ + local locus=$1; shift + local pedit_addr=$1; shift + local flower_addr=$1; shift + + do_test_pedit_ip "$locus" "$pedit_addr set 2001:db8:2::1" ipv6 \ + "$flower_addr 2001:db8:2::1" \ + "-6 -A 2001:db8:1::1 -B 2001:db8:1::2" +} + +do_test_pedit_ip4() +{ + local locus=$1; shift + local pedit_addr=$1; shift + local flower_addr=$1; shift + + do_test_pedit_ip "$locus" "$pedit_addr set 198.51.100.1" ip \ + "$flower_addr 198.51.100.1" \ + "-A 192.0.2.1 -B 192.0.2.2" +} + +test_ip4_src() +{ + do_test_pedit_ip4 "dev $swp1 ingress" "ip src" src_ip + do_test_pedit_ip4 "dev $swp2 egress" "ip src" src_ip +} + +test_ip4_dst() +{ + do_test_pedit_ip4 "dev $swp1 ingress" "ip dst" dst_ip + do_test_pedit_ip4 "dev $swp2 egress" "ip dst" dst_ip +} + +test_ip6_src() +{ + do_test_pedit_ip6 "dev $swp1 ingress" "ip6 src" src_ip + do_test_pedit_ip6 "dev $swp2 egress" "ip6 src" src_ip +} + +test_ip6_dst() +{ + do_test_pedit_ip6 "dev $swp1 ingress" "ip6 dst" dst_ip + do_test_pedit_ip6 "dev $swp2 egress" "ip6 dst" dst_ip +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh index 5f20d289ee43..10e594c55117 100755 --- a/tools/testing/selftests/net/forwarding/pedit_l4port.sh +++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh @@ -71,7 +71,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/forwarding/q_in_vni.sh b/tools/testing/selftests/net/forwarding/q_in_vni.sh new file mode 100755 index 000000000000..798b13525c02 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/q_in_vni.sh @@ -0,0 +1,348 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1ad) + $swp2 | | +# | | vid 100 pvid untagged vid 100 pvid | | +# | | untagged | | +# | | + vx100 (vxlan) | | +# | | local 192.0.2.17 | | +# | | remote 192.0.2.34 192.0.2.50 | | +# | | id 1000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1ad) | | | | BR2 (802.1ad) | | +# | | + vx100 (vxlan) | | | | + vx100 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | vid 100 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 100 pvid untagged | | | | | vid 100 pvid untagged | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 + vlan_create $h1 20 v$h1 198.51.100.1/24 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br1 addrgenmode none + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx100 type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev vx100 master br1 + bridge vlan add vid 100 dev vx100 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 100 dev $swp1 pvid untagged + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 100 dev $swp2 pvid untagged + + bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge vlan del vid 100 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 100 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx100 nomaster + ip link set dev vx100 down + ip link del dev vx100 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1=$1; shift + local host_addr2=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + bridge vlan add vid 100 dev w1 pvid untagged + + ip link add name vx100 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" + ip link set dev vx100 up + bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx100 master br2 + tc qdisc add dev vx100 clsact + + bridge vlan add vid 100 dev vx100 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1/28 + vlan_create w2 20 vw2 $host_addr2/24 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 \ + 192.0.2.3 198.51.100.3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 \ + 192.0.2.4 198.51.100.4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 ": local->local" + ping_test $h1 192.0.2.3 ": local->remote 1" + ping_test $h1 192.0.2.4 ": local->remote 2" +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh new file mode 100755 index 000000000000..0548b2b0d416 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh @@ -0,0 +1,347 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1ad) + $swp2 | | +# | | vid 100 pvid untagged vid 100 pvid | | +# | | untagged | | +# | | + vx100 (vxlan) | | +# | | local 2001:db8:3::1 | | +# | | remote 2001:db8:4::1 2001:db8:5::1 | | +# | | id 1000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:4::0/64 via 2001:db8:3::2 | +# | 2001:db8:5::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 | +# +----|---------------------------------------|------------------+ +# | | +# +----|--------------------------------+ +----|-------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 | +# | | | | +# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 | +# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via | +# | | | 2001:db8:5::2 | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1ad) | | | | BR2 (802.1ad) | | +# | | + vx100 (vxlan) | | | | + vx100 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | vid 100 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 100 pvid untagged | | | | | vid 100 pvid untagged | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 2001:db8:1::3/64 | | | | | 2001:db8:1::4/64 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 2001:db8:2::3/64 | | | | 2001:db8:2::4/64 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-------------------------------------+ +------------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv6 + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 2001:db8:1::2/64 + vlan_create $h2 20 v$h2 2001:db8:2::2/64 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 2001:db8:3::1/64 + + ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2 +} + +rp1_unset_addr() +{ + ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2 + ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + + ip address del dev $rp1 2001:db8:3::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx100 type vxlan id 1000 \ + local 2001:db8:3::1 dstport "$VXPORT" \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev vx100 master br1 + bridge vlan add vid 100 dev vx100 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 100 dev $swp1 pvid untagged + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 100 dev $swp2 pvid untagged + + bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self +} + +switch_destroy() +{ + bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge vlan del vid 100 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 100 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx100 nomaster + ip link set dev vx100 down + ip link del dev vx100 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 + __simple_if_init v1 v$rp2 2001:db8:4::2/64 + __simple_if_init v3 v$rp2 2001:db8:5::2/64 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 2001:db8:5::2/64 + __simple_if_fini v1 2001:db8:4::2/64 + simple_if_fini $rp2 2001:db8:3::2/64 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1=$1; shift + local host_addr2=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/64 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + bridge vlan add vid 100 dev w1 pvid untagged + + ip link add name vx100 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx100 up + bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx100 master br2 + tc qdisc add dev vx100 clsact + + bridge vlan add vid 100 dev vx100 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1/64 + vlan_create w2 20 vw2 $host_addr2/64 + + ip route add 2001:db8:3::0/64 nexthop via $nh_addr + ip route add $other_in_addr/128 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \ + 2001:db8:1::3 2001:db8:2::3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \ + 2001:db8:1::4 2001:db8:2::4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 ": local->local" + ping6_test $h1 2001:db8:1::3 ": local->remote 1" + ping6_test $h1 2001:db8:1::4 ": local->remote 2" +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index 057f91b05098..b98ea9449b8b 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -1,6 +1,24 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +--------------------+ +----------------------+ +# | H1 | | H2 | +# | | | | +# | $h1 + | | + $h2 | +# | 192.0.2.2/24 | | | | 198.51.100.2/24 | +# | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 | +# | | | | | | +# +------------------|-+ +-|--------------------+ +# | | +# +------------------|-------------------------|--------------------+ +# | SW | | | +# | | | | +# | $rp1 + + $rp2 | +# | 192.0.2.1/24 198.51.100.1/24 | +# | 2001:db8:1::1/64 2001:db8:2::1/64 | +# | | +# +-----------------------------------------------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh index ebc596a272f7..0182eb2abfa6 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge.sh @@ -1,9 +1,39 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +------------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.130/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 | +# | | | | | | +# +----|-------------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|-----------------------------+ + $swp2 | +# | | + $swp1 BR1 (802.1q) | 192.0.2.129/28 | +# | | 192.0.2.2/28 | 2001:db8:2::1/64 | +# | | 2001:db8:1::1/64 | | +# | | | | +# | +--------------------------------+ | +# +---------------------------------------------------------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 + config_remaster + ping_ipv4 + ping_ipv6 + config_remove_pvid + ping_ipv4_fails + ping_ipv6_fails + config_add_pvid + ping_ipv4 + ping_ipv6 + config_late_pvid + ping_ipv4 + ping_ipv6 " NUM_NETIFS=4 source lib.sh @@ -38,7 +68,8 @@ h2_destroy() router_create() { - ip link add name br1 type bridge vlan_filtering 1 + ip link add name br1 address $(mac_get $swp1) \ + type bridge vlan_filtering 1 ip link set dev br1 up ip link set dev $swp1 master br1 @@ -61,6 +92,42 @@ router_destroy() ip link del dev br1 } +config_remaster() +{ + log_info "Remaster bridge slave" + + ip link set dev $swp1 nomaster + sleep 2 + ip link set dev $swp1 master br1 +} + +config_remove_pvid() +{ + log_info "Remove PVID from the bridge" + + bridge vlan add dev br1 vid 1 self + sleep 2 +} + +config_add_pvid() +{ + log_info "Add PVID to the bridge" + + bridge vlan add dev br1 vid 1 self pvid untagged + sleep 2 +} + +config_late_pvid() +{ + log_info "Add bridge PVID after enslaving port" + + ip link set dev $swp1 nomaster + ip link set dev br1 type bridge vlan_default_pvid 0 + sleep 2 + ip link set dev $swp1 master br1 + ip link set dev br1 type bridge vlan_default_pvid 1 +} + setup_prepare() { h1=${NETIFS[p1]} @@ -103,6 +170,16 @@ ping_ipv6() ping6_test $h1 2001:db8:2::2 } +ping_ipv4_fails() +{ + ping_test_fails $h1 192.0.2.130 +} + +ping_ipv6_fails() +{ + ping6_test_fails $h1 2001:db8:2::2 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d.sh new file mode 100755 index 000000000000..6d51f2ca72a2 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_1d.sh @@ -0,0 +1,185 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +---------------------------------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.100 + $h1.200 | | + $h2 | +# | | 192.0.2.1/28 | 192.0.2.17/28 | | | 192.0.2.130/28 | +# | | 2001:db8:1::1/64 | 2001:db8:3::1/64 | | | 192.0.2.146/28 | +# | \_________ __________/ | | | 2001:db8:2::2/64 | +# | V | | | 2001:db8:4::2/64 | +# | + $h1 | | | | +# +--------------|------------------------------+ +--|-------------------+ +# | | +# +--------------|----------------------------------------|-------------------+ +# | SW + $swp1 + $swp2 | +# | | 192.0.2.129/28 | +# | | 192.0.2.145/28 | +# | | 2001:db8:2::1/64 | +# | ________^___________________________ 2001:db8:4::1/64 | +# | / \ | +# | +---|------------------------------+ +---|------------------------------+ | +# | | + $swp1.100 BR1 (802.1d) | | + $swp1.200 BR2 (802.1d) | | +# | | 192.0.2.2/28 | | 192.0.2.18/28 | | +# | | 2001:db8:1::2/64 | | 2001:db8:3::2/64 | | +# | | | | | | +# | +----------------------------------+ +----------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + config_remaster + ping_ipv4 + ping_ipv6 +" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 100 v$h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 200 v$h1 192.0.2.17/28 2001:db8:3::1/64 + ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2 + ip -4 route add 192.0.2.144/28 vrf v$h1 nexthop via 192.0.2.18 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 + ip -6 route add 2001:db8:4::/64 vrf v$h1 nexthop via 2001:db8:3::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:4::/64 vrf v$h1 + ip -6 route del 2001:db8:2::/64 vrf v$h1 + ip -4 route del 192.0.2.144/28 vrf v$h1 + ip -4 route del 192.0.2.128/28 vrf v$h1 + vlan_destroy $h1 200 + vlan_destroy $h1 100 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 \ + 192.0.2.146/28 2001:db8:4::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129 + ip -4 route add 192.0.2.16/28 vrf v$h2 nexthop via 192.0.2.145 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 + ip -6 route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:4::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:3::/64 vrf v$h2 + ip -6 route del 2001:db8:1::/64 vrf v$h2 + ip -4 route del 192.0.2.16/28 vrf v$h2 + ip -4 route del 192.0.2.0/28 vrf v$h2 + simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 \ + 192.0.2.146/28 2001:db8:4::2/64 +} + +router_create() +{ + ip link set dev $swp1 up + + vlan_create $swp1 100 + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev br1 address $(mac_get $swp1.100) + ip link set dev $swp1.100 master br1 + __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64 + ip link set dev br1 up + + vlan_create $swp1 200 + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 address $(mac_get $swp1.200) + ip link set dev $swp1.200 master br2 + __addr_add_del br2 add 192.0.2.18/28 2001:db8:3::2/64 + ip link set dev br2 up + + ip link set dev $swp2 up + __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64 \ + 192.0.2.145/28 2001:db8:4::1/64 +} + +router_destroy() +{ + __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64 \ + 192.0.2.145/28 2001:db8:4::1/64 + ip link set dev $swp2 down + + __addr_add_del br2 del 192.0.2.18/28 2001:db8:3::2/64 + ip link set dev $swp1.200 nomaster + ip link del dev br2 + vlan_destroy $swp1 200 + + __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64 + ip link set dev $swp1.100 nomaster + ip link del dev br1 + vlan_destroy $swp1 100 + + ip link set dev $swp1 down +} + +config_remaster() +{ + log_info "Remaster bridge slaves" + + ip link set dev $swp1.100 nomaster + ip link set dev $swp1.200 nomaster + sleep 2 + ip link set dev $swp1.200 master br2 + ip link set dev $swp1.100 master br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.130 ": via 100" + ping_test $h1 192.0.2.146 ": via 200" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 ": via 100" + ping6_test $h1 2001:db8:4::2 ": via 200" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh new file mode 100755 index 000000000000..e064b946e821 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh @@ -0,0 +1,408 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------------------------------+ +# | H1 (vrf) | +# | | +# | + LAG1.100 + LAG1.200 | +# | | 192.0.2.1/28 | 192.0.2.17/28 | +# | | 2001:db8:1::1/64 | 2001:db8:3:1/64 | +# | \___________ _______/ | +# | v | +# | + LAG1 (team) | +# | | | +# | ____^____ | +# | / \ | +# | + $h1 + $h4 | +# | | | | +# +----------|-----------|---------------------+ +# | | +# +----------|-----------|---------------------+ +# | SW | | | +# | + $swp1 + $swp4 | +# | \____ ____/ | +# | v | +# | LAG2 (team) + | +# | | | +# | _______^______________ | +# | / \ | +# | +------|------------+ +-------|----------+ | +# | | + LAG2.100 | | + LAG2.200 | | +# | | | | | | +# | | BR1 (802.1d) | | BR2 (802.1d) | | +# | | 192.0.2.2/28 | | 192.0.2.18/28 | | +# | | 2001:db8:1::2/64 | | 2001:db8:3:2/64 | | +# | | | | | | +# | +-------------------+ +------------------+ | +# | | +# | + LAG3.100 + LAG3.200 | +# | | 192.0.2.129/28 | 192.0.2.145/28 | +# | | 2001:db8:2::1/64 | 2001:db8:4::1/64 | +# | | | | +# | \_________ ___________/ | +# | v | +# | + LAG3 (team) | +# | ____|____ | +# | / \ | +# | + $swp2 + $swp3 | +# | | | | +# +-------|---------|--------------------------+ +# | | +# +-------|---------|--------------------------+ +# | | | | +# | + $h2 + $h3 | +# | \____ ___/ | +# | | | +# | + LAG4 (team) | +# | | | +# | __________^__________ | +# | / \ | +# | | | | +# | + LAG4.100 + LAG4.200 | +# | 192.0.2.130/28 192.0.2.146/28 | +# | 2001:db8:2::2/64 2001:db8:4::2/64 | +# | | +# | H2 (vrf) | +# +--------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG2 slaves ) + config_deslave_swp4 + config_wait + ping_ipv4 + ping_ipv6 + config_enslave_swp4 + config_deslave_swp1 + config_wait + ping_ipv4 + ping_ipv6 + config_deslave_swp4 + config_enslave_swp1 + config_enslave_swp4 + config_wait + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG2 itself ) + config_remaster_lag2 + config_wait + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG3 slaves ) + config_deslave_swp2 + config_wait + ping_ipv4 + ping_ipv6 + config_enslave_swp2 + config_deslave_swp3 + config_wait + ping_ipv4 + ping_ipv6 + config_deslave_swp2 + config_enslave_swp3 + config_enslave_swp2 + config_wait + ping_ipv4 + ping_ipv6 +" +NUM_NETIFS=8 +source lib.sh + +h1_create() +{ + team_create lag1 lacp + ip link set dev lag1 addrgenmode none + ip link set dev lag1 address $(mac_get $h1) + ip link set dev $h1 master lag1 + ip link set dev $h4 master lag1 + simple_if_init lag1 + ip link set dev $h1 up + ip link set dev $h4 up + + vlan_create lag1 100 vlag1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create lag1 200 vlag1 192.0.2.17/28 2001:db8:3::1/64 + + ip -4 route add 192.0.2.128/28 vrf vlag1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf vlag1 nexthop via 2001:db8:1::2 + + ip -4 route add 192.0.2.144/28 vrf vlag1 nexthop via 192.0.2.18 + ip -6 route add 2001:db8:4::/64 vrf vlag1 nexthop via 2001:db8:3::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:4::/64 vrf vlag1 + ip -4 route del 192.0.2.144/28 vrf vlag1 + + ip -6 route del 2001:db8:2::/64 vrf vlag1 + ip -4 route del 192.0.2.128/28 vrf vlag1 + + vlan_destroy lag1 200 + vlan_destroy lag1 100 + + ip link set dev $h4 down + ip link set dev $h1 down + simple_if_fini lag1 + ip link set dev $h4 nomaster + ip link set dev $h1 nomaster + team_destroy lag1 +} + +h2_create() +{ + team_create lag4 lacp + ip link set dev lag4 addrgenmode none + ip link set dev lag4 address $(mac_get $h2) + ip link set dev $h2 master lag4 + ip link set dev $h3 master lag4 + simple_if_init lag4 + ip link set dev $h2 up + ip link set dev $h3 up + + vlan_create lag4 100 vlag4 192.0.2.130/28 2001:db8:2::2/64 + vlan_create lag4 200 vlag4 192.0.2.146/28 2001:db8:4::2/64 + + ip -4 route add 192.0.2.0/28 vrf vlag4 nexthop via 192.0.2.129 + ip -6 route add 2001:db8:1::/64 vrf vlag4 nexthop via 2001:db8:2::1 + + ip -4 route add 192.0.2.16/28 vrf vlag4 nexthop via 192.0.2.145 + ip -6 route add 2001:db8:3::/64 vrf vlag4 nexthop via 2001:db8:4::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:3::/64 vrf vlag4 + ip -4 route del 192.0.2.16/28 vrf vlag4 + + ip -6 route del 2001:db8:1::/64 vrf vlag4 + ip -4 route del 192.0.2.0/28 vrf vlag4 + + vlan_destroy lag4 200 + vlan_destroy lag4 100 + + ip link set dev $h3 down + ip link set dev $h2 down + simple_if_fini lag4 + ip link set dev $h3 nomaster + ip link set dev $h2 nomaster + team_destroy lag4 +} + +router_create() +{ + team_create lag2 lacp + ip link set dev lag2 addrgenmode none + ip link set dev lag2 address $(mac_get $swp1) + ip link set dev $swp1 master lag2 + ip link set dev $swp4 master lag2 + + vlan_create lag2 100 + vlan_create lag2 200 + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev br1 address $(mac_get lag2.100) + ip link set dev lag2.100 master br1 + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 address $(mac_get lag2.200) + ip link set dev lag2.200 master br2 + + ip link set dev $swp1 up + ip link set dev $swp4 up + ip link set dev br1 up + ip link set dev br2 up + + __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64 + __addr_add_del br2 add 192.0.2.18/28 2001:db8:3::2/64 + + team_create lag3 lacp + ip link set dev lag3 addrgenmode none + ip link set dev lag3 address $(mac_get $swp2) + ip link set dev $swp2 master lag3 + ip link set dev $swp3 master lag3 + ip link set dev $swp2 up + ip link set dev $swp3 up + + vlan_create lag3 100 + vlan_create lag3 200 + + __addr_add_del lag3.100 add 192.0.2.129/28 2001:db8:2::1/64 + __addr_add_del lag3.200 add 192.0.2.145/28 2001:db8:4::1/64 +} + +router_destroy() +{ + __addr_add_del lag3.200 del 192.0.2.145/28 2001:db8:4::1/64 + __addr_add_del lag3.100 del 192.0.2.129/28 2001:db8:2::1/64 + + vlan_destroy lag3 200 + vlan_destroy lag3 100 + + ip link set dev $swp3 down + ip link set dev $swp2 down + ip link set dev $swp3 nomaster + ip link set dev $swp2 nomaster + team_destroy lag3 + + __addr_add_del br2 del 192.0.2.18/28 2001:db8:3::2/64 + __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64 + + ip link set dev br2 down + ip link set dev br1 down + ip link set dev $swp4 down + ip link set dev $swp1 down + + ip link set dev lag2.200 nomaster + ip link del dev br2 + + ip link set dev lag2.100 nomaster + ip link del dev br1 + + vlan_destroy lag2 200 + vlan_destroy lag2 100 + + ip link set dev $swp4 nomaster + ip link set dev $swp1 nomaster + team_destroy lag2 +} + +config_remaster_lag2() +{ + log_info "Remaster bridge slaves" + + ip link set dev lag2.200 nomaster + ip link set dev lag2.100 nomaster + sleep 2 + ip link set dev lag2.100 master br1 + ip link set dev lag2.200 master br2 +} + +config_deslave() +{ + local netdev=$1; shift + + log_info "Deslave $netdev" + ip link set dev $netdev down + ip link set dev $netdev nomaster + ip link set dev $netdev up +} + +config_deslave_swp1() +{ + config_deslave $swp1 +} + +config_deslave_swp2() +{ + config_deslave $swp2 +} + +config_deslave_swp3() +{ + config_deslave $swp3 +} + +config_deslave_swp4() +{ + config_deslave $swp4 +} + +config_enslave() +{ + local netdev=$1; shift + local master=$1; shift + + log_info "Enslave $netdev to $master" + ip link set dev $netdev down + ip link set dev $netdev master $master + ip link set dev $netdev up +} + +config_enslave_swp1() +{ + config_enslave $swp1 lag2 +} + +config_enslave_swp2() +{ + config_enslave $swp2 lag3 +} + +config_enslave_swp3() +{ + config_enslave $swp3 lag3 +} + +config_enslave_swp4() +{ + config_enslave $swp4 lag2 +} + +config_wait() +{ + setup_wait_dev lag2 + setup_wait_dev lag3 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h4=${NETIFS[p7]} + swp4=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test lag1.100 192.0.2.130 ": via 100" + ping_test lag1.200 192.0.2.146 ": via 200" +} + +ping_ipv6() +{ + ping6_test lag1.100 2001:db8:2::2 ": via 100" + ping6_test lag1.200 2001:db8:4::2 ": via 200" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh new file mode 100755 index 000000000000..f05ffe213c46 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh @@ -0,0 +1,323 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +----------------------------+ +--------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | | | | +# | + LAG1 (team) | | + LAG4 (team) | +# | | 192.0.2.1/28 | | | 192.0.2.130/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 | +# | __^___ | | __^_____ | +# | / \ | | / \ | +# | + $h1 + $h4 | | + $h2 + $h3 | +# | | | | | | | | +# +----|--------|--------------+ +-|----------|-------------+ +# | | | | +# +----|--------|------------------------------------|----------|-------------+ +# | SW | | | | | +# | + $swp1 + $swp4 + $swp2 + $swp3 | +# | \__ ___/ \__ _____/ | +# | v v | +# | +------|-------------------------------+ | | +# | | + LAG2 BR1 (802.1q) | + LAG3 (team) | +# | | (team) 192.0.2.2/28 | 192.0.2.129/28 | +# | | 2001:db8:1::2/64 | 2001:db8:2::1/64 | +# | | | | +# | +--------------------------------------+ | +# +---------------------------------------------------------------------------+ + +: ${ALL_TESTS:=" + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG2 slaves ) + config_deslave_swp4 + config_wait + ping_ipv4 + ping_ipv6 + config_enslave_swp4 + config_deslave_swp1 + config_wait + ping_ipv4 + ping_ipv6 + config_deslave_swp4 + config_enslave_swp1 + config_enslave_swp4 + config_wait + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG2 itself ) + config_remaster_lag2 + config_wait + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG3 slaves ) + config_deslave_swp2 + config_wait + ping_ipv4 + ping_ipv6 + config_enslave_swp2 + config_deslave_swp3 + config_wait + ping_ipv4 + ping_ipv6 + config_deslave_swp2 + config_enslave_swp3 + config_enslave_swp2 + config_wait + ping_ipv4 + ping_ipv6 + + $(: move LAG3 to a bridge and then out ) + config_remaster_lag3 + config_wait + ping_ipv4 + ping_ipv6 + "} +NUM_NETIFS=8 +: ${lib_dir:=.} +source $lib_dir/lib.sh +$EXTRA_SOURCE + +h1_create() +{ + team_create lag1 lacp + ip link set dev lag1 address $(mac_get $h1) + ip link set dev $h1 master lag1 + ip link set dev $h4 master lag1 + simple_if_init lag1 192.0.2.1/28 2001:db8:1::1/64 + ip link set dev $h1 up + ip link set dev $h4 up + ip -4 route add 192.0.2.128/28 vrf vlag1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf vlag1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf vlag1 + ip -4 route del 192.0.2.128/28 vrf vlag1 + ip link set dev $h4 down + ip link set dev $h1 down + simple_if_fini lag1 192.0.2.1/28 2001:db8:1::1/64 + ip link set dev $h4 nomaster + ip link set dev $h1 nomaster + team_destroy lag1 +} + +h2_create() +{ + team_create lag4 lacp + ip link set dev lag4 address $(mac_get $h2) + ip link set dev $h2 master lag4 + ip link set dev $h3 master lag4 + simple_if_init lag4 192.0.2.130/28 2001:db8:2::2/64 + ip link set dev $h2 up + ip link set dev $h3 up + ip -4 route add 192.0.2.0/28 vrf vlag4 nexthop via 192.0.2.129 + ip -6 route add 2001:db8:1::/64 vrf vlag4 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf vlag4 + ip -4 route del 192.0.2.0/28 vrf vlag4 + ip link set dev $h3 down + ip link set dev $h2 down + simple_if_fini lag4 192.0.2.130/28 2001:db8:2::2/64 + ip link set dev $h3 nomaster + ip link set dev $h2 nomaster + team_destroy lag4 +} + +router_create() +{ + team_create lag2 lacp + ip link set dev lag2 address $(mac_get $swp1) + ip link set dev $swp1 master lag2 + ip link set dev $swp4 master lag2 + + ip link add name br1 address $(mac_get lag2) \ + type bridge vlan_filtering 1 + ip link set dev lag2 master br1 + + ip link set dev $swp1 up + ip link set dev $swp4 up + ip link set dev br1 up + + __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64 + + team_create lag3 lacp + ip link set dev lag3 address $(mac_get $swp2) + ip link set dev $swp2 master lag3 + ip link set dev $swp3 master lag3 + ip link set dev $swp2 up + ip link set dev $swp3 up + __addr_add_del lag3 add 192.0.2.129/28 2001:db8:2::1/64 +} + +router_destroy() +{ + __addr_add_del lag3 del 192.0.2.129/28 2001:db8:2::1/64 + ip link set dev $swp3 down + ip link set dev $swp2 down + ip link set dev $swp3 nomaster + ip link set dev $swp2 nomaster + team_destroy lag3 + + __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64 + + ip link set dev $swp4 down + ip link set dev $swp1 down + ip link set dev br1 down + + ip link set dev lag2 nomaster + ip link del dev br1 + + ip link set dev $swp4 nomaster + ip link set dev $swp1 nomaster + team_destroy lag2 +} + +config_remaster_lag2() +{ + log_info "Remaster bridge slave" + + ip link set dev lag2 nomaster + sleep 2 + ip link set dev lag2 master br1 +} + +config_remaster_lag3() +{ + log_info "Move lag3 to the bridge, then out again" + + ip link set dev lag3 master br1 + sleep 2 + ip link set dev lag3 nomaster +} + +config_deslave() +{ + local netdev=$1; shift + + log_info "Deslave $netdev" + ip link set dev $netdev down + ip link set dev $netdev nomaster + ip link set dev $netdev up +} + +config_deslave_swp1() +{ + config_deslave $swp1 +} + +config_deslave_swp2() +{ + config_deslave $swp2 +} + +config_deslave_swp3() +{ + config_deslave $swp3 +} + +config_deslave_swp4() +{ + config_deslave $swp4 +} + +config_enslave() +{ + local netdev=$1; shift + local master=$1; shift + + log_info "Enslave $netdev to $master" + ip link set dev $netdev down + ip link set dev $netdev master $master + ip link set dev $netdev up +} + +config_enslave_swp1() +{ + config_enslave $swp1 lag2 +} + +config_enslave_swp2() +{ + config_enslave $swp2 lag3 +} + +config_enslave_swp3() +{ + config_enslave $swp3 lag3 +} + +config_enslave_swp4() +{ + config_enslave $swp4 lag2 +} + +config_wait() +{ + setup_wait_dev lag2 + setup_wait_dev lag3 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h4=${NETIFS[p7]} + swp4=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test lag1 192.0.2.130 +} + +ping_ipv6() +{ + ping6_test lag1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh b/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh new file mode 100755 index 000000000000..76e4941fef73 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh @@ -0,0 +1,155 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +----------------------------+ +# | H1 (vrf) | +# | + $h1.10 | +----------------------+ +# | | 192.0.2.1/28 | | H2 (vrf) | +# | | 2001:db8:1::1/64 | | + $h2 | +# | | | | | 192.0.2.130/28 | +# | + $h1 | | | 2001:db8:2::2/64 | +# +---|------------------------+ +--|-------------------+ +# | | +# +---|--------------------------------------------------|-------------------+ +# | | router (main VRF) | | +# | +-|----------------------------------+ + $swp2 | +# | | + $swp1 BR1 (802.1q, pvid=10) | 192.0.2.129/28 | +# | | 192.0.2.2/28 | 2001:db8:2::1/64 | +# | | 2001:db8:1::2/64 | | +# | +------------------------------------+ | +# +--------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + shuffle_pvid + ping_ipv4 + ping_ipv6 +" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf v$h1 + ip -4 route del 192.0.2.128/28 vrf v$h1 + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 + ip -4 route del 192.0.2.0/28 vrf v$h2 + simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 +} + +router_create() +{ + ip link add name br1 address $(mac_get $swp1) \ + type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br1 up + __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64 + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 up + __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64 + + bridge vlan add dev br1 vid 10 pvid untagged self + bridge vlan add dev $swp1 vid 10 +} + +router_destroy() +{ + bridge vlan del dev $swp1 vid 10 + bridge vlan del dev br1 vid 10 self + + __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64 + ip link set dev $swp2 down + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64 + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +shuffle_pvid() +{ + log_info "Add and remove VLAN upper for PVID VLAN" + + # Adding and removing a VLAN upper for the PVID VLAN shouldn't change + # anything. The address is arbitrary, just to make sure it will be an L3 + # netdevice. + vlan_create br1 10 "" 192.0.2.33/28 + sleep 1 + vlan_destroy br1 10 +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.130 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh index fa6a88c50750..b76a4a707a5b 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh @@ -1,10 +1,43 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +------------------------------------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.555 + $h1.777 | | + $h2 | +# | | 192.0.2.1/28 | 192.0.2.17/28 | | | 192.0.2.130/28 | +# | | 2001:db8:1::1/64 | 2001:db8:3::1/64 | | | 192.0.2.146/28 | +# | | .-----------------' | | | 2001:db8:2::2/64 | +# | |/ | | | 2001:db8:4::2/64 | +# | + $h1 | | | | +# +----|-------------------------------------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|-------------------------------+ + $swp2 | +# | | + $swp1 | 192.0.2.129/28 | +# | | vid 555 777 | 192.0.2.145/28 | +# | | | 2001:db8:2::1/64 | +# | | + BR1 (802.1q) | 2001:db8:4::1/64 | +# | | vid 555 pvid untagged | | +# | | 192.0.2.2/28 | | +# | | 192.0.2.18/28 | | +# | | 2001:db8:1::2/64 | | +# | | 2001:db8:3::2/64 | | +# | +----------------------------------+ | +# +---------------------------------------------------------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 vlan + config_777 + ping_ipv4_fails + ping_ipv6_fails + ping_ipv4_777 + ping_ipv6_777 + config_555 + ping_ipv4 + ping_ipv6 " NUM_NETIFS=4 source lib.sh @@ -12,36 +45,52 @@ source lib.sh h1_create() { simple_if_init $h1 + vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64 ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2 ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 + + vlan_create $h1 777 v$h1 192.0.2.17/28 2001:db8:3::1/64 + ip -4 route add 192.0.2.144/28 vrf v$h1 nexthop via 192.0.2.18 + ip -6 route add 2001:db8:4::/64 vrf v$h1 nexthop via 2001:db8:3::2 } h1_destroy() { + ip -6 route del 2001:db8:4::/64 vrf v$h1 + ip -4 route del 192.0.2.144/28 vrf v$h1 + vlan_destroy $h1 777 + ip -6 route del 2001:db8:2::/64 vrf v$h1 ip -4 route del 192.0.2.128/28 vrf v$h1 vlan_destroy $h1 555 + simple_if_fini $h1 } h2_create() { - simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 + simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 \ + 192.0.2.146/28 2001:db8:4::2/64 ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129 + ip -4 route add 192.0.2.16/28 vrf v$h2 nexthop via 192.0.2.145 ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 + ip -6 route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:4::1 } h2_destroy() { + ip -6 route del 2001:db8:3::/64 vrf v$h2 ip -6 route del 2001:db8:1::/64 vrf v$h2 + ip -4 route del 192.0.2.16/28 vrf v$h2 ip -4 route del 192.0.2.0/28 vrf v$h2 - simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 + simple_if_fini $h2 192.0.2.146/28 2001:db8:4::2/64 \ + 192.0.2.130/28 2001:db8:2::2/64 } router_create() { - ip link add name br1 type bridge vlan_filtering 1 + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 ip link set dev br1 up ip link set dev $swp1 master br1 @@ -49,18 +98,23 @@ router_create() bridge vlan add dev br1 vid 555 self pvid untagged bridge vlan add dev $swp1 vid 555 + bridge vlan add dev $swp1 vid 777 __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64 + __addr_add_del br1 add 192.0.2.18/28 2001:db8:3::2/64 ip link set dev $swp2 up __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64 + __addr_add_del $swp2 add 192.0.2.145/28 2001:db8:4::1/64 } router_destroy() { + __addr_add_del $swp2 del 192.0.2.145/28 2001:db8:4::1/64 __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64 ip link set dev $swp2 down + __addr_add_del br1 del 192.0.2.18/28 2001:db8:3::2/64 __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64 ip link set dev $swp1 down ip link set dev $swp1 nomaster @@ -86,6 +140,24 @@ setup_prepare() forwarding_enable } +config_555() +{ + log_info "Configure VLAN 555 as PVID" + + bridge vlan add dev br1 vid 555 self pvid untagged + bridge vlan del dev br1 vid 777 self + sleep 2 +} + +config_777() +{ + log_info "Configure VLAN 777 as PVID" + + bridge vlan add dev br1 vid 777 self pvid untagged + bridge vlan del dev br1 vid 555 self + sleep 2 +} + cleanup() { pre_cleanup @@ -114,12 +186,32 @@ vlan() ping_ipv4() { - ping_test $h1 192.0.2.130 + ping_test $h1.555 192.0.2.130 } ping_ipv6() { - ping6_test $h1 2001:db8:2::2 + ping6_test $h1.555 2001:db8:2::2 +} + +ping_ipv4_fails() +{ + ping_test_fails $h1.555 192.0.2.130 ": via 555" +} + +ping_ipv6_fails() +{ + ping6_test_fails $h1.555 2001:db8:2::2 ": via 555" +} + +ping_ipv4_777() +{ + ping_test $h1.777 192.0.2.146 ": via 777" +} + +ping_ipv6_777() +{ + ping6_test $h1.777 2001:db8:4::2 ": via 777" } trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh new file mode 100755 index 000000000000..215309ea1c8c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh @@ -0,0 +1,169 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +------------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.555 | | + $h2.777 | +# | | 192.0.2.1/28 | | | 192.0.2.18/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 | +# | | | | | | +# | + $h1 | | + $h2 | +# +----|-------------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1q) + $swp2 | | +# | | | | +# | +------+------------------------------------------+---------------------+ | +# | | | | +# | + br1.555 + br1.777 | +# | 192.0.2.2/28 192.0.2.17/28 | +# | 2001:db8:1::2/64 2001:db8:2::1/64 | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + respin_config + ping_ipv4 + ping_ipv6 +" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64 + ip -4 route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf v$h1 + ip -4 route del 192.0.2.16/28 vrf v$h1 + vlan_destroy $h1 555 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 777 v$h2 192.0.2.18/28 2001:db8:2::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 + ip -4 route del 192.0.2.0/28 vrf v$h2 + vlan_destroy $h2 777 + simple_if_fini $h2 +} + +router_create() +{ + ip link add name br1 address $(mac_get $swp1) \ + type bridge vlan_filtering 1 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp2 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 up + + bridge vlan add dev br1 vid 555 self + bridge vlan add dev br1 vid 777 self + bridge vlan add dev $swp1 vid 555 + bridge vlan add dev $swp2 vid 777 + + vlan_create br1 555 "" 192.0.2.2/28 2001:db8:1::2/64 + vlan_create br1 777 "" 192.0.2.17/28 2001:db8:2::1/64 +} + +router_destroy() +{ + vlan_destroy br1 777 + vlan_destroy br1 555 + + bridge vlan del dev $swp2 vid 777 + bridge vlan del dev $swp1 vid 555 + bridge vlan del dev br1 vid 777 self + bridge vlan del dev br1 vid 555 self + + ip link set dev $swp2 down nomaster + ip link set dev $swp1 down nomaster + + ip link set dev br1 down + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.18 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +respin_config() +{ + log_info "Remaster bridge slave" + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + sleep 2 + + ip link set dev $swp1 master br1 + ip link set dev $swp2 master br1 + + bridge vlan add dev $swp1 vid 555 + bridge vlan add dev $swp2 vid 777 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh new file mode 100755 index 000000000000..138558452402 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh @@ -0,0 +1,171 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +----------------------------+ +# | H1 (vrf) | +# | + $h1.10 | +----------------------+ +# | | 192.0.2.1/28 | | H2 (vrf) | +# | | 2001:db8:1::1/64 | | + $h2 | +# | | | | | 192.0.2.130/28 | +# | + $h1 | | | 2001:db8:2::2/64 | +# +---|------------------------+ +--|-------------------+ +# | | +# +---|--------------------------------------------------|-------------------+ +# | | router (main VRF) | | +# | +-|--------------------------+ + $swp2 | +# | | + $swp1 BR1 (802.1q) | 192.0.2.129/28 | +# | +-----+----------------------+ 2001:db8:2::1/64 | +# | | | +# | + br1.10 | +# | 192.0.2.2/28 | +# | 2001:db8:1::2/64 | +# +--------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + pvid_set_unset + ping_ipv4 + ping_ipv6 + pvid_set_move + ping_ipv4 + ping_ipv6 +" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf v$h1 + ip -4 route del 192.0.2.128/28 vrf v$h1 + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 + ip -4 route del 192.0.2.0/28 vrf v$h2 + simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 +} + +router_create() +{ + ip link add name br1 address $(mac_get $swp1) \ + type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 up + __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64 + + bridge vlan add dev br1 vid 10 self + bridge vlan add dev $swp1 vid 10 + vlan_create br1 10 "" 192.0.2.2/28 2001:db8:1::2/64 +} + +router_destroy() +{ + vlan_destroy br1 10 + bridge vlan del dev $swp1 vid 10 + bridge vlan del dev br1 vid 10 self + + __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64 + ip link set dev $swp2 down + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +pvid_set_unset() +{ + log_info "Set and unset PVID on VLAN 10" + + bridge vlan add dev br1 vid 10 pvid self + sleep 1 + bridge vlan add dev br1 vid 10 self +} + +pvid_set_move() +{ + log_info "Set PVID on VLAN 10, then move it to VLAN 20" + + bridge vlan add dev br1 vid 10 pvid self + sleep 1 + bridge vlan add dev br1 vid 20 pvid self +} + +shuffle_vlan() +{ + log_info "" +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.130 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index cf3d26c233e8..3f0f5dc95542 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -1,9 +1,18 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test" +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_test + ping_ipv4_blackhole + ping_ipv6_blackhole + nh_stats_test_v4 + nh_stats_test_v6 +" NUM_NETIFS=8 source lib.sh +source router_mpath_nh_lib.sh h1_create() { @@ -197,8 +206,8 @@ multipath4_test() t0_rp12=$(link_stats_tx_packets_get $rp12) t0_rp13=$(link_stats_tx_packets_get $rp13) - ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -212,7 +221,7 @@ multipath4_test() sysctl_restore net.ipv4.fib_multipath_hash_policy } -multipath6_l4_test() +multipath6_test() { local desc="$1" local weight_rp12=$2 @@ -231,7 +240,7 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -245,34 +254,6 @@ multipath6_l4_test() sysctl_restore net.ipv6.fib_multipath_hash_policy } -multipath6_test() -{ - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 - local t0_rp12 t0_rp13 t1_rp12 t1_rp13 - local packets_rp12 packets_rp13 - - ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13 - - t0_rp12=$(link_stats_tx_packets_get $rp12) - t0_rp13=$(link_stats_tx_packets_get $rp13) - - # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done - - t1_rp12=$(link_stats_tx_packets_get $rp12) - t1_rp13=$(link_stats_tx_packets_get $rp13) - - let "packets_rp12 = $t1_rp12 - $t0_rp12" - let "packets_rp13 = $t1_rp13 - $t0_rp13" - multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 - - ip nexthop replace id 106 group 104/105 -} - multipath_test() { log_info "Running IPv4 multipath tests" @@ -280,15 +261,81 @@ multipath_test() multipath4_test "Weighted MP 2:1" 2 1 multipath4_test "Weighted MP 11:45" 11 45 + log_info "Running IPv4 multipath tests with IPv6 link-local nexthops" + ip nexthop replace id 101 via fe80:2::22 dev $rp12 + ip nexthop replace id 102 via fe80:3::23 dev $rp13 + + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 102 via 169.254.3.23 dev $rp13 + ip nexthop replace id 101 via 169.254.2.22 dev $rp12 + log_info "Running IPv6 multipath tests" multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 multipath6_test "Weighted MP 11:45" 11 45 +} + +ping_ipv4_blackhole() +{ + RET=0 + + ip nexthop add id 1001 blackhole + ip nexthop add id 1002 group 1001 + + ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1001 + ping_do $h1 198.51.100.2 + check_fail $? "ping did not fail when using a blackhole nexthop" + + ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1002 + ping_do $h1 198.51.100.2 + check_fail $? "ping did not fail when using a blackhole nexthop group" + + ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 103 + ping_do $h1 198.51.100.2 + check_err $? "ping failed with a valid nexthop" + + log_test "IPv4 blackhole ping" + + ip nexthop del id 1002 + ip nexthop del id 1001 +} - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 +ping_ipv6_blackhole() +{ + RET=0 + + ip -6 nexthop add id 1001 blackhole + ip nexthop add id 1002 group 1001 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1001 + ping6_do $h1 2001:db8:2::2 + check_fail $? "ping did not fail when using a blackhole nexthop" + + ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1002 + ping6_do $h1 2001:db8:2::2 + check_fail $? "ping did not fail when using a blackhole nexthop group" + + ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 106 + ping6_do $h1 2001:db8:2::2 + check_err $? "ping failed with a valid nexthop" + + log_test "IPv6 blackhole ping" + + ip nexthop del id 1002 + ip -6 nexthop del id 1001 +} + +nh_stats_test_v4() +{ + __nh_stats_test_v4 mpath +} + +nh_stats_test_v6() +{ + __nh_stats_test_v6 mpath } setup_prepare() @@ -312,7 +359,6 @@ setup_prepare() router1_create router2_create - routing_nh_obj forwarding_enable } @@ -345,7 +391,7 @@ ping_ipv6() ip nexthop ls >/dev/null 2>&1 if [ $? -ne 0 ]; then echo "Nexthop objects not supported; skipping tests" - exit 0 + exit $ksft_skip fi trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh new file mode 100644 index 000000000000..7e7d62161c34 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh @@ -0,0 +1,129 @@ +# SPDX-License-Identifier: GPL-2.0 + +nh_stats_do_test() +{ + local what=$1; shift + local nh1_id=$1; shift + local nh2_id=$1; shift + local group_id=$1; shift + local stats_get=$1; shift + local mz="$@" + + local dp + + RET=0 + + sleep 2 + for ((dp=0; dp < 60000; dp += 10000)); do + local dd + local t0_rp12=$(link_stats_tx_packets_get $rp12) + local t0_rp13=$(link_stats_tx_packets_get $rp13) + local t0_nh1=$($stats_get $group_id $nh1_id) + local t0_nh2=$($stats_get $group_id $nh2_id) + + ip vrf exec vrf-h1 \ + $mz -q -p 64 -d 0 -t udp \ + "sp=1024,dp=$((dp))-$((dp + 10000))" + sleep 2 + + local t1_rp12=$(link_stats_tx_packets_get $rp12) + local t1_rp13=$(link_stats_tx_packets_get $rp13) + local t1_nh1=$($stats_get $group_id $nh1_id) + local t1_nh2=$($stats_get $group_id $nh2_id) + + local d_rp12=$((t1_rp12 - t0_rp12)) + local d_rp13=$((t1_rp13 - t0_rp13)) + local d_nh1=$((t1_nh1 - t0_nh1)) + local d_nh2=$((t1_nh2 - t0_nh2)) + + dd=$(absval $((d_rp12 - d_nh1))) + ((dd < 10)) + check_err $? "Discrepancy between link and $stats_get: d_rp12=$d_rp12 d_nh1=$d_nh1" + + dd=$(absval $((d_rp13 - d_nh2))) + ((dd < 10)) + check_err $? "Discrepancy between link and $stats_get: d_rp13=$d_rp13 d_nh2=$d_nh2" + done + + log_test "NH stats test $what" +} + +nh_stats_test_dispatch_swhw() +{ + local what=$1; shift + local nh1_id=$1; shift + local nh2_id=$1; shift + local group_id=$1; shift + local mz="$@" + + local used + + nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \ + nh_stats_get "${mz[@]}" + + used=$(ip -s -j -d nexthop show id $group_id | + jq '.[].hw_stats.used') + kind=$(ip -j -d link show dev $rp11 | + jq -r '.[].linkinfo.info_kind') + if [[ $used == true ]]; then + nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \ + nh_stats_get_hw "${mz[@]}" + elif [[ $kind == veth ]]; then + log_test_skip "HW stats not offloaded on veth topology" + fi +} + +nh_stats_test_dispatch() +{ + local nhgtype=$1; shift + local what=$1; shift + local nh1_id=$1; shift + local nh2_id=$1; shift + local group_id=$1; shift + local mz="$@" + + local enabled + local kind + + if ! ip nexthop help 2>&1 | grep -q hw_stats; then + log_test_skip "NH stats test: ip doesn't support HW stats" + return + fi + + ip nexthop replace id $group_id group $nh1_id/$nh2_id \ + hw_stats on type $nhgtype + enabled=$(ip -s -j -d nexthop show id $group_id | + jq '.[].hw_stats.enabled') + if [[ $enabled == true ]]; then + nh_stats_test_dispatch_swhw "$what" "$nh1_id" "$nh2_id" \ + "$group_id" "${mz[@]}" + elif [[ $enabled == false ]]; then + check_err 1 "HW stats still disabled after enabling" + log_test "NH stats test" + else + log_test_skip "NH stats test: ip doesn't report hw_stats info" + fi + + ip nexthop replace id $group_id group $nh1_id/$nh2_id \ + hw_stats off type $nhgtype +} + +__nh_stats_test_v4() +{ + local nhgtype=$1; shift + + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + nh_stats_test_dispatch $nhgtype "IPv4" 101 102 103 \ + $MZ $h1 -A 192.0.2.2 -B 198.51.100.2 + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +__nh_stats_test_v6() +{ + local nhgtype=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + nh_stats_test_dispatch $nhgtype "IPv6" 104 105 106 \ + $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2 + sysctl_restore net.ipv6.fib_multipath_hash_policy +} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh new file mode 100755 index 000000000000..4b483d24ad00 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -0,0 +1,413 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + multipath_test + nh_stats_test_v4 + nh_stats_test_v6 +" +NUM_NETIFS=8 +source lib.sh +source router_mpath_nh_lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router1_create() +{ + vrf_create "vrf-r1" + ip link set dev $rp11 master vrf-r1 + ip link set dev $rp12 master vrf-r1 + ip link set dev $rp13 master vrf-r1 + + ip link set dev vrf-r1 up + ip link set dev $rp11 up + ip link set dev $rp12 up + ip link set dev $rp13 up + + ip address add 192.0.2.1/24 dev $rp11 + ip address add 2001:db8:1::1/64 dev $rp11 + + ip address add 169.254.2.12/24 dev $rp12 + ip address add fe80:2::12/64 dev $rp12 + + ip address add 169.254.3.13/24 dev $rp13 + ip address add fe80:3::13/64 dev $rp13 +} + +router1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-r1 + ip route del 198.51.100.0/24 vrf vrf-r1 + + ip address del fe80:3::13/64 dev $rp13 + ip address del 169.254.3.13/24 dev $rp13 + + ip address del fe80:2::12/64 dev $rp12 + ip address del 169.254.2.12/24 dev $rp12 + + ip address del 2001:db8:1::1/64 dev $rp11 + ip address del 192.0.2.1/24 dev $rp11 + + ip nexthop del id 103 + ip nexthop del id 101 + ip nexthop del id 102 + ip nexthop del id 106 + ip nexthop del id 104 + ip nexthop del id 105 + + ip link set dev $rp13 down + ip link set dev $rp12 down + ip link set dev $rp11 down + + vrf_destroy "vrf-r1" +} + +router2_create() +{ + vrf_create "vrf-r2" + ip link set dev $rp21 master vrf-r2 + ip link set dev $rp22 master vrf-r2 + ip link set dev $rp23 master vrf-r2 + + ip link set dev vrf-r2 up + ip link set dev $rp21 up + ip link set dev $rp22 up + ip link set dev $rp23 up + + ip address add 198.51.100.1/24 dev $rp21 + ip address add 2001:db8:2::1/64 dev $rp21 + + ip address add 169.254.2.22/24 dev $rp22 + ip address add fe80:2::22/64 dev $rp22 + + ip address add 169.254.3.23/24 dev $rp23 + ip address add fe80:3::23/64 dev $rp23 +} + +router2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-r2 + ip route del 192.0.2.0/24 vrf vrf-r2 + + ip address del fe80:3::23/64 dev $rp23 + ip address del 169.254.3.23/24 dev $rp23 + + ip address del fe80:2::22/64 dev $rp22 + ip address del 169.254.2.22/24 dev $rp22 + + ip address del 2001:db8:2::1/64 dev $rp21 + ip address del 198.51.100.1/24 dev $rp21 + + ip nexthop del id 201 + ip nexthop del id 202 + ip nexthop del id 204 + ip nexthop del id 205 + + ip link set dev $rp23 down + ip link set dev $rp22 down + ip link set dev $rp21 down + + vrf_destroy "vrf-r2" +} + +routing_nh_obj() +{ + ip nexthop add id 101 via 169.254.2.22 dev $rp12 + ip nexthop add id 102 via 169.254.3.23 dev $rp13 + ip nexthop add id 103 group 101/102 type resilient buckets 512 \ + idle_timer 0 + ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103 + + ip nexthop add id 104 via fe80:2::22 dev $rp12 + ip nexthop add id 105 via fe80:3::23 dev $rp13 + ip nexthop add id 106 group 104/105 type resilient buckets 512 \ + idle_timer 0 + ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106 + + ip nexthop add id 201 via 169.254.2.12 dev $rp22 + ip nexthop add id 202 via 169.254.3.13 dev $rp23 + ip nexthop add id 203 group 201/202 type resilient buckets 512 \ + idle_timer 0 + ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203 + + ip nexthop add id 204 via fe80:2::12 dev $rp22 + ip nexthop add id 205 via fe80:3::13 dev $rp23 + ip nexthop add id 206 group 204/205 type resilient buckets 512 \ + idle_timer 0 + ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206 +} + +multipath4_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the provided weights. + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + # Restore settings. + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +multipath6_l4_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the provided weights. + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +multipath_test() +{ + # Without an idle timer, weight replacement should happen immediately. + log_info "Running multipath tests without an idle timer" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 0 + ip nexthop replace id 106 group 104/105 type resilient idle_timer 0 + + log_info "Running IPv4 multipath tests" + ip nexthop replace id 103 group 101,1/102,1 type resilient + multipath4_test "ECMP" 1 1 + ip nexthop replace id 103 group 101,2/102,1 type resilient + multipath4_test "Weighted MP 2:1" 2 1 + ip nexthop replace id 103 group 101,11/102,45 type resilient + multipath4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running IPv6 L4 hash multipath tests" + ip nexthop replace id 106 group 104,1/105,1 type resilient + multipath6_l4_test "ECMP" 1 1 + ip nexthop replace id 106 group 104,2/105,1 type resilient + multipath6_l4_test "Weighted MP 2:1" 2 1 + ip nexthop replace id 106 group 104,11/105,45 type resilient + multipath6_l4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 106 group 104,1/105,1 type resilient + + # With an idle timer, weight replacement should not happen, so the + # expected ratio should always be the initial one (1:1). + log_info "Running multipath tests with an idle timer of 120 seconds" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 120 + ip nexthop replace id 106 group 104/105 type resilient idle_timer 120 + + log_info "Running IPv4 multipath tests" + ip nexthop replace id 103 group 101,1/102,1 type resilient + multipath4_test "ECMP" 1 1 + ip nexthop replace id 103 group 101,2/102,1 type resilient + multipath4_test "Weighted MP 2:1" 1 1 + ip nexthop replace id 103 group 101,11/102,45 type resilient + multipath4_test "Weighted MP 11:45" 1 1 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running IPv6 L4 hash multipath tests" + ip nexthop replace id 106 group 104,1/105,1 type resilient + multipath6_l4_test "ECMP" 1 1 + ip nexthop replace id 106 group 104,2/105,1 type resilient + multipath6_l4_test "Weighted MP 2:1" 1 1 + ip nexthop replace id 106 group 104,11/105,45 type resilient + multipath6_l4_test "Weighted MP 11:45" 1 1 + + ip nexthop replace id 106 group 104,1/105,1 type resilient + + # With a short idle timer and enough idle time, weight replacement + # should happen. + log_info "Running multipath tests with an idle timer of 5 seconds" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 5 + ip nexthop replace id 106 group 104/105 type resilient idle_timer 5 + + log_info "Running IPv4 multipath tests" + sleep 10 + ip nexthop replace id 103 group 101,1/102,1 type resilient + multipath4_test "ECMP" 1 1 + sleep 10 + ip nexthop replace id 103 group 101,2/102,1 type resilient + multipath4_test "Weighted MP 2:1" 2 1 + sleep 10 + ip nexthop replace id 103 group 101,11/102,45 type resilient + multipath4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running IPv6 L4 hash multipath tests" + sleep 10 + ip nexthop replace id 106 group 104,1/105,1 type resilient + multipath6_l4_test "ECMP" 1 1 + sleep 10 + ip nexthop replace id 106 group 104,2/105,1 type resilient + multipath6_l4_test "Weighted MP 2:1" 2 1 + sleep 10 + ip nexthop replace id 106 group 104,11/105,45 type resilient + multipath6_l4_test "Weighted MP 11:45" 11 45 + + ip nexthop replace id 106 group 104,1/105,1 type resilient +} + +nh_stats_test_v4() +{ + __nh_stats_test_v4 resilient +} + +nh_stats_test_v6() +{ + __nh_stats_test_v6 resilient +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp11=${NETIFS[p2]} + + rp12=${NETIFS[p3]} + rp22=${NETIFS[p4]} + + rp13=${NETIFS[p5]} + rp23=${NETIFS[p6]} + + rp21=${NETIFS[p7]} + h2=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +ip nexthop ls >/dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "Nexthop objects not supported; skipping tests" + exit $ksft_skip +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +routing_nh_obj + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh index 57e90c873a2c..5a58b1ec8aef 100755 --- a/tools/testing/selftests/net/forwarding/router_multicast.sh +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -28,7 +28,7 @@ # +------------------+ +------------------+ # -ALL_TESTS="mcast_v4 mcast_v6 rpf_v4 rpf_v6" +ALL_TESTS="mcast_v4 mcast_v6 rpf_v4 rpf_v6 unres_v4 unres_v6" NUM_NETIFS=6 source lib.sh source tc_common.sh @@ -406,6 +406,96 @@ rpf_v6() log_test "RPF IPv6" } +unres_v4() +{ + # Send a multicast packet not corresponding to an installed route, + # causing the kernel to queue the packet for resolution and emit an + # IGMPMSG_NOCACHE notification. smcrouted will react to this + # notification by consulting its (*, G) list and installing an (S, G) + # route, which will be used to forward the queued packet. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 1 flower \ + dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop + tc filter add dev $h3 ingress protocol ip pref 1 handle 1 flower \ + dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop + + # Forwarding should fail before installing a matching (*, G). + $MZ $h1 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 1 0 + check_err $? "Multicast received on first host when should not" + tc_check_packets "dev $h3 ingress" 1 0 + check_err $? "Multicast received on second host when should not" + + # Create (*, G). Will not be installed in the kernel. + create_mcast_sg $rp1 0.0.0.0 225.1.2.3 $rp2 $rp3 + + $MZ $h1 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 1 1 + check_err $? "Multicast not received on second host" + + delete_mcast_sg $rp1 0.0.0.0 225.1.2.3 $rp2 $rp3 + + tc filter del dev $h3 ingress protocol ip pref 1 handle 1 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 1 flower + + log_test "Unresolved queue IPv4" +} + +unres_v6() +{ + # Send a multicast packet not corresponding to an installed route, + # causing the kernel to queue the packet for resolution and emit an + # MRT6MSG_NOCACHE notification. smcrouted will react to this + # notification by consulting its (*, G) list and installing an (S, G) + # route, which will be used to forward the queued packet. + + RET=0 + + tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 1 flower \ + dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop + tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 1 flower \ + dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop + + # Forwarding should fail before installing a matching (*, G). + $MZ $h1 -6 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \ + -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 1 0 + check_err $? "Multicast received on first host when should not" + tc_check_packets "dev $h3 ingress" 1 0 + check_err $? "Multicast received on second host when should not" + + # Create (*, G). Will not be installed in the kernel. + create_mcast_sg $rp1 :: ff0e::3 $rp2 $rp3 + + $MZ $h1 -6 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \ + -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 1 1 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 1 1 + check_err $? "Multicast not received on second host" + + delete_mcast_sg $rp1 :: ff0e::3 $rp2 $rp3 + + tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 1 flower + tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 1 flower + + log_test "Unresolved queue IPv6" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 79a209927962..e2be354167a1 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -178,8 +178,8 @@ multipath4_test() t0_rp12=$(link_stats_tx_packets_get $rp12) t0_rp13=$(link_stats_tx_packets_get $rp13) - ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -195,7 +195,7 @@ multipath4_test() sysctl_restore net.ipv4.fib_multipath_hash_policy } -multipath6_l4_test() +multipath6_test() { local desc="$1" local weight_rp12=$2 @@ -216,7 +216,7 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -232,38 +232,6 @@ multipath6_l4_test() sysctl_restore net.ipv6.fib_multipath_hash_policy } -multipath6_test() -{ - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 - local t0_rp12 t0_rp13 t1_rp12 t1_rp13 - local packets_rp12 packets_rp13 - - ip route replace 2001:db8:2::/64 vrf vrf-r1 \ - nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ - nexthop via fe80:3::23 dev $rp13 weight $weight_rp13 - - t0_rp12=$(link_stats_tx_packets_get $rp12) - t0_rp13=$(link_stats_tx_packets_get $rp13) - - # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null - done - - t1_rp12=$(link_stats_tx_packets_get $rp12) - t1_rp13=$(link_stats_tx_packets_get $rp13) - - let "packets_rp12 = $t1_rp12 - $t0_rp12" - let "packets_rp13 = $t1_rp13 - $t0_rp13" - multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 - - ip route replace 2001:db8:2::/64 vrf vrf-r1 \ - nexthop via fe80:2::22 dev $rp12 \ - nexthop via fe80:3::23 dev $rp13 -} - multipath_test() { log_info "Running IPv4 multipath tests" @@ -275,11 +243,6 @@ multipath_test() multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 multipath6_test "Weighted MP 11:45" 11 45 - - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 } setup_prepare() diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh new file mode 100755 index 000000000000..f3a53738bdcc --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_nh.sh @@ -0,0 +1,160 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp2 clsact + + ip address add 192.0.2.1/24 dev $rp1 + ip address add 2001:db8:1::1/64 dev $rp1 + + ip address add 198.51.100.1/24 dev $rp2 + ip address add 2001:db8:2::1/64 dev $rp2 +} + +router_destroy() +{ + ip address del 2001:db8:2::1/64 dev $rp2 + ip address del 198.51.100.1/24 dev $rp2 + + ip address del 2001:db8:1::1/64 dev $rp1 + ip address del 192.0.2.1/24 dev $rp1 + + tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +routing_nh_obj() +{ + # Create the nexthops as AF_INET6, so that IPv4 and IPv6 routes could + # use them. + ip -6 nexthop add id 101 dev $rp1 + ip -6 nexthop add id 102 dev $rp2 + + ip route replace 192.0.2.0/24 nhid 101 + ip route replace 2001:db8:1::/64 nhid 101 + ip route replace 198.51.100.0/24 nhid 102 + ip route replace 2001:db8:2::/64 nhid 102 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1mac=$(mac_get $rp1) + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +routing_nh_obj + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh index a7306c7ac06d..865c9f7d8143 100755 --- a/tools/testing/selftests/net/forwarding/router_vid_1.sh +++ b/tools/testing/selftests/net/forwarding/router_vid_1.sh @@ -1,7 +1,32 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6" +# +--------------------+ +----------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.1 + | | + $h2.1 | +# | 192.0.2.2/24 | | | | 198.51.100.2/24 | +# | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 | +# | | | | | | +# | $h1 + | | + $h2 | +# | | | | | | +# +------------------|-+ +-|--------------------+ +# | | +# +------------------|-------------------------|--------------------+ +# | SW | | | +# | | | | +# | $rp1 + + $rp2 | +# | | | | +# | $rp1.1 + + $rp2.1 | +# | 192.0.2.1/24 198.51.100.1/24 | +# | 2001:db8:1::1/64 2001:db8:2::1/64 | +# | | +# +-----------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 +" NUM_NETIFS=4 source lib.sh diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index e714bae473fb..81f31179ac88 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -1,3 +1,4 @@ +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # This test sends one stream of traffic from H1 through a TBF shaper, to a RED diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh index 8bd85da1905a..df9bcd6a811a 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -4,9 +4,12 @@ ALL_TESTS=" ping_ipv4 tbf_test + tbf_root_test " source $lib_dir/sch_tbf_core.sh +QDISC_TYPE=${QDISC% *} + tbf_test_one() { local bs=$1; shift @@ -22,6 +25,8 @@ tbf_test_one() tbf_test() { + log_info "Testing root-$QDISC_TYPE-tbf" + # This test is used for both ETS and PRIO. Even though we only need two # bands, PRIO demands a minimum of three. tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 @@ -29,6 +34,33 @@ tbf_test() tc qdisc del dev $swp2 root } +tbf_root_test() +{ + local bs=128K + + log_info "Testing root-tbf-$QDISC_TYPE" + + tc qdisc replace dev $swp2 root handle 1: \ + tbf rate 400Mbit burst $bs limit 1M + tc qdisc replace dev $swp2 parent 1:1 handle 10: \ + $QDISC 3 priomap 2 1 0 + tc qdisc replace dev $swp2 parent 10:3 handle 103: \ + bfifo limit 1M + tc qdisc replace dev $swp2 parent 10:2 handle 102: \ + bfifo limit 1M + tc qdisc replace dev $swp2 parent 10:1 handle 101: \ + bfifo limit 1M + + do_tbf_test 10 400 $bs + do_tbf_test 11 400 $bs + + tc qdisc del dev $swp2 root +} + +if type -t sch_tbf_pre_hook >/dev/null; then + sch_tbf_pre_hook +fi + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh index 72aa21ba88c7..96c997be0d03 100755 --- a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh @@ -23,6 +23,10 @@ tbf_test() tc qdisc del dev $swp2 root } +if type -t sch_tbf_pre_hook >/dev/null; then + sch_tbf_pre_hook +fi + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/net/forwarding/settings index e7b9417537fb..e7b9417537fb 100644 --- a/tools/testing/selftests/powerpc/dscr/settings +++ b/tools/testing/selftests/net/forwarding/settings diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh index e3bd8a6bb8b4..3dd5fcbd3eaa 100755 --- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -54,7 +54,9 @@ h2_destroy() switch_create() { - ip link add name br1 up type bridge vlan_filtering 1 + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none + ip link set dev br1 up ip link set dev $swp1 master br1 ip link set dev $swp1 up ip link set dev $swp2 master br1 @@ -72,7 +74,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index d9eca227136b..589629636502 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -3,20 +3,25 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ - gact_trap_test" + gact_trap_test mirred_egress_to_ingress_test \ + mirred_egress_to_ingress_tcp_test" NUM_NETIFS=4 source tc_common.sh source lib.sh +require_command ncat + tcflags="skip_hw" h1_create() { simple_if_init $h1 192.0.2.1/24 + tc qdisc add dev $h1 clsact } h1_destroy() { + tc qdisc del dev $h1 clsact simple_if_fini $h1 192.0.2.1/24 } @@ -58,7 +63,7 @@ mirred_egress_test() RET=0 tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ - $tcflags dst_ip 192.0.2.2 action drop + dst_ip 192.0.2.2 action drop $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -153,6 +158,92 @@ gact_trap_test() log_test "trap ($tcflags)" } +mirred_egress_to_ingress_test() +{ + RET=0 + + tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \ + ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action \ + ct commit nat src addr 192.0.2.2 pipe \ + ct clear pipe \ + ct commit nat dst addr 192.0.2.1 pipe \ + mirred ingress redirect dev $h1 + + tc filter add dev $swp1 protocol ip pref 11 handle 111 ingress flower \ + ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action drop + tc filter add dev $swp1 protocol ip pref 12 handle 112 ingress flower \ + ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 0 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t icmp "ping,id=42,seq=10" -q + + tc_check_packets "dev $h1 egress" 100 1 + check_err $? "didn't mirror first packet" + + tc_check_packets "dev $swp1 ingress" 111 1 + check_fail $? "didn't redirect first packet" + tc_check_packets "dev $swp1 ingress" 112 1 + check_err $? "didn't receive reply to first packet" + + ping 192.0.2.2 -I$h1 -c1 -w1 -q 1>/dev/null 2>&1 + + tc_check_packets "dev $h1 egress" 100 2 + check_err $? "didn't mirror second packet" + tc_check_packets "dev $swp1 ingress" 111 1 + check_fail $? "didn't redirect second packet" + tc_check_packets "dev $swp1 ingress" 112 2 + check_err $? "didn't receive reply to second packet" + + tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower + tc filter del dev $swp1 ingress protocol ip pref 11 handle 111 flower + tc filter del dev $swp1 ingress protocol ip pref 12 handle 112 flower + + log_test "mirred_egress_to_ingress ($tcflags)" +} + +mirred_egress_to_ingress_tcp_test() +{ + mirred_e2i_tf1=$(mktemp) mirred_e2i_tf2=$(mktemp) + + RET=0 + dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$mirred_e2i_tf1 + tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \ + $tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \ + action ct commit nat src addr 192.0.2.2 pipe \ + action ct clear pipe \ + action ct commit nat dst addr 192.0.2.1 pipe \ + action ct clear pipe \ + action skbedit ptype host pipe \ + action mirred ingress redirect dev $h1 + tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \ + $tcflags ip_proto icmp \ + action mirred ingress redirect dev $h1 + tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \ + ip_proto icmp \ + action drop + + ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 & + local rpid=$! + ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1 + wait -n $rpid + cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2 + check_err $? "server output check failed" + + $MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \ + -t icmp "ping,id=42,seq=5" -q + tc_check_packets "dev $h1 egress" 101 10 + check_err $? "didn't mirred redirect ICMP" + tc_check_packets "dev $h1 ingress" 102 10 + check_err $? "didn't drop mirred ICMP" + + tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower + tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower + tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower + + rm -f $mirred_e2i_tf1 $mirred_e2i_tf2 + log_test "mirred_egress_to_ingress_tcp ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} @@ -178,6 +269,8 @@ setup_prepare() cleanup() { + local tf + pre_cleanup switch_destroy @@ -188,6 +281,8 @@ cleanup() ip link set $swp2 address $swp2origmac ip link set $swp1 address $swp1origmac + + for tf in $mirred_e2i_tf1 $mirred_e2i_tf2; do rm -f $tf; done } mirred_egress_redirect_test() diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh index 2934fb5ed2a2..b95de0463ebd 100755 --- a/tools/testing/selftests/net/forwarding/tc_chains.sh +++ b/tools/testing/selftests/net/forwarding/tc_chains.sh @@ -136,7 +136,7 @@ template_filter_fits() tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \ flower src_mac $h2mac action drop &> /dev/null - check_fail $? "Incorrectly succeded to insert filter which does not template" + check_fail $? "Incorrectly succeeded to insert filter which does not template" tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ flower src_mac $h2mac action drop @@ -144,7 +144,7 @@ template_filter_fits() tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \ flower dst_mac $h2mac action drop &> /dev/null - check_fail $? "Incorrectly succeded to insert filter which does not template" + check_fail $? "Incorrectly succeeded to insert filter which does not template" tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \ flower &> /dev/null diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index 0e18e8be6e2a..bce8bb8d2b6f 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -16,6 +16,16 @@ tc_check_packets() tc_rule_handle_stats_get "$id" "$handle" > /dev/null } +tc_check_at_least_x_packets() +{ + local id=$1 + local handle=$2 + local count=$3 + + busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $count" \ + tc_rule_handle_stats_get "$id" "$handle" > /dev/null +} + tc_check_packets_hitting() { local id=$1 diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index 058c746ee300..b1daad19b01e 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -3,7 +3,10 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \ - match_ip_tos_test match_indev_test" + match_ip_tos_test match_indev_test match_ip_ttl_test + match_mpls_label_test \ + match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \ + match_mpls_lse_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -49,8 +52,8 @@ match_dst_mac_test() tc_check_packets "dev $h2 ingress" 101 1 check_fail $? "Matched on a wrong filter" - tc_check_packets "dev $h2 ingress" 102 1 - check_err $? "Did not match on correct filter" + tc_check_packets "dev $h2 ingress" 102 0 + check_fail $? "Did not match on correct filter" tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower @@ -75,8 +78,8 @@ match_src_mac_test() tc_check_packets "dev $h2 ingress" 101 1 check_fail $? "Matched on a wrong filter" - tc_check_packets "dev $h2 ingress" 102 1 - check_err $? "Did not match on correct filter" + tc_check_packets "dev $h2 ingress" 102 0 + check_fail $? "Did not match on correct filter" tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower @@ -310,6 +313,42 @@ match_ip_tos_test() log_test "ip_tos match ($tcflags)" } +match_ip_ttl_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 ip_ttl 63 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=63" -q + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=63,mf,frag=256" -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_fail $? "Matched on the wrong filter (no check on ttl)" + + tc_check_packets "dev $h2 ingress" 101 2 + check_err $? "Did not match on correct filter (ttl=63)" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip "ttl=255" -q + + tc_check_packets "dev $h2 ingress" 101 3 + check_fail $? "Matched on a wrong filter (ttl=63)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (no check on ttl)" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "ip_ttl match ($tcflags)" +} + match_indev_test() { RET=0 @@ -334,6 +373,309 @@ match_indev_test() log_test "indev match ($tcflags)" } +# Unfortunately, mausezahn can't build MPLS headers when used in L2 +# mode, so we have this function to build Label Stack Entries. +mpls_lse() +{ + local label=$1 + local tc=$2 + local bos=$3 + local ttl=$4 + + printf "%02x %02x %02x %02x" \ + $((label >> 12)) \ + $((label >> 4 & 0xff)) \ + $((((label & 0xf) << 4) + (tc << 1) + bos)) \ + $ttl +} + +match_mpls_label_test() +{ + local ethtype="88 47"; readonly ethtype + local pkt + + RET=0 + + check_tc_mpls_support $h2 || return 0 + + tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \ + flower $tcflags mpls_label 0 action drop + tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \ + flower $tcflags mpls_label 1048575 action drop + + pkt="$ethtype $(mpls_lse 1048575 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter (1048575)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (1048575)" + + pkt="$ethtype $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_fail $? "Matched on a wrong filter (0)" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter (0)" + + tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower + + log_test "mpls_label match ($tcflags)" +} + +match_mpls_tc_test() +{ + local ethtype="88 47"; readonly ethtype + local pkt + + RET=0 + + check_tc_mpls_support $h2 || return 0 + + tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \ + flower $tcflags mpls_tc 0 action drop + tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \ + flower $tcflags mpls_tc 7 action drop + + pkt="$ethtype $(mpls_lse 0 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter (7)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (7)" + + pkt="$ethtype $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_fail $? "Matched on a wrong filter (0)" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter (0)" + + tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower + + log_test "mpls_tc match ($tcflags)" +} + +match_mpls_bos_test() +{ + local ethtype="88 47"; readonly ethtype + local pkt + + RET=0 + + check_tc_mpls_support $h2 || return 0 + + tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \ + flower $tcflags mpls_bos 0 action drop + tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \ + flower $tcflags mpls_bos 1 action drop + + pkt="$ethtype $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter (1)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (1)" + + # Need to add a second label to properly mark the Bottom of Stack + pkt="$ethtype $(mpls_lse 0 0 0 255) $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_fail $? "Matched on a wrong filter (0)" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter (0)" + + tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower + + log_test "mpls_bos match ($tcflags)" +} + +match_mpls_ttl_test() +{ + local ethtype="88 47"; readonly ethtype + local pkt + + RET=0 + + check_tc_mpls_support $h2 || return 0 + + tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \ + flower $tcflags mpls_ttl 0 action drop + tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \ + flower $tcflags mpls_ttl 255 action drop + + pkt="$ethtype $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter (255)" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter (255)" + + pkt="$ethtype $(mpls_lse 0 0 1 0)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_fail $? "Matched on a wrong filter (0)" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter (0)" + + tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower + + log_test "mpls_ttl match ($tcflags)" +} + +match_mpls_lse_test() +{ + local ethtype="88 47"; readonly ethtype + local pkt + + RET=0 + + check_tc_mpls_lse_stats $h2 || return 0 + + # Match on first LSE (minimal values for each field) + tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \ + flower $tcflags mpls lse depth 1 label 0 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \ + flower $tcflags mpls lse depth 1 tc 0 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 3 handle 103 \ + flower $tcflags mpls lse depth 1 bos 0 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 4 handle 104 \ + flower $tcflags mpls lse depth 1 ttl 0 action continue + + # Match on second LSE (maximal values for each field) + tc filter add dev $h2 ingress protocol mpls_uc pref 5 handle 105 \ + flower $tcflags mpls lse depth 2 label 1048575 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 6 handle 106 \ + flower $tcflags mpls lse depth 2 tc 7 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 7 handle 107 \ + flower $tcflags mpls lse depth 2 bos 1 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 8 handle 108 \ + flower $tcflags mpls lse depth 2 ttl 255 action continue + + # Match on LSE depth + tc filter add dev $h2 ingress protocol mpls_uc pref 9 handle 109 \ + flower $tcflags mpls lse depth 1 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 10 handle 110 \ + flower $tcflags mpls lse depth 2 action continue + tc filter add dev $h2 ingress protocol mpls_uc pref 11 handle 111 \ + flower $tcflags mpls lse depth 3 action continue + + # Base packet, matched by all filters (except for stack depth 3) + pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Make a variant of the above packet, with a non-matching value + # for each LSE field + + # Wrong label at depth 1 + pkt="$ethtype $(mpls_lse 1 0 0 0) $(mpls_lse 1048575 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong TC at depth 1 + pkt="$ethtype $(mpls_lse 0 1 0 0) $(mpls_lse 1048575 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong BOS at depth 1 (not adding a second LSE here since BOS is set + # in the first label, so anything that'd follow wouldn't be considered) + pkt="$ethtype $(mpls_lse 0 0 1 0)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong TTL at depth 1 + pkt="$ethtype $(mpls_lse 0 0 0 1) $(mpls_lse 1048575 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong label at depth 2 + pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048574 7 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong TC at depth 2 + pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 6 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong BOS at depth 2 (adding a third LSE here since BOS isn't set in + # the second label) + pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 0 255)" + pkt="$pkt $(mpls_lse 0 0 1 255)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Wrong TTL at depth 2 + pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 1 254)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + # Filters working at depth 1 should match all packets but one + + tc_check_packets "dev $h2 ingress" 101 8 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 102 8 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 103 8 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 104 8 + check_err $? "Did not match on correct filter" + + # Filters working at depth 2 should match all packets but two (because + # of the test packet where the label stack depth is just one) + + tc_check_packets "dev $h2 ingress" 105 7 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 106 7 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 107 7 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 108 7 + check_err $? "Did not match on correct filter" + + # Finally, verify the filters that only match on LSE depth + + tc_check_packets "dev $h2 ingress" 109 9 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 110 8 + check_err $? "Did not match on correct filter" + + tc_check_packets "dev $h2 ingress" 111 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol mpls_uc pref 11 handle 111 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 10 handle 110 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 9 handle 109 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 8 handle 108 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 7 handle 107 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 6 handle 106 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 5 handle 105 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 4 handle 104 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower + + log_test "mpls lse match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh b/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh new file mode 100755 index 000000000000..3ca20df952eb --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="match_cfm_opcode match_cfm_level match_cfm_level_and_opcode" +NUM_NETIFS=2 +source tc_common.sh +source lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +u8_to_hex() +{ + local u8=$1; shift + + printf "%02x" $u8 +} + +generate_cfm_hdr() +{ + local mdl=$1; shift + local op=$1; shift + local flags=$1; shift + local tlv_offset=$1; shift + + local cfm_hdr=$(: + )"$(u8_to_hex $((mdl << 5))):"$( : MD level and Version + )"$(u8_to_hex $op):"$( : OpCode + )"$(u8_to_hex $flags):"$( : Flags + )"$(u8_to_hex $tlv_offset)"$( : TLV offset + ) + + echo $cfm_hdr +} + +match_cfm_opcode() +{ + local ethtype="89 02"; readonly ethtype + RET=0 + + tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \ + flower cfm op 47 action drop + tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \ + flower cfm op 43 action drop + + pkt="$ethtype $(generate_cfm_hdr 7 47 0 32)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + pkt="$ethtype $(generate_cfm_hdr 6 5 0 4)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct opcode" + + tc_check_packets "dev $h2 ingress" 102 0 + check_err $? "Matched on the wrong opcode" + + pkt="$ethtype $(generate_cfm_hdr 0 43 0 12)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Matched on the wrong opcode" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct opcode" + + tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower + + log_test "CFM opcode match test" +} + +match_cfm_level() +{ + local ethtype="89 02"; readonly ethtype + RET=0 + + tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \ + flower cfm mdl 5 action drop + tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \ + flower cfm mdl 3 action drop + tc filter add dev $h2 ingress protocol cfm pref 1 handle 103 \ + flower cfm mdl 0 action drop + + pkt="$ethtype $(generate_cfm_hdr 5 42 0 12)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + pkt="$ethtype $(generate_cfm_hdr 6 1 0 70)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + pkt="$ethtype $(generate_cfm_hdr 0 1 0 70)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct level" + + tc_check_packets "dev $h2 ingress" 102 0 + check_err $? "Matched on the wrong level" + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on correct level" + + pkt="$ethtype $(generate_cfm_hdr 3 0 0 4)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Matched on the wrong level" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct level" + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Matched on the wrong level" + + tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower + tc filter del dev $h2 ingress protocol cfm pref 1 handle 103 flower + + log_test "CFM level match test" +} + +match_cfm_level_and_opcode() +{ + local ethtype="89 02"; readonly ethtype + RET=0 + + tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \ + flower cfm mdl 5 op 41 action drop + tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \ + flower cfm mdl 7 op 42 action drop + + pkt="$ethtype $(generate_cfm_hdr 5 41 0 4)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + pkt="$ethtype $(generate_cfm_hdr 7 3 0 4)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + pkt="$ethtype $(generate_cfm_hdr 3 42 0 12)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct level and opcode" + + tc_check_packets "dev $h2 ingress" 102 0 + check_err $? "Matched on the wrong level and opcode" + + pkt="$ethtype $(generate_cfm_hdr 7 42 0 12)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Matched on the wrong level and opcode" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct level and opcode" + + tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower + + log_test "CFM opcode and level match test" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh new file mode 100755 index 000000000000..c2420bb72c12 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh @@ -0,0 +1,357 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# | | 2001:db8:1::1/64 | | 2001:db8:1::2/64 | | +# +----|------------------+ +------------------|---+ +# | | +# +----|-------------------------------------------------------------------|---+ +# | SW | | | +# | +-|-------------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +-----------------------------------------------------------------------+ | +# +----------------------------------------------------------------------------+ + +ALL_TESTS=" + test_l2_miss_unicast + test_l2_miss_multicast + test_l2_miss_ll_multicast + test_l2_miss_broadcast +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 up type bridge + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +test_l2_miss_unicast() +{ + local dmac=00:01:02:03:04:05 + local dip=192.0.2.2 + local sip=192.0.2.1 + + RET=0 + + # Unknown unicast. + tc filter add dev $swp2 egress protocol ipv4 handle 101 pref 1 \ + flower indev $swp1 l2_miss 1 dst_mac $dmac src_ip $sip \ + dst_ip $dip action pass + # Known unicast. + tc filter add dev $swp2 egress protocol ipv4 handle 102 pref 1 \ + flower indev $swp1 l2_miss 0 dst_mac $dmac src_ip $sip \ + dst_ip $dip action pass + + # Before adding FDB entry. + $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unknown unicast filter was not hit before adding FDB entry" + + tc_check_packets "dev $swp2 egress" 102 0 + check_err $? "Known unicast filter was hit before adding FDB entry" + + # Adding FDB entry. + bridge fdb replace $dmac dev $swp2 master static + + $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unknown unicast filter was hit after adding FDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Known unicast filter was not hit after adding FDB entry" + + # Deleting FDB entry. + bridge fdb del $dmac dev $swp2 master static + + $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 2 + check_err $? "Unknown unicast filter was not hit after deleting FDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Known unicast filter was hit after deleting FDB entry" + + tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 102 flower + tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 101 flower + + log_test "L2 miss - Unicast" +} + +test_l2_miss_multicast_common() +{ + local proto=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local mode=$1; shift + local name=$1; shift + + RET=0 + + # Unregistered multicast. + tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \ + flower indev $swp1 l2_miss 1 src_ip $sip dst_ip $dip \ + action pass + # Registered multicast. + tc filter add dev $swp2 egress protocol $proto handle 102 pref 1 \ + flower indev $swp1 l2_miss 0 src_ip $sip dst_ip $dip \ + action pass + + # Before adding MDB entry. + $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unregistered multicast filter was not hit before adding MDB entry" + + tc_check_packets "dev $swp2 egress" 102 0 + check_err $? "Registered multicast filter was hit before adding MDB entry" + + # Adding MDB entry. + bridge mdb replace dev br1 port $swp2 grp $dip permanent + + $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unregistered multicast filter was hit after adding MDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Registered multicast filter was not hit after adding MDB entry" + + # Deleting MDB entry. + bridge mdb del dev br1 port $swp2 grp $dip + + $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 2 + check_err $? "Unregistered multicast filter was not hit after deleting MDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Registered multicast filter was hit after deleting MDB entry" + + tc filter del dev $swp2 egress protocol $proto pref 1 handle 102 flower + tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower + + log_test "L2 miss - Multicast ($name)" +} + +test_l2_miss_multicast_ipv4() +{ + local proto="ipv4" + local sip=192.0.2.1 + local dip=239.1.1.1 + local dmac=01:00:5e:01:01:01 + local mode="-4" + local name="IPv4" + + test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name +} + +test_l2_miss_multicast_ipv6() +{ + local proto="ipv6" + local sip=2001:db8:1::1 + local dip=ff0e::1 + local dmac=33:33:00:00:00:01 + local mode="-6" + local name="IPv6" + + test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name +} + +test_l2_miss_multicast() +{ + # Configure $swp2 as a multicast router port so that it will forward + # both registered and unregistered multicast traffic. + bridge link set dev $swp2 mcast_router 2 + + # Set the Max Response Delay to 100 centiseconds (1 second) so that the + # bridge will start forwarding according to its MDB soon after a + # multicast querier is enabled. + ip link set dev br1 type bridge mcast_query_response_interval 100 + + # Forwarding according to MDB entries only takes place when the bridge + # detects that there is a valid querier in the network. Set the bridge + # as the querier and assign it a valid IPv6 link-local address to be + # used as the source address for MLD queries. + ip link set dev br1 type bridge mcast_querier 1 + ip -6 address add fe80::1/64 nodad dev br1 + sleep 10 + + test_l2_miss_multicast_ipv4 + test_l2_miss_multicast_ipv6 + + ip -6 address del fe80::1/64 dev br1 + ip link set dev br1 type bridge mcast_querier 0 + ip link set dev br1 type bridge mcast_query_response_interval 1000 + bridge link set dev $swp2 mcast_router 1 +} + +test_l2_miss_multicast_common2() +{ + local name=$1; shift + local dmac=$1; shift + local dip=224.0.0.1 + local sip=192.0.2.1 + +} + +test_l2_miss_ll_multicast_common() +{ + local proto=$1; shift + local dmac=$1; shift + local sip=$1; shift + local dip=$1; shift + local mode=$1; shift + local name=$1; shift + + RET=0 + + tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \ + flower indev $swp1 l2_miss 1 dst_mac $dmac src_ip $sip \ + dst_ip $dip action pass + + $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Filter was not hit" + + tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower + + log_test "L2 miss - Link-local multicast ($name)" +} + +test_l2_miss_ll_multicast_ipv4() +{ + local proto=ipv4 + local dmac=01:00:5e:00:00:01 + local sip=192.0.2.1 + local dip=224.0.0.1 + local mode="-4" + local name="IPv4" + + test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name +} + +test_l2_miss_ll_multicast_ipv6() +{ + local proto=ipv6 + local dmac=33:33:00:00:00:01 + local sip=2001:db8:1::1 + local dip=ff02::1 + local mode="-6" + local name="IPv6" + + test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name +} + +test_l2_miss_ll_multicast() +{ + test_l2_miss_ll_multicast_ipv4 + test_l2_miss_ll_multicast_ipv6 +} + +test_l2_miss_broadcast() +{ + local dmac=ff:ff:ff:ff:ff:ff + local smac=00:01:02:03:04:05 + + RET=0 + + tc filter add dev $swp2 egress protocol all handle 101 pref 1 \ + flower l2_miss 1 dst_mac $dmac src_mac $smac \ + action pass + tc filter add dev $swp2 egress protocol all handle 102 pref 1 \ + flower l2_miss 0 dst_mac $dmac src_mac $smac \ + action pass + + $MZ $h1 -a $smac -b $dmac -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? "L2 miss filter was hit when should not" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "L2 no miss filter was not hit when should" + + tc filter del dev $swp2 egress protocol all pref 1 handle 102 flower + tc filter del dev $swp2 egress protocol all pref 1 handle 101 flower + + log_test "L2 miss - Broadcast" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh new file mode 100755 index 000000000000..3885a2a91f7d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh @@ -0,0 +1,228 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# | | 2001:db8:1::1/64 | | 2001:db8:1::2/64 | | +# +----|------------------+ +------------------|---+ +# | | +# +----|-------------------------------------------------------------------|---+ +# | SW | | | +# | +-|-------------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +-----------------------------------------------------------------------+ | +# +----------------------------------------------------------------------------+ + +ALL_TESTS=" + test_port_range_ipv4_udp + test_port_range_ipv4_tcp + test_port_range_ipv6_udp + test_port_range_ipv6_tcp +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 type bridge + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + ip link set dev br1 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev br1 down + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +__test_port_range() +{ + local proto=$1; shift + local ip_proto=$1; shift + local sip=$1; shift + local dip=$1; shift + local mode=$1; shift + local name=$1; shift + local dmac=$(mac_get $h2) + local smac=$(mac_get $h1) + local sport_min=100 + local sport_max=200 + local sport_mid=$((sport_min + (sport_max - sport_min) / 2)) + local dport_min=300 + local dport_max=400 + local dport_mid=$((dport_min + (dport_max - dport_min) / 2)) + + RET=0 + + tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \ + flower src_ip $sip dst_ip $dip ip_proto $ip_proto \ + src_port $sport_min-$sport_max \ + dst_port $dport_min-$dport_max \ + action pass + tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \ + flower src_ip $sip dst_ip $dip ip_proto $ip_proto \ + src_port $sport_min-$sport_max \ + dst_port $dport_min-$dport_max \ + action drop + + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$dport_min" + tc_check_packets "dev $swp1 ingress" 101 1 + check_err $? "Ingress filter not hit with minimum ports" + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Egress filter not hit with minimum ports" + + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_mid,dp=$dport_mid" + tc_check_packets "dev $swp1 ingress" 101 2 + check_err $? "Ingress filter not hit with middle ports" + tc_check_packets "dev $swp2 egress" 101 2 + check_err $? "Egress filter not hit with middle ports" + + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_max,dp=$dport_max" + tc_check_packets "dev $swp1 ingress" 101 3 + check_err $? "Ingress filter not hit with maximum ports" + tc_check_packets "dev $swp2 egress" 101 3 + check_err $? "Egress filter not hit with maximum ports" + + # Send traffic when both ports are out of range and when only one port + # is out of range. + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_min - 1)),dp=$dport_min" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_max + 1)),dp=$dport_min" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$((dport_min - 1))" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$((dport_max + 1))" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_max + 1)),dp=$((dport_max + 1))" + tc_check_packets "dev $swp1 ingress" 101 3 + check_err $? "Ingress filter was hit when should not" + tc_check_packets "dev $swp2 egress" 101 3 + check_err $? "Egress filter was hit when should not" + + tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower + + log_test "Port range matching - $name" +} + +test_port_range_ipv4_udp() +{ + local proto=ipv4 + local ip_proto=udp + local sip=192.0.2.1 + local dip=192.0.2.2 + local mode="-4" + local name="IPv4 UDP" + + __test_port_range $proto $ip_proto $sip $dip $mode "$name" +} + +test_port_range_ipv4_tcp() +{ + local proto=ipv4 + local ip_proto=tcp + local sip=192.0.2.1 + local dip=192.0.2.2 + local mode="-4" + local name="IPv4 TCP" + + __test_port_range $proto $ip_proto $sip $dip $mode "$name" +} + +test_port_range_ipv6_udp() +{ + local proto=ipv6 + local ip_proto=udp + local sip=2001:db8:1::1 + local dip=2001:db8:1::2 + local mode="-6" + local name="IPv6 UDP" + + __test_port_range $proto $ip_proto $sip $dip $mode "$name" +} + +test_port_range_ipv6_tcp() +{ + local proto=ipv6 + local ip_proto=tcp + local sip=2001:db8:1::1 + local dip=2001:db8:1::2 + local mode="-6" + local name="IPv6 TCP" + + __test_port_range $proto $ip_proto $sip $dip $mode "$name" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh new file mode 100755 index 000000000000..03743f04e178 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh @@ -0,0 +1,192 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +# | H1 (v$h1) | +# | 192.0.2.1/24 | +# | 2001:db8::1/124 | +# | + $h1 | +# +-----------------|-----+ +# | +# | (Plain Ethernet traffic) +# | +# +-----------------|-----------------------------------------+ +# | LER1 + $edge1 | +# | -ingress: | +# | -encapsulate Ethernet into MPLS | +# | -add outer Ethernet header | +# | -redirect to $mpls1 (egress) | +# | | +# | + $mpls1 | +# | | -ingress: | +# | | -remove outer Ethernet header | +# | | -remove MPLS header | +# | | -redirect to $edge1 (egress) | +# +-----------------|-----------------------------------------+ +# | +# | (Ethernet over MPLS traffic) +# | +# +-----------------|-----------------------------------------+ +# | LER2 + $mpls2 | +# | -ingress: | +# | -remove outer Ethernet header | +# | -remove MPLS header | +# | -redirect to $edge2 (egress) | +# | | +# | + $edge2 | +# | | -ingress: | +# | | -encapsulate Ethernet into MPLS | +# | | -add outer Ethernet header | +# | | -redirect to $mpls2 (egress) | +# +-----------------|-----------------------------------------| +# | +# | (Plain Ethernet traffic) +# | +# +-----------------|-----+ +# | H2 (v$h2) | | +# | + $h2 | +# | 192.0.2.2/24 | +# | 2001:db8::2/124 | +# +-----------------------+ +# +# LER1 and LER2 logically represent two different routers. However, no VRF is +# created for them, as they don't do any IP routing. + +ALL_TESTS="mpls_forward_eth" +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8::1/124 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8::1/124 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8::2/124 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8::2/124 +} + +ler1_create() +{ + tc qdisc add dev $edge1 ingress + tc filter add dev $edge1 ingress \ + matchall \ + action mpls mac_push label 102 \ + action vlan push_eth dst_mac $mpls2mac src_mac $mpls1mac \ + action mirred egress redirect dev $mpls1 + ip link set dev $edge1 up + + tc qdisc add dev $mpls1 ingress + tc filter add dev $mpls1 ingress \ + protocol mpls_uc \ + flower mpls_label 101 \ + action vlan pop_eth \ + action mpls pop protocol teb \ + action mirred egress redirect dev $edge1 + ip link set dev $mpls1 up +} + +ler1_destroy() +{ + ip link set dev $mpls1 down + tc qdisc del dev $mpls1 ingress + + ip link set dev $edge1 down + tc qdisc del dev $edge1 ingress +} + +ler2_create() +{ + tc qdisc add dev $edge2 ingress + tc filter add dev $edge2 ingress \ + matchall \ + action mpls mac_push label 101 \ + action vlan push_eth dst_mac $mpls1mac src_mac $mpls2mac \ + action mirred egress redirect dev $mpls2 + ip link set dev $edge2 up + + tc qdisc add dev $mpls2 ingress + tc filter add dev $mpls2 ingress \ + protocol mpls_uc \ + flower mpls_label 102 \ + action vlan pop_eth \ + action mpls pop protocol teb \ + action mirred egress redirect dev $edge2 + ip link set dev $mpls2 up +} + +ler2_destroy() +{ + ip link set dev $mpls2 down + tc qdisc del dev $mpls2 ingress + + ip link set dev $edge2 down + tc qdisc del dev $edge2 ingress +} + +mpls_forward_eth() +{ + ping_test $h1 192.0.2.2 + ping6_test $h1 2001:db8::2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + edge1=${NETIFS[p2]} + + mpls1=${NETIFS[p3]} + mpls2=${NETIFS[p4]} + + edge2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + mpls1mac=$(mac_get $mpls1) + mpls2mac=$(mac_get $mpls2) + + vrf_prepare + + h1_create + h2_create + ler1_create + ler2_create +} + +cleanup() +{ + pre_cleanup + + ler2_destroy + ler1_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + tests_run +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 160f9cccdfb7..5103f64a71d6 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -35,6 +35,10 @@ ALL_TESTS=" police_shared_test police_rx_mirror_test police_tx_mirror_test + police_pps_rx_test + police_pps_tx_test + police_mtu_rx_test + police_mtu_tx_test " NUM_NETIFS=6 source tc_common.sh @@ -136,7 +140,7 @@ police_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -153,7 +157,7 @@ police_rx_test() # Rule to police traffic destined to $h2 on ingress of $rp1 tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok + action police rate 10mbit burst 16k conform-exceed drop/ok police_common_test "police on rx" @@ -165,7 +169,7 @@ police_tx_test() # Rule to police traffic destined to $h2 on egress of $rp2 tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok + action police rate 10mbit burst 16k conform-exceed drop/ok police_common_test "police on tx" @@ -186,7 +190,7 @@ police_shared_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -207,7 +211,7 @@ police_shared_test() # Rule to police traffic destined to $h2 on ingress of $rp1 tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok \ + action police rate 10mbit burst 16k conform-exceed drop/ok \ index 10 # Rule to police a different flow destined to $h2 on egress of $rp2 @@ -246,7 +250,7 @@ police_mirror_common_test() # Rule to police traffic destined to $h2 and mirror to $h3 tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/pipe \ + action police rate 10mbit burst 16k conform-exceed drop/pipe \ action mirred egress mirror dev $rp3 mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ @@ -256,7 +260,7 @@ police_mirror_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -266,7 +270,7 @@ police_mirror_common_test() sleep 10 local t1=$(tc_rule_stats_get $h3 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -290,6 +294,110 @@ police_tx_mirror_test() police_mirror_common_test $rp2 egress "police tx and mirror" } +police_pps_common_test() +{ + local test_name=$1; shift + + RET=0 + + # Rule to measure bandwidth on ingress of $h2 + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action drop + + mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ + -t udp sp=12345,dp=54321 -p 1000 -c 0 -q & + + local t0=$(tc_rule_stats_get $h2 1 ingress .packets) + sleep 10 + local t1=$(tc_rule_stats_get $h2 1 ingress .packets) + + local er=$((2000)) + local nr=$(packets_rate $t0 $t1 10) + local nr_pct=$((100 * (nr - er) / er)) + ((-10 <= nr_pct && nr_pct <= 10)) + check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%." + + log_test "$test_name" + + { kill %% && wait %%; } 2>/dev/null + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower +} + +police_pps_rx_test() +{ + # Rule to police traffic destined to $h2 on ingress of $rp1 + tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok + + police_pps_common_test "police pps on rx" + + tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower +} + +police_pps_tx_test() +{ + # Rule to police traffic destined to $h2 on egress of $rp2 + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok + + police_pps_common_test "police pps on tx" + + tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower +} + +police_mtu_common_test() { + RET=0 + + local test_name=$1; shift + local dev=$1; shift + local direction=$1; shift + + tc filter add dev $dev $direction protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action police mtu 1042 conform-exceed drop/ok + + # to count "conform" packets + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ + action drop + + mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ + -t udp sp=12345,dp=54321 -p 1001 -c 10 -q + + mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ + -t udp sp=12345,dp=54321 -p 1000 -c 3 -q + + tc_check_packets "dev $dev $direction" 101 13 + check_err $? "wrong packet counter" + + # "exceed" packets + local overlimits_t0=$(tc_rule_stats_get ${dev} 1 ${direction} .overlimits) + test ${overlimits_t0} = 10 + check_err $? "wrong overlimits, expected 10 got ${overlimits_t0}" + + # "conform" packets + tc_check_packets "dev $h2 ingress" 101 3 + check_err $? "forwarding error" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $dev $direction protocol ip pref 1 handle 101 flower + + log_test "$test_name" +} + +police_mtu_rx_test() +{ + police_mtu_common_test "police mtu (rx)" $rp1 ingress +} + +police_mtu_tx_test() +{ + police_mtu_common_test "police mtu (tx)" $rp2 egress +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh new file mode 100755 index 000000000000..5a5dd9034819 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +ALL_TESTS="tunnel_key_nofrag_test" + +NUM_NETIFS=4 +source tc_common.sh +source lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 + forwarding_enable + mtu_set $h1 1500 + tunnel_create h1-et vxlan 192.0.2.1 192.0.2.2 dev $h1 dstport 0 external + tc qdisc add dev h1-et clsact + mtu_set h1-et 1230 + mtu_restore $h1 + mtu_set $h1 1000 +} + +h1_destroy() +{ + tc qdisc del dev h1-et clsact + tunnel_destroy h1-et + forwarding_restore + mtu_restore $h1 + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 clsact + simple_if_init $swp2 192.0.2.1/24 +} + +switch_destroy() +{ + simple_if_fini $swp2 192.0.2.1/24 + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + swp1origmac=$(mac_get $swp1) + swp2origmac=$(mac_get $swp2) + ip link set $swp1 address $h2mac + ip link set $swp2 address $h1mac + + vrf_prepare + + h1_create + h2_create + switch_create + + if ! tc action add action tunnel_key help 2>&1 | grep -q nofrag; then + log_test "SKIP: iproute doesn't support nofrag" + exit $ksft_skip + fi +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + ip link set $swp2 address $swp2origmac + ip link set $swp1 address $swp1origmac +} + +tunnel_key_nofrag_test() +{ + RET=0 + local i + + tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \ + flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \ + ip_flags nofrag action drop + tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \ + flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \ + ip_flags firstfrag action drop + tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \ + flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \ + ip_flags nofirstfrag action drop + + # test 'nofrag' set + tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \ + action tunnel_key set src_ip 192.0.2.1 dst_ip 192.0.2.2 id 42 nofrag index 10 + $MZ h1-et -c 1 -p 930 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q + tc_check_packets "dev $swp1 ingress" 100 1 + check_err $? "packet smaller than MTU was not tunneled" + + $MZ h1-et -c 1 -p 931 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q + tc_check_packets "dev $swp1 ingress" 100 1 + check_err $? "packet bigger than MTU matched nofrag (nofrag was set)" + tc_check_packets "dev $swp1 ingress" 101 0 + check_err $? "packet bigger than MTU matched firstfrag (nofrag was set)" + tc_check_packets "dev $swp1 ingress" 102 0 + check_err $? "packet bigger than MTU matched nofirstfrag (nofrag was set)" + + # test 'nofrag' cleared + tc actions change action tunnel_key set src_ip 192.0.2.1 dst_ip 192.0.2.2 id 42 index 10 + $MZ h1-et -c 1 -p 931 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q + tc_check_packets "dev $swp1 ingress" 100 1 + check_err $? "packet bigger than MTU matched nofrag (nofrag was unset)" + tc_check_packets "dev $swp1 ingress" 101 1 + check_err $? "packet bigger than MTU didn't match firstfrag (nofrag was unset) " + tc_check_packets "dev $swp1 ingress" 102 1 + check_err $? "packet bigger than MTU didn't match nofirstfrag (nofrag was unset) " + + for i in 100 101 102; do + tc filter del dev $swp1 ingress protocol ip pref $i handle $i flower + done + tc filter del dev h1-et egress pref 1 handle 1 matchall + + log_test "tunnel_key nofrag ($tcflags)" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + tests_run +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh new file mode 100644 index 000000000000..b91bcd8008a9 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2021-2022 NXP + +REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes} +REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes} + +# Tunables +UTC_TAI_OFFSET=37 +ISOCHRON_CPU=1 + +if [[ "$REQUIRE_ISOCHRON" = "yes" ]]; then + # https://github.com/vladimiroltean/tsn-scripts + # WARNING: isochron versions pre-1.0 are unstable, + # always use the latest version + require_command isochron +fi +if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then + require_command phc2sys + require_command ptp4l +fi + +phc2sys_start() +{ + local uds_address=$1 + local extra_args="" + + if ! [ -z "${uds_address}" ]; then + extra_args="${extra_args} -z ${uds_address}" + fi + + phc2sys_log="$(mktemp)" + + chrt -f 10 phc2sys -m \ + -a -rr \ + --step_threshold 0.00002 \ + --first_step_threshold 0.00002 \ + ${extra_args} \ + > "${phc2sys_log}" 2>&1 & + phc2sys_pid=$! + + echo "phc2sys logs to ${phc2sys_log} and has pid ${phc2sys_pid}" + + sleep 1 +} + +phc2sys_stop() +{ + { kill ${phc2sys_pid} && wait ${phc2sys_pid}; } 2> /dev/null + rm "${phc2sys_log}" 2> /dev/null +} + +# Replace space separators from interface list with underscores +if_names_to_label() +{ + local if_name_list="$1" + + echo "${if_name_list/ /_}" +} + +ptp4l_start() +{ + local if_names="$1" + local slave_only=$2 + local uds_address=$3 + local log="ptp4l_log_$(if_names_to_label ${if_names})" + local pid="ptp4l_pid_$(if_names_to_label ${if_names})" + local extra_args="" + + for if_name in ${if_names}; do + extra_args="${extra_args} -i ${if_name}" + done + + if [ "${slave_only}" = true ]; then + extra_args="${extra_args} -s" + fi + + # declare dynamic variables ptp4l_log_${if_name} and ptp4l_pid_${if_name} + # as global, so that they can be referenced later + declare -g "${log}=$(mktemp)" + + chrt -f 10 ptp4l -m -2 -P \ + --step_threshold 0.00002 \ + --first_step_threshold 0.00002 \ + --tx_timestamp_timeout 100 \ + --uds_address="${uds_address}" \ + ${extra_args} \ + > "${!log}" 2>&1 & + declare -g "${pid}=$!" + + echo "ptp4l for interfaces ${if_names} logs to ${!log} and has pid ${!pid}" + + sleep 1 +} + +ptp4l_stop() +{ + local if_names="$1" + local log="ptp4l_log_$(if_names_to_label ${if_names})" + local pid="ptp4l_pid_$(if_names_to_label ${if_names})" + + { kill ${!pid} && wait ${!pid}; } 2> /dev/null + rm "${!log}" 2> /dev/null +} + +cpufreq_max() +{ + local cpu=$1 + local freq="cpu${cpu}_freq" + local governor="cpu${cpu}_governor" + + # Kernel may be compiled with CONFIG_CPU_FREQ disabled + if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then + return + fi + + # declare dynamic variables cpu${cpu}_freq and cpu${cpu}_governor as + # global, so they can be referenced later + declare -g "${freq}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq)" + declare -g "${governor}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor)" + + cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_max_freq > \ + /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq + echo -n "performance" > \ + /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor +} + +cpufreq_restore() +{ + local cpu=$1 + local freq="cpu${cpu}_freq" + local governor="cpu${cpu}_governor" + + if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then + return + fi + + echo "${!freq}" > /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq + echo -n "${!governor}" > \ + /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor +} + +isochron_recv_start() +{ + local if_name=$1 + local uds=$2 + local stats_port=$3 + local extra_args=$4 + local pid="isochron_pid_${stats_port}" + + if ! [ -z "${uds}" ]; then + extra_args="${extra_args} --unix-domain-socket ${uds}" + fi + + isochron rcv \ + --interface ${if_name} \ + --sched-priority 98 \ + --sched-fifo \ + --utc-tai-offset ${UTC_TAI_OFFSET} \ + --stats-port ${stats_port} \ + --quiet \ + ${extra_args} & \ + declare -g "${pid}=$!" + + sleep 1 +} + +isochron_recv_stop() +{ + local stats_port=$1 + local pid="isochron_pid_${stats_port}" + + { kill ${!pid} && wait ${!pid}; } 2> /dev/null +} + +isochron_do() +{ + local sender_if_name=$1; shift + local receiver_if_name=$1; shift + local sender_uds=$1; shift + local receiver_uds=$1; shift + local base_time=$1; shift + local cycle_time=$1; shift + local shift_time=$1; shift + local num_pkts=$1; shift + local vid=$1; shift + local priority=$1; shift + local dst_ip=$1; shift + local isochron_dat=$1; shift + local extra_args="" + local receiver_extra_args="" + local vrf="$(master_name_get ${sender_if_name})" + local use_l2="true" + + if ! [ -z "${dst_ip}" ]; then + use_l2="false" + fi + + if ! [ -z "${vrf}" ]; then + dst_ip="${dst_ip}%${vrf}" + fi + + if ! [ -z "${vid}" ]; then + vid="--vid=${vid}" + fi + + if [ -z "${receiver_uds}" ]; then + extra_args="${extra_args} --omit-remote-sync" + fi + + if ! [ -z "${shift_time}" ]; then + extra_args="${extra_args} --shift-time=${shift_time}" + fi + + if [ "${use_l2}" = "true" ]; then + extra_args="${extra_args} --l2 --etype=0xdead ${vid}" + receiver_extra_args="--l2 --etype=0xdead" + else + extra_args="${extra_args} --l4 --ip-destination=${dst_ip}" + receiver_extra_args="--l4" + fi + + cpufreq_max ${ISOCHRON_CPU} + + isochron_recv_start "${h2}" "${receiver_uds}" 5000 "${receiver_extra_args}" + + isochron send \ + --interface ${sender_if_name} \ + --unix-domain-socket ${sender_uds} \ + --priority ${priority} \ + --base-time ${base_time} \ + --cycle-time ${cycle_time} \ + --num-frames ${num_pkts} \ + --frame-size 64 \ + --txtime \ + --utc-tai-offset ${UTC_TAI_OFFSET} \ + --cpu-mask $((1 << ${ISOCHRON_CPU})) \ + --sched-fifo \ + --sched-priority 98 \ + --client 127.0.0.1 \ + --sync-threshold 5000 \ + --output-file ${isochron_dat} \ + ${extra_args} \ + --quiet + + isochron_recv_stop 5000 + + cpufreq_restore ${ISOCHRON_CPU} +} diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh index 0727e2012b68..43469c7de118 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh @@ -525,7 +525,7 @@ arp_suppression() log_test "neigh_suppress: on / neigh exists: yes" - # Delete the neighbour from the the SVI. A single ARP request should be + # Delete the neighbour from the SVI. A single ARP request should be # received by the remote VTEP RET=0 diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh new file mode 100755 index 000000000000..f4930098974f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh @@ -0,0 +1,504 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------------------+ +-----------------------------+ +# | vrf-h1 | | vrf-h2 | +# | + $h1 | | + $h2 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::1/64 | +# | | default via 2001:db8:1::3 | | | default via 2001:db8:2::3 | +# +----|---------------------------+ +-|---------------------------+ +# | | +# +----|------------------------------------------|---------------------------+ +# | SW | | | +# | +--|------------------------------------------|-------------------------+ | +# | | + $swp1 br1 + $swp2 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::1 local 2001:db8:3::1 | | +# | | remote 2001:db8:3::2 remote 2001:db8:3::2 | | +# | | id 1000 id 2000 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | | +# | | + vlan10 vlan20 + | | +# | | | 2001:db8:1::2/64 2001:db8:2::2/64 | | | +# | | | | | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | 2001:db8:1::3/64 2001:db8:2::3/64 | | +# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | +# | | vrf-green | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 +lo | +# | | 2001:db8:4::1/64 2001:db8:3::1/128 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | vrf-spine | +# | + $rp2 | +# | 2001:db8:4::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | | +# | + v1 (veth) | +# | | 2001:db8:5::2/64 | +# +----|--------------------------------------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | + v2 (veth) +lo NS1 (netns) | +# | 2001:db8:5::1/64 2001:db8:3::2/128 | +# | | +# | +-----------------------------------------------------------------------+ | +# | | vrf-green | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | | +# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | | +# | | | | | | +# | | + vlan10 vlan20 + | | +# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | | +# | | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::2 local 2001:db8:3::2 | | +# | | remote 2001:db8:3::1 remote 2001:db8:3::1 | | +# | | id 1000 id 2000 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + w1 (veth) + w3 (veth) | | +# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | | +# | +--|------------------------------------------|-------------------------+ | +# | | | | +# | | | | +# | +--|----------------------+ +--|-------------------------+ | +# | | | vrf-h1 | | | vrf-h2 | | +# | | + w2 (veth) | | + w4 (veth) | | +# | | 2001:db8:1::4/64 | | 2001:db8:2::4/64 | | +# | | default via | | default via | | +# | | 2001:db8:1::3/64 | | 2001:db8:2::3/64 | | +# | +-------------------------+ +----------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv6 + arp_decap +" +NUM_NETIFS=6 +source lib.sh + +require_command $ARPING + +hx_create() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + ip address add $ip_addr/64 dev $if_name + ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \ + dev $if_name + ip route add default vrf $vrf_name nexthop via $gw_ip +} +export -f hx_create + +hx_destroy() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + ip route del default vrf $vrf_name nexthop via $gw_ip + ip neigh del $gw_ip dev $if_name + ip address del $ip_addr/64 dev $if_name + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +h1_create() +{ + hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h1_destroy() +{ + hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h2_create() +{ + hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +h2_destroy() +{ + hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + ip address add dev $rp1 2001:db8:4::1/64 + ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2 + + ip link add name vx10 type vxlan id 1000 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 pvid untagged + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 20 dev $swp2 pvid untagged + + ip address add 2001:db8:3::1/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::2/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::2/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + +} + +switch_destroy() +{ + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 + + bridge vlan del vid 20 dev br1 self + bridge vlan del vid 10 dev br1 self + + ip link del dev vlan20 + + ip link del dev vlan10 + + vrf_destroy "vrf-green" + + ip address del 2001:db8:3::1/128 dev lo + + bridge vlan del vid 20 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2 + ip address del dev $rp1 2001:db8:4::1/64 + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +spine_create() +{ + vrf_create "vrf-spine" + ip link set dev $rp2 master vrf-spine + ip link set dev v1 master vrf-spine + ip link set dev vrf-spine up + ip link set dev $rp2 up + ip link set dev v1 up + + ip address add 2001:db8:4::2/64 dev $rp2 + ip address add 2001:db8:5::2/64 dev v1 + + ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 +} + +spine_destroy() +{ + ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 + ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + + ip address del 2001:db8:5::2/64 dev v1 + ip address del 2001:db8:4::2/64 dev $rp2 + + ip link set dev v1 down + ip link set dev $rp2 down + vrf_destroy "vrf-spine" +} + +ns_h1_create() +{ + hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3 +} +export -f ns_h1_create + +ns_h2_create() +{ + hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3 +} +export -f ns_h2_create + +ns_switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + ip link set dev br1 up + + ip link set dev v2 up + ip address add dev v2 2001:db8:5::1/64 + ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2 + + ip link add name vx10 type vxlan id 1000 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev w1 master br1 + ip link set dev w1 up + bridge vlan add vid 10 dev w1 pvid untagged + + ip link set dev w3 master br1 + ip link set dev w3 up + bridge vlan add vid 20 dev w3 pvid untagged + + ip address add 2001:db8:3::2/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::3/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::3/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 +} +export -f ns_switch_create + +ns_init() +{ + ip link add name w1 type veth peer name w2 + ip link add name w3 type veth peer name w4 + + ip link set dev lo up + + ns_h1_create + ns_h2_create + ns_switch_create +} +export -f ns_init + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 ns_init +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +macs_populate() +{ + local mac1=$1; shift + local mac2=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local dst=$1; shift + + bridge fdb add $mac1 dev vx10 self master extern_learn static \ + dst $dst vlan 10 + bridge fdb add $mac2 dev vx20 self master extern_learn static \ + dst $dst vlan 20 + + ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \ + extern_learn + ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \ + extern_learn +} +export -f macs_populate + +macs_initialize() +{ + local h1_ns_mac=$(in_ns ns1 mac_get w2) + local h2_ns_mac=$(in_ns ns1 mac_get w4) + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + macs_populate $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \ + 2001:db8:3::2 + in_ns ns1 macs_populate $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \ + 2001:db8:3::1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + spine_create + ns1_create + in_ns ns1 forwarding_enable + + macs_initialize +} + +cleanup() +{ + pre_cleanup + + ns1_destroy + spine_destroy + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20" + ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10" + ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20" + ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20" + ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10" +} + +arp_decap() +{ + # Repeat the ping tests, but without populating the neighbours. This + # makes sure we correctly decapsulate ARP packets + log_info "deleting neighbours from vlan interfaces" + + ip neigh del 2001:db8:1::4 dev vlan10 + ip neigh del 2001:db8:2::4 dev vlan20 + + ping_ipv6 + + ip neigh replace 2001:db8:1::4 lladdr $(in_ns ns1 mac_get w2) \ + nud noarp dev vlan10 extern_learn + ip neigh replace 2001:db8:2::4 lladdr $(in_ns ns1 mac_get w4) \ + nud noarp dev vlan20 extern_learn +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index ce6bea9675c0..6f0a2e452ba1 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -495,7 +495,7 @@ vxlan_ping_test() local delta=$((t1 - t0)) # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((expect <= delta && delta <= expect + 5)) check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." } @@ -532,7 +532,7 @@ __test_ecn_encap() RET=0 tc filter add dev v1 egress pref 77 prot ip \ - flower ip_tos $tos action pass + flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass sleep 1 vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10 tc filter del dev v1 egress pref 77 prot ip @@ -657,10 +657,21 @@ test_ecn_decap() { # In accordance with INET_ECN_decapsulate() __test_ecn_decap 00 00 0x00 + __test_ecn_decap 00 01 0x00 + __test_ecn_decap 00 02 0x00 + # 00 03 is tested in test_ecn_decap_error() + __test_ecn_decap 01 00 0x01 __test_ecn_decap 01 01 0x01 - __test_ecn_decap 02 01 0x02 + __test_ecn_decap 01 02 0x01 __test_ecn_decap 01 03 0x03 + __test_ecn_decap 02 00 0x02 + __test_ecn_decap 02 01 0x01 + __test_ecn_decap 02 02 0x02 __test_ecn_decap 02 03 0x03 + __test_ecn_decap 03 00 0x03 + __test_ecn_decap 03 01 0x03 + __test_ecn_decap 03 02 0x03 + __test_ecn_decap 03 03 0x03 test_ecn_decap_error } diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh new file mode 100755 index 000000000000..a603f7b0a08f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh @@ -0,0 +1,804 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 2001:db8:3::1 | | +# | | remote 2001:db8:4::1 2001:db8:5::1 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:4::0/64 via 2001:db8:3::2 | +# | 2001:db8:5::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 | +# +----|---------------------------------------|------------------+ +# | | +# +----|--------------------------------+ +----|-------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 | +# | | | | +# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 | +# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via | +# | | | 2001:db8:5::2 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1d) | | | | BR2 (802.1d) | | +# | | + vx2 (vxlan) | | | | + vx2 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | | +# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-------------------------------------+ +------------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + ping_ipv6 + test_flood + test_unicast + test_ttl + test_tos + test_ecn_encap + test_ecn_decap + reapply_config + ping_ipv4 + ping_ipv6 + test_flood + test_unicast +"} + +NUM_NETIFS=6 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +rp1_set_addr() +{ + ip address add dev $rp1 2001:db8:3::1/64 + + ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2 +} + +rp1_unset_addr() +{ + ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2 + ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + + ip address del dev $rp1 2001:db8:3::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + tc qdisc add dev $rp1 clsact + + ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx1 up + + ip link set dev vx1 master br1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + tc qdisc add dev $swp1 clsact + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self +} + +switch_destroy() +{ + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + tc qdisc del dev $swp1 clsact + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx1 nomaster + ip link set dev vx1 down + ip link del dev vx1 + + tc qdisc del dev $rp1 clsact + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 + __simple_if_init v1 v$rp2 2001:db8:4::2/64 + __simple_if_init v3 v$rp2 2001:db8:5::2/64 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 2001:db8:5::2/64 + __simple_if_fini v1 2001:db8:4::2/64 + simple_if_fini $rp2 2001:db8:3::2/64 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr_ipv4=$1; shift + local host_addr_ipv6=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/64 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + ip link add name vx2 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx2 up + bridge fdb append dev vx2 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx2 master br2 + tc qdisc add dev vx2 clsact + + simple_if_init w2 $host_addr_ipv4/28 $host_addr_ipv6/64 + + ip route add 2001:db8:3::0/64 nexthop via $nh_addr + ip route add $other_in_addr/128 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \ + 192.0.2.3 2001:db8:1::3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \ + 192.0.2.4 2001:db8:1::4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx1 is the first device to get +# attached to the bridge, and at that point the local IP is already +# configured. Try the other scenario of attaching the devices to a an +# already-offloaded bridge, and only then assign the local IP. +reapply_config() +{ + log_info "Reapplying configuration" + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + ip link set dev vx1 nomaster + rp1_unset_addr + sleep 5 + + ip link set dev vx1 master br1 + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self + sleep 1 + rp1_set_addr + sleep 5 +} + +__ping_ipv4() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip $dst_ip dst_ip $src_ip \ + $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome ARP noise. + PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv4() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_ip=192.0.2.1 + local w2_ns1_ip=192.0.2.3 + local w2_ns2_ip=192.0.2.4 + + ping_test $h1 192.0.2.2 ": local->local" + + __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \ + "local->remote 1" + __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \ + "local->remote 2" +} + +__ping_ipv6() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip $dst_ip dst_ip $src_ip $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome neighbor discovery noise. + PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping6: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv6() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_ip=2001:db8:1::1 + local w2_ns1_ip=2001:db8:1::3 + local w2_ns2_ip=2001:db8:1::4 + + ping6_test $h1 2001:db8:1::2 ": local->local" + + __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \ + "local->remote 1" + __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \ + "local->remote 2" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_sw action pass 2>/dev/null || \ + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_hw action pass +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx2 ns1" "vx2 ns2") + local counter + local key + + for counter in "${counters[@]}"; do + flood_counter_install $dst $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ -6 $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $dst $counter + done +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local what=$1; shift + + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 2001:db8:1::100 "flood" +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx1 2001:db8:4::1" + "$r2_mac vx1 2001:db8:5::1") + local target + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add $target + done + + __test_unicast $h2_mac 2001:db8:1::2 0 "local MAC unicast" + __test_unicast $r1_mac 2001:db8:1::3 1 "remote MAC 1 unicast" + __test_unicast $r2_mac 2001:db8:1::4 2 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del $target + done +} + +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping6_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 5)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +test_ttl() +{ + RET=0 + + tc filter add dev v1 egress pref 77 protocol ipv6 \ + flower ip_ttl 99 action pass + vxlan_ping_test $h1 2001:db8:1::3 "" v1 egress 77 10 + tc filter del dev v1 egress pref 77 protocol ipv6 + + log_test "VXLAN: envelope TTL" +} + +test_tos() +{ + RET=0 + + tc filter add dev v1 egress pref 77 protocol ipv6 \ + flower ip_tos 0x14 action pass + vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x14" v1 egress 77 10 + vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x18" v1 egress 77 0 + tc filter del dev v1 egress pref 77 protocol ipv6 + + log_test "VXLAN: envelope TOS inheritance" +} + +__test_ecn_encap() +{ + local q=$1; shift + local tos=$1; shift + + RET=0 + + tc filter add dev v1 egress pref 77 protocol ipv6 \ + flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass + sleep 1 + vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10 + tc filter del dev v1 egress pref 77 protocol ipv6 + + log_test "VXLAN: ECN encap: $q->$tos" +} + +test_ecn_encap() +{ + # In accordance with INET_ECN_encapsulate() + __test_ecn_encap 0x00 0x00 + __test_ecn_encap 0x01 0x01 + __test_ecn_encap 0x02 0x02 + __test_ecn_encap 0x03 0x02 +} + +vxlan_encapped_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + + $MZ -6 $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$(mac_get w2):"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"$inner_tos"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) +} +export -f vxlan_encapped_ping_do + +vxlan_encapped_ping_test() +{ + local ping_dev=$1; shift + local nh_dev=$1; shift + local ping_dip=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + local stat_get=$1; shift + local expect=$1; shift + + local t0=$($stat_get) + + in_ns ns1 \ + vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \ + $ping_dip $(mac_get $h1) \ + $inner_tos $outer_tos + sleep 1 + local t1=$($stat_get) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "Expected to capture $expect packets, got $delta." +} +export -f vxlan_encapped_ping_test + +__test_ecn_decap() +{ + local orig_inner_tos=$1; shift + local orig_outer_tos=$1; shift + local decapped_tos=$1; shift + + RET=0 + + tc filter add dev $h1 ingress pref 77 protocol ipv6 \ + flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 \ + ip_tos $decapped_tos action drop + sleep 1 + vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \ + $orig_inner_tos $orig_outer_tos \ + "tc_rule_stats_get $h1 77 ingress" 10 + tc filter del dev $h1 ingress pref 77 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos" +} + +test_ecn_decap_error() +{ + local orig_inner_tos="0:0" + local orig_outer_tos=03 + + RET=0 + + vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \ + $orig_inner_tos $orig_outer_tos \ + "link_stats_rx_errors_get vx1" 10 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error" +} + +test_ecn_decap() +{ + # In accordance with INET_ECN_decapsulate() + __test_ecn_decap "0:0" 00 0x00 + __test_ecn_decap "0:0" 01 0x00 + __test_ecn_decap "0:0" 02 0x00 + # 00 03 is tested in test_ecn_decap_error() + __test_ecn_decap "0:1" 00 0x01 + __test_ecn_decap "0:1" 01 0x01 + __test_ecn_decap "0:1" 02 0x01 + __test_ecn_decap "0:1" 03 0x03 + __test_ecn_decap "0:2" 00 0x02 + __test_ecn_decap "0:2" 01 0x01 + __test_ecn_decap "0:2" 02 0x02 + __test_ecn_decap "0:2" 03 0x03 + __test_ecn_decap "0:3" 00 0x03 + __test_ecn_decap "0:3" 01 0x03 + __test_ecn_decap "0:3" 02 0x03 + __test_ecn_decap "0:3" 03 0x03 + test_ecn_decap_error +} + +test_all() +{ + log_info "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh new file mode 100755 index 000000000000..00540317737a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 + ping_ipv6 +" +source vxlan_bridge_1d_ipv6.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index a5789721ba92..fb9a34cb50c6 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -680,26 +680,6 @@ test_pvid() log_test "VXLAN: flood after vlan re-add" } -vxlan_ping_test() -{ - local ping_dev=$1; shift - local ping_dip=$1; shift - local ping_args=$1; shift - local capture_dev=$1; shift - local capture_dir=$1; shift - local capture_pref=$1; shift - local expect=$1; shift - - local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) - ping_do $ping_dev $ping_dip "$ping_args" - local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) - local delta=$((t1 - t0)) - - # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) - check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." -} - __test_learning() { local -a expects=(0 0 0 0 0) @@ -770,7 +750,7 @@ __test_learning() expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 vxlan_flood_test $mac $dst $vid "${expects[@]}" - sleep 20 + sleep 60 bridge fdb show brport $vx | grep $mac | grep -q self check_fail $? @@ -816,11 +796,11 @@ test_learning() local dst=192.0.2.100 local vid=10 - # Enable learning on the VxLAN devices and set ageing time to 10 seconds - ip link set dev br1 type bridge ageing_time 1000 - ip link set dev vx10 type vxlan ageing 10 + # Enable learning on the VxLAN devices and set ageing time to 30 seconds + ip link set dev br1 type bridge ageing_time 3000 + ip link set dev vx10 type vxlan ageing 30 ip link set dev vx10 type vxlan learning - ip link set dev vx20 type vxlan ageing 10 + ip link set dev vx20 type vxlan ageing 30 ip link set dev vx20 type vxlan learning reapply_config diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh new file mode 100755 index 000000000000..e83fde79f40d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh @@ -0,0 +1,837 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | | +# | | local: local: | | +# | | 2001:db8:3::1 2001:db8:3::1 | | +# | | remote: remote: | | +# | | 2001:db8:4::1 2001:db8:5::1 2001:db8:4::1 2001:db8:5::1 | | +# | | id 1000 dstport $VXPORT id 2000 dstport $VXPORT | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:4::0/64 via 2001:db8:3::2 | +# | 2001:db8:5::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 | +# +----|---------------------------------------|------------------+ +# | | +# +----|--------------------------------+ +----|-------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 | +# | | | | +# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 | +# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via | +# | | | 2001:db8:5::2 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1q) | | | | BR2 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 | | | | | vid 10 | | +# | | | vid 20 | | | | | vid 20 | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | 2001:db8:1::3/64 | | | | | 2001:db8:1::4/64 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | | +# | | 2001:db8:2::3/64 | | | | 2001:db8:2::4/64 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-------------------------------------+ +------------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + ping_ipv6 + test_flood + test_unicast + reapply_config + ping_ipv4 + ping_ipv6 + test_flood + test_unicast + test_pvid + ping_ipv4 + ping_ipv6 + test_flood + test_pvid +"} + +NUM_NETIFS=6 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 192.0.2.2/28 2001:db8:1::2/64 + vlan_create $h2 20 v$h2 198.51.100.2/24 2001:db8:2::2/64 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 2001:db8:3::1/64 + + ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2 +} + +rp1_unset_addr() +{ + ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2 + ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + + ip address del dev $rp1 2001:db8:3::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + tc qdisc add dev $rp1 clsact + + ip link add name vx10 type vxlan id 1000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + tc qdisc add dev $swp1 clsact + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self +} + +switch_destroy() +{ + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge vlan del vid 20 dev $swp2 + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + tc qdisc del dev $swp1 clsact + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + tc qdisc del dev $rp1 clsact + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 + __simple_if_init v1 v$rp2 2001:db8:4::2/64 + __simple_if_init v3 v$rp2 2001:db8:5::2/64 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 2001:db8:5::2/64 + __simple_if_fini v1 2001:db8:4::2/64 + simple_if_fini $rp2 2001:db8:3::2/64 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1_ipv4=$1; shift + local host_addr1_ipv6=$1; shift + local host_addr2_ipv4=$1; shift + local host_addr2_ipv6=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/64 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + bridge vlan add vid 10 dev w1 + bridge vlan add vid 20 dev w1 + + ip link add name vx10 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx10 up + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx10 master br2 + tc qdisc add dev vx10 clsact + + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx20 up + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx20 master br2 + tc qdisc add dev vx20 clsact + + bridge vlan add vid 20 dev vx20 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1_ipv4/28 $host_addr1_ipv6/64 + vlan_create w2 20 vw2 $host_addr2_ipv4/24 $host_addr2_ipv6/64 + + ip route add 2001:db8:3::0/64 nexthop via $nh_addr + ip route add $other_in_addr/128 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \ + 192.0.2.3 2001:db8:1::3 198.51.100.3 2001:db8:2::3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \ + 192.0.2.4 2001:db8:1::4 198.51.100.4 2001:db8:2::4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx10 and vx20 were the first devices to get +# attached to the bridge, and at that point the local IP is already +# configured. Try the other scenario of attaching these devices to a bridge +# that already has local ports members, and only then assign the local IP. +reapply_config() +{ + log_info "Reapplying configuration" + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + + ip link set dev vx20 nomaster + ip link set dev vx10 nomaster + + rp1_unset_addr + sleep 5 + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self + + rp1_set_addr + sleep 5 +} + +__ping_ipv4() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \ + flower vlan_ethtype ipv4 src_ip $dst_ip dst_ip $src_ip \ + $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome ARP noise. + PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv4() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_10_ip=192.0.2.1 + local h1_20_ip=198.51.100.1 + local w2_10_ns1_ip=192.0.2.3 + local w2_10_ns2_ip=192.0.2.4 + local w2_20_ns1_ip=198.51.100.3 + local w2_20_ns2_ip=198.51.100.4 + + ping_test $h1.10 192.0.2.2 ": local->local vid 10" + ping_test $h1.20 198.51.100.2 ": local->local vid 20" + + __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \ + "local->remote 1 vid 10" + __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \ + "local->remote 2 vid 10" + __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \ + "local->remote 1 vid 20" + __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \ + "local->remote 2 vid 20" +} + +__ping_ipv6() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \ + flower vlan_ethtype ipv6 src_ip $dst_ip dst_ip $src_ip \ + $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome neighbor discovery noise. + PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping6: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv6() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_10_ip=2001:db8:1::1 + local h1_20_ip=2001:db8:2::1 + local w2_10_ns1_ip=2001:db8:1::3 + local w2_10_ns2_ip=2001:db8:1::4 + local w2_20_ns1_ip=2001:db8:2::3 + local w2_20_ns2_ip=2001:db8:2::4 + + ping6_test $h1.10 2001:db8:1::2 ": local->local vid 10" + ping6_test $h1.20 2001:db8:2::2 ": local->local vid 20" + + __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \ + "local->remote 1 vid 10" + __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \ + "local->remote 2 vid 10" + __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \ + "local->remote 1 vid 20" + __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \ + "local->remote 2 vid 20" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_sw action pass 2>/dev/null || \ + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_hw action pass +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2") + local counter + local key + + # Packets reach the local host tagged whereas they reach the VxLAN + # devices untagged. In order to be able to use the same filter for + # all counters, make sure the packets also reach the local host + # untagged + bridge vlan add vid $vid dev $swp2 untagged + for counter in "${counters[@]}"; do + flood_counter_install $dst $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ -6 $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $dst $counter + done + bridge vlan add vid $vid dev $swp2 +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local what=$1; shift + local -a expects=("${@}") + + RET=0 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 2001:db8:1::100 10 "flood vlan 10" \ + 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 2001:db8:2::100 20 "flood vlan 20" \ + 10 0 10 0 10 +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local vid=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local vid=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx10 2001:db8:4::1" + "$r2_mac vx10 2001:db8:5::1") + local target + + log_info "unicast vlan 10" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 10 $target + done + + __test_unicast $h2_mac 2001:db8:1::2 0 10 "local MAC unicast" + __test_unicast $r1_mac 2001:db8:1::3 1 10 "remote MAC 1 unicast" + __test_unicast $r2_mac 2001:db8:1::4 3 10 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 10 $target + done + + log_info "unicast vlan 20" + + targets=("$h2_mac $h2" "$r1_mac vx20 2001:db8:4::1" \ + "$r2_mac vx20 2001:db8:5::1") + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 20 $target + done + + __test_unicast $h2_mac 2001:db8:2::2 0 20 "local MAC unicast" + __test_unicast $r1_mac 2001:db8:2::3 2 20 "remote MAC 1 unicast" + __test_unicast $r2_mac 2001:db8:2::4 4 20 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 20 $target + done +} + +test_pvid() +{ + local -a expects=(0 0 0 0 0) + local mac=de:ad:be:ef:13:37 + local dst=2001:db8:1::100 + local vid=10 + + # Check that flooding works + RET=0 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before pvid off" + + # Toggle PVID off and test that flood to remote hosts does not work + RET=0 + + bridge vlan add vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid off" + + # Toggle PVID on and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid on" + + # Add a new VLAN and test that it does not affect flooding + RET=0 + + bridge vlan add vid 30 dev vx10 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + bridge vlan del vid 30 dev vx10 + + log_test "VXLAN: flood after vlan add" + + # Remove currently mapped VLAN and test that flood to remote hosts does + # not work + RET=0 + + bridge vlan del vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan delete" + + # Re-add the VLAN and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan re-add" +} + +test_all() +{ + log_info "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh new file mode 100755 index 000000000000..344f43ccb755 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 + ping_ipv6 +" +source vxlan_bridge_1q_ipv6.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh new file mode 100755 index 000000000000..904633427fd0 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh @@ -0,0 +1,563 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + + +# +--------------------------------+ +-----------------------------+ +# | vrf-h1 | | vrf-h2 | +# | + $h1 | | + $h2 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::1/64 | +# | | default via 2001:db8:1::3 | | | default via 2001:db8:2::3 | +# +----|---------------------------+ +-|---------------------------+ +# | | +# +----|------------------------------------------|---------------------------+ +# | SW | | | +# | +--|------------------------------------------|-------------------------+ | +# | | + $swp1 br1 + $swp2 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::1 local 2001:db8:3::1 | | +# | | remote 2001:db8:3::2 remote 2001:db8:3::2 | | +# | | id 1010 id 1020 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx4001 | | +# | | local 2001:db8:3::1 | | +# | | remote 2001:db8:3::2 | | +# | | id 104001 | | +# | | dstport 4789 | | +# | | vid 4001 pvid untagged | | +# | | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | | | +# | | + vlan10 | vlan20 + | | +# | | | 2001:db8:1::2/64 | 2001:db8:2::2/64 | | | +# | | | | | | | +# | | + vlan10-v (macvlan) + vlan20-v (macvlan) + | | +# | | 2001:db8:1::3/64 vlan4001 2001:db8:2::3/64 | | +# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | +# | | vrf-green | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 +lo | +# | | 2001:db8:4::1/64 2001:db8:3::1 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | vrf-spine | +# | + $rp2 | +# | 2001:db8:4::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | | +# | + v1 (veth) | +# | | 2001:db8:5::2/64 | +# +----|--------------------------------------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | + v2 (veth) +lo NS1 (netns) | +# | 2001:db8:5::1/64 2001:db8:3::2/128 | +# | | +# | +-----------------------------------------------------------------------+ | +# | | vrf-green | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | | +# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | | +# | | | vlan4001 | | | +# | | + vlan10 + vlan20 + | | +# | | | 2001:db8:1::3/64 | 2001:db8:2::3/64 | | | +# | | | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::2 local 2001:db8:3::2 | | +# | | remote 2001:db8:3::1 remote 2001:db8:3::1 | | +# | | id 1010 id 1020 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx4001 | | +# | | local 2001:db8:3::2 | | +# | | remote 2001:db8:3::1 | | +# | | id 104001 | | +# | | dstport 4789 | | +# | | vid 4001 pvid untagged | | +# | | | | +# | | + w1 (veth) + w3 (veth) | | +# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | | +# | +--|------------------------------------------|-------------------------+ | +# | | | | +# | | | | +# | +--|----------------------+ +--|-------------------------+ | +# | | | vrf-h1 | | | vrf-h2 | | +# | | + w2 (veth) | | + w4 (veth) | | +# | | 2001:db8:1::4/64 | | 2001:db8:2::4/64 | | +# | | default via | | default via | | +# | | 2001:db8:1::3/64 | | 2001:db8:2::3/64 | | +# | +-------------------------+ +----------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv6 +" +NUM_NETIFS=6 +source lib.sh + +hx_create() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + ip address add $ip_addr/64 dev $if_name + ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \ + dev $if_name + ip route add default vrf $vrf_name nexthop via $gw_ip +} +export -f hx_create + +hx_destroy() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + ip route del default vrf $vrf_name nexthop via $gw_ip + ip neigh del $gw_ip dev $if_name + ip address del $ip_addr/64 dev $if_name + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +h1_create() +{ + hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h1_destroy() +{ + hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h2_create() +{ + hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +h2_destroy() +{ + hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + ip address add dev $rp1 2001:db8:4::1/64 + ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2 + + ip link add name vx10 type vxlan id 1010 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 1020 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + ip link add name vx4001 type vxlan id 104001 \ + local 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx4001 up + + ip link set dev vx4001 master br1 + bridge vlan add vid 4001 dev vx4001 pvid untagged + + ip address add 2001:db8:3::1/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::2/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::2/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + ip link add link br1 name vlan4001 up master vrf-green \ + type vlan id 4001 + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + bridge vlan add vid 4001 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + bridge vlan add vid 10 dev $swp1 pvid untagged + bridge vlan add vid 20 dev $swp2 pvid untagged +} + +switch_destroy() +{ + bridge vlan del vid 20 dev br1 self + bridge vlan del vid 10 dev br1 self + + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 + + bridge vlan del vid 4001 dev br1 self + ip link del dev vlan4001 + + ip link del dev vlan20 + + ip link del dev vlan10 + + vrf_destroy "vrf-green" + + ip address del 2001:db8:3::1/128 dev lo + + bridge vlan del vid 20 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 4001 dev vx4001 + ip link set dev vx4001 nomaster + + ip link set dev vx4001 down + ip link del dev vx4001 + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2 + ip address del dev $rp1 2001:db8:4::1/64 + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +spine_create() +{ + vrf_create "vrf-spine" + ip link set dev $rp2 master vrf-spine + ip link set dev v1 master vrf-spine + ip link set dev vrf-spine up + ip link set dev $rp2 up + ip link set dev v1 up + + ip address add 2001:db8:4::2/64 dev $rp2 + ip address add 2001:db8:5::2/64 dev v1 + + ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 +} + +spine_destroy() +{ + ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 + ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + + ip address del 2001:db8:5::2/64 dev v1 + ip address del 2001:db8:4::2/64 dev $rp2 + + ip link set dev v1 down + ip link set dev $rp2 down + vrf_destroy "vrf-spine" +} + +ns_h1_create() +{ + hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3 +} +export -f ns_h1_create + +ns_h2_create() +{ + hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3 +} +export -f ns_h2_create + +ns_switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + ip link set dev br1 up + + ip link set dev v2 up + ip address add dev v2 2001:db8:5::1/64 + ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2 + + ip link add name vx10 type vxlan id 1010 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 1020 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link add name vx4001 type vxlan id 104001 \ + local 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx4001 up + + ip link set dev vx4001 master br1 + bridge vlan add vid 4001 dev vx4001 pvid untagged + + ip link set dev w1 master br1 + ip link set dev w1 up + bridge vlan add vid 10 dev w1 pvid untagged + + ip link set dev w3 master br1 + ip link set dev w3 up + bridge vlan add vid 20 dev w3 pvid untagged + + ip address add 2001:db8:3::2/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::3/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::3/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + ip link add link br1 name vlan4001 up master vrf-green \ + type vlan id 4001 + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + bridge vlan add vid 4001 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 +} +export -f ns_switch_create + +ns_init() +{ + ip link add name w1 type veth peer name w2 + ip link add name w3 type veth peer name w4 + + ip link set dev lo up + + ns_h1_create + ns_h2_create + ns_switch_create +} +export -f ns_init + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 ns_init +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +__l2_vni_init() +{ + local mac1=$1; shift + local mac2=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local dst=$1; shift + + bridge fdb add $mac1 dev vx10 self master extern_learn static \ + dst $dst vlan 10 + bridge fdb add $mac2 dev vx20 self master extern_learn static \ + dst $dst vlan 20 + + ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \ + extern_learn + ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \ + extern_learn +} +export -f __l2_vni_init + +l2_vni_init() +{ + local h1_ns_mac=$(in_ns ns1 mac_get w2) + local h2_ns_mac=$(in_ns ns1 mac_get w4) + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + __l2_vni_init $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \ + 2001:db8:3::2 + in_ns ns1 __l2_vni_init $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \ + 2001:db8:3::1 +} + +__l3_vni_init() +{ + local mac=$1; shift + local vtep_ip=$1; shift + local host1_ip=$1; shift + local host2_ip=$1; shift + + bridge fdb add $mac dev vx4001 self master extern_learn static \ + dst $vtep_ip vlan 4001 + + ip neigh add $vtep_ip lladdr $mac nud noarp dev vlan4001 extern_learn + + ip route add $host1_ip/128 vrf vrf-green nexthop via $vtep_ip \ + dev vlan4001 onlink + ip route add $host2_ip/128 vrf vrf-green nexthop via $vtep_ip \ + dev vlan4001 onlink +} +export -f __l3_vni_init + +l3_vni_init() +{ + local vlan4001_ns_mac=$(in_ns ns1 mac_get vlan4001) + local vlan4001_mac=$(mac_get vlan4001) + + __l3_vni_init $vlan4001_ns_mac 2001:db8:3::2 2001:db8:1::4 \ + 2001:db8:2::4 + in_ns ns1 __l3_vni_init $vlan4001_mac 2001:db8:3::1 2001:db8:1::1 \ + 2001:db8:2::1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + spine_create + ns1_create + in_ns ns1 forwarding_enable + + l2_vni_init + l3_vni_init +} + +cleanup() +{ + pre_cleanup + + ns1_destroy + spine_destroy + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20" + ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10" + ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20" + ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20" + ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh new file mode 100755 index 000000000000..977070ed42b3 --- /dev/null +++ b/tools/testing/selftests/net/fq_band_pktlimit.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that FQ has a packet limit per band: +# +# 1. set the limit to 10 per band +# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped +# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped +# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped +# +# Send packets with a delay to ensure that previously sent +# packets are still queued when later ones are sent. +# Use SO_TXTIME for this. + +die() { + echo "$1" + exit 1 +} + +# run inside private netns +if [[ $# -eq 0 ]]; then + ./in_netns.sh "$0" __subprocess + exit +fi + +ip link add type dummy +ip link set dev dummy0 up +ip -6 addr add fdaa::1/128 dev dummy0 +ip -6 route add fdaa::/64 dev dummy0 +tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10 + +DELAY=400000 + +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 +OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 +OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000 +OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Initial stats will report zero sent, as all packets are still +# queued in FQ. Sleep for at least the delay period and see that +# twenty are now sent. +sleep 0.6 +OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Log the output after the test +echo "${OUT1}" +echo "${OUT2}" +echo "${OUT3}" +echo "${OUT4}" + +# Test the output for expected values +echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1" +echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2" +echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3" +echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4" diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh new file mode 100755 index 000000000000..5100d90f92d2 --- /dev/null +++ b/tools/testing/selftests/net/gre_gso.sh @@ -0,0 +1,235 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking GRE GSO. +source lib.sh +ret=0 + +# all tests in this script. Can be overridden with -t option +TESTS="gre_gso" + +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no +TMPFILE=`mktemp` +PID= + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf " TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi +} + +setup() +{ + set -e + setup_ns ns1 + IP="ip -netns $ns1" + NS_EXEC="ip netns exec $ns1" + + ip link add veth0 type veth peer name veth1 + ip link set veth0 up + ip link set veth1 netns $ns1 + $IP link set veth1 name veth0 + $IP link set veth0 up + + dd if=/dev/urandom of=$TMPFILE bs=1024 count=2048 &>/dev/null + set +e +} + +cleanup() +{ + rm -rf $TMPFILE + [ -n "$PID" ] && kill $PID + ip link del dev gre1 &> /dev/null + ip link del dev veth0 &> /dev/null + cleanup_ns $ns1 +} + +get_linklocal() +{ + local dev=$1 + local ns=$2 + local addr + + [ -n "$ns" ] && ns="-netns $ns" + + addr=$(ip -6 -br $ns addr show dev ${dev} | \ + awk '{ + for (i = 3; i <= NF; ++i) { + if ($i ~ /^fe80/) + print $i + } + }' + ) + addr=${addr/\/*} + + [ -z "$addr" ] && return 1 + + echo $addr + + return 0 +} + +gre_create_tun() +{ + local a1=$1 + local a2=$2 + local mode + + [[ $a1 =~ ^[0-9.]*$ ]] && mode=gre || mode=ip6gre + + ip tunnel add gre1 mode $mode local $a1 remote $a2 dev veth0 + ip link set gre1 up + $IP tunnel add gre1 mode $mode local $a2 remote $a1 dev veth0 + $IP link set gre1 up +} + +gre_gst_test_checks() +{ + local name=$1 + local addr=$2 + local proto=$3 + + [ "$proto" == 6 ] && addr="[$addr]" + + $NS_EXEC socat - tcp${proto}-listen:$port,reuseaddr,fork >/dev/null & + PID=$! + while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done + + cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port + log_test $? 0 "$name - copy file w/ TSO" + + ethtool -K veth0 tso off + + cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port + log_test $? 0 "$name - copy file w/ GSO" + + ethtool -K veth0 tso on + + kill $PID + PID= +} + +gre6_gso_test() +{ + local port=7777 + + setup + + a1=$(get_linklocal veth0) + a2=$(get_linklocal veth0 $ns1) + + gre_create_tun $a1 $a2 + + ip addr add 172.16.2.1/24 dev gre1 + $IP addr add 172.16.2.2/24 dev gre1 + + ip -6 addr add 2001:db8:1::1/64 dev gre1 nodad + $IP -6 addr add 2001:db8:1::2/64 dev gre1 nodad + + sleep 2 + + gre_gst_test_checks GREv6/v4 172.16.2.2 4 + gre_gst_test_checks GREv6/v6 2001:db8:1::2 6 + + cleanup +} + +gre_gso_test() +{ + gre6_gso_test +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -P Pause after each test before cleanup + -v verbose mode (show commands and output) +EOF +} + +################################################################################ +# main + +while getopts :t:pPhv o +do + case $o in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +PEER_CMD="ip netns exec ${PEER_NS}" + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v socat)" ]; then + echo "SKIP: Could not run test without socat tool" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null + +for t in $TESTS +do + case $t in + gre_gso) gre_gso_test;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c new file mode 100644 index 000000000000..353e1e867fbb --- /dev/null +++ b/tools/testing/selftests/net/gro.c @@ -0,0 +1,1187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This testsuite provides conformance testing for GRO coalescing. + * + * Test cases: + * 1.data + * Data packets of the same size and same header setup with correct + * sequence numbers coalesce. The one exception being the last data + * packet coalesced: it can be smaller than the rest and coalesced + * as long as it is in the same flow. + * 2.ack + * Pure ACK does not coalesce. + * 3.flags + * Specific test cases: no packets with PSH, SYN, URG, RST set will + * be coalesced. + * 4.tcp + * Packets with incorrect checksum, non-consecutive seqno and + * different TCP header options shouldn't coalesce. Nit: given that + * some extension headers have paddings, such as timestamp, headers + * that are padding differently would not be coalesced. + * 5.ip: + * Packets with different (ECN, TTL, TOS) header, ip options or + * ip fragments (ipv6) shouldn't coalesce. + * 6.large: + * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce. + * + * MSS is defined as 4096 - header because if it is too small + * (i.e. 1500 MTU - header), it will result in many packets, + * increasing the "large" test case's flakiness. This is because + * due to time sensitivity in the coalescing window, the receiver + * may not coalesce all of the packets. + * + * Note the timing issue applies to all of the test cases, so some + * flakiness is to be expected. + * + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> + +#include "../kselftest.h" + +#define DPORT 8000 +#define SPORT 1500 +#define PAYLOAD_LEN 100 +#define NUM_PACKETS 4 +#define START_SEQ 100 +#define START_ACK 100 +#define ETH_P_NONE 0 +#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) +#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) +#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) +#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MIN_EXTHDR_SIZE 8 +#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00" +#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11" + +#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +static const char *addr6_src = "fdaa::2"; +static const char *addr6_dst = "fdaa::1"; +static const char *addr4_src = "192.168.1.200"; +static const char *addr4_dst = "192.168.1.100"; +static int proto = -1; +static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN]; +static char *testname = "data"; +static char *ifname = "eth0"; +static char *smac = "aa:00:00:00:00:02"; +static char *dmac = "aa:00:00:00:00:01"; +static bool verbose; +static bool tx_socket = true; +static int tcp_offset = -1; +static int total_hdr_len = -1; +static int ethhdr_proto = -1; + +static void vlog(const char *fmt, ...) +{ + va_list args; + + if (verbose) { + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + } +} + +static void setup_sock_filter(int fd) +{ + const int dport_off = tcp_offset + offsetof(struct tcphdr, dest); + const int ethproto_off = offsetof(struct ethhdr, h_proto); + int optlen = 0; + int ipproto_off, opt_ipproto_off; + int next_off; + + if (proto == PF_INET) + next_off = offsetof(struct iphdr, protocol); + else + next_off = offsetof(struct ipv6hdr, nexthdr); + ipproto_off = ETH_HLEN + next_off; + + if (strcmp(testname, "ip") == 0) { + if (proto == PF_INET) + optlen = sizeof(struct ip_timestamp); + else { + BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE); + + /* same size for HBH and Fragment extension header types */ + optlen = MIN_EXTHDR_SIZE; + opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) + + offsetof(struct ip6_ext, ip6e_nxt); + } + } + + /* this filter validates the following: + * - packet is IPv4/IPv6 according to the running test. + * - packet is TCP. Also handles the case of one extension header and then TCP. + * - checks the packet tcp dport equals to DPORT. Also handles the case of one + * extension header and then TCP. + */ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1), + BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF), + BPF_STMT(BPF_RET + BPF_K, 0), + }; + + struct sock_fprog bpf = { + .len = ARRAY_SIZE(filter), + .filter = filter, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0) + error(1, errno, "error setting filter"); +} + +static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + int i; + + for (i = 0; i < len / 2; i++) + sum += words[i]; + if (len & 1) + sum += ((char *)data)[len - 1]; + return sum; +} + +static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum_nofold(data, len, sum); + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + return ~sum; +} + +static uint16_t tcp_checksum(void *buf, int payload_len) +{ + struct pseudo_header6 { + struct in6_addr saddr; + struct in6_addr daddr; + uint16_t protocol; + uint16_t payload_len; + } ph6; + struct pseudo_header4 { + struct in_addr saddr; + struct in_addr daddr; + uint16_t protocol; + uint16_t payload_len; + } ph4; + uint32_t sum = 0; + + if (proto == PF_INET6) { + if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1) + error(1, errno, "inet_pton6 source ip pseudo"); + if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1) + error(1, errno, "inet_pton6 dest ip pseudo"); + ph6.protocol = htons(IPPROTO_TCP); + ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len); + + sum = checksum_nofold(&ph6, sizeof(ph6), 0); + } else if (proto == PF_INET) { + if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1) + error(1, errno, "inet_pton source ip pseudo"); + if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1) + error(1, errno, "inet_pton dest ip pseudo"); + ph4.protocol = htons(IPPROTO_TCP); + ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len); + + sum = checksum_nofold(&ph4, sizeof(ph4), 0); + } + + return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum); +} + +static void read_MAC(uint8_t *mac_addr, char *mac) +{ + if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &mac_addr[0], &mac_addr[1], &mac_addr[2], + &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) + error(1, 0, "sscanf"); +} + +static void fill_datalinklayer(void *buf) +{ + struct ethhdr *eth = buf; + + memcpy(eth->h_dest, dst_mac, ETH_ALEN); + memcpy(eth->h_source, src_mac, ETH_ALEN); + eth->h_proto = ethhdr_proto; +} + +static void fill_networklayer(void *buf, int payload_len) +{ + struct ipv6hdr *ip6h = buf; + struct iphdr *iph = buf; + + if (proto == PF_INET6) { + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->hop_limit = 8; + if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) + error(1, errno, "inet_pton source ip6"); + if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1) + error(1, errno, "inet_pton dest ip6"); + } else if (proto == PF_INET) { + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->ttl = 8; + iph->protocol = IPPROTO_TCP; + iph->tot_len = htons(sizeof(struct tcphdr) + + payload_len + sizeof(struct iphdr)); + iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ + if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1) + error(1, errno, "inet_pton source ip"); + if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1) + error(1, errno, "inet_pton dest ip"); + iph->check = checksum_fold(buf, sizeof(struct iphdr), 0); + } +} + +static void fill_transportlayer(void *buf, int seq_offset, int ack_offset, + int payload_len, int fin) +{ + struct tcphdr *tcph = buf; + + memset(tcph, 0, sizeof(*tcph)); + + tcph->source = htons(SPORT); + tcph->dest = htons(DPORT); + tcph->seq = ntohl(START_SEQ + seq_offset); + tcph->ack_seq = ntohl(START_ACK + ack_offset); + tcph->ack = 1; + tcph->fin = fin; + tcph->doff = 5; + tcph->window = htons(TCP_MAXWIN); + tcph->urg_ptr = 0; + tcph->check = tcp_checksum(tcph, payload_len); +} + +static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr) +{ + int ret = -1; + + ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr)); + if (ret == -1) + error(1, errno, "sendto failure"); + if (ret != len) + error(1, errno, "sendto wrong length"); +} + +static void create_packet(void *buf, int seq_offset, int ack_offset, + int payload_len, int fin) +{ + memset(buf, 0, total_hdr_len); + memset(buf + total_hdr_len, 'a', payload_len); + fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, + payload_len, fin); + fill_networklayer(buf + ETH_HLEN, payload_len); + fill_datalinklayer(buf); +} + +/* send one extra flag, not first and not last pkt */ +static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, + int rst, int urg) +{ + static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + int payload_len, pkt_size, flag, i; + struct tcphdr *tcph; + + payload_len = PAYLOAD_LEN * psh; + pkt_size = total_hdr_len + payload_len; + flag = NUM_PACKETS / 2; + + create_packet(flag_buf, flag * payload_len, 0, payload_len, 0); + + tcph = (struct tcphdr *)(flag_buf + tcp_offset); + tcph->psh = psh; + tcph->syn = syn; + tcph->rst = rst; + tcph->urg = urg; + tcph->check = 0; + tcph->check = tcp_checksum(tcph, payload_len); + + for (i = 0; i < NUM_PACKETS + 1; i++) { + if (i == flag) { + write_packet(fd, flag_buf, pkt_size, daddr); + continue; + } + create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); + } +} + +/* Test for data of same length, smaller than previous + * and of different lengths + */ +static void send_data_pkts(int fd, struct sockaddr_ll *daddr, + int payload_len1, int payload_len2) +{ + static char buf[ETH_HLEN + IP_MAXPACKET]; + + create_packet(buf, 0, 0, payload_len1, 0); + write_packet(fd, buf, total_hdr_len + payload_len1, daddr); + create_packet(buf, payload_len1, 0, payload_len2, 0); + write_packet(fd, buf, total_hdr_len + payload_len2, daddr); +} + +/* If incoming segments make tracked segment length exceed + * legal IP datagram length, do not coalesce + */ +static void send_large(int fd, struct sockaddr_ll *daddr, int remainder) +{ + static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS]; + static char last[TOTAL_HDR_LEN + MSS]; + static char new_seg[TOTAL_HDR_LEN + MSS]; + int i; + + for (i = 0; i < NUM_LARGE_PKT; i++) + create_packet(pkts[i], i * MSS, 0, MSS, 0); + create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0); + create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0); + + for (i = 0; i < NUM_LARGE_PKT; i++) + write_packet(fd, pkts[i], total_hdr_len + MSS, daddr); + write_packet(fd, last, total_hdr_len + remainder, daddr); + write_packet(fd, new_seg, total_hdr_len + remainder, daddr); +} + +/* Pure acks and dup acks don't coalesce */ +static void send_ack(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN]; + + create_packet(buf, 0, 0, 0, 0); + write_packet(fd, buf, total_hdr_len, daddr); + write_packet(fd, buf, total_hdr_len, daddr); + create_packet(buf, 0, 1, 0, 0); + write_packet(fd, buf, total_hdr_len, daddr); +} + +static void recompute_packet(char *buf, char *no_ext, int extlen) +{ + struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + memmove(buf, no_ext, total_hdr_len); + memmove(buf + total_hdr_len + extlen, + no_ext + total_hdr_len, PAYLOAD_LEN); + + tcphdr->doff = tcphdr->doff + (extlen / 4); + tcphdr->check = 0; + tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen); + if (proto == PF_INET) { + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else { + ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + } +} + +static void tcp_write_options(char *buf, int kind, int ts) +{ + struct tcp_option_ts { + uint8_t kind; + uint8_t len; + uint32_t tsval; + uint32_t tsecr; + } *opt_ts = (void *)buf; + struct tcp_option_window { + uint8_t kind; + uint8_t len; + uint8_t shift; + } *opt_window = (void *)buf; + + switch (kind) { + case TCPOPT_NOP: + buf[0] = TCPOPT_NOP; + break; + case TCPOPT_WINDOW: + memset(opt_window, 0, sizeof(struct tcp_option_window)); + opt_window->kind = TCPOPT_WINDOW; + opt_window->len = TCPOLEN_WINDOW; + opt_window->shift = 0; + break; + case TCPOPT_TIMESTAMP: + memset(opt_ts, 0, sizeof(struct tcp_option_ts)); + opt_ts->kind = TCPOPT_TIMESTAMP; + opt_ts->len = TCPOLEN_TIMESTAMP; + opt_ts->tsval = ts; + opt_ts->tsecr = 0; + break; + default: + error(1, 0, "unimplemented TCP option"); + break; + } +} + +/* TCP with options is always a permutation of {TS, NOP, NOP}. + * Implement different orders to verify coalescing stops. + */ +static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order) +{ + switch (order) { + case 0: + tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */, + TCPOPT_TIMESTAMP, ts); + break; + case 1: + tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 1, + TCPOPT_TIMESTAMP, ts); + tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP, + TCPOPT_NOP, 0); + break; + case 2: + tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts); + tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1, + TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2, + TCPOPT_NOP, 0); + break; + default: + error(1, 0, "unknown order"); + break; + } + recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA); +} + +/* Packets with invalid checksum don't coalesce. */ +static void send_changed_checksum(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + tcph->check = tcph->check - 1; + write_packet(fd, buf, pkt_size, daddr); +} + + /* Packets with non-consecutive sequence number don't coalesce.*/ +static void send_changed_seq(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + tcph->seq = ntohl(htonl(tcph->seq) + 1); + tcph->check = 0; + tcph->check = tcp_checksum(tcph, PAYLOAD_LEN); + write_packet(fd, buf, pkt_size, daddr); +} + + /* Packet with different timestamp option or different timestamps + * don't coalesce. + */ +static void send_changed_ts(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; + int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 0, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 0, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 1); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 2); + write_packet(fd, extpkt, pkt_size, daddr); +} + +/* Packet with different tcp options don't coalesce. */ +static void send_diff_opt(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; + static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG]; + int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; + int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt1, buf, 0, 0); + write_packet(fd, extpkt1, extpkt1_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt1, buf, 0, 0); + write_packet(fd, extpkt1, extpkt1_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0); + tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0); + recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1); + write_packet(fd, extpkt2, extpkt2_size, daddr); +} + +static void add_ipv4_ts_option(void *buf, void *optpkt) +{ + struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset); + int optlen = sizeof(struct ip_timestamp); + struct iphdr *iph; + + if (optlen % 4) + error(1, 0, "ipv4 timestamp length is not a multiple of 4B"); + + ts->ipt_code = IPOPT_TS; + ts->ipt_len = optlen; + ts->ipt_ptr = 5; + ts->ipt_flg = IPOPT_TS_TSONLY; + + memcpy(optpkt, buf, tcp_offset); + memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + + iph = (struct iphdr *)(optpkt + ETH_HLEN); + iph->ihl = 5 + (optlen / 4); + iph->tot_len = htons(ntohs(iph->tot_len) + optlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0); +} + +static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload) +{ + struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset); + struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN); + char *exthdr_payload_start = (char *)(exthdr + 1); + + exthdr->hdrlen = 0; + exthdr->nexthdr = IPPROTO_TCP; + + memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr)); + + memcpy(optpkt, buf, tcp_offset); + memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + + iph->nexthdr = exthdr_type; + iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE); +} + +static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1); + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); + + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2); + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); +} + +/* IPv4 options shouldn't coalesce */ +static void send_ip_options(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)]; + int optlen = sizeof(struct ip_timestamp); + int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); + + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); + add_ipv4_ts_option(buf, optpkt); + write_packet(fd, optpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); +} + +/* IPv4 fragments shouldn't coalesce */ +static void send_fragment4(int fd, struct sockaddr_ll *daddr) +{ + static char buf[IP_MAXPACKET]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + /* Once fragmented, packet would retain the total_len. + * Tcp header is prepared as if rest of data is in follow-up frags, + * but follow up frags aren't actually sent. + */ + memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); + fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); + fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); + fill_datalinklayer(buf); + + iph->frag_off = htons(0x6000); // DF = 1, MF = 1 + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + write_packet(fd, buf, pkt_size, daddr); +} + +/* IPv4 packets with different ttl don't coalesce.*/ +static void send_changed_ttl(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + iph->ttl = 7; + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + write_packet(fd, buf, pkt_size, daddr); +} + +/* Packets with different tos don't coalesce.*/ +static void send_changed_tos(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + if (proto == PF_INET) { + iph->tos = 1; + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else if (proto == PF_INET6) { + ip6h->priority = 0xf; + } + write_packet(fd, buf, pkt_size, daddr); +} + +/* Packets with different ECN don't coalesce.*/ +static void send_changed_ECN(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + if (proto == PF_INET) { + buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10 + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else { + buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10 + } + write_packet(fd, buf, pkt_size, daddr); +} + +/* IPv6 fragments and packets with extensions don't coalesce.*/ +static void send_fragment6(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN + + sizeof(struct ip6_frag)]; + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + struct ip6_frag *frag = (void *)(extpkt + tcp_offset); + int extlen = sizeof(struct ip6_frag); + int bufpkt_len = total_hdr_len + PAYLOAD_LEN; + int extpkt_len = bufpkt_len + extlen; + int i; + + for (i = 0; i < 2; i++) { + create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, bufpkt_len, daddr); + } + sleep(1); + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + memset(extpkt, 0, extpkt_len); + + ip6h->nexthdr = IPPROTO_FRAGMENT; + ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + frag->ip6f_nxt = IPPROTO_TCP; + + memcpy(extpkt, buf, tcp_offset); + memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + write_packet(fd, extpkt, extpkt_len, daddr); + + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, bufpkt_len, daddr); +} + +static void bind_packetsocket(int fd) +{ + struct sockaddr_ll daddr = {}; + + daddr.sll_family = AF_PACKET; + daddr.sll_protocol = ethhdr_proto; + daddr.sll_ifindex = if_nametoindex(ifname); + if (daddr.sll_ifindex == 0) + error(1, errno, "if_nametoindex"); + + if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0) + error(1, errno, "could not bind socket"); +} + +static void set_timeout(int fd) +{ + struct timeval timeout; + + timeout.tv_sec = 3; + timeout.tv_usec = 0; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, + sizeof(timeout)) < 0) + error(1, errno, "cannot set timeout, setsockopt failed"); +} + +static void check_recv_pkts(int fd, int *correct_payload, + int correct_num_pkts) +{ + static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; + struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); + struct tcphdr *tcph; + bool bad_packet = false; + int tcp_ext_len = 0; + int ip_ext_len = 0; + int pkt_size = -1; + int data_len = 0; + int num_pkt = 0; + int i; + + vlog("Expected {"); + for (i = 0; i < correct_num_pkts; i++) + vlog("%d ", correct_payload[i]); + vlog("}, Total %d packets\nReceived {", correct_num_pkts); + + while (1) { + ip_ext_len = 0; + pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); + if (pkt_size < 0) + error(1, errno, "could not receive"); + + if (iph->version == 4) + ip_ext_len = (iph->ihl - 5) * 4; + else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) + ip_ext_len = MIN_EXTHDR_SIZE; + + tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); + + if (tcph->fin) + break; + + tcp_ext_len = (tcph->doff - 5) * 4; + data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len; + /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3. + * Ipv4/tcp packets without at least 6 bytes of data will be padded. + * Packet sockets are protocol agnostic, and will not trim the padding. + */ + if (pkt_size == ETH_ZLEN && iph->version == 4) { + data_len = ntohs(iph->tot_len) + - sizeof(struct tcphdr) - sizeof(struct iphdr); + } + vlog("%d ", data_len); + if (data_len != correct_payload[num_pkt]) { + vlog("[!=%d]", correct_payload[num_pkt]); + bad_packet = true; + } + num_pkt++; + } + vlog("}, Total %d packets.\n", num_pkt); + if (num_pkt != correct_num_pkts) + error(1, 0, "incorrect number of packets"); + if (bad_packet) + error(1, 0, "incorrect packet geometry"); + + printf("Test succeeded\n\n"); +} + +static void gro_sender(void) +{ + static char fin_pkt[MAX_HDR_LEN]; + struct sockaddr_ll daddr = {}; + int txfd = -1; + + txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (txfd < 0) + error(1, errno, "socket creation"); + + memset(&daddr, 0, sizeof(daddr)); + daddr.sll_ifindex = if_nametoindex(ifname); + if (daddr.sll_ifindex == 0) + error(1, errno, "if_nametoindex"); + daddr.sll_family = AF_PACKET; + memcpy(daddr.sll_addr, dst_mac, ETH_ALEN); + daddr.sll_halen = ETH_ALEN; + create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1); + + if (strcmp(testname, "data") == 0) { + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ack") == 0) { + send_ack(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "flags") == 0) { + send_flags(txfd, &daddr, 1, 0, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 1, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 0, 1, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 0, 0, 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "tcp") == 0) { + send_changed_checksum(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_seq(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_ts(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_diff_opt(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip") == 0) { + send_changed_ECN(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_tos(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + if (proto == PF_INET) { + /* Modified packets may be received out of order. + * Sleep function added to enforce test boundaries + * so that fin pkts are not received prior to other pkts. + */ + sleep(1); + send_changed_ttl(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + send_ip_options(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + send_fragment4(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (proto == PF_INET6) { + sleep(1); + send_fragment6(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + /* send IPv6 packets with ext header with same payload */ + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + /* send IPv6 packets with ext header with different payload */ + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } + } else if (strcmp(testname, "large") == 0) { + /* 20 is the difference between min iphdr size + * and min ipv6hdr size. Like MAX_HDR_SIZE, + * MAX_PAYLOAD is defined with the larger header of the two. + */ + int offset = proto == PF_INET ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; + + send_large(txfd, &daddr, remainder); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_large(txfd, &daddr, remainder + 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else { + error(1, 0, "Unknown testcase"); + } + + if (close(txfd)) + error(1, errno, "socket close"); +} + +static void gro_receiver(void) +{ + static int correct_payload[NUM_PACKETS]; + int rxfd = -1; + + rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE)); + if (rxfd < 0) + error(1, 0, "socket creation"); + setup_sock_filter(rxfd); + set_timeout(rxfd); + bind_packetsocket(rxfd); + + memset(correct_payload, 0, sizeof(correct_payload)); + + if (strcmp(testname, "data") == 0) { + printf("pure data packet of same size: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("large data packets followed by a smaller one: "); + correct_payload[0] = PAYLOAD_LEN * 1.5; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("small data packets followed by a larger one: "); + correct_payload[0] = PAYLOAD_LEN / 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ack") == 0) { + printf("duplicate ack and pure ack: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "flags") == 0) { + correct_payload[0] = PAYLOAD_LEN * 3; + correct_payload[1] = PAYLOAD_LEN * 2; + + printf("psh flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 2); + + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = 0; + correct_payload[2] = PAYLOAD_LEN * 2; + printf("syn flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + + printf("rst flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + + printf("urg flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "tcp") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + correct_payload[3] = PAYLOAD_LEN; + + printf("changed checksum does not coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("Wrong Seq number doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("Different timestamp doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 4); + + printf("Different options doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + + printf("different ECN doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("different tos doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + if (proto == PF_INET) { + printf("different ttl doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("ip options doesn't coalesce: "); + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + + printf("fragmented ip4 doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + } else if (proto == PF_INET6) { + /* GRO doesn't check for ipv6 hop limit when flushing. + * Hence no corresponding test to the ipv4 case. + */ + printf("fragmented ip6 doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + + printf("ipv6 with ext header does coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("ipv6 with ext header with different payloads doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + } + } else if (strcmp(testname, "large") == 0) { + int offset = proto == PF_INET ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; + + correct_payload[0] = (MAX_PAYLOAD + offset); + correct_payload[1] = remainder; + printf("Shouldn't coalesce if exceed IP max pkt size: "); + check_recv_pkts(rxfd, correct_payload, 2); + + /* last segment sent individually, doesn't start new segment */ + correct_payload[0] = correct_payload[0] - remainder; + correct_payload[1] = remainder + 1; + correct_payload[2] = remainder + 1; + check_recv_pkts(rxfd, correct_payload, 3); + } else { + error(1, 0, "Test case error, should never trigger"); + } + + if (close(rxfd)) + error(1, 0, "socket close"); +} + +static void parse_args(int argc, char **argv) +{ + static const struct option opts[] = { + { "daddr", required_argument, NULL, 'd' }, + { "dmac", required_argument, NULL, 'D' }, + { "iface", required_argument, NULL, 'i' }, + { "ipv4", no_argument, NULL, '4' }, + { "ipv6", no_argument, NULL, '6' }, + { "rx", no_argument, NULL, 'r' }, + { "saddr", required_argument, NULL, 's' }, + { "smac", required_argument, NULL, 'S' }, + { "test", required_argument, NULL, 't' }, + { "verbose", no_argument, NULL, 'v' }, + { 0, 0, 0, 0 } + }; + int c; + + while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) { + switch (c) { + case '4': + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; + case '6': + proto = PF_INET6; + ethhdr_proto = htons(ETH_P_IPV6); + break; + case 'd': + addr4_dst = addr6_dst = optarg; + break; + case 'D': + dmac = optarg; + break; + case 'i': + ifname = optarg; + break; + case 'r': + tx_socket = false; + break; + case 's': + addr4_src = addr6_src = optarg; + break; + case 'S': + smac = optarg; + break; + case 't': + testname = optarg; + break; + case 'v': + verbose = true; + break; + default: + error(1, 0, "%s invalid option %c\n", __func__, c); + break; + } + } +} + +int main(int argc, char **argv) +{ + parse_args(argc, argv); + + if (proto == PF_INET) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr); + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET6) { + tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr); + total_hdr_len = MAX_HDR_LEN; + } else { + error(1, 0, "Protocol family is not ipv4 or ipv6"); + } + + read_MAC(src_mac, smac); + read_MAC(dst_mac, dmac); + + if (tx_socket) + gro_sender(); + else + gro_receiver(); + + fprintf(stderr, "Gro::%s test passed.\n", testname); + return 0; +} diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh new file mode 100755 index 000000000000..02c21ff4ca81 --- /dev/null +++ b/tools/testing/selftests/net/gro.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly SERVER_MAC="aa:00:00:00:00:02" +readonly CLIENT_MAC="aa:00:00:00:00:01" +readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") +readonly PROTOS=("ipv4" "ipv6") +dev="" +test="all" +proto="ipv4" + +run_test() { + local server_pid=0 + local exit_code=0 + local protocol=$1 + local test=$2 + local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \ + "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" ) + + setup_ns + # Each test is run 3 times to deflake, because given the receive timing, + # not all packets that should coalesce will be considered in the same flow + # on every try. + for tries in {1..3}; do + # Actual test starts here + ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ + 1>>log.txt & + server_pid=$! + sleep 0.5 # to allow for socket init + ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \ + 1>>log.txt + wait "${server_pid}" + exit_code=$? + if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \ + ${exit_code} -ne 0 ]]; then + echo "Ignoring errors due to slow environment" 1>&2 + exit_code=0 + fi + if [[ "${exit_code}" -eq 0 ]]; then + break; + fi + done + cleanup_ns + echo ${exit_code} +} + +run_all_tests() { + local failed_tests=() + for proto in "${PROTOS[@]}"; do + for test in "${TESTS[@]}"; do + echo "running test ${proto} ${test}" >&2 + exit_code=$(run_test $proto $test) + if [[ "${exit_code}" -ne 0 ]]; then + failed_tests+=("${proto}_${test}") + fi; + done; + done + if [[ ${#failed_tests[@]} -ne 0 ]]; then + echo "failed tests: ${failed_tests[*]}. \ + Please see log.txt for more logs" + exit 1 + else + echo "All Tests Succeeded!" + fi; +} + +usage() { + echo "Usage: $0 \ + [-i <DEV>] \ + [-t data|ack|flags|tcp|ip|large] \ + [-p <ipv4|ipv6>]" 1>&2; + exit 1; +} + +while getopts "i:t:p:" opt; do + case "${opt}" in + i) + dev="${OPTARG}" + ;; + t) + test="${OPTARG}" + ;; + p) + proto="${OPTARG}" + ;; + *) + usage + ;; + esac +done + +if [ -n "$dev" ]; then + source setup_loopback.sh +else + source setup_veth.sh +fi + +setup +trap cleanup EXIT +if [[ "${test}" == "all" ]]; then + run_all_tests +else + run_test "${proto}" "${test}" +fi; diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile new file mode 100644 index 000000000000..92c1d9d080cd --- /dev/null +++ b/tools/testing/selftests/net/hsr/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +TEST_PROGS := hsr_ping.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config new file mode 100644 index 000000000000..22061204fb69 --- /dev/null +++ b/tools/testing/selftests/net/hsr/config @@ -0,0 +1,4 @@ +CONFIG_IPV6=y +CONFIG_NET_SCH_NETEM=m +CONFIG_HSR=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh new file mode 100755 index 000000000000..1c6457e54625 --- /dev/null +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -0,0 +1,276 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ret=0 +ksft_skip=4 +ipv6=true + +optstring="h4" +usage() { + echo "Usage: $0 [OPTION]" + echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" +} + +while getopts "$optstring" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "4") + ipv6=false + ;; + "?") + usage $0 + exit 1 + ;; +esac +done + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns3="ns3-$rndh" + +cleanup() +{ + local netns + for netns in "$ns1" "$ns2" "$ns3" ;do + ip netns del $netns + done +} + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + +do_ping() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 2" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + return 0 +} + +do_ping_long() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 10" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)" + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + VAL="$(echo $OUT | cut -d' ' -f1-8)" + if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ] + then + echo "$netns -> $connect_addr ping TEST [ FAIL ]" + echo "Expect to send and receive 10 packets and no duplicates." + echo "Full message: ${OUT}." + ret=1 + return 1 + fi + + return 0 +} + +stop_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + echo "FAIL: ${msg}" 1>&2 + exit ${ret} + fi +} + +do_complete_ping_test() +{ + echo "INFO: Initial validation ping." + # Each node has to be able each one. + do_ping "$ns1" 100.64.0.2 + do_ping "$ns2" 100.64.0.1 + do_ping "$ns3" 100.64.0.1 + stop_if_error "Initial validation failed." + + do_ping "$ns1" 100.64.0.3 + do_ping "$ns2" 100.64.0.3 + do_ping "$ns3" 100.64.0.2 + + do_ping "$ns1" dead:beef:1::2 + do_ping "$ns1" dead:beef:1::3 + do_ping "$ns2" dead:beef:1::1 + do_ping "$ns2" dead:beef:1::2 + do_ping "$ns3" dead:beef:1::1 + do_ping "$ns3" dead:beef:1::2 + + stop_if_error "Initial validation failed." + +# Wait until supervisor all supervision frames have been processed and the node +# entries have been merged. Otherwise duplicate frames will be observed which is +# valid at this stage. + WAIT=5 + while [ ${WAIT} -gt 0 ] + do + grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table + if [ $? -ne 0 ] + then + break + fi + sleep 1 + let "WAIT = WAIT - 1" + done + +# Just a safety delay in case the above check didn't handle it. + sleep 1 + + echo "INFO: Longer ping test." + do_ping_long "$ns1" 100.64.0.2 + do_ping_long "$ns1" dead:beef:1::2 + do_ping_long "$ns1" 100.64.0.3 + do_ping_long "$ns1" dead:beef:1::3 + + stop_if_error "Longer ping test failed." + + do_ping_long "$ns2" 100.64.0.1 + do_ping_long "$ns2" dead:beef:1::1 + do_ping_long "$ns2" 100.64.0.3 + do_ping_long "$ns2" dead:beef:1::2 + stop_if_error "Longer ping test failed." + + do_ping_long "$ns3" 100.64.0.1 + do_ping_long "$ns3" dead:beef:1::1 + do_ping_long "$ns3" 100.64.0.2 + do_ping_long "$ns3" dead:beef:1::2 + stop_if_error "Longer ping test failed." + + echo "INFO: Cutting one link." + do_ping_long "$ns1" 100.64.0.3 & + + sleep 3 + ip -net "$ns3" link set ns3eth1 down + wait + + ip -net "$ns3" link set ns3eth1 up + + stop_if_error "Failed with one link down." + + echo "INFO: Delay the link and drop a few packages." + tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms + tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% + + do_ping_long "$ns1" 100.64.0.2 + do_ping_long "$ns1" 100.64.0.3 + + stop_if_error "Failed with delay and packetloss." + + do_ping_long "$ns2" 100.64.0.1 + do_ping_long "$ns2" 100.64.0.3 + + stop_if_error "Failed with delay and packetloss." + + do_ping_long "$ns3" 100.64.0.1 + do_ping_long "$ns3" 100.64.0.2 + stop_if_error "Failed with delay and packetloss." + + echo "INFO: All good." +} + +setup_hsr_interfaces() +{ + local HSRv="$1" + + echo "INFO: preparing interfaces for HSRv${HSRv}." +# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. +# +# ns1eth1 ----- ns2eth1 +# hsr1 hsr2 +# ns1eth2 ns2eth2 +# | | +# ns3eth1 ns3eth2 +# \ / +# hsr3 +# + # Interfaces + ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" + ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" + + # HSRv0/1 + ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0 + + # IP for HSR + ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 + ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad + ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 + ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad + ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 + ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + + # All Links up + ip -net "$ns1" link set ns1eth1 up + ip -net "$ns1" link set ns1eth2 up + ip -net "$ns1" link set hsr1 up + + ip -net "$ns2" link set ns2eth1 up + ip -net "$ns2" link set ns2eth2 up + ip -net "$ns2" link set hsr2 up + + ip -net "$ns3" link set ns3eth1 up + ip -net "$ns3" link set ns3eth2 up + ip -net "$ns3" link set hsr3 up +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +trap cleanup EXIT + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +setup_hsr_interfaces 0 +do_complete_ping_test +cleanup + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +setup_hsr_interfaces 1 +do_complete_ping_test + +exit $ret diff --git a/tools/testing/selftests/net/hwtstamp_config.c b/tools/testing/selftests/net/hwtstamp_config.c index e1fdee841021..170728c96c46 100644 --- a/tools/testing/selftests/net/hwtstamp_config.c +++ b/tools/testing/selftests/net/hwtstamp_config.c @@ -16,6 +16,8 @@ #include <linux/net_tstamp.h> #include <linux/sockios.h> +#include "kselftest.h" + static int lookup_value(const char **names, int size, const char *name) { @@ -50,7 +52,7 @@ static const char *tx_types[] = { TX_TYPE(ONESTEP_SYNC) #undef TX_TYPE }; -#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0]))) +#define N_TX_TYPES ((int)(ARRAY_SIZE(tx_types))) static const char *rx_filters[] = { #define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name @@ -71,7 +73,7 @@ static const char *rx_filters[] = { RX_FILTER(PTP_V2_DELAY_REQ), #undef RX_FILTER }; -#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0]))) +#define N_RX_FILTERS ((int)(ARRAY_SIZE(rx_filters))) static void usage(void) { diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh new file mode 100755 index 000000000000..824cb0e35eff --- /dev/null +++ b/tools/testing/selftests/net/icmp.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for checking ICMP response with dummy address instead of 0.0.0.0. +# Sets up two namespaces like: +# +----------------------+ +--------------------+ +# | ns1 | v4-via-v6 routes: | ns2 | +# | | ' | | +# | +--------+ -> 172.16.1.0/24 -> +--------+ | +# | | veth0 +--------------------------+ veth0 | | +# | +--------+ <- 172.16.0.0/24 <- +--------+ | +# | 172.16.0.1 | | 2001:db8:1::2/64 | +# | 2001:db8:1::2/64 | | | +# +----------------------+ +--------------------+ +# +# And then tries to ping 172.16.1.1 from ns1. This results in a "net +# unreachable" message being sent from ns2, but there is no IPv4 address set in +# that address space, so the kernel should substitute the dummy address +# 192.0.0.8 defined in RFC7600. + +source lib.sh + +H1_IP=172.16.0.1/32 +H1_IP6=2001:db8:1::1 +RT1=172.16.1.0/24 +PINGADDR=172.16.1.1 +RT2=172.16.0.0/24 +H2_IP6=2001:db8:1::2 + +TMPFILE=$(mktemp) + +cleanup() +{ + rm -f "$TMPFILE" + cleanup_ns $NS1 $NS2 +} + +trap cleanup EXIT + +# Namespaces +setup_ns NS1 NS2 + +# Connectivity +ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2 +ip -netns $NS1 link set dev veth0 up +ip -netns $NS2 link set dev veth0 up +ip -netns $NS1 addr add $H1_IP dev veth0 +ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad +ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad +ip -netns $NS1 route add $RT1 via inet6 $H2_IP6 +ip -netns $NS2 route add $RT2 via inet6 $H1_IP6 + +# Make sure ns2 will respond with ICMP unreachable +ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1 + +# Run the test - a ping runs in the background, and we capture ICMP responses +# with tcpdump; -c 1 means it should exit on the first ping, but add a timeout +# in case something goes wrong +ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null & +ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null + +# Parse response and check for dummy address +# tcpdump output looks like: +# IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92 +RESP_IP=$(awk '{print $2}' < $TMPFILE) +if [[ "$RESP_IP" != "192.0.0.8" ]]; then + echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8" + exit 1 +else + echo "OK" + exit 0 +fi diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index bf361f30d6ef..d6f0e449c029 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -19,6 +19,7 @@ # Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent # from r1 to h1 telling h1 to use r2 when talking to h2. +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -63,10 +64,14 @@ log_test() local rc=$1 local expected=$2 local msg="$3" + local xfail=$4 if [ ${rc} -eq ${expected} ]; then printf "TEST: %-60s [ OK ]\n" "${msg}" nsuccess=$((nsuccess+1)) + elif [ ${rc} -eq ${xfail} ]; then + printf "TEST: %-60s [XFAIL]\n" "${msg}" + nxfail=$((nxfail+1)) else ret=1 nfail=$((nfail+1)) @@ -136,11 +141,7 @@ get_linklocal() cleanup() { - local ns - - for ns in h1 h2 r1 r2; do - ip netns del $ns 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } create_vrf() @@ -167,102 +168,99 @@ setup() # # create nodes as namespaces - # - for ns in h1 h2 r1 r2; do - ip netns add $ns - ip -netns $ns li set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 - - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 - esac + setup_ns h1 h2 r1 r2 + for ns in $h1 $h2 $r1 $r2; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 + + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 + fi done # # create interconnects # - ip -netns h1 li add eth0 type veth peer name r1h1 - ip -netns h1 li set r1h1 netns r1 name eth0 up + ip -netns $h1 li add eth0 type veth peer name r1h1 + ip -netns $h1 li set r1h1 netns $r1 name eth0 up - ip -netns h1 li add eth1 type veth peer name r2h1 - ip -netns h1 li set r2h1 netns r2 name eth0 up + ip -netns $h1 li add eth1 type veth peer name r2h1 + ip -netns $h1 li set r2h1 netns $r2 name eth0 up - ip -netns h2 li add eth0 type veth peer name r2h2 - ip -netns h2 li set eth0 up - ip -netns h2 li set r2h2 netns r2 name eth2 up + ip -netns $h2 li add eth0 type veth peer name r2h2 + ip -netns $h2 li set eth0 up + ip -netns $h2 li set r2h2 netns $r2 name eth2 up - ip -netns r1 li add eth1 type veth peer name r2r1 - ip -netns r1 li set eth1 up - ip -netns r1 li set r2r1 netns r2 name eth1 up + ip -netns $r1 li add eth1 type veth peer name r2r1 + ip -netns $r1 li set eth1 up + ip -netns $r1 li set r2r1 netns $r2 name eth1 up # # h1 # if [ "${WITH_VRF}" = "yes" ]; then - create_vrf "h1" + create_vrf "$h1" H1_VRF_ARG="vrf ${VRF}" H1_PING_ARG="-I ${VRF}" else H1_VRF_ARG= H1_PING_ARG= fi - ip -netns h1 li add br0 type bridge + ip -netns $h1 li add br0 type bridge if [ "${WITH_VRF}" = "yes" ]; then - ip -netns h1 li set br0 vrf ${VRF} up + ip -netns $h1 li set br0 vrf ${VRF} up else - ip -netns h1 li set br0 up + ip -netns $h1 li set br0 up fi - ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad - ip -netns h1 li set eth0 master br0 up - ip -netns h1 li set eth1 master br0 up + ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 li set eth0 master br0 up + ip -netns $h1 li set eth1 master br0 up # # h2 # - ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 - ip -netns h2 ro add default via ${R2_N2_IP} dev eth0 - ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad - ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0 + ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns $h2 ro add default via ${R2_N2_IP} dev eth0 + ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 -6 ro add default via ${R2_N2_IP6} dev eth0 # # r1 # - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30 - ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_R2_N1_IP}/30 + ip -netns $r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad # # r2 # - ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 - ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad - ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30 - ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad - ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24 - ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad + ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns $r2 addr add dev eth1 ${R2_R1_N1_IP}/30 + ip -netns $r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad + ip -netns $r2 addr add dev eth2 ${R2_N2_IP}/24 + ip -netns $r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad sleep 2 - R1_LLADDR=$(get_linklocal r1 eth0) + R1_LLADDR=$(get_linklocal $r1 eth0) if [ $? -ne 0 ]; then echo "Error: Failed to get link-local address of r1's eth0" exit 1 fi log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}" - R2_LLADDR=$(get_linklocal r2 eth0) + R2_LLADDR=$(get_linklocal $r2 eth0) if [ $? -ne 0 ]; then echo "Error: Failed to get link-local address of r2's eth0" exit 1 @@ -274,8 +272,8 @@ change_h2_mtu() { local mtu=$1 - run_cmd ip -netns h2 li set eth0 mtu ${mtu} - run_cmd ip -netns r2 li set eth2 mtu ${mtu} + run_cmd ip -netns $h2 li set eth0 mtu ${mtu} + run_cmd ip -netns $r2 li set eth2 mtu ${mtu} } check_exception() @@ -287,63 +285,64 @@ check_exception() # From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102) if [ "$VERBOSE" = "1" ]; then echo "Commands to check for exception:" - run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} - run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} + run_cmd ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} + run_cmd ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} fi if [ -n "${mtu}" ]; then mtu=" mtu ${mtu}" fi if [ "$with_redirect" = "yes" ]; then - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -q "cache <redirected> expires [0-9]*sec${mtu}" elif [ -n "${mtu}" ]; then - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -q "cache expires [0-9]*sec${mtu}" else # want to verify that neither mtu nor redirected appears in # the route get output. The -v will wipe out the cache line # if either are set so the last grep -q will not find a match - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -E -v 'mtu|redirected' | grep -q "cache" fi - log_test $? 0 "IPv4: ${desc}" + log_test $? 0 "IPv4: ${desc}" 0 - if [ "$with_redirect" = "yes" ]; then - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ - grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}" + # No PMTU info for test "redirect" and "mtu exception plus redirect" + if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0" elif [ -n "${mtu}" ]; then - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -q "${mtu}" else # IPv6 is a bit harder. First strip out the match if it # contains an mtu exception and then look for the first # gateway - R1's lladdr - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -v "mtu" | grep -q "${R1_LLADDR}" fi - log_test $? 0 "IPv6: ${desc}" + log_test $? 0 "IPv6: ${desc}" 1 } run_ping() { local sz=$1 - run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP} - run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6} + run_cmd ip netns exec $h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP} + run_cmd ip netns exec $h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6} } replace_route_new() { # r1 to h2 via r2 and eth0 - run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0 - run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0 + run_cmd ip -netns $r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0 + run_cmd ip -netns $r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0 } reset_route_new() { - run_cmd ip -netns r1 nexthop flush - run_cmd ip -netns h1 nexthop flush + run_cmd ip -netns $r1 nexthop flush + run_cmd ip -netns $h1 nexthop flush initial_route_new } @@ -351,34 +350,34 @@ reset_route_new() initial_route_new() { # r1 to h2 via r2 and eth1 - run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1 - run_cmd ip -netns r1 ro add ${H2_N2} nhid 1 + run_cmd ip -netns $r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1 + run_cmd ip -netns $r1 ro add ${H2_N2} nhid 1 - run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1 - run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2 + run_cmd ip -netns $r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1 + run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} nhid 2 # h1 to h2 via r1 - run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0 - run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1 + run_cmd ip -netns $h1 nexthop add id 1 via ${R1_N1_IP} dev br0 + run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1 - run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0 - run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2 + run_cmd ip -netns $h1 nexthop add id 2 via ${R1_LLADDR} dev br0 + run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2 } replace_route_legacy() { # r1 to h2 via r2 and eth0 - run_cmd ip -netns r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0 - run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0 + run_cmd ip -netns $r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0 + run_cmd ip -netns $r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0 } reset_route_legacy() { - run_cmd ip -netns r1 ro del ${H2_N2} - run_cmd ip -netns r1 -6 ro del ${H2_N2_6} + run_cmd ip -netns $r1 ro del ${H2_N2} + run_cmd ip -netns $r1 -6 ro del ${H2_N2_6} - run_cmd ip -netns h1 ro del ${H1_VRF_ARG} ${H2_N2} - run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6} + run_cmd ip -netns $h1 ro del ${H1_VRF_ARG} ${H2_N2} + run_cmd ip -netns $h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6} initial_route_legacy } @@ -386,22 +385,22 @@ reset_route_legacy() initial_route_legacy() { # r1 to h2 via r2 and eth1 - run_cmd ip -netns r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1 - run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1 + run_cmd ip -netns $r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1 + run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1 # h1 to h2 via r1 # - IPv6 redirect only works if gateway is the LLA - run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0 - run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0 + run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0 + run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0 } check_connectivity() { local rc - run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP} + run_cmd ip netns exec $h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP} rc=$? - run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6} + run_cmd ip netns exec $h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6} [ $? -ne 0 ] && rc=$? return $rc @@ -488,6 +487,7 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) ret=0 nsuccess=0 nfail=0 +nxfail=0 while getopts :pv o do @@ -532,5 +532,6 @@ fi printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} +printf "Tests xfailed: %3d\n" ${nxfail} exit $ret diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c new file mode 100644 index 000000000000..76e604e4810e --- /dev/null +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -0,0 +1,320 @@ +/* SPDX-License-Identifier: MIT */ +/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */ +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/io_uring.h> +#include <linux/ipv6.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> + +#include <io_uring/mini_liburing.h> + +#define NOTIF_TAG 0xfffffffULL +#define NONZC_TAG 0 +#define ZC_TAG 1 + +enum { + MODE_NONZC = 0, + MODE_ZC = 1, + MODE_ZC_FIXED = 2, + MODE_MIXED = 3, +}; + +static bool cfg_cork = false; +static int cfg_mode = MODE_ZC_FIXED; +static int cfg_nr_reqs = 8; +static int cfg_family = PF_UNSPEC; +static int cfg_payload_len; +static int cfg_port = 8000; +static int cfg_runtime_ms = 4200; + +static socklen_t cfg_alen; +static struct sockaddr_storage cfg_dst_addr; + +static char payload[IP_MAXPACKET] __attribute__((aligned(4096))); + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void do_setsockopt(int fd, int level, int optname, int val) +{ + if (setsockopt(fd, level, optname, &val, sizeof(val))) + error(1, errno, "setsockopt %d.%d: %d", level, optname, val); +} + +static int do_setup_tx(int domain, int type, int protocol) +{ + int fd; + + fd = socket(domain, type, protocol); + if (fd == -1) + error(1, errno, "socket t"); + + do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21); + + if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) + error(1, errno, "connect"); + return fd; +} + +static void do_tx(int domain, int type, int protocol) +{ + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + unsigned long packets = 0, bytes = 0; + struct io_uring ring; + struct iovec iov; + uint64_t tstop; + int i, fd, ret; + int compl_cqes = 0; + + fd = do_setup_tx(domain, type, protocol); + + ret = io_uring_queue_init(512, &ring, 0); + if (ret) + error(1, ret, "io_uring: queue init"); + + iov.iov_base = payload; + iov.iov_len = cfg_payload_len; + + ret = io_uring_register_buffers(&ring, &iov, 1); + if (ret) + error(1, ret, "io_uring: buffer registration"); + + tstop = gettimeofday_ms() + cfg_runtime_ms; + do { + if (cfg_cork) + do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1); + + for (i = 0; i < cfg_nr_reqs; i++) { + unsigned zc_flags = 0; + unsigned buf_idx = 0; + unsigned mode = cfg_mode; + unsigned msg_flags = MSG_WAITALL; + + if (cfg_mode == MODE_MIXED) + mode = rand() % 3; + + sqe = io_uring_get_sqe(&ring); + + if (mode == MODE_NONZC) { + io_uring_prep_send(sqe, fd, payload, + cfg_payload_len, msg_flags); + sqe->user_data = NONZC_TAG; + } else { + io_uring_prep_sendzc(sqe, fd, payload, + cfg_payload_len, + msg_flags, zc_flags); + if (mode == MODE_ZC_FIXED) { + sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; + sqe->buf_index = buf_idx; + } + sqe->user_data = ZC_TAG; + } + } + + ret = io_uring_submit(&ring); + if (ret != cfg_nr_reqs) + error(1, ret, "submit"); + + if (cfg_cork) + do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); + for (i = 0; i < cfg_nr_reqs; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) + error(1, ret, "wait cqe"); + + if (cqe->user_data != NONZC_TAG && + cqe->user_data != ZC_TAG) + error(1, -EINVAL, "invalid cqe->user_data"); + + if (cqe->flags & IORING_CQE_F_NOTIF) { + if (cqe->flags & IORING_CQE_F_MORE) + error(1, -EINVAL, "invalid notif flags"); + if (compl_cqes <= 0) + error(1, -EINVAL, "notification mismatch"); + compl_cqes--; + i--; + io_uring_cqe_seen(&ring); + continue; + } + if (cqe->flags & IORING_CQE_F_MORE) { + if (cqe->user_data != ZC_TAG) + error(1, cqe->res, "unexpected F_MORE"); + compl_cqes++; + } + if (cqe->res >= 0) { + packets++; + bytes += cqe->res; + } else if (cqe->res != -EAGAIN) { + error(1, cqe->res, "send failed"); + } + io_uring_cqe_seen(&ring); + } + } while (gettimeofday_ms() < tstop); + + while (compl_cqes) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) + error(1, ret, "wait cqe"); + if (cqe->flags & IORING_CQE_F_MORE) + error(1, -EINVAL, "invalid notif flags"); + if (!(cqe->flags & IORING_CQE_F_NOTIF)) + error(1, -EINVAL, "missing notif flag"); + + io_uring_cqe_seen(&ring); + compl_cqes--; + } + + fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n", + packets, bytes >> 20, + packets / (cfg_runtime_ms / 1000), + (bytes >> 20) / (cfg_runtime_ms / 1000)); + + if (close(fd)) + error(1, errno, "close"); +} + +static void do_test(int domain, int type, int protocol) +{ + int i; + + for (i = 0; i < IP_MAXPACKET; i++) + payload[i] = 'a' + (i % 26); + do_tx(domain, type, protocol); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-4|-6) (udp|tcp) -D<dst_ip> [-s<payload size>] " + "[-t<time s>] [-n<batch>] [-p<port>] [-m<mode>]", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + const int max_payload_len = sizeof(payload) - + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) - + 40 /* max tcp options */; + struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr; + struct sockaddr_in *addr4 = (void *) &cfg_dst_addr; + char *daddr = NULL; + int c; + + if (argc <= 1) + usage(argv[0]); + cfg_payload_len = max_payload_len; + + while ((c = getopt(argc, argv, "46D:p:s:t:n:c:m:")) != -1) { + switch (c) { + case '4': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'D': + daddr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 's': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 't': + cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; + break; + case 'n': + cfg_nr_reqs = strtoul(optarg, NULL, 0); + break; + case 'c': + cfg_cork = strtol(optarg, NULL, 0); + break; + case 'm': + cfg_mode = strtol(optarg, NULL, 0); + break; + } + } + + switch (cfg_family) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (daddr && + inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", daddr); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (daddr && + inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", daddr); + break; + default: + error(1, 0, "illegal domain"); + } + + if (cfg_payload_len > max_payload_len) + error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); + if (optind != argc - 1) + usage(argv[0]); +} + +int main(int argc, char **argv) +{ + const char *cfg_test = argv[argc - 1]; + + parse_opts(argc, argv); + + if (!strcmp(cfg_test, "tcp")) + do_test(cfg_family, SOCK_STREAM, 0); + else if (!strcmp(cfg_test, "udp")) + do_test(cfg_family, SOCK_DGRAM, 0); + else + error(1, 0, "unknown cfg_test %s", cfg_test); + return 0; +} diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh new file mode 100755 index 000000000000..123439545013 --- /dev/null +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh @@ -0,0 +1,126 @@ +#!/bin/bash +# +# Send data between two processes across namespaces +# Run twice: once without and once with zerocopy + +set -e + +readonly DEV="veth0" +readonly DEV_MTU=65535 +readonly BIN_TX="./io_uring_zerocopy_tx" +readonly BIN_RX="./msg_zerocopy" + +readonly RAND="$(mktemp -u XXXXXX)" +readonly NSPREFIX="ns-${RAND}" +readonly NS1="${NSPREFIX}1" +readonly NS2="${NSPREFIX}2" + +readonly SADDR4='192.168.1.1' +readonly DADDR4='192.168.1.2' +readonly SADDR6='fd::1' +readonly DADDR6='fd::2' + +readonly path_sysctl_mem="net.core.optmem_max" + +# No arguments: automated test +if [[ "$#" -eq "0" ]]; then + IPs=( "4" "6" ) + + for IP in "${IPs[@]}"; do + for mode in $(seq 1 3); do + $0 "$IP" udp -m "$mode" -t 1 -n 32 + $0 "$IP" tcp -m "$mode" -t 1 -n 1 + done + done + + echo "OK. All tests passed" + exit 0 +fi + +# Argument parsing +if [[ "$#" -lt "2" ]]; then + echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>" + exit 1 +fi + +readonly IP="$1" +shift +readonly TXMODE="$1" +shift +readonly EXTRA_ARGS="$@" + +# Argument parsing: configure addresses +if [[ "${IP}" == "4" ]]; then + readonly SADDR="${SADDR4}" + readonly DADDR="${DADDR4}" +elif [[ "${IP}" == "6" ]]; then + readonly SADDR="${SADDR6}" + readonly DADDR="${DADDR6}" +else + echo "Invalid IP version ${IP}" + exit 1 +fi + +# Argument parsing: select receive mode +# +# This differs from send mode for +# - packet: use raw recv, because packet receives skb clones +# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option +case "${TXMODE}" in +'packet' | 'packet_dgram' | 'raw_hdrincl') + RXMODE='raw' + ;; +*) + RXMODE="${TXMODE}" + ;; +esac + +# Start of state changes: install cleanup handler + +cleanup() { + ip netns del "${NS2}" + ip netns del "${NS1}" +} + +trap cleanup EXIT + +# Create virtual ethernet pair between network namespaces +ip netns add "${NS1}" +ip netns add "${NS2}" + +# Configure system settings +ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000" +ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" + +ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ + peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" + +# Bring the devices up +ip -netns "${NS1}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DEV}" up + +# Set fixed MAC addresses on the devices +ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 +ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 + +# Add fixed IP addresses to the devices +ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" +ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" +ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad +ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad + +# Optionally disable sg or csum offload to test edge cases +# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off + +do_test() { + local readonly ARGS="$1" + + echo "ipv${IP} ${TXMODE} ${ARGS}" + ip netns exec "${NS2}" "${BIN_RX}" "-${IP}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" -r "${RXMODE}" & + sleep 0.2 + ip netns exec "${NS1}" "${BIN_TX}" "-${IP}" -t 1 -D "${DADDR}" ${ARGS} "${TXMODE}" + wait +} + +do_test "${EXTRA_ARGS}" +echo ok diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh new file mode 100755 index 000000000000..12491850ae98 --- /dev/null +++ b/tools/testing/selftests/net/ioam6.sh @@ -0,0 +1,771 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Author: Justin Iurman <justin.iurman@uliege.be> +# +# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data +# consistency directly inside packets on the receiver side. Tests are divided +# into three categories: OUTPUT (evaluates the IOAM processing by the sender), +# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates +# wider use cases that do not fall into the other two categories). Both OUTPUT +# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL +# tests use the entire three-node topology (alpha, beta, gamma). Each test is +# documented inside its own handler in the code below. +# +# An IOAM domain is configured from Alpha to Gamma but not on the reverse path. +# When either Beta or Gamma is the destination (depending on the test category), +# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | Alpha netns | | Gamma netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | db01::2/64 | | | | db02::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +----------------------------------------------------+ +# | . . | +# | +-------------+ +-------------+ | +# | | veth0 | | veth1 | | +# | | db01::1/64 | ................ | db02::1/64 | | +# | +-------------+ +-------------+ | +# | | +# | Beta netns | +# | | +# +----------------------------------------------------+ +# +# +# +# ============================================================= +# | Alpha - IOAM configuration | +# +===========================================================+ +# | Node ID | 1 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 11111111 | +# +-----------------------------------------------------------+ +# | Ingress ID | 0xffff (default value) | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 0xffffffff (default value) | +# +-----------------------------------------------------------+ +# | Egress ID | 101 | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 101101 | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee0 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf00dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 777 | +# +-----------------------------------------------------------+ +# | Schema Data | something that will be 4n-aligned | +# +-----------------------------------------------------------+ +# +# +# ============================================================= +# | Beta - IOAM configuration | +# +===========================================================+ +# | Node ID | 2 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 22222222 | +# +-----------------------------------------------------------+ +# | Ingress ID | 201 | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 201201 | +# +-----------------------------------------------------------+ +# | Egress ID | 202 | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 202202 | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee1 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf11dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 666 | +# +-----------------------------------------------------------+ +# | Schema Data | Hello there -Obi | +# +-----------------------------------------------------------+ +# +# +# ============================================================= +# | Gamma - IOAM configuration | +# +===========================================================+ +# | Node ID | 3 | +# +-----------------------------------------------------------+ +# | Node Wide ID | 33333333 | +# +-----------------------------------------------------------+ +# | Ingress ID | 301 | +# +-----------------------------------------------------------+ +# | Ingress Wide ID | 301301 | +# +-----------------------------------------------------------+ +# | Egress ID | 0xffff (default value) | +# +-----------------------------------------------------------+ +# | Egress Wide ID | 0xffffffff (default value) | +# +-----------------------------------------------------------+ +# | Namespace Data | 0xdeadbee2 | +# +-----------------------------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf22dc0de | +# +-----------------------------------------------------------+ +# | Schema ID | 0xffffff (= None) | +# +-----------------------------------------------------------+ +# | Schema Data | | +# +-----------------------------------------------------------+ + +source lib.sh + +################################################################################ +# # +# WARNING: Be careful if you modify the block below - it MUST be kept # +# synchronized with configurations inside ioam6_parser.c and always # +# reflect the same. # +# # +################################################################################ + +ALPHA=( + 1 # ID + 11111111 # Wide ID + 0xffff # Ingress ID + 0xffffffff # Ingress Wide ID + 101 # Egress ID + 101101 # Egress Wide ID + 0xdeadbee0 # Namespace Data + 0xcafec0caf00dc0de # Namespace Wide Data + 777 # Schema ID (0xffffff = None) + "something that will be 4n-aligned" # Schema Data +) + +BETA=( + 2 + 22222222 + 201 + 201201 + 202 + 202202 + 0xdeadbee1 + 0xcafec0caf11dc0de + 666 + "Hello there -Obi" +) + +GAMMA=( + 3 + 33333333 + 301 + 301301 + 0xffff + 0xffffffff + 0xdeadbee2 + 0xcafec0caf22dc0de + 0xffffff + "" +) + +TESTS_OUTPUT=" + out_undef_ns + out_no_room + out_bits + out_full_supp_trace +" + +TESTS_INPUT=" + in_undef_ns + in_no_room + in_oflag + in_bits + in_full_supp_trace +" + +TESTS_GLOBAL=" + fwd_full_supp_trace +" + + +################################################################################ +# # +# LIBRARY # +# # +################################################################################ + +check_kernel_compatibility() +{ + setup_ns ioam_tmp_node + ip link add name veth0 netns $ioam_tmp_node type veth \ + peer name veth1 netns $ioam_tmp_node + + ip -netns $ioam_tmp_node link set veth0 up + ip -netns $ioam_tmp_node link set veth1 up + + ip -netns $ioam_tmp_node ioam namespace add 0 + ns_ad=$? + + ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0" + ns_sh=$? + + if [[ $ns_ad != 0 || $ns_sh != 0 ]] + then + echo "SKIP: kernel version probably too old, missing ioam support" + ip link del veth0 2>/dev/null || true + cleanup_ns $ioam_tmp_node || true + exit $ksft_skip + fi + + ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \ + trace prealloc type 0x800000 ns 0 size 4 dev veth0 + tr_ad=$? + + ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6" + tr_sh=$? + + if [[ $tr_ad != 0 || $tr_sh != 0 ]] + then + echo "SKIP: cannot attach an ioam trace to a route, did you compile" \ + "without CONFIG_IPV6_IOAM6_LWTUNNEL?" + ip link del veth0 2>/dev/null || true + cleanup_ns $ioam_tmp_node || true + exit $ksft_skip + fi + + ip link del veth0 2>/dev/null || true + cleanup_ns $ioam_tmp_node || true + + lsmod | grep -q "ip6_tunnel" + ip6tnl_loaded=$? + + if [ $ip6tnl_loaded = 0 ] + then + encap_tests=0 + else + modprobe ip6_tunnel &>/dev/null + lsmod | grep -q "ip6_tunnel" + encap_tests=$? + + if [ $encap_tests != 0 ] + then + ip a | grep -q "ip6tnl0" + encap_tests=$? + + if [ $encap_tests != 0 ] + then + echo "Note: ip6_tunnel not found neither as a module nor inside the" \ + "kernel, tests that require it (encap mode) will be omitted" + fi + fi + fi +} + +cleanup() +{ + ip link del ioam-veth-alpha 2>/dev/null || true + ip link del ioam-veth-gamma 2>/dev/null || true + + cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true + + if [ $ip6tnl_loaded != 0 ] + then + modprobe -r ip6_tunnel 2>/dev/null || true + fi +} + +setup() +{ + setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma + + ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \ + peer name ioam-veth-betaL netns $ioam_node_beta + ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \ + peer name ioam-veth-gamma netns $ioam_node_gamma + + ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 + ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 + ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 + ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 + + ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0 + ip -netns $ioam_node_alpha link set veth0 up + ip -netns $ioam_node_alpha link set lo up + ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0 + ip -netns $ioam_node_alpha route del db01::/64 + ip -netns $ioam_node_alpha route add db01::/64 dev veth0 + + ip -netns $ioam_node_beta addr add db01::1/64 dev veth0 + ip -netns $ioam_node_beta addr add db02::1/64 dev veth1 + ip -netns $ioam_node_beta link set veth0 up + ip -netns $ioam_node_beta link set veth1 up + ip -netns $ioam_node_beta link set lo up + + ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0 + ip -netns $ioam_node_gamma link set veth0 up + ip -netns $ioam_node_gamma link set lo up + ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0 + + # - IOAM config - + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} + ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} + ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" + ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} + + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} + ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} + ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}" + ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]} + + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} + ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} + + sleep 1 + + ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + if [ $? != 0 ] + then + echo "Setup FAILED" + cleanup &>/dev/null + exit 0 + fi +} + +log_test_passed() +{ + local desc=$1 + printf "TEST: %-60s [ OK ]\n" "${desc}" +} + +log_test_failed() +{ + local desc=$1 + printf "TEST: %-60s [FAIL]\n" "${desc}" +} + +log_results() +{ + echo "- Tests passed: ${npassed}" + echo "- Tests failed: ${nfailed}" +} + +run_test() +{ + local name=$1 + local desc=$2 + local node_src=$3 + local node_dst=$4 + local ip6_dst=$5 + local trace_type=$6 + local ioam_ns=$7 + local type=$8 + + ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & + local spid=$! + sleep 0.1 + + ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null + if [ $? != 0 ] + then + nfailed=$((nfailed+1)) + log_test_failed "${desc}" + kill -2 $spid &>/dev/null + else + wait $spid + if [ $? = 0 ] + then + npassed=$((npassed+1)) + log_test_passed "${desc}" + else + nfailed=$((nfailed+1)) + log_test_failed "${desc}" + fi + fi +} + +run() +{ + echo + printf "%0.s-" {1..74} + echo + echo "OUTPUT tests" + printf "%0.s-" {1..74} + echo + + # set OUTPUT settings + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 + + for t in $TESTS_OUTPUT + do + $t "inline" + [ $encap_tests = 0 ] && $t "encap" + done + + # clean OUTPUT settings + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip -netns $ioam_node_alpha route change db01::/64 dev veth0 + + + echo + printf "%0.s-" {1..74} + echo + echo "INPUT tests" + printf "%0.s-" {1..74} + echo + + # set INPUT settings + ip -netns $ioam_node_alpha ioam namespace del 123 + + for t in $TESTS_INPUT + do + $t "inline" + [ $encap_tests = 0 ] && $t "encap" + done + + # clean INPUT settings + ip -netns $ioam_node_alpha ioam namespace add 123 \ + data ${ALPHA[6]} wide ${ALPHA[7]} + ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} + ip -netns $ioam_node_alpha route change db01::/64 dev veth0 + + echo + printf "%0.s-" {1..74} + echo + echo "GLOBAL tests" + printf "%0.s-" {1..74} + echo + + for t in $TESTS_GLOBAL + do + $t "inline" + [ $encap_tests = 0 ] && $t "encap" + done + + echo + log_results +} + +bit2type=( + 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000 + 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100 + 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 +) +bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 ) + + +################################################################################ +# # +# OUTPUT tests # +# # +# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. # +################################################################################ + +out_undef_ns() +{ + ############################################################################## + # Make sure that the encap node won't fill the trace if the chosen IOAM # + # namespace is not configured locally. # + ############################################################################## + local desc="Unknown IOAM namespace" + + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0x800000 ns 0 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0x800000 0 $1 + + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down +} + +out_no_room() +{ + ############################################################################## + # Make sure that the encap node won't fill the trace and will set the # + # Overflow flag since there is no room enough for its data. # + ############################################################################## + local desc="Missing trace room" + + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xc00000 123 $1 + + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down +} + +out_bits() +{ + ############################################################################## + # Make sure that, for each trace type bit, the encap node will either: # + # (i) fill the trace with its data when it is a supported bit # + # (ii) not fill the trace with its data when it is an unsupported bit # + ############################################################################## + local desc="Trace type with bit <n> only" + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + + for i in {0..22} + do + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ + dev veth0 &>/dev/null + + local cmd_res=$? + local descr="${desc/<n>/$i}" + + if [[ $i -ge 12 && $i -le 21 ]] + then + if [ $cmd_res != 0 ] + then + npassed=$((npassed+1)) + log_test_passed "$descr ($1 mode)" + else + nfailed=$((nfailed+1)) + log_test_failed "$descr ($1 mode)" + fi + else + run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 + fi + done + + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + + bit2size[22]=$tmp +} + +out_full_supp_trace() +{ + ############################################################################## + # Make sure that the encap node will correctly fill a full trace. Be careful,# + # "full trace" here does NOT mean all bits (only supported ones). # + ############################################################################## + local desc="Full supported trace" + + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0xfff002 ns 123 size 100 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xfff002 123 $1 + + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down +} + + +################################################################################ +# # +# INPUT tests # +# # +# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon # +# insertion -> the IOAM namespace configured on the sender is removed # +# and is used in the inserted trace to force the sender not to fill it. # +################################################################################ + +in_undef_ns() +{ + ############################################################################## + # Make sure that the receiving node won't fill the trace if the related IOAM # + # namespace is not configured locally. # + ############################################################################## + local desc="Unknown IOAM namespace" + + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0x800000 ns 0 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0x800000 0 $1 + + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down +} + +in_no_room() +{ + ############################################################################## + # Make sure that the receiving node won't fill the trace and will set the # + # Overflow flag if there is no room enough for its data. # + ############################################################################## + local desc="Missing trace room" + + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xc00000 123 $1 + + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down +} + +in_bits() +{ + ############################################################################## + # Make sure that, for each trace type bit, the receiving node will either: # + # (i) fill the trace with its data when it is a supported bit # + # (ii) not fill the trace with its data when it is an unsupported bit # + ############################################################################## + local desc="Trace type with bit <n> only" + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + + for i in {0..11} {22..22} + do + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ + dev veth0 + + run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 + done + + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + + bit2size[22]=$tmp +} + +in_oflag() +{ + ############################################################################## + # Make sure that the receiving node won't fill the trace since the Overflow # + # flag is set. # + ############################################################################## + local desc="Overflow flag is set" + + # Exception: + # Here, we need the sender to set the Overflow flag. For that, we will add + # back the IOAM namespace that was previously configured on the sender. + ip -netns $ioam_node_alpha ioam namespace add 123 + + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xc00000 123 $1 + + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + + # And we clean the exception for this test to get things back to normal for + # other INPUT tests + ip -netns $ioam_node_alpha ioam namespace del 123 +} + +in_full_supp_trace() +{ + ############################################################################## + # Make sure that the receiving node will correctly fill a full trace. Be # + # careful, "full trace" here does NOT mean all bits (only supported ones). # + ############################################################################## + local desc="Full supported trace" + + [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ + trace prealloc type 0xfff002 ns 123 size 80 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xfff002 123 $1 + + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down +} + + +################################################################################ +# # +# GLOBAL tests # +# # +# Three nodes (sender/router/receiver), IOAM fully enabled on every node. # +################################################################################ + +fwd_full_supp_trace() +{ + ############################################################################## + # Make sure that all three nodes correctly filled the full supported trace # + # by checking that the trace data is consistent with the predefined config. # + ############################################################################## + local desc="Forward - Full supported trace" + + [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1" + [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up + + ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \ + trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 + + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ + db02::2 0xfff002 123 $1 + + [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down +} + + +################################################################################ +# # +# MAIN # +# # +################################################################################ + +npassed=0 +nfailed=0 + +if [ "$(id -u)" -ne 0 ] +then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ] +then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip ioam &>/dev/null +if [ $? = 1 ] +then + echo "SKIP: iproute2 too old, missing ioam command" + exit $ksft_skip +fi + +check_kernel_compatibility + +cleanup &>/dev/null +setup +run +cleanup &>/dev/null diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c new file mode 100644 index 000000000000..895e5bb5044b --- /dev/null +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -0,0 +1,674 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Author: Justin Iurman (justin.iurman@uliege.be) + * + * IOAM tester for IPv6, see ioam6.sh for details on each test case. + */ +#include <arpa/inet.h> +#include <errno.h> +#include <limits.h> +#include <linux/const.h> +#include <linux/ioam6.h> +#include <linux/ipv6.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +struct ioam_config { + __u32 id; + __u64 wide; + __u16 ingr_id; + __u16 egr_id; + __u32 ingr_wide; + __u32 egr_wide; + __u32 ns_data; + __u64 ns_wide; + __u32 sc_id; + __u8 hlim; + char *sc_data; +}; + +/* + * Be careful if you modify structs below - everything MUST be kept synchronized + * with configurations inside ioam6.sh and always reflect the same. + */ + +static struct ioam_config node1 = { + .id = 1, + .wide = 11111111, + .ingr_id = 0xffff, /* default value */ + .egr_id = 101, + .ingr_wide = 0xffffffff, /* default value */ + .egr_wide = 101101, + .ns_data = 0xdeadbee0, + .ns_wide = 0xcafec0caf00dc0de, + .sc_id = 777, + .sc_data = "something that will be 4n-aligned", + .hlim = 64, +}; + +static struct ioam_config node2 = { + .id = 2, + .wide = 22222222, + .ingr_id = 201, + .egr_id = 202, + .ingr_wide = 201201, + .egr_wide = 202202, + .ns_data = 0xdeadbee1, + .ns_wide = 0xcafec0caf11dc0de, + .sc_id = 666, + .sc_data = "Hello there -Obi", + .hlim = 63, +}; + +static struct ioam_config node3 = { + .id = 3, + .wide = 33333333, + .ingr_id = 301, + .egr_id = 0xffff, /* default value */ + .ingr_wide = 301301, + .egr_wide = 0xffffffff, /* default value */ + .ns_data = 0xdeadbee2, + .ns_wide = 0xcafec0caf22dc0de, + .sc_id = 0xffffff, /* default value */ + .sc_data = NULL, + .hlim = 62, +}; + +enum { + /********** + * OUTPUT * + **********/ + TEST_OUT_UNDEF_NS, + TEST_OUT_NO_ROOM, + TEST_OUT_BIT0, + TEST_OUT_BIT1, + TEST_OUT_BIT2, + TEST_OUT_BIT3, + TEST_OUT_BIT4, + TEST_OUT_BIT5, + TEST_OUT_BIT6, + TEST_OUT_BIT7, + TEST_OUT_BIT8, + TEST_OUT_BIT9, + TEST_OUT_BIT10, + TEST_OUT_BIT11, + TEST_OUT_BIT22, + TEST_OUT_FULL_SUPP_TRACE, + + /********* + * INPUT * + *********/ + TEST_IN_UNDEF_NS, + TEST_IN_NO_ROOM, + TEST_IN_OFLAG, + TEST_IN_BIT0, + TEST_IN_BIT1, + TEST_IN_BIT2, + TEST_IN_BIT3, + TEST_IN_BIT4, + TEST_IN_BIT5, + TEST_IN_BIT6, + TEST_IN_BIT7, + TEST_IN_BIT8, + TEST_IN_BIT9, + TEST_IN_BIT10, + TEST_IN_BIT11, + TEST_IN_BIT22, + TEST_IN_FULL_SUPP_TRACE, + + /********** + * GLOBAL * + **********/ + TEST_FWD_FULL_SUPP_TRACE, + + __TEST_MAX, +}; + +static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, + __u32 trace_type, __u16 ioam_ns) +{ + if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns || + __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8)) + return 1; + + switch (tid) { + case TEST_OUT_UNDEF_NS: + case TEST_IN_UNDEF_NS: + return ioam6h->overflow || + ioam6h->nodelen != 1 || + ioam6h->remlen != 1; + + case TEST_OUT_NO_ROOM: + case TEST_IN_NO_ROOM: + case TEST_IN_OFLAG: + return !ioam6h->overflow || + ioam6h->nodelen != 2 || + ioam6h->remlen != 1; + + case TEST_OUT_BIT0: + case TEST_IN_BIT0: + case TEST_OUT_BIT1: + case TEST_IN_BIT1: + case TEST_OUT_BIT2: + case TEST_IN_BIT2: + case TEST_OUT_BIT3: + case TEST_IN_BIT3: + case TEST_OUT_BIT4: + case TEST_IN_BIT4: + case TEST_OUT_BIT5: + case TEST_IN_BIT5: + case TEST_OUT_BIT6: + case TEST_IN_BIT6: + case TEST_OUT_BIT7: + case TEST_IN_BIT7: + case TEST_OUT_BIT11: + case TEST_IN_BIT11: + return ioam6h->overflow || + ioam6h->nodelen != 1 || + ioam6h->remlen; + + case TEST_OUT_BIT8: + case TEST_IN_BIT8: + case TEST_OUT_BIT9: + case TEST_IN_BIT9: + case TEST_OUT_BIT10: + case TEST_IN_BIT10: + return ioam6h->overflow || + ioam6h->nodelen != 2 || + ioam6h->remlen; + + case TEST_OUT_BIT22: + case TEST_IN_BIT22: + return ioam6h->overflow || + ioam6h->nodelen || + ioam6h->remlen; + + case TEST_OUT_FULL_SUPP_TRACE: + case TEST_IN_FULL_SUPP_TRACE: + case TEST_FWD_FULL_SUPP_TRACE: + return ioam6h->overflow || + ioam6h->nodelen != 15 || + ioam6h->remlen; + + default: + break; + } + + return 1; +} + +static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, + const struct ioam_config cnf) +{ + unsigned int len; + __u8 aligned; + __u64 raw64; + __u32 raw32; + + if (ioam6h->type.bit0) { + raw32 = __be32_to_cpu(*((__u32 *)*p)); + if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff)) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit1) { + raw32 = __be32_to_cpu(*((__u32 *)*p)); + if (cnf.ingr_id != (raw32 >> 16) || + cnf.egr_id != (raw32 & 0xffff)) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit2) + *p += sizeof(__u32); + + if (ioam6h->type.bit3) + *p += sizeof(__u32); + + if (ioam6h->type.bit4) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit5) { + if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit6) + *p += sizeof(__u32); + + if (ioam6h->type.bit7) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit8) { + raw64 = __be64_to_cpu(*((__u64 *)*p)); + if (cnf.hlim != (raw64 >> 56) || + cnf.wide != (raw64 & 0xffffffffffffff)) + return 1; + *p += sizeof(__u64); + } + + if (ioam6h->type.bit9) { + if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide) + return 1; + *p += sizeof(__u32); + + if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit10) { + if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide) + return 1; + *p += sizeof(__u64); + } + + if (ioam6h->type.bit11) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit12) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit13) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit14) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit15) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit16) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit17) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit18) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit19) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit20) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit21) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit22) { + len = cnf.sc_data ? strlen(cnf.sc_data) : 0; + aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0; + + raw32 = __be32_to_cpu(*((__u32 *)*p)); + if (aligned != (raw32 >> 24) * 4 || + cnf.sc_id != (raw32 & 0xffffff)) + return 1; + *p += sizeof(__u32); + + if (cnf.sc_data) { + if (strncmp((char *)*p, cnf.sc_data, len)) + return 1; + + *p += len; + aligned -= len; + + while (aligned--) { + if (**p != '\0') + return 1; + *p += sizeof(__u8); + } + } + } + + return 0; +} + +static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h, + __u32 trace_type, __u16 ioam_ns) +{ + __u8 *p; + + if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns)) + return 1; + + p = ioam6h->data + ioam6h->remlen * 4; + + switch (tid) { + case TEST_OUT_BIT0: + case TEST_OUT_BIT1: + case TEST_OUT_BIT2: + case TEST_OUT_BIT3: + case TEST_OUT_BIT4: + case TEST_OUT_BIT5: + case TEST_OUT_BIT6: + case TEST_OUT_BIT7: + case TEST_OUT_BIT8: + case TEST_OUT_BIT9: + case TEST_OUT_BIT10: + case TEST_OUT_BIT11: + case TEST_OUT_BIT22: + case TEST_OUT_FULL_SUPP_TRACE: + return check_ioam6_data(&p, ioam6h, node1); + + case TEST_IN_BIT0: + case TEST_IN_BIT1: + case TEST_IN_BIT2: + case TEST_IN_BIT3: + case TEST_IN_BIT4: + case TEST_IN_BIT5: + case TEST_IN_BIT6: + case TEST_IN_BIT7: + case TEST_IN_BIT8: + case TEST_IN_BIT9: + case TEST_IN_BIT10: + case TEST_IN_BIT11: + case TEST_IN_BIT22: + case TEST_IN_FULL_SUPP_TRACE: + { + __u32 tmp32 = node2.egr_wide; + __u16 tmp16 = node2.egr_id; + int res; + + node2.egr_id = 0xffff; + node2.egr_wide = 0xffffffff; + + res = check_ioam6_data(&p, ioam6h, node2); + + node2.egr_id = tmp16; + node2.egr_wide = tmp32; + + return res; + } + + case TEST_FWD_FULL_SUPP_TRACE: + if (check_ioam6_data(&p, ioam6h, node3)) + return 1; + if (check_ioam6_data(&p, ioam6h, node2)) + return 1; + return check_ioam6_data(&p, ioam6h, node1); + + default: + break; + } + + return 1; +} + +static int str2id(const char *tname) +{ + if (!strcmp("out_undef_ns", tname)) + return TEST_OUT_UNDEF_NS; + if (!strcmp("out_no_room", tname)) + return TEST_OUT_NO_ROOM; + if (!strcmp("out_bit0", tname)) + return TEST_OUT_BIT0; + if (!strcmp("out_bit1", tname)) + return TEST_OUT_BIT1; + if (!strcmp("out_bit2", tname)) + return TEST_OUT_BIT2; + if (!strcmp("out_bit3", tname)) + return TEST_OUT_BIT3; + if (!strcmp("out_bit4", tname)) + return TEST_OUT_BIT4; + if (!strcmp("out_bit5", tname)) + return TEST_OUT_BIT5; + if (!strcmp("out_bit6", tname)) + return TEST_OUT_BIT6; + if (!strcmp("out_bit7", tname)) + return TEST_OUT_BIT7; + if (!strcmp("out_bit8", tname)) + return TEST_OUT_BIT8; + if (!strcmp("out_bit9", tname)) + return TEST_OUT_BIT9; + if (!strcmp("out_bit10", tname)) + return TEST_OUT_BIT10; + if (!strcmp("out_bit11", tname)) + return TEST_OUT_BIT11; + if (!strcmp("out_bit22", tname)) + return TEST_OUT_BIT22; + if (!strcmp("out_full_supp_trace", tname)) + return TEST_OUT_FULL_SUPP_TRACE; + if (!strcmp("in_undef_ns", tname)) + return TEST_IN_UNDEF_NS; + if (!strcmp("in_no_room", tname)) + return TEST_IN_NO_ROOM; + if (!strcmp("in_oflag", tname)) + return TEST_IN_OFLAG; + if (!strcmp("in_bit0", tname)) + return TEST_IN_BIT0; + if (!strcmp("in_bit1", tname)) + return TEST_IN_BIT1; + if (!strcmp("in_bit2", tname)) + return TEST_IN_BIT2; + if (!strcmp("in_bit3", tname)) + return TEST_IN_BIT3; + if (!strcmp("in_bit4", tname)) + return TEST_IN_BIT4; + if (!strcmp("in_bit5", tname)) + return TEST_IN_BIT5; + if (!strcmp("in_bit6", tname)) + return TEST_IN_BIT6; + if (!strcmp("in_bit7", tname)) + return TEST_IN_BIT7; + if (!strcmp("in_bit8", tname)) + return TEST_IN_BIT8; + if (!strcmp("in_bit9", tname)) + return TEST_IN_BIT9; + if (!strcmp("in_bit10", tname)) + return TEST_IN_BIT10; + if (!strcmp("in_bit11", tname)) + return TEST_IN_BIT11; + if (!strcmp("in_bit22", tname)) + return TEST_IN_BIT22; + if (!strcmp("in_full_supp_trace", tname)) + return TEST_IN_FULL_SUPP_TRACE; + if (!strcmp("fwd_full_supp_trace", tname)) + return TEST_FWD_FULL_SUPP_TRACE; + + return -1; +} + +static int get_u32(__u32 *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; + + if (!arg || !*arg) + return -1; + res = strtoul(arg, &ptr, base); + + if (!ptr || ptr == arg || *ptr) + return -1; + + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + if (res > 0xFFFFFFFFUL) + return -1; + + *val = res; + return 0; +} + +static int get_u16(__u16 *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; + + if (!arg || !*arg) + return -1; + res = strtoul(arg, &ptr, base); + + if (!ptr || ptr == arg || *ptr) + return -1; + + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + if (res > 0xFFFFUL) + return -1; + + *val = res; + return 0; +} + +static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { + [TEST_OUT_UNDEF_NS] = check_ioam_header, + [TEST_OUT_NO_ROOM] = check_ioam_header, + [TEST_OUT_BIT0] = check_ioam_header_and_data, + [TEST_OUT_BIT1] = check_ioam_header_and_data, + [TEST_OUT_BIT2] = check_ioam_header_and_data, + [TEST_OUT_BIT3] = check_ioam_header_and_data, + [TEST_OUT_BIT4] = check_ioam_header_and_data, + [TEST_OUT_BIT5] = check_ioam_header_and_data, + [TEST_OUT_BIT6] = check_ioam_header_and_data, + [TEST_OUT_BIT7] = check_ioam_header_and_data, + [TEST_OUT_BIT8] = check_ioam_header_and_data, + [TEST_OUT_BIT9] = check_ioam_header_and_data, + [TEST_OUT_BIT10] = check_ioam_header_and_data, + [TEST_OUT_BIT11] = check_ioam_header_and_data, + [TEST_OUT_BIT22] = check_ioam_header_and_data, + [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data, + [TEST_IN_UNDEF_NS] = check_ioam_header, + [TEST_IN_NO_ROOM] = check_ioam_header, + [TEST_IN_OFLAG] = check_ioam_header, + [TEST_IN_BIT0] = check_ioam_header_and_data, + [TEST_IN_BIT1] = check_ioam_header_and_data, + [TEST_IN_BIT2] = check_ioam_header_and_data, + [TEST_IN_BIT3] = check_ioam_header_and_data, + [TEST_IN_BIT4] = check_ioam_header_and_data, + [TEST_IN_BIT5] = check_ioam_header_and_data, + [TEST_IN_BIT6] = check_ioam_header_and_data, + [TEST_IN_BIT7] = check_ioam_header_and_data, + [TEST_IN_BIT8] = check_ioam_header_and_data, + [TEST_IN_BIT9] = check_ioam_header_and_data, + [TEST_IN_BIT10] = check_ioam_header_and_data, + [TEST_IN_BIT11] = check_ioam_header_and_data, + [TEST_IN_BIT22] = check_ioam_header_and_data, + [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data, + [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data, +}; + +int main(int argc, char **argv) +{ + int fd, size, hoplen, tid, ret = 1, on = 1; + struct ioam6_hdr *opt; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; + __u8 buffer[512]; + __u32 tr_type; + __u16 ioam_ns; + __u8 *ptr; + + if (argc != 5) + goto out; + + tid = str2id(argv[1]); + if (tid < 0 || !func[tid]) + goto out; + + if (get_u32(&tr_type, argv[2], 16) || + get_u16(&ioam_ns, argv[3], 0)) + goto out; + + fd = socket(PF_INET6, SOCK_RAW, + !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + if (fd < 0) + goto out; + + setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); + + iov.iov_len = 1; + iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); + if (!iov.iov_base) + goto close; +recv: + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buffer; + msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); + + size = recvmsg(fd, &msg, 0); + if (size <= 0) + goto close; + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != IPPROTO_IPV6 || + cmsg->cmsg_type != IPV6_HOPOPTS || + cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) + continue; + + ptr = (__u8 *)CMSG_DATA(cmsg); + + hoplen = (ptr[1] + 1) << 3; + ptr += sizeof(struct ipv6_hopopt_hdr); + + while (hoplen > 0) { + opt = (struct ioam6_hdr *)ptr; + + if (opt->opt_type == IPV6_TLV_IOAM && + opt->type == IOAM6_TYPE_PREALLOC) { + ptr += sizeof(*opt); + ret = func[tid](tid, + (struct ioam6_trace_hdr *)ptr, + tr_type, ioam_ns); + goto close; + } + + ptr += opt->opt_len + 2; + hoplen -= opt->opt_len + 2; + } + } + + goto recv; +close: + free(iov.iov_base); + close(fd); +out: + return ret; +} diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c new file mode 100644 index 000000000000..193b82745fd8 --- /dev/null +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2023 Cloudflare + +/* Test IP_LOCAL_PORT_RANGE socket option: IPv4 + IPv6, TCP + UDP. + * + * Tests assume that net.ipv4.ip_local_port_range is [40000, 49999]. + * Don't run these directly but with ip_local_port_range.sh script. + */ + +#include <fcntl.h> +#include <netinet/ip.h> + +#include "../kselftest_harness.h" + +#ifndef IP_LOCAL_PORT_RANGE +#define IP_LOCAL_PORT_RANGE 51 +#endif + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + +static __u32 pack_port_range(__u16 lo, __u16 hi) +{ + return (hi << 16) | (lo << 0); +} + +static void unpack_port_range(__u32 range, __u16 *lo, __u16 *hi) +{ + *lo = range & 0xffff; + *hi = range >> 16; +} + +static int get_so_domain(int fd) +{ + int domain, err; + socklen_t len; + + len = sizeof(domain); + err = getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &domain, &len); + if (err) + return -1; + + return domain; +} + +static int bind_to_loopback_any_port(int fd) +{ + union { + struct sockaddr sa; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr; + socklen_t addr_len; + + memset(&addr, 0, sizeof(addr)); + switch (get_so_domain(fd)) { + case AF_INET: + addr.v4.sin_family = AF_INET; + addr.v4.sin_port = htons(0); + addr.v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr_len = sizeof(addr.v4); + break; + case AF_INET6: + addr.v6.sin6_family = AF_INET6; + addr.v6.sin6_port = htons(0); + addr.v6.sin6_addr = in6addr_loopback; + addr_len = sizeof(addr.v6); + break; + default: + return -1; + } + + return bind(fd, &addr.sa, addr_len); +} + +static int get_sock_port(int fd) +{ + union { + struct sockaddr sa; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr; + socklen_t addr_len; + int err; + + addr_len = sizeof(addr); + memset(&addr, 0, sizeof(addr)); + err = getsockname(fd, &addr.sa, &addr_len); + if (err) + return -1; + + switch (addr.sa.sa_family) { + case AF_INET: + return ntohs(addr.v4.sin_port); + case AF_INET6: + return ntohs(addr.v6.sin6_port); + default: + errno = EAFNOSUPPORT; + return -1; + } +} + +static int get_ip_local_port_range(int fd, __u32 *range) +{ + socklen_t len; + __u32 val; + int err; + + len = sizeof(val); + err = getsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val, &len); + if (err) + return -1; + + *range = val; + return 0; +} + +FIXTURE(ip_local_port_range) {}; + +FIXTURE_SETUP(ip_local_port_range) +{ +} + +FIXTURE_TEARDOWN(ip_local_port_range) +{ +} + +FIXTURE_VARIANT(ip_local_port_range) { + int so_domain; + int so_type; + int so_protocol; +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_tcp) { + .so_domain = AF_INET, + .so_type = SOCK_STREAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_udp) { + .so_domain = AF_INET, + .so_type = SOCK_DGRAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) { + .so_domain = AF_INET, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_SCTP, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_mptcp) { + .so_domain = AF_INET, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_MPTCP, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) { + .so_domain = AF_INET6, + .so_type = SOCK_STREAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_udp) { + .so_domain = AF_INET6, + .so_type = SOCK_DGRAM, + .so_protocol = 0, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) { + .so_domain = AF_INET6, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_SCTP, +}; + +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_mptcp) { + .so_domain = AF_INET6, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_MPTCP, +}; + +TEST_F(ip_local_port_range, invalid_option_value) +{ + __u16 val16; + __u32 val32; + __u64 val64; + int fd, err; + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + /* Too few bytes */ + val16 = 40000; + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val16, sizeof(val16)); + EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail"); + EXPECT_EQ(errno, EINVAL); + + /* Empty range: low port > high port */ + val32 = pack_port_range(40222, 40111); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val32, sizeof(val32)); + EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail"); + EXPECT_EQ(errno, EINVAL); + + /* Too many bytes */ + val64 = pack_port_range(40333, 40444); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val64, sizeof(val64)); + EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail"); + EXPECT_EQ(errno, EINVAL); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); +} + +TEST_F(ip_local_port_range, port_range_out_of_netns_range) +{ + const struct test { + __u16 range_lo; + __u16 range_hi; + } tests[] = { + { 30000, 39999 }, /* socket range below netns range */ + { 50000, 59999 }, /* socket range above netns range */ + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + /* Bind a couple of sockets, not just one, to check + * that the range wasn't clamped to a single port from + * the netns range. That is [40000, 40000] or [49999, + * 49999], respectively for each test case. + */ + int fds[2], i; + + TH_LOG("lo %5hu, hi %5hu", t->range_lo, t->range_hi); + + for (i = 0; i < ARRAY_SIZE(fds); i++) { + int fd, err, port; + __u32 range; + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("#%d: socket failed", i); + + range = pack_port_range(t->range_lo, t->range_hi); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("#%d: setsockopt(IP_LOCAL_PORT_RANGE) failed", i); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("#%d: bind failed", i); + + /* Check that socket port range outside of ephemeral range is ignored */ + port = get_sock_port(fd); + ASSERT_GE(port, 40000) TH_LOG("#%d: expected port within netns range", i); + ASSERT_LE(port, 49999) TH_LOG("#%d: expected port within netns range", i); + + fds[i] = fd; + } + + for (i = 0; i < ARRAY_SIZE(fds); i++) + ASSERT_TRUE(close(fds[i]) == 0) TH_LOG("#%d: close failed", i); + } +} + +TEST_F(ip_local_port_range, single_port_range) +{ + const struct test { + __u16 range_lo; + __u16 range_hi; + __u16 expected; + } tests[] = { + /* single port range within ephemeral range */ + { 45000, 45000, 45000 }, + /* first port in the ephemeral range (clamp from above) */ + { 0, 40000, 40000 }, + /* last port in the ephemeral range (clamp from below) */ + { 49999, 0, 49999 }, + }; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + int fd, err, port; + __u32 range; + + TH_LOG("lo %5hu, hi %5hu, expected %5hu", + t->range_lo, t->range_hi, t->expected); + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(t->range_lo, t->range_hi); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("bind failed"); + + port = get_sock_port(fd); + ASSERT_EQ(port, t->expected) TH_LOG("unexpected local port"); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); + } +} + +TEST_F(ip_local_port_range, exhaust_8_port_range) +{ + __u8 port_set = 0; + int i, fd, err; + __u32 range; + __u16 port; + int fds[8]; + + for (i = 0; i < ARRAY_SIZE(fds); i++) { + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(40000, 40007); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("bind failed"); + + port = get_sock_port(fd); + ASSERT_GE(port, 40000) TH_LOG("expected port within sockopt range"); + ASSERT_LE(port, 40007) TH_LOG("expected port within sockopt range"); + + port_set |= 1 << (port - 40000); + fds[i] = fd; + } + + /* Check that all every port from the test range is in use */ + ASSERT_EQ(port_set, 0xff) TH_LOG("expected all ports to be busy"); + + /* Check that bind() fails because the whole range is busy */ + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(40000, 40007); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(err) TH_LOG("expected bind to fail"); + ASSERT_EQ(errno, EADDRINUSE); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); + + for (i = 0; i < ARRAY_SIZE(fds); i++) { + err = close(fds[i]); + ASSERT_TRUE(!err) TH_LOG("close failed"); + } +} + +TEST_F(ip_local_port_range, late_bind) +{ + union { + struct sockaddr sa; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr; + socklen_t addr_len; + const int one = 1; + int fd, err; + __u32 range; + __u16 port; + + fd = socket(variant->so_domain, variant->so_type, 0); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + range = pack_port_range(40100, 40199); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + err = setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_BIND_ADDRESS_NO_PORT) failed"); + + err = bind_to_loopback_any_port(fd); + ASSERT_TRUE(!err) TH_LOG("bind failed"); + + port = get_sock_port(fd); + ASSERT_EQ(port, 0) TH_LOG("getsockname failed"); + + /* Invalid destination */ + memset(&addr, 0, sizeof(addr)); + switch (variant->so_domain) { + case AF_INET: + addr.v4.sin_family = AF_INET; + addr.v4.sin_port = htons(0); + addr.v4.sin_addr.s_addr = htonl(INADDR_ANY); + addr_len = sizeof(addr.v4); + break; + case AF_INET6: + addr.v6.sin6_family = AF_INET6; + addr.v6.sin6_port = htons(0); + addr.v6.sin6_addr = in6addr_any; + addr_len = sizeof(addr.v6); + break; + default: + ASSERT_TRUE(false) TH_LOG("unsupported socket domain"); + } + + /* connect() doesn't need to succeed for late bind to happen */ + connect(fd, &addr.sa, addr_len); + + port = get_sock_port(fd); + ASSERT_GE(port, 40100); + ASSERT_LE(port, 40199); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); +} + +XFAIL_ADD(ip_local_port_range, ip4_stcp, late_bind); +XFAIL_ADD(ip_local_port_range, ip6_stcp, late_bind); + +TEST_F(ip_local_port_range, get_port_range) +{ + __u16 lo, hi; + __u32 range; + int fd, err; + + fd = socket(variant->so_domain, variant->so_type, variant->so_protocol); + ASSERT_GE(fd, 0) TH_LOG("socket failed"); + + /* Get range before it will be set */ + err = get_ip_local_port_range(fd, &range); + ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed"); + + unpack_port_range(range, &lo, &hi); + ASSERT_EQ(lo, 0) TH_LOG("unexpected low port"); + ASSERT_EQ(hi, 0) TH_LOG("unexpected high port"); + + range = pack_port_range(12345, 54321); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + /* Get range after it has been set */ + err = get_ip_local_port_range(fd, &range); + ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed"); + + unpack_port_range(range, &lo, &hi); + ASSERT_EQ(lo, 12345) TH_LOG("unexpected low port"); + ASSERT_EQ(hi, 54321) TH_LOG("unexpected high port"); + + /* Unset the port range */ + range = pack_port_range(0, 0); + err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range)); + ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed"); + + /* Get range after it has been unset */ + err = get_ip_local_port_range(fd, &range); + ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed"); + + unpack_port_range(range, &lo, &hi); + ASSERT_EQ(lo, 0) TH_LOG("unexpected low port"); + ASSERT_EQ(hi, 0) TH_LOG("unexpected high port"); + + err = close(fd); + ASSERT_TRUE(!err) TH_LOG("close failed"); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh new file mode 100755 index 000000000000..6c6ad346eaa0 --- /dev/null +++ b/tools/testing/selftests/net/ip_local_port_range.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range' diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index 17ced7d6ce25..be4a30a0d02a 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -41,7 +41,6 @@ #define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__) -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) #define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */ @@ -59,6 +58,8 @@ #define VETH_FMT "ktst-%d" #define VETH_LEN 12 +#define XFRM_ALGO_NR_KEYS 29 + static int nsfd_parent = -1; static int nsfd_childa = -1; static int nsfd_childb = -1; @@ -76,6 +77,43 @@ const unsigned int ping_timeout = 300; const unsigned int ping_count = 100; const unsigned int ping_success = 80; +struct xfrm_key_entry { + char algo_name[35]; + int key_len; +}; + +struct xfrm_key_entry xfrm_key_entries[] = { + {"digest_null", 0}, + {"ecb(cipher_null)", 0}, + {"cbc(des)", 64}, + {"hmac(md5)", 128}, + {"cmac(aes)", 128}, + {"xcbc(aes)", 128}, + {"cbc(cast5)", 128}, + {"cbc(serpent)", 128}, + {"hmac(sha1)", 160}, + {"hmac(rmd160)", 160}, + {"cbc(des3_ede)", 192}, + {"hmac(sha256)", 256}, + {"cbc(aes)", 256}, + {"cbc(camellia)", 256}, + {"cbc(twofish)", 256}, + {"rfc3686(ctr(aes))", 288}, + {"hmac(sha384)", 384}, + {"cbc(blowfish)", 448}, + {"hmac(sha512)", 512}, + {"rfc4106(gcm(aes))-128", 160}, + {"rfc4543(gcm(aes))-128", 160}, + {"rfc4309(ccm(aes))-128", 152}, + {"rfc4106(gcm(aes))-192", 224}, + {"rfc4543(gcm(aes))-192", 224}, + {"rfc4309(ccm(aes))-192", 216}, + {"rfc4106(gcm(aes))-256", 288}, + {"rfc4543(gcm(aes))-256", 288}, + {"rfc4309(ccm(aes))-256", 280}, + {"rfc7539(chacha20,poly1305)-128", 0} +}; + static void randomize_buffer(void *buf, size_t buflen) { int *p = (int *)buf; @@ -484,13 +522,16 @@ enum desc_type { MONITOR_ACQUIRE, EXPIRE_STATE, EXPIRE_POLICY, + SPDINFO_ATTRS, }; const char *desc_name[] = { "create tunnel", "alloc spi", "monitor acquire", "expire state", - "expire policy" + "expire policy", + "spdinfo attributes", + "" }; struct xfrm_desc { enum desc_type type; @@ -765,65 +806,12 @@ static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from, static int xfrm_fill_key(char *name, char *buf, size_t buf_len, unsigned int *key_len) { - /* TODO: use set/map instead */ - if (strncmp(name, "digest_null", ALGO_LEN) == 0) - *key_len = 0; - else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0) - *key_len = 0; - else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0) - *key_len = 64; - else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0) - *key_len = 128; - else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0) - *key_len = 128; - else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0) - *key_len = 128; - else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0) - *key_len = 128; - else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0) - *key_len = 128; - else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0) - *key_len = 160; - else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0) - *key_len = 160; - else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0) - *key_len = 192; - else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0) - *key_len = 256; - else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0) - *key_len = 256; - else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0) - *key_len = 256; - else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0) - *key_len = 256; - else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0) - *key_len = 288; - else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0) - *key_len = 384; - else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0) - *key_len = 448; - else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0) - *key_len = 512; - else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0) - *key_len = 160; - else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0) - *key_len = 160; - else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0) - *key_len = 152; - else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0) - *key_len = 224; - else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0) - *key_len = 224; - else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0) - *key_len = 216; - else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0) - *key_len = 288; - else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0) - *key_len = 288; - else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0) - *key_len = 280; - else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0) - *key_len = 0; + int i; + + for (i = 0; i < XFRM_ALGO_NR_KEYS; i++) { + if (strncmp(name, xfrm_key_entries[i].algo_name, ALGO_LEN) == 0) + *key_len = xfrm_key_entries[i].key_len; + } if (*key_len > buf_len) { printk("Can't pack a key - too big for buffer"); @@ -1593,6 +1581,155 @@ out_close: return ret; } +static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq, + unsigned thresh4_l, unsigned thresh4_r, + unsigned thresh6_l, unsigned thresh6_r, + bool add_bad_attr) + +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrmu_spdhthresh thresh; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_NEWSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + thresh.lbits = thresh4_l; + thresh.rbits = thresh4_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + thresh.lbits = thresh6_l; + thresh.rbits = thresh6_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + if (add_bad_attr) { + BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) { + pr_err("adding attribute failed: no space"); + return -1; + } + } + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return -1; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + return 0; +} + +static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq) +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) { + pr_err("Can't set SPD HTHRESH"); + return KSFT_FAIL; + } + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_GETSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) { + size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused)); + struct rtattr *attr = (void *)req.attrbuf; + int got_thresh = 0; + + for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 32 || t->rbits != 31) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 120 || t->rbits != 16) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + } + if (got_thresh != 2) { + pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh); + return KSFT_FAIL; + } + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return KSFT_FAIL; + } else { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + /* Restore the default */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) { + pr_err("Can't restore SPD HTHRESH"); + return KSFT_FAIL; + } + + /* + * At this moment xfrm uses nlmsg_parse_deprecated(), which + * implies NL_VALIDATE_LIBERAL - ignoring attributes with + * (type > maxtype). nla_parse_depricated_strict() would enforce + * it. Or even stricter nla_parse(). + * Right now it's not expected to fail, but to be ignored. + */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true)) + return KSFT_PASS; + + return KSFT_PASS; +} + static int child_serv(int xfrm_sock, uint32_t *seq, unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) { @@ -1717,6 +1854,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) case EXPIRE_POLICY: ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); break; + case SPDINFO_ATTRS: + ret = xfrm_spdinfo_attrs(xfrm_sock, &seq); + break; default: printk("Unknown desc type %d", desc.type); exit(KSFT_FAIL); @@ -1785,7 +1925,7 @@ static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf, break; default: printk("got unknown msg type %d", msg->type); - }; + } } static int grand_child_f(unsigned int nr, int cmd_fd, void *buf) @@ -1994,8 +2134,10 @@ static int write_proto_plan(int fd, int proto) * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 * * Check the affected by the UABI difference structures. + * Also, check translation for xfrm_set_spdinfo: it has it's own attributes + * which needs to be correctly copied, but not translated. */ -const unsigned int compat_plan = 4; +const unsigned int compat_plan = 5; static int write_compat_struct_tests(int test_desc_fd) { struct xfrm_desc desc = {}; @@ -2019,6 +2161,10 @@ static int write_compat_struct_tests(int test_desc_fd) if (__write_desc(test_desc_fd, &desc)) return -1; + desc.type = SPDINFO_ATTRS; + if (__write_desc(test_desc_fd, &desc)) + return -1; + return 0; } @@ -2117,7 +2263,7 @@ static int check_results(void) int main(int argc, char **argv) { - unsigned int nr_process = 1; + long nr_process = 1; int route_sock = -1, ret = KSFT_SKIP; int test_desc_fd[2]; uint32_t route_seq; @@ -2138,7 +2284,7 @@ int main(int argc, char **argv) exit_usage(argv); } - if (nr_process > MAX_PROCESSES || !nr_process) { + if (nr_process > MAX_PROCESSES || nr_process < 1) { printk("nr_process should be between [1; %u]", MAX_PROCESSES); exit_usage(argv); diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c index a7c41375374f..708a9822259d 100644 --- a/tools/testing/selftests/net/ipv6_flowlabel.c +++ b/tools/testing/selftests/net/ipv6_flowlabel.c @@ -9,6 +9,7 @@ #include <errno.h> #include <fcntl.h> #include <limits.h> +#include <linux/icmpv6.h> #include <linux/in6.h> #include <stdbool.h> #include <stdio.h> @@ -29,26 +30,48 @@ #ifndef IPV6_FLOWLABEL_MGR #define IPV6_FLOWLABEL_MGR 32 #endif +#ifndef IPV6_FLOWINFO_SEND +#define IPV6_FLOWINFO_SEND 33 +#endif #define FLOWLABEL_WILDCARD ((uint32_t) -1) static const char cfg_data[] = "a"; static uint32_t cfg_label = 1; +static bool use_ping; +static bool use_flowinfo_send; + +static struct icmp6hdr icmp6 = { + .icmp6_type = ICMPV6_ECHO_REQUEST +}; + +static struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, +}; static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel) { char control[CMSG_SPACE(sizeof(flowlabel))] = {0}; struct msghdr msg = {0}; - struct iovec iov = {0}; + struct iovec iov = { + .iov_base = (char *)cfg_data, + .iov_len = sizeof(cfg_data) + }; int ret; - iov.iov_base = (char *)cfg_data; - iov.iov_len = sizeof(cfg_data); + if (use_ping) { + iov.iov_base = &icmp6; + iov.iov_len = sizeof(icmp6); + } msg.msg_iov = &iov; msg.msg_iovlen = 1; - if (with_flowlabel) { + if (use_flowinfo_send) { + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + } else if (with_flowlabel) { struct cmsghdr *cm; cm = (void *)control; @@ -94,6 +117,8 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) ret = recvmsg(fd, &msg, 0); if (ret == -1) error(1, errno, "recv"); + if (use_ping) + goto parse_cmsg; if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) error(1, 0, "recv: truncated"); if (ret != sizeof(cfg_data)) @@ -101,6 +126,7 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) if (memcmp(data, cfg_data, sizeof(data))) error(1, 0, "recv: data mismatch"); +parse_cmsg: cm = CMSG_FIRSTHDR(&msg); if (with_flowlabel) { if (!cm) @@ -114,9 +140,11 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm)); fprintf(stderr, "recv with label %u\n", flowlabel); - if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) + if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) { fprintf(stderr, "recv: incorrect flowlabel %u != %u\n", flowlabel, expect); + error(1, 0, "recv: flowlabel is wrong"); + } } else { fprintf(stderr, "recv without label\n"); @@ -165,11 +193,17 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "l:")) != -1) { + while ((c = getopt(argc, argv, "l:ps")) != -1) { switch (c) { case 'l': cfg_label = strtoul(optarg, NULL, 0); break; + case 'p': + use_ping = true; + break; + case 's': + use_flowinfo_send = true; + break; default: error(1, 0, "%s: parse error", argv[0]); } @@ -178,27 +212,30 @@ static void parse_opts(int argc, char **argv) int main(int argc, char **argv) { - struct sockaddr_in6 addr = { - .sin6_family = AF_INET6, - .sin6_port = htons(8000), - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; const int one = 1; int fdt, fdr; + int prot = 0; + + addr.sin6_port = htons(8000); parse_opts(argc, argv); - fdt = socket(PF_INET6, SOCK_DGRAM, 0); + if (use_ping) { + fprintf(stderr, "attempting to use ping sockets\n"); + prot = IPPROTO_ICMPV6; + } + + fdt = socket(PF_INET6, SOCK_DGRAM, prot); if (fdt == -1) error(1, errno, "socket t"); - fdr = socket(PF_INET6, SOCK_DGRAM, 0); + fdr = use_ping ? fdt : socket(PF_INET6, SOCK_DGRAM, 0); if (fdr == -1) error(1, errno, "socket r"); if (connect(fdt, (void *)&addr, sizeof(addr))) error(1, errno, "connect"); - if (bind(fdr, (void *)&addr, sizeof(addr))) + if (!use_ping && bind(fdr, (void *)&addr, sizeof(addr))) error(1, errno, "bind"); flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE); @@ -216,13 +253,21 @@ int main(int argc, char **argv) do_recv(fdr, false, 0); } + if (use_flowinfo_send) { + fprintf(stderr, "using IPV6_FLOWINFO_SEND to send label\n"); + addr.sin6_flowinfo = htonl(cfg_label); + if (setsockopt(fdt, SOL_IPV6, IPV6_FLOWINFO_SEND, &one, + sizeof(one)) == -1) + error(1, errno, "setsockopt flowinfo_send"); + } + fprintf(stderr, "send label\n"); do_send(fdt, true, cfg_label); do_recv(fdr, true, cfg_label); if (close(fdr)) error(1, errno, "close r"); - if (close(fdt)) + if (!use_ping && close(fdt)) error(1, errno, "close t"); return 0; diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh index d3bc6442704e..cee95e252bee 100755 --- a/tools/testing/selftests/net/ipv6_flowlabel.sh +++ b/tools/testing/selftests/net/ipv6_flowlabel.sh @@ -18,4 +18,20 @@ echo "TEST datapath (with auto-flowlabels)" ./in_netns.sh \ sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1' +echo "TEST datapath (with ping-sockets)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \ + sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \ + ./ipv6_flowlabel -l 1 -p' + +echo "TEST datapath (with flowinfo-send)" +./in_netns.sh \ + sh -c './ipv6_flowlabel -l 1 -s' + +echo "TEST datapath (with ping-sockets flowinfo-send)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \ + sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \ + ./ipv6_flowlabel -l 1 -p -s' + echo OK. All tests passed diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh new file mode 100755 index 000000000000..f11756e7df2f --- /dev/null +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -0,0 +1,446 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Author: Matthias May <matthias.may@westermo.com> +# +# This script evaluates ip tunnels that are capable of carrying L2 traffic +# if they inherit or set the inheritable fields. +# Namely these tunnels are: 'gretap', 'vxlan' and 'geneve'. +# Checked inheritable fields are: TOS and TTL. +# The outer tunnel protocol of 'IPv4' or 'IPv6' is verified- +# As payload frames of type 'IPv4', 'IPv6' and 'other'(ARP) are verified. +# In addition this script also checks if forcing a specific field in the +# outer header is working. + +# Return 4 by default (Kselftest SKIP code) +ERR=4 + +if [ "$(id -u)" != "0" ]; then + echo "Please run as root." + exit $ERR +fi +if ! which tcpdump > /dev/null 2>&1; then + echo "No tcpdump found. Required for this test." + exit $ERR +fi + +expected_tos="0x00" +expected_ttl="0" +failed=false + +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) + +RUN_NS0="ip netns exec ${NS0}" + +get_random_tos() { + # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4 + echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\ +$(tr -dc '048c' < /dev/urandom | head -c 1)" +} +get_random_ttl() { + # Get a random dec value between 0 and 255 + printf "%d" "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 2)" +} +get_field() { + # Expects to get the 'head -n 1' of a captured frame by tcpdump. + # Parses this first line and returns the specified field. + local field="$1" + local input="$2" + local found=false + input="$(echo "$input" | tr -d '(),')" + for input_field in $input; do + if $found; then + echo "$input_field" + return + fi + # The next field that we iterate over is the looked for value + if [ "$input_field" = "$field" ]; then + found=true + fi + done + echo "0" +} +setup() { + local type="$1" + local outer="$2" + local inner="$3" + local tos_ttl="$4" + local vlan="$5" + local test_tos="0x00" + local test_ttl="0" + + # We don't want a test-tos of 0x00, + # because this is the value that we get when no tos is set. + expected_tos="$(get_random_tos)" + while [ "$expected_tos" = "0x00" ]; do + expected_tos="$(get_random_tos)" + done + if [ "$tos_ttl" = "random" ]; then + test_tos="$expected_tos" + tos="fixed $test_tos" + elif [ "$tos_ttl" = "inherit" ]; then + test_tos="$tos_ttl" + tos="inherit $expected_tos" + fi + + # We don't want a test-ttl of 64 or 0, + # because 64 is when no ttl is set and 0 is not a valid ttl. + expected_ttl="$(get_random_ttl)" + while [ "$expected_ttl" = "64" ] || [ "$expected_ttl" = "0" ]; do + expected_ttl="$(get_random_ttl)" + done + + if [ "$tos_ttl" = "random" ]; then + test_ttl="$expected_ttl" + ttl="fixed $test_ttl" + elif [ "$tos_ttl" = "inherit" ]; then + test_ttl="$tos_ttl" + ttl="inherit $expected_ttl" + fi + printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \ + "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan" + + # Create netns NS0 and NS1 and connect them with a veth pair + ip netns add "${NS0}" + ip netns add "${NS1}" + ip link add name veth0 netns "${NS0}" type veth \ + peer name veth1 netns "${NS1}" + ip -netns "${NS0}" link set dev veth0 up + ip -netns "${NS1}" link set dev veth1 up + ip -netns "${NS0}" address flush dev veth0 + ip -netns "${NS1}" address flush dev veth1 + + local local_addr1="" + local local_addr2="" + if [ "$type" = "gre" ] || [ "$type" = "vxlan" ]; then + if [ "$outer" = "4" ]; then + local_addr1="local 198.18.0.1" + local_addr2="local 198.18.0.2" + elif [ "$outer" = "6" ]; then + local_addr1="local fdd1:ced0:5d88:3fce::1" + local_addr2="local fdd1:ced0:5d88:3fce::2" + fi + fi + local vxlan="" + if [ "$type" = "vxlan" ]; then + vxlan="vni 100 dstport 4789" + fi + local geneve="" + if [ "$type" = "geneve" ]; then + geneve="vni 100" + fi + # Create tunnel and assign outer IPv4/IPv6 addresses + if [ "$outer" = "4" ]; then + if [ "$type" = "gre" ]; then + type="gretap" + fi + ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0 + ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1 + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote 198.18.0.2 tos $test_tos ttl $test_ttl \ + $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote 198.18.0.1 tos $test_tos ttl $test_ttl \ + $vxlan $geneve + elif [ "$outer" = "6" ]; then + if [ "$type" = "gre" ]; then + type="ip6gretap" + fi + ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \ + dev veth0 nodad + ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \ + dev veth1 nodad + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote fdd1:ced0:5d88:3fce::2 tos $test_tos \ + ttl $test_ttl $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote fdd1:ced0:5d88:3fce::1 tos $test_tos \ + ttl $test_ttl $vxlan $geneve + fi + + # Bring L2-tunnel link up and create VLAN on top + ip -netns "${NS0}" link set tep0 up + ip -netns "${NS1}" link set tep1 up + ip -netns "${NS0}" address flush dev tep0 + ip -netns "${NS1}" address flush dev tep1 + local parent + if $vlan; then + parent="vlan99-" + ip -netns "${NS0}" link add link tep0 name ${parent}0 \ + type vlan id 99 + ip -netns "${NS1}" link add link tep1 name ${parent}1 \ + type vlan id 99 + ip -netns "${NS0}" link set dev ${parent}0 up + ip -netns "${NS1}" link set dev ${parent}1 up + ip -netns "${NS0}" address flush dev ${parent}0 + ip -netns "${NS1}" address flush dev ${parent}1 + else + parent="tep" + fi + + # Assign inner IPv4/IPv6 addresses + if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then + ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0 + ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1 + elif [ "$inner" = "6" ]; then + ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \ + dev ${parent}0 nodad + ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \ + dev ${parent}1 nodad + fi +} + +verify() { + local outer="$1" + local inner="$2" + local tos_ttl="$3" + local vlan="$4" + + local ping_pid out captured_tos captured_ttl result + + local ping_dst + if [ "$inner" = "4" ]; then + ping_dst="198.19.0.2" + elif [ "$inner" = "6" ]; then + ping_dst="fdd4:96cf:4eae:443b::2" + elif [ "$inner" = "other" ]; then + ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6 + fi + if [ "$tos_ttl" = "inherit" ]; then + ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \ + -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!" + else + ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" + fi + local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset + if [ "$type" = "gre" ]; then + tunnel_type_proto="0x2f" + elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then + tunnel_type_proto="0x11" + fi + if [ "$outer" = "4" ]; then + tunnel_type_offset="9" + if [ "$inner" = "4" ]; then + req_proto_offset="47" + req_offset="58" + if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then + req_proto_offset="$((req_proto_offset + 12))" + req_offset="$((req_offset + 12))" + fi + if $vlan; then + req_proto_offset="$((req_proto_offset + 4))" + req_offset="$((req_offset + 4))" + fi + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x01 and \ + ip[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" + elif [ "$inner" = "6" ]; then + req_proto_offset="44" + req_offset="78" + if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then + req_proto_offset="$((req_proto_offset + 12))" + req_offset="$((req_offset + 12))" + fi + if $vlan; then + req_proto_offset="$((req_proto_offset + 4))" + req_offset="$((req_offset + 4))" + fi + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x3a and \ + ip[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" + elif [ "$inner" = "other" ]; then + req_proto_offset="36" + req_offset="45" + if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then + req_proto_offset="$((req_proto_offset + 12))" + req_offset="$((req_offset + 12))" + fi + if $vlan; then + req_proto_offset="$((req_proto_offset + 4))" + req_offset="$((req_offset + 4))" + fi + if [ "$tos_ttl" = "inherit" ]; then + expected_tos="0x00" + expected_ttl="64" + fi + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x08 and \ + ip[$((req_proto_offset + 1))] = 0x06 and \ + ip[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" + fi + elif [ "$outer" = "6" ]; then + if [ "$type" = "gre" ]; then + tunnel_type_offset="40" + elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then + tunnel_type_offset="6" + fi + if [ "$inner" = "4" ]; then + local req_proto_offset="75" + local req_offset="86" + if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then + req_proto_offset="$((req_proto_offset + 4))" + req_offset="$((req_offset + 4))" + fi + if $vlan; then + req_proto_offset="$((req_proto_offset + 4))" + req_offset="$((req_offset + 4))" + fi + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x01 and \ + ip6[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" + elif [ "$inner" = "6" ]; then + local req_proto_offset="72" + local req_offset="106" + if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then + req_proto_offset="$((req_proto_offset + 4))" + req_offset="$((req_offset + 4))" + fi + if $vlan; then + req_proto_offset="$((req_proto_offset + 4))" + req_offset="$((req_offset + 4))" + fi + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x3a and \ + ip6[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" + elif [ "$inner" = "other" ]; then + local req_proto_offset="64" + local req_offset="73" + if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then + req_proto_offset="$((req_proto_offset + 4))" + req_offset="$((req_offset + 4))" + fi + if $vlan; then + req_proto_offset="$((req_proto_offset + 4))" + req_offset="$((req_offset + 4))" + fi + if [ "$tos_ttl" = "inherit" ]; then + expected_tos="0x00" + expected_ttl="64" + fi + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x08 and \ + ip6[$((req_proto_offset + 1))] = 0x06 and \ + ip6[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" + fi + fi + kill -9 $ping_pid + wait $ping_pid 2>/dev/null || true + result="FAIL" + if [ "$outer" = "4" ]; then + captured_ttl="$(get_field "ttl" "$out")" + captured_tos="$(printf "0x%02x" "$(get_field "tos" "$out")")" + if [ "$captured_tos" = "$expected_tos" ] && + [ "$captured_ttl" = "$expected_ttl" ]; then + result="OK" + fi + elif [ "$outer" = "6" ]; then + captured_ttl="$(get_field "hlim" "$out")" + captured_tos="$(printf "0x%02x" "$(get_field "class" "$out")")" + if [ "$captured_tos" = "$expected_tos" ] && + [ "$captured_ttl" = "$expected_ttl" ]; then + result="OK" + fi + fi + + printf "%7s │\n" "$result" + if [ "$result" = "FAIL" ]; then + failed=true + if [ "$captured_tos" != "$expected_tos" ]; then + printf "│%43s%27s │\n" \ + "Expected TOS value: $expected_tos" \ + "Captured TOS value: $captured_tos" + fi + if [ "$captured_ttl" != "$expected_ttl" ]; then + printf "│%43s%27s │\n" \ + "Expected TTL value: $expected_ttl" \ + "Captured TTL value: $captured_ttl" + fi + printf "│%71s│\n" " " + fi +} + +cleanup() { + ip netns del "${NS0}" 2>/dev/null + ip netns del "${NS1}" 2>/dev/null +} + +exit_handler() { + # Don't exit immediately if one of the intermediate commands fails. + # We might be called at the end of the script, when the network + # namespaces have already been deleted. So cleanup() may fail, but we + # still need to run until 'exit $ERR' or the script won't return the + # correct error code. + set +e + + cleanup + + exit $ERR +} + +# Restore the default SIGINT handler (just in case) and exit. +# The exit handler will take care of cleaning everything up. +interrupted() { + trap - INT + + exit $ERR +} + +set -e +trap exit_handler EXIT +trap interrupted INT + +printf "┌────────┬───────┬───────┬──────────────┬" +printf "──────────────┬───────┬────────┐\n" +for type in gre vxlan geneve; do + if ! $(modprobe "$type" 2>/dev/null); then + continue + fi + for outer in 4 6; do + printf "├────────┼───────┼───────┼──────────────┼" + printf "──────────────┼───────┼────────┤\n" + printf "│ Type │ outer | inner │ tos │" + printf " ttl │ vlan │ result │\n" + for inner in 4 6 other; do + printf "├────────┼───────┼───────┼──────────────┼" + printf "──────────────┼───────┼────────┤\n" + for tos_ttl in inherit random; do + for vlan in false true; do + setup "$type" "$outer" "$inner" \ + "$tos_ttl" "$vlan" + verify "$outer" "$inner" "$tos_ttl" \ + "$vlan" + cleanup + done + done + done + done +done +printf "└────────┴───────┴───────┴──────────────┴" +printf "──────────────┴───────┴────────┘\n" + +# All tests done. +# Set ERR appropriately: it will be returned by the exit handler. +if $failed; then + ERR=1 +else + ERR=0 +fi diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh index 5782433886fc..88de7166c8ae 100755 --- a/tools/testing/selftests/net/l2tp.sh +++ b/tools/testing/selftests/net/l2tp.sh @@ -13,6 +13,7 @@ # 10.1.1.1 | | 10.1.2.1 # 2001:db8:1::1 | | 2001:db8:2::1 +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -80,9 +81,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -133,12 +131,7 @@ connect_ns() cleanup() { - local ns - - for ns in host-1 host-2 router - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $host_1 $host_2 $router } setup_l2tp_ipv4() @@ -146,28 +139,28 @@ setup_l2tp_ipv4() # # configure l2tpv3 tunnel on host-1 # - ip -netns host-1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \ + ip -netns $host_1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \ encap ip local 10.1.1.1 remote 10.1.2.1 - ip -netns host-1 l2tp add session name l2tp4 tunnel_id 1041 \ + ip -netns $host_1 l2tp add session name l2tp4 tunnel_id 1041 \ session_id 1041 peer_session_id 1042 - ip -netns host-1 link set dev l2tp4 up - ip -netns host-1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2 + ip -netns $host_1 link set dev l2tp4 up + ip -netns $host_1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2 # # configure l2tpv3 tunnel on host-2 # - ip -netns host-2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \ + ip -netns $host_2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \ encap ip local 10.1.2.1 remote 10.1.1.1 - ip -netns host-2 l2tp add session name l2tp4 tunnel_id 1042 \ + ip -netns $host_2 l2tp add session name l2tp4 tunnel_id 1042 \ session_id 1042 peer_session_id 1041 - ip -netns host-2 link set dev l2tp4 up - ip -netns host-2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1 + ip -netns $host_2 link set dev l2tp4 up + ip -netns $host_2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1 # # add routes to loopback addresses # - ip -netns host-1 ro add 172.16.101.2/32 via 172.16.1.2 - ip -netns host-2 ro add 172.16.101.1/32 via 172.16.1.1 + ip -netns $host_1 ro add 172.16.101.2/32 via 172.16.1.2 + ip -netns $host_2 ro add 172.16.101.1/32 via 172.16.1.1 } setup_l2tp_ipv6() @@ -175,28 +168,28 @@ setup_l2tp_ipv6() # # configure l2tpv3 tunnel on host-1 # - ip -netns host-1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \ + ip -netns $host_1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \ encap ip local 2001:db8:1::1 remote 2001:db8:2::1 - ip -netns host-1 l2tp add session name l2tp6 tunnel_id 1061 \ + ip -netns $host_1 l2tp add session name l2tp6 tunnel_id 1061 \ session_id 1061 peer_session_id 1062 - ip -netns host-1 link set dev l2tp6 up - ip -netns host-1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2 + ip -netns $host_1 link set dev l2tp6 up + ip -netns $host_1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2 # # configure l2tpv3 tunnel on host-2 # - ip -netns host-2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \ + ip -netns $host_2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \ encap ip local 2001:db8:2::1 remote 2001:db8:1::1 - ip -netns host-2 l2tp add session name l2tp6 tunnel_id 1062 \ + ip -netns $host_2 l2tp add session name l2tp6 tunnel_id 1062 \ session_id 1062 peer_session_id 1061 - ip -netns host-2 link set dev l2tp6 up - ip -netns host-2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1 + ip -netns $host_2 link set dev l2tp6 up + ip -netns $host_2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1 # # add routes to loopback addresses # - ip -netns host-1 -6 ro add fc00:101::2/128 via fc00:1::2 - ip -netns host-2 -6 ro add fc00:101::1/128 via fc00:1::1 + ip -netns $host_1 -6 ro add fc00:101::2/128 via fc00:1::2 + ip -netns $host_2 -6 ro add fc00:101::1/128 via fc00:1::1 } setup() @@ -205,21 +198,22 @@ setup() cleanup set -e - create_ns host-1 172.16.101.1/32 fc00:101::1/128 - create_ns host-2 172.16.101.2/32 fc00:101::2/128 - create_ns router + setup_ns host_1 host_2 router + create_ns $host_1 172.16.101.1/32 fc00:101::1/128 + create_ns $host_2 172.16.101.2/32 fc00:101::2/128 + create_ns $router - connect_ns host-1 eth0 10.1.1.1/24 2001:db8:1::1/64 \ - router eth1 10.1.1.2/24 2001:db8:1::2/64 + connect_ns $host_1 eth0 10.1.1.1/24 2001:db8:1::1/64 \ + $router eth1 10.1.1.2/24 2001:db8:1::2/64 - connect_ns host-2 eth0 10.1.2.1/24 2001:db8:2::1/64 \ - router eth2 10.1.2.2/24 2001:db8:2::2/64 + connect_ns $host_2 eth0 10.1.2.1/24 2001:db8:2::1/64 \ + $router eth2 10.1.2.2/24 2001:db8:2::2/64 - ip -netns host-1 ro add 10.1.2.0/24 via 10.1.1.2 - ip -netns host-1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2 + ip -netns $host_1 ro add 10.1.2.0/24 via 10.1.1.2 + ip -netns $host_1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2 - ip -netns host-2 ro add 10.1.1.0/24 via 10.1.2.2 - ip -netns host-2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2 + ip -netns $host_2 ro add 10.1.1.0/24 via 10.1.2.2 + ip -netns $host_2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2 setup_l2tp_ipv4 setup_l2tp_ipv6 @@ -231,38 +225,38 @@ setup_ipsec() # # IPv4 # - run_cmd host-1 ip xfrm policy add \ + run_cmd $host_1 ip xfrm policy add \ src 10.1.1.1 dst 10.1.2.1 dir out \ tmpl proto esp mode transport - run_cmd host-1 ip xfrm policy add \ + run_cmd $host_1 ip xfrm policy add \ src 10.1.2.1 dst 10.1.1.1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip xfrm policy add \ + run_cmd $host_2 ip xfrm policy add \ src 10.1.1.1 dst 10.1.2.1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip xfrm policy add \ + run_cmd $host_2 ip xfrm policy add \ src 10.1.2.1 dst 10.1.1.1 dir out \ tmpl proto esp mode transport - ip -netns host-1 xfrm state add \ + ip -netns $host_1 xfrm state add \ src 10.1.1.1 dst 10.1.2.1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-1 xfrm state add \ + ip -netns $host_1 xfrm state add \ src 10.1.2.1 dst 10.1.1.1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 xfrm state add \ + ip -netns $host_2 xfrm state add \ src 10.1.1.1 dst 10.1.2.1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 xfrm state add \ + ip -netns $host_2 xfrm state add \ src 10.1.2.1 dst 10.1.1.1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport @@ -270,38 +264,38 @@ setup_ipsec() # # IPV6 # - run_cmd host-1 ip -6 xfrm policy add \ + run_cmd $host_1 ip -6 xfrm policy add \ src 2001:db8:1::1 dst 2001:db8:2::1 dir out \ tmpl proto esp mode transport - run_cmd host-1 ip -6 xfrm policy add \ + run_cmd $host_1 ip -6 xfrm policy add \ src 2001:db8:2::1 dst 2001:db8:1::1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip -6 xfrm policy add \ + run_cmd $host_2 ip -6 xfrm policy add \ src 2001:db8:1::1 dst 2001:db8:2::1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip -6 xfrm policy add \ + run_cmd $host_2 ip -6 xfrm policy add \ src 2001:db8:2::1 dst 2001:db8:1::1 dir out \ tmpl proto esp mode transport - ip -netns host-1 -6 xfrm state add \ + ip -netns $host_1 -6 xfrm state add \ src 2001:db8:1::1 dst 2001:db8:2::1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-1 -6 xfrm state add \ + ip -netns $host_1 -6 xfrm state add \ src 2001:db8:2::1 dst 2001:db8:1::1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 -6 xfrm state add \ + ip -netns $host_2 -6 xfrm state add \ src 2001:db8:1::1 dst 2001:db8:2::1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 -6 xfrm state add \ + ip -netns $host_2 -6 xfrm state add \ src 2001:db8:2::1 dst 2001:db8:1::1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport @@ -309,10 +303,10 @@ setup_ipsec() teardown_ipsec() { - run_cmd host-1 ip xfrm state flush - run_cmd host-1 ip xfrm policy flush - run_cmd host-2 ip xfrm state flush - run_cmd host-2 ip xfrm policy flush + run_cmd $host_1 ip xfrm state flush + run_cmd $host_1 ip xfrm policy flush + run_cmd $host_2 ip xfrm state flush + run_cmd $host_2 ip xfrm policy flush } ################################################################################ @@ -322,16 +316,16 @@ run_ping() { local desc="$1" - run_cmd host-1 ping -c1 -w1 172.16.1.2 + run_cmd $host_1 ping -c1 -w1 172.16.1.2 log_test $? 0 "IPv4 basic L2TP tunnel ${desc}" - run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 + run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 log_test $? 0 "IPv4 route through L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2 + run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2 log_test $? 0 "IPv6 basic L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 + run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 log_test $? 0 "IPv6 route through L2TP tunnel ${desc}" } @@ -344,16 +338,16 @@ run_tests() setup_ipsec run_ping "- with IPsec" - run_cmd host-1 ping -c1 -w1 172.16.1.2 + run_cmd $host_1 ping -c1 -w1 172.16.1.2 log_test $? 0 "IPv4 basic L2TP tunnel ${desc}" - run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 + run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 log_test $? 0 "IPv4 route through L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2 + run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2 log_test $? 0 "IPv6 basic L2TP tunnel - with IPsec" - run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 + run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 log_test $? 0 "IPv6 route through L2TP tunnel - with IPsec" teardown_ipsec diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh new file mode 100644 index 000000000000..f9fe182dfbd4 --- /dev/null +++ b/tools/testing/selftests/net/lib.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} +BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +# namespace list created by setup_ns +NS_LIST="" + +############################################################################## +# Helpers +busywait() +{ + local timeout=$1; shift + + local start_time="$(date -u +%s%3N)" + while true + do + local out + out=$("$@") + local ret=$? + if ((!ret)); then + echo -n "$out" + return 0 + fi + + local current_time="$(date -u +%s%3N)" + if ((current_time - start_time > timeout)); then + echo -n "$out" + return 1 + fi + done +} + +cleanup_ns() +{ + local ns="" + local errexit=0 + local ret=0 + + # disable errexit temporary + if [[ $- =~ "e" ]]; then + errexit=1 + set +e + fi + + for ns in "$@"; do + ip netns delete "${ns}" &> /dev/null + if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then + echo "Warn: Failed to remove namespace $ns" + ret=1 + fi + done + + [ $errexit -eq 1 ] && set -e + return $ret +} + +cleanup_all_ns() +{ + cleanup_ns $NS_LIST +} + +# setup netns with given names as prefix. e.g +# setup_ns local remote +setup_ns() +{ + local ns="" + local ns_name="" + local ns_list="" + for ns_name in "$@"; do + # Some test may setup/remove same netns multi times + if unset ${ns_name} 2> /dev/null; then + ns="${ns_name,,}-$(mktemp -u XXXXXX)" + eval readonly ${ns_name}="$ns" + else + eval ns='$'${ns_name} + cleanup_ns "$ns" + + fi + + if ! ip netns add "$ns"; then + echo "Failed to create namespace $ns_name" + cleanup_ns "$ns_list" + return $ksft_skip + fi + ip -n "$ns" link set lo up + ns_list="$ns_list $ns" + done + NS_LIST="$NS_LIST $ns_list" +} diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 260336d5f0b1..49daae73c41e 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect +mptcp_inq +mptcp_sockopt pm_nl_ctl *.pcap diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 00bb158b4a5d..7b936a926859 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -1,16 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 top_srcdir = ../../../../.. -KSFT_KHDR_INSTALL := 1 -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ - simult_flows.sh + simult_flows.sh mptcp_sockopt.sh userspace_pm.sh -TEST_GEN_FILES = mptcp_connect pm_nl_ctl +TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq -TEST_FILES := settings +TEST_FILES := mptcp_lib.sh settings EXTRA_CLEAN := *.pcap diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 741a1c4f4ae8..4f80014cae49 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,3 +1,4 @@ +CONFIG_KALLSYMS=y CONFIG_MPTCP=y CONFIG_IPV6=y CONFIG_MPTCP_IPV6=y @@ -5,3 +6,29 @@ CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m +CONFIG_SYN_COOKIES=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NF_TABLES=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NF_TABLES_INET=y +CONFIG_NFT_TPROXY=m +CONFIG_NFT_SOCKET=m +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m +CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_SCH_INGRESS=m diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 39edce4f541c..bc97ab33a00e 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -1,12 +1,17 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns="ns1-$rndh" -ksft_skip=4 -test_cnt=1 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +. "$(dirname "${0}")/mptcp_lib.sh" + +ns="" +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) ret=0 -pids=() flush_pids() { @@ -14,108 +19,290 @@ flush_pids() # give it some time sleep 1.1 - for pid in ${pids[@]}; do - [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1 + ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null + + for _ in $(seq $((timeout_poll * 10))); do + [ -z "$(ip netns pids "${ns}")" ] && break + sleep 0.1 done - pids=() } +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { - ip netns del $ns - for pid in ${pids[@]}; do - [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1 - done + ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null + + mptcp_lib_ns_exit "${ns}" } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi -ss -h | grep -q MPTCP -if [ $? -ne 0 ];then - echo "SKIP: ss tool does not support MPTCP" - exit $ksft_skip -fi +mptcp_lib_check_mptcp +mptcp_lib_check_tools ip ss + +get_msk_inuse() +{ + ip netns exec $ns cat /proc/net/protocols | awk '$1~/^MPTCP$/{print $3}' +} __chk_nr() { - local condition="$1" + local command="$1" local expected=$2 - local msg nr + local msg="$3" + local skip="${4-SKIP}" + local nr - shift 2 - msg=$* - nr=$(ss -inmHMN $ns | $condition) + nr=$(eval $command) - printf "%-50s" "$msg" - if [ $nr != $expected ]; then - echo "[ fail ] expected $expected found $nr" - ret=$test_cnt + mptcp_lib_print_title "$msg" + if [ "$nr" != "$expected" ]; then + if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then + mptcp_lib_pr_skip "Feature probably not supported" + mptcp_lib_result_skip "${msg}" + else + mptcp_lib_pr_fail "expected $expected found $nr" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi else - echo "[ ok ]" + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" fi - test_cnt=$((test_cnt+1)) +} + +__chk_msk_nr() +{ + local condition=$1 + shift 1 + + __chk_nr "ss -inmHMN $ns | $condition" "$@" } chk_msk_nr() { - __chk_nr "grep -c token:" $* + __chk_msk_nr "grep -c token:" "$@" +} + +chk_listener_nr() +{ + local expected=$1 + local msg="$2" + + __chk_nr "ss -nlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0 + __chk_nr "ss -nlHtON $ns | wc -l" "$expected" "$msg - subflows" +} + +wait_msk_nr() +{ + local condition="grep -c token:" + local expected=$1 + local timeout=20 + local msg nr + local max=0 + local i=0 + + shift 1 + msg=$* + + while [ $i -lt $timeout ]; do + nr=$(ss -inmHMN $ns | $condition) + [ $nr == $expected ] && break; + [ $nr -gt $max ] && max=$nr + i=$((i + 1)) + sleep 1 + done + + mptcp_lib_print_title "$msg" + if [ $i -ge $timeout ]; then + mptcp_lib_pr_fail "timeout while expecting $expected max $max last $nr" + mptcp_lib_result_fail "${msg} # timeout" + ret=${KSFT_FAIL} + elif [ $nr != $expected ]; then + mptcp_lib_pr_fail "expected $expected found $nr" + mptcp_lib_result_fail "${msg} # unexpected result" + ret=${KSFT_FAIL} + else + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + fi } chk_msk_fallback_nr() { - __chk_nr "grep -c fallback" $* + __chk_msk_nr "grep -c fallback" "$@" } chk_msk_remote_key_nr() { - __chk_nr "grep -c remote_key" $* + __chk_msk_nr "grep -c remote_key" "$@" +} + +__chk_listen() +{ + local filter="$1" + local expected=$2 + local msg="$3" + + __chk_nr "ss -N $ns -Ml '$filter' | grep -c LISTEN" "$expected" "$msg" 0 +} + +chk_msk_listen() +{ + lport=$1 + + # destination port search should always return empty list + __chk_listen "dport $lport" 0 "listen match for dport $lport" + + # should return 'our' mptcp listen socket + __chk_listen "sport $lport" 1 "listen match for sport $lport" + + __chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport" + + __chk_listen "" 1 "all listen sockets" + + nr=$(ss -Ml $filter | wc -l) +} + +chk_msk_inuse() +{ + local expected=$1 + local msg="....chk ${2:-${expected}} msk in use" + local listen_nr + + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi + + listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) + expected=$((expected + listen_nr)) + + for _ in $(seq 10); do + if [ "$(get_msk_inuse)" -eq $expected ]; then + break + fi + sleep 0.1 + done + + __chk_nr get_msk_inuse $expected "${msg}" 0 } +# $1: cestab nr +chk_msk_cestab() +{ + local expected=$1 + local msg="....chk ${2:-${expected}} cestab" + + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi + + __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \ + "${expected}" "${msg}" "" +} + +wait_connected() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec ${listener_ns} grep -q " 0100007F:${port_hex} " /proc/net/tcp && break + sleep 0.1 + done +} trap cleanup EXIT -ip netns add $ns -ip -n $ns link set dev lo up +mptcp_lib_ns_init ns -echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null & -sleep 0.1 -pids[0]=$! +echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \ + 0.0.0.0 >/dev/null & +mptcp_lib_wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" +chk_msk_listen 10000 -echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null & -sleep 0.1 -pids[1]=$! +echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ + 127.0.0.1 >/dev/null & +wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" +chk_msk_inuse 2 +chk_msk_cestab 2 flush_pids +chk_msk_inuse 0 "2->0" +chk_msk_cestab 0 "2->0" -echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null & -pids[0]=$! -sleep 0.1 -echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null & -pids[1]=$! -sleep 0.1 +echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \ + 0.0.0.0 >/dev/null & +mptcp_lib_wait_local_port_listen $ns 10001 +echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \ + 127.0.0.1 >/dev/null & +wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" +chk_msk_inuse 1 +chk_msk_cestab 1 flush_pids +chk_msk_inuse 0 "1->0" +chk_msk_cestab 0 "1->0" + NR_CLIENTS=100 -for I in `seq 1 $NR_CLIENTS`; do - echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null & - pids[$((I*2))]=$! +for I in $(seq 1 $NR_CLIENTS); do + echo "a" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p $((I+10001)) -l -w 20 \ + -t ${timeout_poll} 0.0.0.0 >/dev/null & +done +mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001)) + +for I in $(seq 1 $NR_CLIENTS); do + echo "b" | \ + timeout ${timeout_test} \ + ip netns exec $ns \ + ./mptcp_connect -p $((I+10001)) -w 20 \ + -t ${timeout_poll} 127.0.0.1 >/dev/null & done -sleep 0.1 -for I in `seq 1 $NR_CLIENTS`; do - echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null & - pids[$((I*2 + 1))]=$! +wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +chk_msk_inuse $((NR_CLIENTS*2)) "many" +chk_msk_cestab $((NR_CLIENTS*2)) "many" +flush_pids + +chk_msk_inuse 0 "many->0" +chk_msk_cestab 0 "many->0" + +chk_listener_nr 0 "no listener sockets" +NR_SERVERS=100 +for I in $(seq 1 $NR_SERVERS); do + ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \ + -t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 & done -sleep 1.5 +mptcp_lib_wait_local_port_listen $ns $((NR_SERVERS + 20001)) -chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +chk_listener_nr $NR_SERVERS "many listener sockets" + +# graceful termination +for I in $(seq 1 $NR_SERVERS); do + echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 & +done flush_pids +mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 77bb62feb872..d2043ec3bf6d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -6,6 +6,7 @@ #include <limits.h> #include <fcntl.h> #include <string.h> +#include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> @@ -13,8 +14,11 @@ #include <strings.h> #include <signal.h> #include <unistd.h> +#include <time.h> +#include <sys/ioctl.h> #include <sys/poll.h> +#include <sys/random.h> #include <sys/sendfile.h> #include <sys/stat.h> #include <sys/socket.h> @@ -25,6 +29,8 @@ #include <netinet/in.h> #include <linux/tcp.h> +#include <linux/time_types.h> +#include <linux/sockios.h> extern int optind; @@ -45,34 +51,103 @@ enum cfg_mode { CFG_MODE_SENDFILE, }; +enum cfg_peek { + CFG_NONE_PEEK, + CFG_WITH_PEEK, + CFG_AFTER_PEEK, +}; + static enum cfg_mode cfg_mode = CFG_MODE_POLL; +static enum cfg_peek cfg_peek = CFG_NONE_PEEK; static const char *cfg_host; static const char *cfg_port = "12000"; static int cfg_sock_proto = IPPROTO_MPTCP; -static bool tcpulp_audit; static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; static bool cfg_remove; +static unsigned int cfg_time; +static unsigned int cfg_do_w; static int cfg_wait; +static uint32_t cfg_mark; +static char *cfg_input; +static int cfg_repeat = 1; +static int cfg_truncate; +static int cfg_rcv_trunc; + +struct cfg_cmsg_types { + unsigned int cmsg_enabled:1; + unsigned int timestampns:1; + unsigned int tcp_inq:1; +}; + +struct cfg_sockopt_types { + unsigned int transparent:1; + unsigned int mptfo:1; +}; + +struct tcp_inq_state { + unsigned int last; + bool expect_eof; +}; + +struct wstate { + char buf[8192]; + unsigned int len; + unsigned int off; + unsigned int total_len; +}; + +static struct tcp_inq_state tcp_inq; + +static struct cfg_cmsg_types cfg_cmsg_types; +static struct cfg_sockopt_types cfg_sockopt_types; static void die_usage(void) { - fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" - "[-l] [-w sec] connect_address\n"); + fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] " + "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-r num] [-R num] " + "[-s MPTCP|TCP] [-S num] [-t num] [-T num] [-w sec] connect_address\n"); fprintf(stderr, "\t-6 use ipv6\n"); - fprintf(stderr, "\t-t num -- set poll timeout to num\n"); - fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); - fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); + fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); + fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount " + "of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP " + "fastclose at close/shutdown. If offset is negative, expect the peer to close before " + "all the local data as been sent, thus toleration errors on write and EPIPE signals\n"); + fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin"); + fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num " + "incoming connections, in client mode, disconnect and reconnect to the server\n"); + fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " + "-- for MPJ tests\n"); + fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); + fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); + fprintf(stderr, "\t-M mark -- set socket packet mark\n"); + fprintf(stderr, "\t-o option -- test sockopt <option>\n"); fprintf(stderr, "\t-p num -- use port num\n"); + fprintf(stderr, + "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); + fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes " + "-- for remove addr tests\n"); + fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); - fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); - fprintf(stderr, "\t-u -- check mptcp ulp\n"); + fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); + fprintf(stderr, "\t-t num -- set poll timeout to num\n"); + fprintf(stderr, "\t-T num -- set expected runtime to num ms\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); exit(1); } +static void xerror(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + static void handle_signal(int nr) { quit = true; @@ -139,10 +214,81 @@ static void set_sndbuf(int fd, unsigned int size) } } +static void set_mark(int fd, uint32_t mark) +{ + int err; + + err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); + if (err) { + perror("set SO_MARK"); + exit(1); + } +} + +static void set_transparent(int fd, int pf) +{ + int one = 1; + + switch (pf) { + case AF_INET: + if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one))) + perror("IP_TRANSPARENT"); + break; + case AF_INET6: + if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one))) + perror("IPV6_TRANSPARENT"); + break; + } +} + +static void set_mptfo(int fd, int pf) +{ + int qlen = 25; + + if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) + perror("TCP_FASTOPEN"); +} + +static int do_ulp_so(int sock, const char *name) +{ + return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); +} + +#define X(m) xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line) +static void sock_test_tcpulp(int sock, int proto, unsigned int line) +{ + socklen_t buflen = 8; + char buf[8] = ""; + int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen); + + if (ret != 0) + X("getsockopt"); + + if (buflen > 0) { + if (strcmp(buf, "mptcp") != 0) + xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line); + ret = do_ulp_so(sock, "tls"); + if (ret == 0) + X("setsockopt"); + } else if (proto == IPPROTO_MPTCP) { + ret = do_ulp_so(sock, "tls"); + if (ret != -1) + X("setsockopt"); + } + + ret = do_ulp_so(sock, "mptcp"); + if (ret != -1) + X("setsockopt"); + +#undef X +} + +#define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__) + static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, @@ -162,10 +308,18 @@ static int sock_listen_mptcp(const char * const listenaddr, if (sock < 0) continue; + SOCK_TEST_TCPULP(sock, cfg_sock_proto); + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) perror("setsockopt"); + if (cfg_sockopt_types.transparent) + set_transparent(sock, pf); + + if (cfg_sockopt_types.mptfo) + set_mptfo(sock, pf); + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -181,60 +335,30 @@ static int sock_listen_mptcp(const char * const listenaddr, return sock; } + SOCK_TEST_TCPULP(sock, cfg_sock_proto); + if (listen(sock, 20)) { perror("listen"); close(sock); return -1; } - return sock; -} + SOCK_TEST_TCPULP(sock, cfg_sock_proto); -static bool sock_test_tcpulp(const char * const remoteaddr, - const char * const port) -{ - struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, - .ai_socktype = SOCK_STREAM, - }; - struct addrinfo *a, *addr; - int sock = -1, ret = 0; - bool test_pass = false; - - hints.ai_family = AF_INET; - - xgetaddrinfo(remoteaddr, port, &hints, &addr); - for (a = addr; a; a = a->ai_next) { - sock = socket(a->ai_family, a->ai_socktype, IPPROTO_TCP); - if (sock < 0) { - perror("socket"); - continue; - } - ret = setsockopt(sock, IPPROTO_TCP, TCP_ULP, "mptcp", - sizeof("mptcp")); - if (ret == -1 && errno == EOPNOTSUPP) - test_pass = true; - close(sock); - - if (test_pass) - break; - if (!ret) - fprintf(stderr, - "setsockopt(TCP_ULP) returned 0\n"); - else - perror("setsockopt(TCP_ULP)"); - } - return test_pass; + return sock; } static int sock_connect_mptcp(const char * const remoteaddr, - const char * const port, int proto) + const char * const port, int proto, + struct addrinfo **peer, + int infd, struct wstate *winfo) { struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; + int syn_copied = 0; int sock = -1; hints.ai_family = pf; @@ -247,15 +371,44 @@ static int sock_connect_mptcp(const char * const remoteaddr, continue; } - if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) - break; /* success */ + SOCK_TEST_TCPULP(sock, proto); - perror("connect()"); - close(sock); - sock = -1; + if (cfg_mark) + set_mark(sock, cfg_mark); + + if (cfg_sockopt_types.mptfo) { + if (!winfo->total_len) + winfo->total_len = winfo->len = read(infd, winfo->buf, + sizeof(winfo->buf)); + + syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN, + a->ai_addr, a->ai_addrlen); + if (syn_copied >= 0) { + winfo->off = syn_copied; + winfo->len -= syn_copied; + *peer = a; + break; /* success */ + } + } else { + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { + *peer = a; + break; /* success */ + } + } + if (cfg_sockopt_types.mptfo) { + perror("sendto()"); + close(sock); + sock = -1; + } else { + perror("connect()"); + close(sock); + sock = -1; + } } freeaddrinfo(addr); + if (sock != -1) + SOCK_TEST_TCPULP(sock, proto); return sock; } @@ -272,12 +425,12 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) if (cfg_join && first && do_w > 100) do_w = 100; - if (cfg_remove && do_w > 50) - do_w = 50; + if (cfg_remove && do_w > cfg_do_w) + do_w = cfg_do_w; bw = write(fd, buf, do_w); if (bw < 0) - perror("write"); + return bw; /* let the join handshake complete, before going on */ if (cfg_join && first) { @@ -312,8 +465,105 @@ static size_t do_write(const int fd, char *buf, const size_t len) return offset; } +static void process_cmsg(struct msghdr *msgh) +{ + struct __kernel_timespec ts; + bool inq_found = false; + bool ts_found = false; + unsigned int inq = 0; + struct cmsghdr *cmsg; + + for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) { + memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts)); + ts_found = true; + continue; + } + if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { + memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq)); + inq_found = true; + continue; + } + + } + + if (cfg_cmsg_types.timestampns) { + if (!ts_found) + xerror("TIMESTAMPNS not present\n"); + } + + if (cfg_cmsg_types.tcp_inq) { + if (!inq_found) + xerror("TCP_INQ not present\n"); + + if (inq > 1024) + xerror("tcp_inq %u is larger than one kbyte\n", inq); + tcp_inq.last = inq; + } +} + +static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) +{ + char msg_buf[8192]; + struct iovec iov = { + .iov_base = buf, + .iov_len = len, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = msg_buf, + .msg_controllen = sizeof(msg_buf), + }; + int flags = 0; + unsigned int last_hint = tcp_inq.last; + int ret = recvmsg(fd, &msg, flags); + + if (ret <= 0) { + if (ret == 0 && tcp_inq.expect_eof) + return ret; + + if (ret == 0 && cfg_cmsg_types.tcp_inq) + if (last_hint != 1 && last_hint != 0) + xerror("EOF but last tcp_inq hint was %u\n", last_hint); + + return ret; + } + + if (tcp_inq.expect_eof) + xerror("expected EOF, last_hint %u, now %u\n", + last_hint, tcp_inq.last); + + if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) + xerror("got %lu bytes of cmsg data, expected 0\n", + (unsigned long)msg.msg_controllen); + + if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled) + xerror("%s\n", "got no cmsg data"); + + if (msg.msg_controllen) + process_cmsg(&msg); + + if (cfg_cmsg_types.tcp_inq) { + if ((size_t)ret < len && last_hint > (unsigned int)ret) { + if (ret + 1 != (int)last_hint) { + int next = read(fd, msg_buf, sizeof(msg_buf)); + + xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n", + ret, (unsigned int)len, last_hint, tcp_inq.last, next); + } else { + tcp_inq.expect_eof = true; + } + } + } + + return ret; +} + static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) { + int ret = 0; + char tmp[16384]; size_t cap = rand(); cap &= 0xffff; @@ -323,35 +573,62 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) else if (cap > len) cap = len; - return read(fd, buf, cap); + if (cfg_peek == CFG_WITH_PEEK) { + ret = recv(fd, buf, cap, MSG_PEEK); + ret = (ret < 0) ? ret : read(fd, tmp, ret); + } else if (cfg_peek == CFG_AFTER_PEEK) { + ret = recv(fd, buf, cap, MSG_PEEK); + ret = (ret < 0) ? ret : read(fd, buf, cap); + } else if (cfg_cmsg_types.cmsg_enabled) { + ret = do_recvmsg_cmsg(fd, buf, cap); + } else { + ret = read(fd, buf, cap); + } + + return ret; } -static void set_nonblock(int fd) +static void set_nonblock(int fd, bool nonblock) { int flags = fcntl(fd, F_GETFL); if (flags == -1) return; - fcntl(fd, F_SETFL, flags | O_NONBLOCK); + if (nonblock) + fcntl(fd, F_SETFL, flags | O_NONBLOCK); + else + fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); +} + +static void shut_wr(int fd) +{ + /* Close our write side, ev. give some time + * for address notification and/or checking + * the current status + */ + if (cfg_wait) + usleep(cfg_wait); + + shutdown(fd, SHUT_WR); } -static int copyfd_io_poll(int infd, int peerfd, int outfd) +static int copyfd_io_poll(int infd, int peerfd, int outfd, + bool *in_closed_after_out, struct wstate *winfo) { struct pollfd fds = { .fd = peerfd, .events = POLLIN | POLLOUT, }; - unsigned int woff = 0, wlen = 0; - char wbuf[8192]; + unsigned int total_wlen = 0, total_rlen = 0; - set_nonblock(peerfd); + set_nonblock(peerfd, true); for (;;) { char rbuf[8192]; ssize_t len; - if (fds.events == 0) + if (fds.events == 0 || quit) break; switch (poll(&fds, 1, poll_timeout)) { @@ -368,42 +645,65 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } if (fds.revents & POLLIN) { - len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); + ssize_t rb = sizeof(rbuf); + + /* limit the total amount of read data to the trunc value*/ + if (cfg_truncate > 0) { + if (rb + total_rlen > cfg_truncate) + rb = cfg_truncate - total_rlen; + len = read(peerfd, rbuf, rb); + } else { + len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); + } if (len == 0) { /* no more data to receive: * peer has closed its write side */ fds.events &= ~POLLIN; - if ((fds.events & POLLOUT) == 0) + if ((fds.events & POLLOUT) == 0) { + *in_closed_after_out = true; /* and nothing more to send */ break; + } /* Else, still have data to transmit */ } else if (len < 0) { + if (cfg_rcv_trunc) + return 0; perror("read"); return 3; } + total_rlen += len; do_write(outfd, rbuf, len); } if (fds.revents & POLLOUT) { - if (wlen == 0) { - woff = 0; - wlen = read(infd, wbuf, sizeof(wbuf)); + if (winfo->len == 0) { + winfo->off = 0; + winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); } - if (wlen > 0) { + if (winfo->len > 0) { ssize_t bw; - bw = do_rnd_write(peerfd, wbuf + woff, wlen); - if (bw < 0) + /* limit the total amount of written data to the trunc value */ + if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate) + winfo->len = cfg_truncate - total_wlen; + + bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); + if (bw < 0) { + if (cfg_rcv_trunc) + return 0; + perror("write"); return 111; + } - woff += bw; - wlen -= bw; - } else if (wlen == 0) { + winfo->off += bw; + winfo->len -= bw; + total_wlen += bw; + } else if (winfo->len == 0) { /* We have no more data to send. */ fds.events &= ~POLLOUT; @@ -411,14 +711,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) /* ... and peer also closed already */ break; - /* ... but we still receive. - * Close our write side, ev. give some time - * for address notification and/or checking - * the current status - */ - if (cfg_wait) - usleep(cfg_wait); - shutdown(peerfd, SHUT_WR); + shut_wr(peerfd); } else { if (errno == EINTR) continue; @@ -428,17 +721,22 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } if (fds.revents & (POLLERR | POLLNVAL)) { + if (cfg_rcv_trunc) + return 0; fprintf(stderr, "Unexpected revents: " "POLLERR/POLLNVAL(%x)\n", fds.revents); return 5; } + + if (cfg_truncate > 0 && total_wlen >= cfg_truncate && + total_rlen >= cfg_truncate) + break; } /* leave some time for late join/announce */ - if (cfg_join || cfg_remove) + if (cfg_remove && !quit) usleep(cfg_wait); - close(peerfd); return 0; } @@ -461,10 +759,26 @@ static int do_recvfile(int infd, int outfd) return (int)r; } -static int do_mmap(int infd, int outfd, unsigned int size) +static int spool_buf(int fd, struct wstate *winfo) +{ + while (winfo->len) { + int ret = write(fd, winfo->buf + winfo->off, winfo->len); + + if (ret < 0) { + perror("write"); + return 4; + } + winfo->off += ret; + winfo->len -= ret; + } + return 0; +} + +static int do_mmap(int infd, int outfd, unsigned int size, + struct wstate *winfo) { char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); - ssize_t ret = 0, off = 0; + ssize_t ret = 0, off = winfo->total_len; size_t rem; if (inbuf == MAP_FAILED) { @@ -472,7 +786,11 @@ static int do_mmap(int infd, int outfd, unsigned int size) return 1; } - rem = size; + ret = spool_buf(outfd, winfo); + if (ret < 0) + return ret; + + rem = size - winfo->total_len; while (rem > 0) { ret = write(outfd, inbuf + off, rem); @@ -516,8 +834,16 @@ static int get_infd_size(int fd) return (int)count; } -static int do_sendfile(int infd, int outfd, unsigned int count) +static int do_sendfile(int infd, int outfd, unsigned int count, + struct wstate *winfo) { + int ret = spool_buf(outfd, winfo); + + if (ret < 0) + return ret; + + count -= winfo->total_len; + while (count > 0) { ssize_t r; @@ -534,7 +860,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count) } static int copyfd_io_mmap(int infd, int peerfd, int outfd, - unsigned int size) + unsigned int size, bool *in_closed_after_out, + struct wstate *winfo) { int err; @@ -543,22 +870,23 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd, if (err) return err; - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, winfo); } else { - err = do_mmap(infd, peerfd, size); + err = do_mmap(infd, peerfd, size, winfo); if (err) return err; - shutdown(peerfd, SHUT_WR); + shut_wr(peerfd); err = do_recvfile(peerfd, outfd); + *in_closed_after_out = true; } return err; } static int copyfd_io_sendfile(int infd, int peerfd, int outfd, - unsigned int size) + unsigned int size, bool *in_closed_after_out, struct wstate *winfo) { int err; @@ -567,40 +895,85 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, if (err) return err; - err = do_sendfile(infd, peerfd, size); + err = do_sendfile(infd, peerfd, size, winfo); } else { - err = do_sendfile(infd, peerfd, size); + err = do_sendfile(infd, peerfd, size, winfo); if (err) return err; + + shut_wr(peerfd); + err = do_recvfile(peerfd, outfd); + *in_closed_after_out = true; } return err; } -static int copyfd_io(int infd, int peerfd, int outfd) +static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) { + bool in_closed_after_out = false; + struct timespec start, end; int file_size; + int ret; + + if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0)) + xerror("can not fetch start time %d", errno); switch (cfg_mode) { case CFG_MODE_POLL: - return copyfd_io_poll(infd, peerfd, outfd); + ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, + winfo); + break; + case CFG_MODE_MMAP: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - return copyfd_io_mmap(infd, peerfd, outfd, file_size); + ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); + break; + case CFG_MODE_SENDFILE: file_size = get_infd_size(infd); if (file_size < 0) return file_size; - return copyfd_io_sendfile(infd, peerfd, outfd, file_size); + ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); + break; + + default: + fprintf(stderr, "Invalid mode %d\n", cfg_mode); + + die_usage(); + return 1; } - fprintf(stderr, "Invalid mode %d\n", cfg_mode); + if (ret) + return ret; - die_usage(); - return 1; + if (close_peerfd) + close(peerfd); + + if (cfg_time) { + unsigned int delta_ms; + + if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) + xerror("can not fetch end time %d", errno); + delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; + if (delta_ms > cfg_time) { + xerror("transfer slower than expected! runtime %d ms, expected %d ms", + delta_ms, cfg_time); + } + + /* show the runtime only if this end shutdown(wr) before receiving the EOF, + * (that is, if this end got the longer runtime) + */ + if (in_closed_after_out) + fprintf(stderr, "%d", delta_ms); + } + + return 0; } static void check_sockaddr(int pf, struct sockaddr_storage *ss, @@ -693,17 +1066,20 @@ static void maybe_close(int fd) { unsigned int r = rand(); - if (!(cfg_join || cfg_remove) && (r & 1)) + if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1)) close(fd); } int main_loop_s(int listensock) { struct sockaddr_storage ss; + struct wstate winfo; struct pollfd polls; socklen_t salen; int remotesock; + int fd = 0; +again: polls.fd = listensock; polls.events = POLLIN; @@ -724,47 +1100,206 @@ int main_loop_s(int listensock) check_sockaddr(pf, &ss, salen); check_getpeername(remotesock, &ss, salen); - return copyfd_io(0, remotesock, 1); + if (cfg_input) { + fd = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s: %d", cfg_input, errno); + } + + SOCK_TEST_TCPULP(remotesock, 0); + + memset(&winfo, 0, sizeof(winfo)); + copyfd_io(fd, remotesock, 1, true, &winfo); + } else { + perror("accept"); + return 1; } - perror("accept"); + if (--cfg_repeat > 0) { + if (cfg_input) + close(fd); + goto again; + } - return 1; + return 0; } static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); } +static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen) +{ + int err; + + err = setsockopt(fd, level, optname, optval, optlen); + if (err) { + perror("setsockopt"); + exit(1); + } +} + +static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) +{ + static const unsigned int on = 1; + + if (cmsg->timestampns) + xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); + if (cmsg->tcp_inq) + xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)); +} + +static void parse_cmsg_types(const char *type) +{ + char *next = strchr(type, ','); + unsigned int len = 0; + + cfg_cmsg_types.cmsg_enabled = 1; + + if (next) { + parse_cmsg_types(next + 1); + len = next - type; + } else { + len = strlen(type); + } + + if (strncmp(type, "TIMESTAMPNS", len) == 0) { + cfg_cmsg_types.timestampns = 1; + return; + } + + if (strncmp(type, "TCPINQ", len) == 0) { + cfg_cmsg_types.tcp_inq = 1; + return; + } + + fprintf(stderr, "Unrecognized cmsg option %s\n", type); + exit(1); +} + +static void parse_setsock_options(const char *name) +{ + char *next = strchr(name, ','); + unsigned int len = 0; + + if (next) { + parse_setsock_options(next + 1); + len = next - name; + } else { + len = strlen(name); + } + + if (strncmp(name, "TRANSPARENT", len) == 0) { + cfg_sockopt_types.transparent = 1; + return; + } + + if (strncmp(name, "MPTFO", len) == 0) { + cfg_sockopt_types.mptfo = 1; + return; + } + + fprintf(stderr, "Unrecognized setsockopt option %s\n", name); + exit(1); +} + +void xdisconnect(int fd, int addrlen) +{ + struct sockaddr_storage empty; + int msec_sleep = 10; + int queued = 1; + int i; + + shutdown(fd, SHUT_WR); + + /* while until the pending data is completely flushed, the later + * disconnect will bypass/ignore/drop any pending data. + */ + for (i = 0; ; i += msec_sleep) { + if (ioctl(fd, SIOCOUTQ, &queued) < 0) + xerror("can't query out socket queue: %d", errno); + + if (!queued) + break; + + if (i > poll_timeout) + xerror("timeout while waiting for spool to complete"); + usleep(msec_sleep * 1000); + } + + memset(&empty, 0, sizeof(empty)); + empty.ss_family = AF_UNSPEC; + if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0) + xerror("can't disconnect: %d", errno); +} + int main_loop(void) { - int fd; + int fd = 0, ret, fd_in = 0; + struct addrinfo *peer; + struct wstate winfo; - /* listener is ready. */ - fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto); + if (cfg_input && cfg_sockopt_types.mptfo) { + fd_in = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s:%d", cfg_input, errno); + } + + memset(&winfo, 0, sizeof(winfo)); + fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo); if (fd < 0) return 2; +again: check_getpeername_connect(fd); + SOCK_TEST_TCPULP(fd, cfg_sock_proto); + if (cfg_rcvbuf) set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); + if (cfg_cmsg_types.cmsg_enabled) + apply_cmsg_types(fd, &cfg_cmsg_types); - return copyfd_io(0, fd, 1); + if (cfg_input && !cfg_sockopt_types.mptfo) { + fd_in = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s:%d", cfg_input, errno); + } + + ret = copyfd_io(fd_in, fd, 1, 0, &winfo); + if (ret) + return ret; + + if (cfg_truncate > 0) { + xdisconnect(fd, peer->ai_addrlen); + } else if (--cfg_repeat > 0) { + xdisconnect(fd, peer->ai_addrlen); + + /* the socket could be unblocking at this point, we need the + * connect to be blocking + */ + set_nonblock(fd, false); + if (connect(fd, peer->ai_addr, peer->ai_addrlen)) + xerror("can't reconnect: %d", errno); + if (cfg_input) + close(fd_in); + memset(&winfo, 0, sizeof(winfo)); + goto again; + } else { + close(fd); + } + + return 0; } int parse_proto(const char *proto) @@ -802,6 +1337,26 @@ int parse_mode(const char *mode) return 0; } +int parse_peek(const char *mode) +{ + if (!strcasecmp(mode, "saveWithPeek")) + return CFG_WITH_PEEK; + if (!strcasecmp(mode, "saveAfterPeek")) + return CFG_AFTER_PEEK; + + fprintf(stderr, "Unknown: %s\n", mode); + fprintf(stderr, "Supported MSG_PEEK mode are:\n"); + fprintf(stderr, + "\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n"); + fprintf(stderr, + "\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n"); + + die_usage(); + + /* silence compiler warning */ + return 0; +} + static int parse_int(const char *size) { unsigned long s; @@ -829,17 +1384,36 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) { + while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) { switch (c) { + case 'f': + cfg_truncate = atoi(optarg); + + /* when receiving a fastclose, ignore PIPE signals and + * all the I/O errors later in the code + */ + if (cfg_truncate < 0) { + cfg_rcv_trunc = true; + signal(SIGPIPE, handle_signal); + } + break; case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; - cfg_wait = 400000; break; case 'r': cfg_remove = true; cfg_mode = CFG_MODE_POLL; cfg_wait = 400000; + cfg_do_w = atoi(optarg); + if (cfg_do_w <= 0) + cfg_do_w = 50; + break; + case 'i': + cfg_input = optarg; + break; + case 'I': + cfg_repeat = atoi(optarg); break; case 'l': listen_mode = true; @@ -853,9 +1427,6 @@ static void parse_opts(int argc, char **argv) case 'h': die_usage(); break; - case 'u': - tcpulp_audit = true; - break; case '6': pf = AF_INET6; break; @@ -864,6 +1435,9 @@ static void parse_opts(int argc, char **argv) if (poll_timeout <= 0) poll_timeout = -1; break; + case 'T': + cfg_time = atoi(optarg); + break; case 'm': cfg_mode = parse_mode(optarg); break; @@ -876,6 +1450,18 @@ static void parse_opts(int argc, char **argv) case 'w': cfg_wait = atoi(optarg)*1000000; break; + case 'M': + cfg_mark = strtol(optarg, NULL, 0); + break; + case 'P': + cfg_peek = parse_peek(optarg); + break; + case 'c': + parse_cmsg_types(optarg); + break; + case 'o': + parse_setsock_options(optarg); + break; } } @@ -894,9 +1480,6 @@ int main(int argc, char *argv[]) signal(SIGUSR1, handle_signal); parse_opts(argc, argv); - if (tcpulp_audit) - return sock_test_tcpulp(cfg_host, cfg_port) ? 0 : 1; - if (listen_mode) { int fd = sock_listen_mptcp(cfg_host, cfg_port); @@ -907,6 +1490,10 @@ int main(int argc, char *argv[]) set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); + if (cfg_mark) + set_mark(fd, cfg_mark); + if (cfg_cmsg_types.cmsg_enabled) + apply_cmsg_types(fd, &cfg_cmsg_types); return main_loop_s(fd); } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 2cfd87d94db8..4c4248554826 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -1,17 +1,26 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +. "$(dirname "${0}")/mptcp_lib.sh" + time_start=$(date +%s) -optstring="S:R:d:e:l:r:h4cm:f:t" +optstring="S:R:d:e:l:r:h4cm:f:tC" ret=0 +final_ret=0 sin="" sout="" +cin_disconnect="" cin="" cout="" -ksft_skip=4 capture=false -timeout=30 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) ipv6=true ethtool_random_on=true tc_delay="$((RANDOM%50))" @@ -21,7 +30,10 @@ sndbuf=0 rcvbuf=0 options_log=true do_tcp=0 +checksum=false filesize=0 +connect_per_transfer=1 +port=$((10000 - 1)) if [ $tc_loss -eq 100 ];then tc_loss=1% @@ -46,20 +58,21 @@ usage() { echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)" + echo -e "\t-C: enable the MPTCP data checksum" } while getopts "$optstring" option;do case "$option" in "h") usage $0 - exit 0 + exit ${KSFT_PASS} ;; "d") if [ $OPTARG -ge 0 ];then tc_delay="$OPTARG" else echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "e") @@ -83,7 +96,7 @@ while getopts "$optstring" option;do sndbuf="$OPTARG" else echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "R") @@ -91,7 +104,7 @@ while getopts "$optstring" option;do rcvbuf="$OPTARG" else echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "m") @@ -103,51 +116,49 @@ while getopts "$optstring" option;do "t") do_tcp=$((do_tcp+1)) ;; + "C") + checksum=true + ;; "?") usage $0 - exit 1 + exit ${KSFT_FAIL} ;; esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns3="ns3-$rndh" -ns4="ns4-$rndh" +ns1="" +ns2="" +ns3="" +ns4="" -TEST_COUNT=0 +TEST_GROUP="" +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { + rm -f "$cin_disconnect" "$cout_disconnect" rm -f "$cin" "$cout" rm -f "$sin" "$sout" rm -f "$capout" - local netns - for netns in "$ns1" "$ns2" "$ns3" "$ns4";do - ip netns del $netns - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" "${ns4}" } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms +mptcp_lib_check_tools ip sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) cout=$(mktemp) capout=$(mktemp) +cin_disconnect="$cin".disconnect +cout_disconnect="$cout".disconnect trap cleanup EXIT -for i in "$ns1" "$ns2" "$ns3" "$ns4";do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done +mptcp_lib_ns_init ns1 ns2 ns3 ns4 # "$ns1" ns2 ns3 ns4 # ns1eth2 ns2eth1 ns2eth3 ns3eth2 ns3eth4 ns4eth3 @@ -195,16 +206,20 @@ ip -net "$ns4" link set ns4eth3 up ip -net "$ns4" route add default via 10.0.3.2 ip -net "$ns4" route add default via dead:beef:3::2 -# use TCP syn cookies, even if no flooding was detected. -ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 +if $checksum; then + for i in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1 + done +fi set_ethtool_flags() { local ns="$1" local dev="$2" local flags="$3" - ip netns exec $ns ethtool -K $dev $flags 2>/dev/null - [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags" + if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then + mptcp_lib_pr_info "set $ns dev $dev: ethtool -K $flags" + fi } set_random_ethtool_flags() { @@ -232,100 +247,62 @@ else set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args" fi -print_file_err() -{ - ls -l "$1" 1>&2 - echo "Trailing bytes are: " - tail -c 27 "$1" -} - -check_transfer() -{ - local in=$1 - local out=$2 - local what=$3 - - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" - - return 1 - fi - - return 0 +print_larger_title() { + # here we don't have the time, a bit longer for the alignment + MPTCP_LIB_TEST_FORMAT="%02u %-69s" \ + mptcp_lib_print_title "${@}" } check_mptcp_disabled() { local disabled_ns - disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)" - ip netns add ${disabled_ns} || exit $ksft_skip + mptcp_lib_ns_init disabled_ns + print_larger_title "New MPTCP socket can be blocked via sysctl" # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then - echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]" - ret=1 + mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default" + mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default" + ret=${KSFT_FAIL} return 1 fi ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 local err=0 - LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ + LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ grep -q "^socket: Protocol not available$" && err=1 - ip netns delete ${disabled_ns} + mptcp_lib_ns_exit "${disabled_ns}" if [ ${err} -eq 0 ]; then - echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]" - ret=1 + mptcp_lib_pr_fail "New MPTCP socket cannot be blocked via sysctl" + mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl" + ret=${KSFT_FAIL} return 1 fi - echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]" + mptcp_lib_pr_ok + mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl" return 0 } -check_mptcp_ulp_setsockopt() -{ - local t retval - t="ns_ulp-$sech-$(mktemp -u XXXXXX)" - - ip netns add ${t} || exit $ksft_skip - if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then - printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n" - retval=1 - ret=$retval - else - printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n" - retval=0 - fi - ip netns del ${t} - return $retval -} - -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - do_ping() { local listener_ns="$1" local connector_ns="$2" local connect_addr="$3" local ping_args="-q -c 1" + local rc=0 - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then $ipv6 || return 0 ping_args="${ping_args} -6" fi - ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null - if [ $? -ne 0 ] ; then - echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 - ret=1 + ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1 + + if [ $rc -ne 0 ] ; then + mptcp_lib_pr_fail "$listener_ns -> $connect_addr connectivity" + ret=${KSFT_FAIL} return 1 fi @@ -333,23 +310,6 @@ do_ping() return 0 } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - do_transfer() { local listener_ns="$1" @@ -358,28 +318,26 @@ do_transfer() local srv_proto="$4" local connect_addr="$5" local local_addr="$6" - local extra_args="" + local extra_args="$7" - local port - port=$((10000+$TEST_COUNT)) - TEST_COUNT=$((TEST_COUNT+1)) + port=$((port + 1)) if [ "$rcvbuf" -gt 0 ]; then - extra_args="$extra_args -R $rcvbuf" + extra_args+=" -R $rcvbuf" fi if [ "$sndbuf" -gt 0 ]; then - extra_args="$extra_args -S $sndbuf" + extra_args+=" -S $sndbuf" fi if [ -n "$testmode" ]; then - extra_args="$extra_args -m $testmode" + extra_args+=" -m $testmode" fi if [ -n "$extra_args" ] && $options_log; then - options_log=false - echo "INFO: extra options: $extra_args" + mptcp_lib_pr_info "extra options: $extra_args" fi + options_log=false :> "$cout" :> "$sout" @@ -387,10 +345,13 @@ do_transfer() local addr_port addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto} + local result_msg + result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" + mptcp_lib_print_title "${result_msg}" if $capture; then local capuser + local rndh="${connector_ns:4}" if [ -z $SUDO_USER ] ; then capuser="" else @@ -409,19 +370,40 @@ do_transfer() sleep 1 fi - local stat_synrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done) - local stat_ackrx_last_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done) - local stat_cookietx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) - local stat_cookierx_last=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + if [ ${listener_ns} != ${connector_ns} ]; then + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n + fi - ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & + local stat_synrx_last_l + local stat_ackrx_last_l + local stat_cookietx_last + local stat_cookierx_last + local stat_csum_err_s + local stat_csum_err_c + stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_args $local_addr < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${listener_ns}" "${port}" + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) - ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_args $connect_addr < "$cin" > "$cout" & local cpid=$! wait $cpid @@ -438,30 +420,48 @@ do_transfer() kill ${cappid_connector} fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat | grep Tcp > /tmp/${listener_ns}.out + if [ ${listener_ns} != ${connector_ns} ]; then + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat | grep Tcp > /tmp/${connector_ns}.out + fi + local duration duration=$((stop-start)) - duration=$(printf "(duration %05sms)" $duration) + result_msg+=" # time=${duration}ms" + printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2 + mptcp_lib_pr_fail "client exit code $retc, server $rets" echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" + ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" + cat /tmp/${listener_ns}.out echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" + ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out + echo cat "$capout" + mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" return 1 fi - check_transfer $sin $cout "file received by client" + mptcp_lib_check_transfer $sin $cout "file received by client" retc=$? - check_transfer $cin $sout "file received by server" + mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? - local stat_synrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableSYNRX | while read a count c rest ;do echo $count;done) - local stat_ackrx_now_l=$(ip netns exec ${listener_ns} nstat -z -a MPTcpExtMPCapableACKRX | while read a count c rest ;do echo $count;done) - - local stat_cookietx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesSent | while read a count c rest ;do echo $count;done) - local stat_cookierx_now=$(ip netns exec ${listener_ns} nstat -z -a TcpExtSyncookiesRecv | while read a count c rest ;do echo $count;done) + local extra="" + local stat_synrx_now_l + local stat_ackrx_now_l + local stat_cookietx_now + local stat_cookierx_now + local stat_ooo_now + stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -470,40 +470,81 @@ do_transfer() cookies=${cookies##*=} if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then - expect_synrx=$((stat_synrx_last_l+1)) - expect_ackrx=$((stat_ackrx_last_l+1)) + expect_synrx=$((stat_synrx_last_l+connect_per_transfer)) + expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer)) + fi + + if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then + mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \ + "than expected (${expect_synrx})" + retc=1 + fi + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ooo_now} -eq 0 ]; then + mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ + "than expected (${expect_ackrx})" + rets=1 + else + extra+=" [ Note ] fallback due to TCP OoO" + fi + fi + + if $checksum; then + local csum_err_s + local csum_err_c + csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + + local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) + if [ $csum_err_s_nr -gt 0 ]; then + mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]" + rets=1 + fi + + local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) + if [ $csum_err_c_nr -gt 0 ]; then + mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]" + retc=1 + fi fi + if [ $cookies -eq 2 ];then if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then - echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: did not advance" + extra+=" WARN: CookieSent: did not advance" fi if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then - echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: did not advance" + extra+=" WARN: CookieRecv: did not advance" fi else if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then - echo "${listener_ns} CookieSent: ${cl_proto} -> ${srv_proto}: changed" + extra+=" WARN: CookieSent: changed" fi if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then - echo "${listener_ns} CookieRecv: ${cl_proto} -> ${srv_proto}: changed" + extra+=" WARN: CookieRecv: changed" fi fi - if [ $expect_synrx -ne $stat_synrx_now_l ] ;then - echo "${listener_ns} SYNRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" + if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then + extra+=" WARN: SYNRX: expect ${expect_synrx}," + extra+=" got ${stat_synrx_now_l} (probably retransmissions)" fi - if [ $expect_ackrx -ne $stat_ackrx_now_l ] ;then - echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" + if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then + extra+=" WARN: ACKRX: expect ${expect_ackrx}," + extra+=" got ${stat_ackrx_now_l} (probably retransmissions)" fi - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - echo "$duration [ OK ]" - cat "$capout" - return 0 + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then + mptcp_lib_pr_ok "${extra:1}" + mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + else + if [ -n "${extra}" ]; then + mptcp_lib_print_warn "${extra:1}" + fi + mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" fi cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() @@ -524,9 +565,8 @@ make_file() ksize=$((SIZE / 1024)) rem=$((SIZE - (ksize * 1024))) - dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null - dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $ksize + dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null echo "Created $name (size $(du -b "$name")) containing data sent by $who" } @@ -537,6 +577,7 @@ run_tests_lo() local connector_ns="$2" local connect_addr="$3" local loopback="$4" + local extra_args="$5" local lret=0 # skip if test programs are running inside same netns for subsequent runs. @@ -545,18 +586,19 @@ run_tests_lo() fi # skip if we don't want v6 - if ! $ipv6 && is_v6 "${connect_addr}"; then + if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then return 0 fi local local_addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" else local_addr="0.0.0.0" fi - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -570,14 +612,16 @@ run_tests_lo() fi fi - do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} MPTCP TCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret return 1 fi - do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} TCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -585,7 +629,8 @@ run_tests_lo() fi if [ $do_tcp -gt 1 ] ;then - do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr} + do_transfer ${listener_ns} ${connector_ns} TCP TCP \ + ${connect_addr} ${local_addr} "${extra_args}" lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -601,14 +646,204 @@ run_tests() run_tests_lo $1 $2 $3 0 } +run_test_transparent() +{ + local connect_addr="$1" + local msg="$2" + + local connector_ns="$ns1" + local listener_ns="$ns2" + local lret=0 + local r6flag="" + + TEST_GROUP="${msg}" + + # skip if we don't want v6 + if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then + return 0 + fi + + # IP(V6)_TRANSPARENT has been added after TOS support which came with + # the required infrastructure in MPTCP sockopt code. To support TOS, the + # following function has been exported (T). Not great but better than + # checking for a specific kernel version. + if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then + mptcp_lib_pr_skip "${msg} not supported by the kernel" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" +flush ruleset +table inet mangle { + chain divert { + type filter hook prerouting priority -150; + + meta l4proto tcp socket transparent 1 meta mark set 1 accept + tcp dport 20000 tproxy to :20000 meta mark set 1 accept + } +} +EOF + then + mptcp_lib_pr_skip "$msg, could not load nft ruleset" + mptcp_lib_fail_if_expected_feature "nft rules" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + local local_addr + if mptcp_lib_is_v6 "${connect_addr}"; then + local_addr="::" + r6flag="-6" + else + local_addr="0.0.0.0" + fi + + if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then + ip netns exec "$listener_ns" nft flush ruleset + mptcp_lib_pr_skip "$msg, ip $r6flag rule failed" + mptcp_lib_fail_if_expected_feature "ip rule" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then + ip netns exec "$listener_ns" nft flush ruleset + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 + mptcp_lib_pr_skip "$msg, ip route add local $local_addr failed" + mptcp_lib_fail_if_expected_feature "ip route" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + mptcp_lib_pr_info "test $msg" + + port=$((20000 - 1)) + local extra_args="-o TRANSPARENT" + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" + lret=$? + + ip netns exec "$listener_ns" nft flush ruleset + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 + ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100 + + if [ $lret -ne 0 ]; then + mptcp_lib_pr_fail "$msg, mptcp connection error" + ret=$lret + return 1 + fi + + mptcp_lib_pr_info "$msg pass" + return 0 +} + +run_tests_peekmode() +{ + local peekmode="$1" + + TEST_GROUP="peek mode: ${peekmode}" + mptcp_lib_pr_info "with peek mode: ${peekmode}" + run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}" + run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" +} + +run_tests_mptfo() +{ + TEST_GROUP="MPTFO" + + if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then + mptcp_lib_pr_skip "TFO not supported by the kernel" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + mptcp_lib_pr_info "with MPTFO start" + ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1 + + run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO" + run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO" + + run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO" + run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO" + + ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0 + mptcp_lib_pr_info "with MPTFO end" +} + +run_tests_disconnect() +{ + local old_cin=$cin + local old_sin=$sin + + TEST_GROUP="full disconnect" + + if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then + mptcp_lib_pr_skip "Full disconnect not supported" + mptcp_lib_result_skip "${TEST_GROUP}" + return + fi + + cat $cin $cin $cin > "$cin".disconnect + + # force do_transfer to cope with the multiple transmissions + sin="$cin.disconnect" + cin="$cin.disconnect" + cin_disconnect="$old_cin" + connect_per_transfer=3 + + mptcp_lib_pr_info "disconnect" + run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin" + run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin" + + # restore previous status + sin=$old_sin + cin=$old_cin + cin_disconnect="$cin".disconnect + connect_per_transfer=1 +} + +display_time() +{ + time_end=$(date +%s) + time_run=$((time_end-time_start)) + + echo "Time: ${time_run} seconds" +} + +log_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + mptcp_lib_pr_fail "${msg}" + + final_ret=${ret} + ret=${KSFT_PASS} + + return ${final_ret} + fi +} + +stop_if_error() +{ + if ! log_if_error "${@}"; then + display_time + mptcp_lib_result_print_all_tap + exit ${final_ret} + fi +} + make_file "$cin" "client" make_file "$sin" "server" check_mptcp_disabled -check_mptcp_ulp_setsockopt +stop_if_error "The kernel configuration is not valid for MPTCP" -echo "INFO: validating network environment with pings" +print_larger_title "Validating network environment with pings" for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns1" $sender 10.0.1.1 do_ping "$ns1" $sender dead:beef:1::1 @@ -627,11 +862,16 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns4" $sender dead:beef:3::1 done +mptcp_lib_result_code "${ret}" "ping tests" + +stop_if_error "Could not even run ping tests" +mptcp_lib_pr_ok + [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms -echo -n "INFO: Using loss of $tc_loss " -test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " +tc_info="loss of $tc_loss " +test "$tc_delay" -gt 0 && tc_info+="delay $tc_delay ms " -reorder_delay=$(($tc_delay / 4)) +reorder_delay=$((tc_delay / 4)) if [ -z "${tc_reorder}" ]; then reorder1=$((RANDOM%10)) @@ -640,32 +880,41 @@ if [ -z "${tc_reorder}" ]; then if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then tc_reorder="reorder ${reorder1}% ${reorder2}%" - echo -n "$tc_reorder with delay ${reorder_delay}ms " + tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi elif [ "$tc_reorder" = "0" ];then tc_reorder="" elif [ "$reorder_delay" -gt 0 ];then # reordering requires some delay tc_reorder="reorder $tc_reorder" - echo -n "$tc_reorder with delay ${reorder_delay}ms " + tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi -echo "on ns3eth4" +mptcp_lib_pr_info "Using ${tc_info}on ns3eth4" tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder +TEST_GROUP="loopback v4" +run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 +stop_if_error "Could not even run loopback test" + +TEST_GROUP="loopback v6" +run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 +stop_if_error "Could not even run loopback v6 test" + +TEST_GROUP="multihosts" for sender in $ns1 $ns2 $ns3 $ns4;do - run_tests_lo "$ns1" "$sender" 10.0.1.1 1 - if [ $ret -ne 0 ] ;then - echo "FAIL: Could not even run loopback test" 1>&2 - exit $ret - fi - run_tests_lo "$ns1" $sender dead:beef:1::1 1 - if [ $ret -ne 0 ] ;then - echo "FAIL: Could not even run loopback v6 test" 2>&1 - exit $ret + # ns1<->ns2 is not subject to reordering/tc delays. Use it to test + # mptcp syncookie support. + if [ $sender = $ns1 ]; then + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 + else + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1 fi + run_tests "$ns1" $sender 10.0.1.1 + run_tests "$ns1" $sender dead:beef:1::1 + run_tests "$ns2" $sender 10.0.1.2 run_tests "$ns2" $sender dead:beef:1::2 run_tests "$ns2" $sender 10.0.2.1 @@ -678,11 +927,26 @@ for sender in $ns1 $ns2 $ns3 $ns4;do run_tests "$ns4" $sender 10.0.3.1 run_tests "$ns4" $sender dead:beef:3::1 + + log_if_error "Tests with $sender as a sender have failed" done -time_end=$(date +%s) -time_run=$((time_end-time_start)) +run_tests_peekmode "saveWithPeek" +run_tests_peekmode "saveAfterPeek" +log_if_error "Tests with peek mode have failed" + +# MPTFO (MultiPath TCP Fatopen tests) +run_tests_mptfo +log_if_error "Tests with MPTFO have failed" + +# connect to ns4 ip address, ns2 should intercept/proxy +run_test_transparent 10.0.3.1 "tproxy ipv4" +run_test_transparent dead:beef:3::1 "tproxy ipv6" +log_if_error "Tests with tproxy have failed" -echo "Time: ${time_run} seconds" +run_tests_disconnect +log_if_error "Tests of the full disconnection have failed" -exit $ret +display_time +mptcp_lib_result_print_all_tap +exit ${final_ret} diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c new file mode 100644 index 000000000000..218aac467321 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -0,0 +1,599 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <string.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <time.h> + +#include <sys/ioctl.h> +#include <sys/random.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include <netdb.h> +#include <netinet/in.h> + +#include <linux/tcp.h> +#include <linux/sockios.h> + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif +#ifndef SOL_MPTCP +#define SOL_MPTCP 284 +#endif + +static int pf = AF_INET; +static int proto_tx = IPPROTO_MPTCP; +static int proto_rx = IPPROTO_MPTCP; + +static void die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage: mptcp_inq [-6] [ -t tcp|mptcp ] [ -r tcp|mptcp]\n"); + exit(r); +} + +static void xerror(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + exit(1); +} + +static const char *getxinfo_strerr(int err) +{ + if (err == EAI_SYSTEM) + return strerror(errno); + + return gai_strerror(err); +} + +static void xgetaddrinfo(const char *node, const char *service, + const struct addrinfo *hints, + struct addrinfo **res) +{ + int err = getaddrinfo(node, service, hints, res); + + if (err) { + const char *errstr = getxinfo_strerr(err); + + fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", + node ? node : "", service ? service : "", errstr); + exit(1); + } +} + +static int sock_listen_mptcp(const char * const listenaddr, + const char * const port) +{ + int sock = -1; + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + .ai_flags = AI_PASSIVE | AI_NUMERICHOST + }; + + hints.ai_family = pf; + + struct addrinfo *a, *addr; + int one = 1; + + xgetaddrinfo(listenaddr, port, &hints, &addr); + hints.ai_family = pf; + + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto_rx); + if (sock < 0) + continue; + + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, + sizeof(one))) + perror("setsockopt"); + + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + perror("bind"); + close(sock); + sock = -1; + } + + freeaddrinfo(addr); + + if (sock < 0) + xerror("could not create listen socket"); + + if (listen(sock, 20)) + die_perror("listen"); + + return sock; +} + +static int sock_connect_mptcp(const char * const remoteaddr, + const char * const port, int proto) +{ + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *a, *addr; + int sock = -1; + + hints.ai_family = pf; + + xgetaddrinfo(remoteaddr, port, &hints, &addr); + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto); + if (sock < 0) + continue; + + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + die_perror("connect"); + } + + if (sock < 0) + xerror("could not create connect socket"); + + freeaddrinfo(addr); + return sock; +} + +static int protostr_to_num(const char *s) +{ + if (strcasecmp(s, "tcp") == 0) + return IPPROTO_TCP; + if (strcasecmp(s, "mptcp") == 0) + return IPPROTO_MPTCP; + + die_usage(1); + return 0; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "h6t:r:")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case '6': + pf = AF_INET6; + break; + case 't': + proto_tx = protostr_to_num(optarg); + break; + case 'r': + proto_rx = protostr_to_num(optarg); + break; + default: + die_usage(1); + break; + } + } +} + +/* wait up to timeout milliseconds */ +static void wait_for_ack(int fd, int timeout, size_t total) +{ + int i; + + for (i = 0; i < timeout; i++) { + int nsd, ret, queued = -1; + struct timespec req; + + ret = ioctl(fd, TIOCOUTQ, &queued); + if (ret < 0) + die_perror("TIOCOUTQ"); + + ret = ioctl(fd, SIOCOUTQNSD, &nsd); + if (ret < 0) + die_perror("SIOCOUTQNSD"); + + if ((size_t)queued > total) + xerror("TIOCOUTQ %u, but only %zu expected\n", queued, total); + assert(nsd <= queued); + + if (queued == 0) + return; + + /* wait for peer to ack rx of all data */ + req.tv_sec = 0; + req.tv_nsec = 1 * 1000 * 1000ul; /* 1ms */ + nanosleep(&req, NULL); + } + + xerror("still tx data queued after %u ms\n", timeout); +} + +static void connect_one_server(int fd, int unixfd) +{ + size_t len, i, total, sent; + char buf[4096], buf2[4096]; + ssize_t ret; + + len = rand() % (sizeof(buf) - 1); + + if (len < 128) + len = 128; + + for (i = 0; i < len ; i++) { + buf[i] = rand() % 26; + buf[i] += 'A'; + } + + buf[i] = '\n'; + + /* un-block server */ + ret = read(unixfd, buf2, 4); + assert(ret == 4); + + assert(strncmp(buf2, "xmit", 4) == 0); + + ret = write(unixfd, &len, sizeof(len)); + assert(ret == (ssize_t)sizeof(len)); + + ret = write(fd, buf, len); + if (ret < 0) + die_perror("write"); + + if (ret != (ssize_t)len) + xerror("short write"); + + ret = read(unixfd, buf2, 4); + assert(strncmp(buf2, "huge", 4) == 0); + + total = rand() % (16 * 1024 * 1024); + total += (1 * 1024 * 1024); + sent = total; + + ret = write(unixfd, &total, sizeof(total)); + assert(ret == (ssize_t)sizeof(total)); + + wait_for_ack(fd, 5000, len); + + while (total > 0) { + if (total > sizeof(buf)) + len = sizeof(buf); + else + len = total; + + ret = write(fd, buf, len); + if (ret < 0) + die_perror("write"); + total -= ret; + + /* we don't have to care about buf content, only + * number of total bytes sent + */ + } + + ret = read(unixfd, buf2, 4); + assert(ret == 4); + assert(strncmp(buf2, "shut", 4) == 0); + + wait_for_ack(fd, 5000, sent); + + ret = write(fd, buf, 1); + assert(ret == 1); + close(fd); + ret = write(unixfd, "closed", 6); + assert(ret == 6); + + close(unixfd); +} + +static void get_tcp_inq(struct msghdr *msgh, unsigned int *inqv) +{ + struct cmsghdr *cmsg; + + for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { + if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { + memcpy(inqv, CMSG_DATA(cmsg), sizeof(*inqv)); + return; + } + } + + xerror("could not find TCP_CM_INQ cmsg type"); +} + +static void process_one_client(int fd, int unixfd) +{ + unsigned int tcp_inq; + size_t expect_len; + char msg_buf[4096]; + char buf[4096]; + char tmp[16]; + struct iovec iov = { + .iov_base = buf, + .iov_len = 1, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = msg_buf, + .msg_controllen = sizeof(msg_buf), + }; + ssize_t ret, tot; + + ret = write(unixfd, "xmit", 4); + assert(ret == 4); + + ret = read(unixfd, &expect_len, sizeof(expect_len)); + assert(ret == (ssize_t)sizeof(expect_len)); + + if (expect_len > sizeof(buf)) + xerror("expect len %zu exceeds buffer size", expect_len); + + for (;;) { + struct timespec req; + unsigned int queued; + + ret = ioctl(fd, FIONREAD, &queued); + if (ret < 0) + die_perror("FIONREAD"); + if (queued > expect_len) + xerror("FIONREAD returned %u, but only %zu expected\n", + queued, expect_len); + if (queued == expect_len) + break; + + req.tv_sec = 0; + req.tv_nsec = 1000 * 1000ul; + nanosleep(&req, NULL); + } + + /* read one byte, expect cmsg to return expected - 1 */ + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + if (msg.msg_controllen == 0) + xerror("msg_controllen is 0"); + + get_tcp_inq(&msg, &tcp_inq); + + assert((size_t)tcp_inq == (expect_len - 1)); + + iov.iov_len = sizeof(buf); + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + /* should have gotten exact remainder of all pending data */ + assert(ret == (ssize_t)tcp_inq); + + /* should be 0, all drained */ + get_tcp_inq(&msg, &tcp_inq); + assert(tcp_inq == 0); + + /* request a large swath of data. */ + ret = write(unixfd, "huge", 4); + assert(ret == 4); + + ret = read(unixfd, &expect_len, sizeof(expect_len)); + assert(ret == (ssize_t)sizeof(expect_len)); + + /* peer should send us a few mb of data */ + if (expect_len <= sizeof(buf)) + xerror("expect len %zu too small\n", expect_len); + + tot = 0; + do { + iov.iov_len = sizeof(buf); + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + tot += ret; + + get_tcp_inq(&msg, &tcp_inq); + + if (tcp_inq > expect_len - tot) + xerror("inq %d, remaining %d total_len %d\n", + tcp_inq, expect_len - tot, (int)expect_len); + + assert(tcp_inq <= expect_len - tot); + } while ((size_t)tot < expect_len); + + ret = write(unixfd, "shut", 4); + assert(ret == 4); + + /* wait for hangup. Should have received one more byte of data. */ + ret = read(unixfd, tmp, sizeof(tmp)); + assert(ret == 6); + assert(strncmp(tmp, "closed", 6) == 0); + + sleep(1); + + iov.iov_len = 1; + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + assert(ret == 1); + + get_tcp_inq(&msg, &tcp_inq); + + /* tcp_inq should be 1 due to received fin. */ + assert(tcp_inq == 1); + + iov.iov_len = 1; + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + /* expect EOF */ + assert(ret == 0); + get_tcp_inq(&msg, &tcp_inq); + assert(tcp_inq == 1); + + close(fd); +} + +static int xaccept(int s) +{ + int fd = accept(s, NULL, 0); + + if (fd < 0) + die_perror("accept"); + + return fd; +} + +static int server(int unixfd) +{ + int fd = -1, r, on = 1; + + switch (pf) { + case AF_INET: + fd = sock_listen_mptcp("127.0.0.1", "15432"); + break; + case AF_INET6: + fd = sock_listen_mptcp("::1", "15432"); + break; + default: + xerror("Unknown pf %d\n", pf); + break; + } + + r = write(unixfd, "conn", 4); + assert(r == 4); + + alarm(15); + r = xaccept(fd); + + if (-1 == setsockopt(r, IPPROTO_TCP, TCP_INQ, &on, sizeof(on))) + die_perror("setsockopt"); + + process_one_client(r, unixfd); + + return 0; +} + +static int client(int unixfd) +{ + int fd = -1; + + alarm(15); + + switch (pf) { + case AF_INET: + fd = sock_connect_mptcp("127.0.0.1", "15432", proto_tx); + break; + case AF_INET6: + fd = sock_connect_mptcp("::1", "15432", proto_tx); + break; + default: + xerror("Unknown pf %d\n", pf); + } + + connect_one_server(fd, unixfd); + + return 0; +} + +static void init_rng(void) +{ + unsigned int foo; + + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); + } + + srand(foo); +} + +static pid_t xfork(void) +{ + pid_t p = fork(); + + if (p < 0) + die_perror("fork"); + else if (p == 0) + init_rng(); + + return p; +} + +static int rcheck(int wstatus, const char *what) +{ + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == 0) + return 0; + fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus)); + return WEXITSTATUS(wstatus); + } else if (WIFSIGNALED(wstatus)) { + xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus)); + } else if (WIFSTOPPED(wstatus)) { + xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus)); + } + + return 111; +} + +int main(int argc, char *argv[]) +{ + int e1, e2, wstatus; + pid_t s, c, ret; + int unixfds[2]; + + parse_opts(argc, argv); + + e1 = socketpair(AF_UNIX, SOCK_DGRAM, 0, unixfds); + if (e1 < 0) + die_perror("pipe"); + + s = xfork(); + if (s == 0) + return server(unixfds[1]); + + close(unixfds[1]); + + /* wait until server bound a socket */ + e1 = read(unixfds[0], &e1, 4); + assert(e1 == 4); + + c = xfork(); + if (c == 0) + return client(unixfds[0]); + + close(unixfds[0]); + + ret = waitpid(s, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e1 = rcheck(wstatus, "server"); + ret = waitpid(c, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e2 = rcheck(wstatus, "client"); + + return e1 ? e1 : e2; +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 08f53d86dedc..5e9211e89825 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1,42 +1,111 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +# ShellCheck incorrectly believes that most of the code here is unreachable +# because it's invoked by variable name, see how the "tests" array is used +#shellcheck disable=SC2317 + +. "$(dirname "${0}")/mptcp_lib.sh" + ret=0 sin="" +sinfail="" sout="" cin="" +cinfail="" +cinsent="" +tmpfile="" cout="" -ksft_skip=4 -timeout=30 -mptcp_connect="" -capture=0 - -TEST_COUNT=0 - -init() +err="" +capout="" +ns1="" +ns2="" +iptables="iptables" +ip6tables="ip6tables" +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) +capture=false +checksum=false +ip_mptcp=0 +check_invert=0 +validate_checksum=false +init=0 +evts_ns1="" +evts_ns2="" +evts_ns1_pid=0 +evts_ns2_pid=0 +last_test_failed=0 +last_test_skipped=0 +last_test_ignored=1 + +declare -A all_tests +declare -a only_tests_ids +declare -a only_tests_names +declare -A failed_tests +MPTCP_LIB_TEST_FORMAT="%03u %s\n" +TEST_NAME="" +nr_blank=6 + +# These var are used only in some tests, make sure they are not already set +unset FAILING_LINKS +unset test_linkfail +unset addr_nr_ns1 +unset addr_nr_ns2 +unset cestab_ns1 +unset cestab_ns2 +unset sflags +unset fastclose +unset fullmesh +unset speed + +# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || +# (ip6 && (ip6[74] & 0xf0) == 0x30)'" +CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, + 48 0 0 0, + 84 0 0 240, + 21 0 3 64, + 48 0 0 54, + 84 0 0 240, + 21 6 7 48, + 48 0 0 0, + 84 0 0 240, + 21 0 4 96, + 48 0 0 74, + 84 0 0 240, + 21 0 1 48, + 6 0 0 65535, + 6 0 0 0" + +init_partial() { capout=$(mktemp) - rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) - - ns1="ns1-$rndh" - ns2="ns2-$rndh" + mptcp_lib_ns_init ns1 ns2 - for netns in "$ns1" "$ns2";do - ip netns add $netns || exit $ksft_skip - ip -net $netns link set lo up - ip netns exec $netns sysctl -q net.mptcp.enabled=1 - ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 + local netns + for netns in "$ns1" "$ns2"; do + ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true + if $checksum; then + ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1 + fi done - # ns1 ns2 + check_invert=0 + validate_checksum=$checksum + + # ns1 ns2 # ns1eth1 ns2eth1 # ns1eth2 ns2eth2 # ns1eth3 ns2eth3 # ns1eth4 ns2eth4 - for i in `seq 1 4`; do + local i + for i in $(seq 1 4); do ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad @@ -48,6 +117,16 @@ init() # let $ns2 reach any $ns1 address from any interface ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i + ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i + done +} + +init_shapers() +{ + local i + for i in $(seq 1 4); do + tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1 + tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1 done } @@ -55,172 +134,986 @@ cleanup_partial() { rm -f "$capout" - for netns in "$ns1" "$ns2"; do - ip netns del $netns - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" +} + +init() { + init=1 + + mptcp_lib_check_mptcp + mptcp_lib_check_kallsyms + mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}" + + sin=$(mktemp) + sout=$(mktemp) + cin=$(mktemp) + cinsent=$(mktemp) + cout=$(mktemp) + err=$(mktemp) + evts_ns1=$(mktemp) + evts_ns2=$(mktemp) + + trap cleanup EXIT + + make_file "$cin" "client" 1 >/dev/null + make_file "$sin" "server" 1 >/dev/null } cleanup() { - rm -f "$cin" "$cout" - rm -f "$sin" "$sout" + rm -f "$cin" "$cout" "$sinfail" + rm -f "$sin" "$sout" "$cinsent" "$cinfail" + rm -f "$tmpfile" + rm -rf $evts_ns1 $evts_ns2 + rm -f "$err" cleanup_partial } +print_check() +{ + printf "%-${nr_blank}s%-36s" " " "${*}" +} + +print_info() +{ + # It can be empty, no need to print anything then + [ -z "${1}" ] && return + + mptcp_lib_print_info " Info: ${*}" +} + +print_ok() +{ + mptcp_lib_pr_ok "${@}" +} + +print_fail() +{ + mptcp_lib_pr_fail "${@}" +} + +print_skip() +{ + mptcp_lib_pr_skip "${@}" +} + +# [ $1: fail msg ] +mark_as_skipped() +{ + local msg="${1:-"Feature not supported"}" + + mptcp_lib_fail_if_expected_feature "${msg}" + + print_check "${msg}" + print_skip + + last_test_skipped=1 +} + +# $@: condition +continue_if() +{ + if ! "${@}"; then + mark_as_skipped + return 1 + fi +} + +skip_test() +{ + if [ "${#only_tests_ids[@]}" -eq 0 ] && [ "${#only_tests_names[@]}" -eq 0 ]; then + return 1 + fi + + local i + for i in "${only_tests_ids[@]}"; do + if [ "$((MPTCP_LIB_TEST_COUNTER+1))" -eq "${i}" ]; then + return 1 + fi + done + for i in "${only_tests_names[@]}"; do + if [ "${TEST_NAME}" = "${i}" ]; then + return 1 + fi + done + + return 0 +} + +append_prev_results() +{ + if [ ${last_test_failed} -eq 1 ]; then + mptcp_lib_result_fail "${TEST_NAME}" + elif [ ${last_test_skipped} -eq 1 ]; then + mptcp_lib_result_skip "${TEST_NAME}" + elif [ ${last_test_ignored} -ne 1 ]; then + mptcp_lib_result_pass "${TEST_NAME}" + fi + + last_test_failed=0 + last_test_skipped=0 + last_test_ignored=0 +} + +# $1: test name reset() { - cleanup_partial - init + append_prev_results + + TEST_NAME="${1}" + + if skip_test; then + MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) + last_test_ignored=1 + return 1 + fi + + mptcp_lib_print_title "${TEST_NAME}" + + if [ "${init}" != "1" ]; then + init + else + cleanup_partial + fi + + init_partial + + return 0 } +# $1: test name ; $2: counter to check +reset_check_counter() +{ + reset "${1}" || return 1 + + local counter="${2}" + + if ! nstat -asz "${counter}" | grep -wq "${counter}"; then + mark_as_skipped "counter '${counter}' is not available" + return 1 + fi +} + +# $1: test name reset_with_cookies() { - reset + reset "${1}" || return 1 - for netns in "$ns1" "$ns2";do + local netns + for netns in "$ns1" "$ns2"; do ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2 done } -for arg in "$@"; do - if [ "$arg" = "-c" ]; then - capture=1 +# $1: test name +reset_with_add_addr_timeout() +{ + local ip="${2:-4}" + local tables + + reset "${1}" || return 1 + + tables="${iptables}" + if [ $ip -eq 6 ]; then + tables="${ip6tables}" fi -done -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \ + -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \ + -j DROP; then + mark_as_skipped "unable to set the 'add addr' rule" + return 1 + fi +} -check_transfer() +# $1: test name +reset_with_checksum() +{ + local ns1_enable=$1 + local ns2_enable=$2 + + reset "checksum test ${1} ${2}" || return 1 + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable + + validate_checksum=true +} + +reset_with_allow_join_id0() +{ + local ns1_enable=$2 + local ns2_enable=$3 + + reset "${1}" || return 1 + + ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable + ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable +} + +# Modify TCP payload without corrupting the TCP packet +# +# This rule inverts a 8-bit word at byte offset 148 for the 2nd TCP ACK packets +# carrying enough data. +# Once it is done, the TCP Checksum field is updated so the packet is still +# considered as valid at the TCP level. +# Because the MPTCP checksum, covering the TCP options and data, has not been +# updated, the modification will be detected and an MP_FAIL will be emitted: +# what we want to validate here without corrupting "random" MPTCP options. +# +# To avoid having tc producing this pr_info() message for each TCP ACK packets +# not carrying enough data: +# +# tc action pedit offset 162 out of bounds +# +# Netfilter is used to mark packets with enough data. +setup_fail_rules() +{ + check_invert=1 + validate_checksum=true + local i="$1" + local ip="${2:-4}" + local tables + + tables="${iptables}" + if [ $ip -eq 6 ]; then + tables="${ip6tables}" + fi + + ip netns exec $ns2 $tables \ + -t mangle \ + -A OUTPUT \ + -o ns2eth$i \ + -p tcp \ + -m length --length 150:9999 \ + -m statistic --mode nth --packet 1 --every 99999 \ + -j MARK --set-mark 42 || return ${KSFT_SKIP} + + tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${KSFT_SKIP} + tc -n $ns2 filter add dev ns2eth$i egress \ + protocol ip prio 1000 \ + handle 42 fw \ + action pedit munge offset 148 u8 invert \ + pipe csum tcp \ + index 100 || return ${KSFT_SKIP} +} + +reset_with_fail() +{ + reset_check_counter "${1}" "MPTcpExtInfiniteMapTx" || return 1 + shift + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1 + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1 + + local rc=0 + setup_fail_rules "${@}" || rc=$? + + if [ ${rc} -eq ${KSFT_SKIP} ]; then + mark_as_skipped "unable to set the 'fail' rules" + return 1 + fi +} + +reset_with_events() { - in=$1 - out=$2 - what=$3 + reset "${1}" || return 1 - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" + mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid + mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid +} +reset_with_tcp_filter() +{ + reset "${1}" || return 1 + shift + + local ns="${!1}" + local src="${2}" + local target="${3}" + + if ! ip netns exec "${ns}" ${iptables} \ + -A INPUT \ + -s "${src}" \ + -p tcp \ + -j "${target}"; then + mark_as_skipped "unable to set the filter rules" return 1 fi +} + +# $1: err msg +fail_test() +{ + ret=${KSFT_FAIL} + + if [ ${#} -gt 0 ]; then + print_fail "${@}" + fi + + # just in case a test is marked twice as failed + if [ ${last_test_failed} -eq 0 ]; then + failed_tests[${MPTCP_LIB_TEST_COUNTER}]="${TEST_NAME}" + dump_stats + last_test_failed=1 + fi +} + +get_failed_tests_ids() +{ + # sorted + local i + for i in "${!failed_tests[@]}"; do + echo "${i}" + done | sort -n +} + +check_transfer() +{ + local in=$1 + local out=$2 + local what=$3 + local bytes=$4 + local i a b + + local line + if [ -n "$bytes" ]; then + local out_size + # when truncating we must check the size explicitly + out_size=$(wc -c $out | awk '{print $1}') + if [ $out_size -ne $bytes ]; then + fail_test "$what output file has wrong size ($out_size, $bytes)" + return 1 + fi + + # note: BusyBox's "cmp" command doesn't support --bytes + tmpfile=$(mktemp) + head --bytes="$bytes" "$in" > "$tmpfile" + mv "$tmpfile" "$in" + head --bytes="$bytes" "$out" > "$tmpfile" + mv "$tmpfile" "$out" + tmpfile="" + fi + cmp -l "$in" "$out" | while read -r i a b; do + local sum=$((0${a} + 0${b})) + if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then + fail_test "$what does not match (in, out):" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" + + return 1 + else + print_info "$what has inverted byte at ${i}" + fi + done return 0 } do_ping() { - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" - ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null - if [ $? -ne 0 ] ; then - echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 - ret=1 + if ! ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null; then + fail_test "$listener_ns -> $connect_addr connectivity" fi } -do_transfer() +link_failure() { - listener_ns="$1" - connector_ns="$2" - cl_proto="$3" - srv_proto="$4" - connect_addr="$5" - rm_nr_ns1="$6" - rm_nr_ns2="$7" + local ns="$1" - port=$((10000+$TEST_COUNT)) - TEST_COUNT=$((TEST_COUNT+1)) + if [ -z "$FAILING_LINKS" ]; then + l=$((RANDOM%4)) + FAILING_LINKS=$((l+1)) + fi + + local l + for l in $FAILING_LINKS; do + local veth="ns1eth$l" + ip -net "$ns" link set "$veth" down + done +} + +rm_addr_count() +{ + mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr" +} + +# $1: ns, $2: old rm_addr counter in $ns +wait_rm_addr() +{ + local ns="${1}" + local old_cnt="${2}" + local cnt + + local i + for i in $(seq 10); do + cnt=$(rm_addr_count ${ns}) + [ "$cnt" = "${old_cnt}" ] || break + sleep 0.1 + done +} + +rm_sf_count() +{ + mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow" +} + +# $1: ns, $2: old rm_sf counter in $ns +wait_rm_sf() +{ + local ns="${1}" + local old_cnt="${2}" + local cnt + + local i + for i in $(seq 10); do + cnt=$(rm_sf_count ${ns}) + [ "$cnt" = "${old_cnt}" ] || break + sleep 0.1 + done +} + +wait_mpj() +{ + local ns="${1}" + local cnt old_cnt + + old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") + + local i + for i in $(seq 10); do + cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") + [ "$cnt" = "${old_cnt}" ] || break + sleep 0.1 + done +} + +kill_events_pids() +{ + mptcp_lib_kill_wait $evts_ns1_pid + evts_ns1_pid=0 + mptcp_lib_kill_wait $evts_ns2_pid + evts_ns2_pid=0 +} + +pm_nl_set_limits() +{ + local ns=$1 + local addrs=$2 + local subflows=$3 + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows + else + ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows + fi +} + +pm_nl_add_endpoint() +{ + local ns=$1 + local addr=$2 + local flags _flags + local port _port + local dev _dev + local id _id + local nr=2 + + local p + for p in "${@}" + do + if [ $p = "flags" ]; then + eval _flags=\$"$nr" + [ -n "$_flags" ]; flags="flags $_flags" + fi + if [ $p = "dev" ]; then + eval _dev=\$"$nr" + [ -n "$_dev" ]; dev="dev $_dev" + fi + if [ $p = "id" ]; then + eval _id=\$"$nr" + [ -n "$_id" ]; id="id $_id" + fi + if [ $p = "port" ]; then + eval _port=\$"$nr" + [ -n "$_port" ]; port="port $_port" + fi + + nr=$((nr + 1)) + done + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port + else + ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port + fi +} + +pm_nl_del_endpoint() +{ + local ns=$1 + local id=$2 + local addr=$3 + + if [ $ip_mptcp -eq 1 ]; then + [ $id -ne 0 ] && addr='' + ip -n $ns mptcp endpoint delete id $id $addr + else + ip netns exec $ns ./pm_nl_ctl del $id $addr + fi +} + +pm_nl_flush_endpoint() +{ + local ns=$1 + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp endpoint flush + else + ip netns exec $ns ./pm_nl_ctl flush + fi +} + +pm_nl_show_endpoints() +{ + local ns=$1 + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp endpoint show + else + ip netns exec $ns ./pm_nl_ctl dump + fi +} + +pm_nl_change_endpoint() +{ + local ns=$1 + local id=$2 + local flags=$3 + + if [ $ip_mptcp -eq 1 ]; then + ip -n $ns mptcp endpoint change id $id ${flags//","/" "} + else + ip netns exec $ns ./pm_nl_ctl set id $id flags $flags + fi +} + +pm_nl_check_endpoint() +{ + local line expected_line + local msg="$1" + local ns=$2 + local addr=$3 + local _flags="" + local flags + local _port + local port + local dev + local _id + local id + + print_check "${msg}" + + shift 3 + while [ -n "$1" ]; do + if [ $1 = "flags" ]; then + _flags=$2 + [ -n "$_flags" ]; flags="flags $_flags" + shift + elif [ $1 = "dev" ]; then + [ -n "$2" ]; dev="dev $1" + shift + elif [ $1 = "id" ]; then + _id=$2 + [ -n "$_id" ]; id="id $_id" + shift + elif [ $1 = "port" ]; then + _port=$2 + [ -n "$_port" ]; port=" port $_port" + shift + fi + + shift + done + + if [ -z "$id" ]; then + test_fail "bad test - missing endpoint id" + return + fi + + if [ $ip_mptcp -eq 1 ]; then + # get line and trim trailing whitespace + line=$(ip -n $ns mptcp endpoint show $id) + line="${line% }" + # the dump order is: address id flags port dev + [ -n "$addr" ] && expected_line="$addr" + expected_line+=" $id" + [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}" + [ -n "$dev" ] && expected_line+=" $dev" + [ -n "$port" ] && expected_line+=" $port" + else + line=$(ip netns exec $ns ./pm_nl_ctl get $_id) + # the dump order is: id flags dev address port + expected_line="$id" + [ -n "$flags" ] && expected_line+=" $flags" + [ -n "$dev" ] && expected_line+=" $dev" + [ -n "$addr" ] && expected_line+=" $addr" + [ -n "$_port" ] && expected_line+=" $_port" + fi + if [ "$line" = "$expected_line" ]; then + print_ok + else + fail_test "expected '$expected_line' found '$line'" + fi +} + +pm_nl_set_endpoint() +{ + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + + local addr_nr_ns1=${addr_nr_ns1:-0} + local addr_nr_ns2=${addr_nr_ns2:-0} + local sflags=${sflags:-""} + local fullmesh=${fullmesh:-""} + + local flags="subflow" + if [ -n "${fullmesh}" ]; then + flags="${flags},fullmesh" + addr_nr_ns2=${fullmesh} + fi + + # let the mptcp subflow be established in background before + # do endpoint manipulation + if [ $addr_nr_ns1 != "0" ] || [ $addr_nr_ns2 != "0" ]; then + sleep 1 + fi + + if [ $addr_nr_ns1 -gt 0 ]; then + local counter=2 + local add_nr_ns1=${addr_nr_ns1} + local id=10 + while [ $add_nr_ns1 -gt 0 ]; do + local addr + if mptcp_lib_is_v6 "${connect_addr}"; then + addr="dead:beef:$counter::1" + else + addr="10.0.$counter.1" + fi + pm_nl_add_endpoint $ns1 $addr flags signal + counter=$((counter + 1)) + add_nr_ns1=$((add_nr_ns1 - 1)) + id=$((id + 1)) + done + elif [ $addr_nr_ns1 -lt 0 ]; then + local rm_nr_ns1=$((-addr_nr_ns1)) + if [ $rm_nr_ns1 -lt 8 ]; then + local counter=0 + local line + pm_nl_show_endpoints ${listener_ns} | while read -r line; do + # shellcheck disable=SC2206 # we do want to split per word + local arr=($line) + local nr=0 + + local i + for i in "${arr[@]}"; do + if [ $i = "id" ]; then + if [ $counter -eq $rm_nr_ns1 ]; then + break + fi + id=${arr[$nr+1]} + rm_addr=$(rm_addr_count ${connector_ns}) + pm_nl_del_endpoint ${listener_ns} $id + wait_rm_addr ${connector_ns} ${rm_addr} + counter=$((counter + 1)) + fi + nr=$((nr + 1)) + done + done + elif [ $rm_nr_ns1 -eq 8 ]; then + pm_nl_flush_endpoint ${listener_ns} + elif [ $rm_nr_ns1 -eq 9 ]; then + pm_nl_del_endpoint ${listener_ns} 0 ${connect_addr} + fi + fi + + # if newly added endpoints must be deleted, give the background msk + # some time to created them + [ $addr_nr_ns1 -gt 0 ] && [ $addr_nr_ns2 -lt 0 ] && sleep 1 + + if [ $addr_nr_ns2 -gt 0 ]; then + local add_nr_ns2=${addr_nr_ns2} + local counter=3 + local id=20 + while [ $add_nr_ns2 -gt 0 ]; do + local addr + if mptcp_lib_is_v6 "${connect_addr}"; then + addr="dead:beef:$counter::2" + else + addr="10.0.$counter.2" + fi + pm_nl_add_endpoint $ns2 $addr flags $flags + counter=$((counter + 1)) + add_nr_ns2=$((add_nr_ns2 - 1)) + id=$((id + 1)) + done + elif [ $addr_nr_ns2 -lt 0 ]; then + local rm_nr_ns2=$((-addr_nr_ns2)) + if [ $rm_nr_ns2 -lt 8 ]; then + local counter=0 + local line + pm_nl_show_endpoints ${connector_ns} | while read -r line; do + # shellcheck disable=SC2206 # we do want to split per word + local arr=($line) + local nr=0 + + local i + for i in "${arr[@]}"; do + if [ $i = "id" ]; then + if [ $counter -eq $rm_nr_ns2 ]; then + break + fi + local id rm_addr + # rm_addr are serialized, allow the previous one to + # complete + id=${arr[$nr+1]} + rm_addr=$(rm_addr_count ${listener_ns}) + pm_nl_del_endpoint ${connector_ns} $id + wait_rm_addr ${listener_ns} ${rm_addr} + counter=$((counter + 1)) + fi + nr=$((nr + 1)) + done + done + elif [ $rm_nr_ns2 -eq 8 ]; then + pm_nl_flush_endpoint ${connector_ns} + elif [ $rm_nr_ns2 -eq 9 ]; then + local addr + if mptcp_lib_is_v6 "${connect_addr}"; then + addr="dead:beef:1::2" + else + addr="10.0.1.2" + fi + pm_nl_del_endpoint ${connector_ns} 0 $addr + fi + fi + + if [ -n "${sflags}" ]; then + sleep 1 + + local netns + for netns in "$ns1" "$ns2"; do + local line + pm_nl_show_endpoints $netns | while read -r line; do + # shellcheck disable=SC2206 # we do want to split per word + local arr=($line) + local nr=0 + local id + + local i + for i in "${arr[@]}"; do + if [ $i = "id" ]; then + id=${arr[$nr+1]} + fi + nr=$((nr + 1)) + done + pm_nl_change_endpoint $netns $id $sflags + done + done + fi +} + +chk_cestab_nr() +{ + local ns=$1 + local cestab=$2 + local count + + print_check "cestab $cestab" + count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$cestab" ]; then + fail_test "got $count current establish[s] expected $cestab" + else + print_ok + fi +} + +# $1 namespace 1, $2 namespace 2 +check_cestab() +{ + if [ -n "${cestab_ns1}" ]; then + chk_cestab_nr ${1} ${cestab_ns1} + fi + if [ -n "${cestab_ns2}" ]; then + chk_cestab_nr ${2} ${cestab_ns2} + fi +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + + local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) + local cappid + local FAILING_LINKS=${FAILING_LINKS:-""} + local fastclose=${fastclose:-""} + local speed=${speed:-"fast"} :> "$cout" :> "$sout" :> "$capout" - if [ $capture -eq 1 ]; then + if $capture; then + local capuser if [ -z $SUDO_USER ] ; then capuser="" else capuser="-Z $SUDO_USER" fi - capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}") + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") - echo "Capturing traffic for test $TEST_COUNT into $capfile" + echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & cappid=$! sleep 1 fi - if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then - mptcp_connect="./mptcp_connect -j" - else - mptcp_connect="./mptcp_connect -r" + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n + + local extra_args + if [ $speed = "fast" ]; then + extra_args="-j" + elif [ $speed = "slow" ]; then + extra_args="-r 50" + elif [ $speed -gt 0 ]; then + extra_args="-r ${speed}" fi - ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & - spid=$! - - sleep 1 - - ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & - cpid=$! + local extra_cl_args="" + local extra_srv_args="" + local trunc_size="" + if [ -n "${fastclose}" ]; then + if [ ${test_linkfail} -le 1 ]; then + fail_test "fastclose tests need test_linkfail argument" + return 1 + fi - if [ $rm_nr_ns1 -gt 0 ]; then - counter=1 - sleep 1 + # disconnect + trunc_size=${test_linkfail} + local side=${fastclose} - while [ $counter -le $rm_nr_ns1 ] - do - ip netns exec ${listener_ns} ./pm_nl_ctl del $counter - sleep 1 - let counter+=1 - done + if [ ${side} = "client" ]; then + extra_cl_args="-f ${test_linkfail}" + extra_srv_args="-f -1" + elif [ ${side} = "server" ]; then + extra_srv_args="-f ${test_linkfail}" + extra_cl_args="-f -1" + else + fail_test "wrong/unknown fastclose spec ${side}" + return 1 + fi fi - if [ $rm_nr_ns2 -gt 0 ]; then - counter=1 - sleep 1 - - while [ $counter -le $rm_nr_ns2 ] - do - ip netns exec ${connector_ns} ./pm_nl_ctl del $counter - sleep 1 - let counter+=1 - done + extra_srv_args="$extra_args $extra_srv_args" + if [ "$test_linkfail" -gt 1 ];then + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_srv_args "::" < "$sinfail" > "$sout" & + else + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_srv_args "::" < "$sin" > "$sout" & + fi + local spid=$! + + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" + + extra_cl_args="$extra_args $extra_cl_args" + if [ "$test_linkfail" -eq 0 ];then + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr < "$cin" > "$cout" & + elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then + ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ + tee "$cinsent" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & + else + tee "$cinsent" < "$cinfail" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & fi + local cpid=$! + + pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr + check_cestab $listener_ns $connector_ns wait $cpid - retc=$? + local retc=$? wait $spid - rets=$? + local rets=$? - if [ $capture -eq 1 ]; then + if $capture; then sleep 1 kill $cappid fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat | grep Tcp > /tmp/${listener_ns}.out + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat | grep Tcp > /tmp/${connector_ns}.out + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo " client exit code $retc, server $rets" 1>&2 + fail_test "client exit code $retc, server $rets" echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" + ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" + cat /tmp/${listener_ns}.out echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" + ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + cat /tmp/${connector_ns}.out cat "$capout" return 1 fi - check_transfer $sin $cout "file received by client" + if [ "$test_linkfail" -gt 1 ];then + check_transfer $sinfail $cout "file received by client" $trunc_size + else + check_transfer $sin $cout "file received by client" $trunc_size + fi retc=$? - check_transfer $cin $sout "file received by server" + if [ "$test_linkfail" -eq 0 ];then + check_transfer $cin $sout "file received by server" $trunc_size + else + check_transfer $cinsent $sout "file received by server" $trunc_size + fi rets=$? if [ $retc -eq 0 ] && [ $rets -eq 0 ];then @@ -234,95 +1127,391 @@ do_transfer() make_file() { - name=$1 - who=$2 - - SIZE=1 + local name=$1 + local who=$2 + local size=$3 - dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $size - echo "Created $name (size $SIZE KB) containing data sent by $who" + print_info "Test file (size $size KB) for $who" } run_tests() { - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" - lret=0 + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0 - lret=$? - if [ $lret -ne 0 ]; then - ret=$lret - return + local size + local test_linkfail=${test_linkfail:-0} + + # The values above 2 are reused to make test files + # with the given sizes (KB) + if [ "$test_linkfail" -gt 2 ]; then + size=$test_linkfail + + if [ -z "$cinfail" ]; then + cinfail=$(mktemp) + fi + make_file "$cinfail" "client" $size + # create the input file for the failure test when + # the first failure test run + elif [ "$test_linkfail" -ne 0 ] && [ -z "$cinfail" ]; then + # the client file must be considerably larger + # of the maximum expected cwin value, or the + # link utilization will be not predicable + size=$((RANDOM%2)) + size=$((size+1)) + size=$((size*8192)) + size=$((size + ( RANDOM % 8192) )) + + cinfail=$(mktemp) + make_file "$cinfail" "client" $size fi + + if [ "$test_linkfail" -gt 2 ]; then + size=$test_linkfail + + if [ -z "$sinfail" ]; then + sinfail=$(mktemp) + fi + make_file "$sinfail" "server" $size + elif [ "$test_linkfail" -eq 2 ] && [ -z "$sinfail" ]; then + size=$((RANDOM%16)) + size=$((size+1)) + size=$((size*2048)) + + sinfail=$(mktemp) + make_file "$sinfail" "server" $size + fi + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} } -run_remove_tests() +dump_stats() { - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" - rm_nr_ns1="$4" - rm_nr_ns2="$5" - lret=0 + echo Server ns stats + ip netns exec $ns1 nstat -as | grep Tcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep Tcp +} - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2} - lret=$? - if [ $lret -ne 0 ]; then - ret=$lret - return +chk_csum_nr() +{ + local csum_ns1=${1:-0} + local csum_ns2=${2:-0} + local count + local extra_msg="" + local allow_multi_errors_ns1=0 + local allow_multi_errors_ns2=0 + + if [[ "${csum_ns1}" = "+"* ]]; then + allow_multi_errors_ns1=1 + csum_ns1=${csum_ns1:1} + fi + if [[ "${csum_ns2}" = "+"* ]]; then + allow_multi_errors_ns2=1 + csum_ns2=${csum_ns2:1} + fi + + print_check "sum" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") + if [ "$count" != "$csum_ns1" ]; then + extra_msg+=" ns1=$count" + fi + if [ -z "$count" ]; then + print_skip + elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || + { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then + fail_test "got $count data checksum error[s] expected $csum_ns1" + else + print_ok + fi + print_check "csum" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") + if [ "$count" != "$csum_ns2" ]; then + extra_msg+=" ns2=$count" + fi + if [ -z "$count" ]; then + print_skip + elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || + { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then + fail_test "got $count data checksum error[s] expected $csum_ns2" + else + print_ok + fi + + print_info "$extra_msg" +} + +chk_fail_nr() +{ + local fail_tx=$1 + local fail_rx=$2 + local ns_invert=${3:-""} + local count + local ns_tx=$ns1 + local ns_rx=$ns2 + local extra_msg="" + local allow_tx_lost=0 + local allow_rx_lost=0 + + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + extra_msg="invert" + fi + + if [[ "${fail_tx}" = "-"* ]]; then + allow_tx_lost=1 + fail_tx=${fail_tx:1} + fi + if [[ "${fail_rx}" = "-"* ]]; then + allow_rx_lost=1 + fail_rx=${fail_rx:1} + fi + + print_check "ftx" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") + if [ "$count" != "$fail_tx" ]; then + extra_msg+=",tx=$count" + fi + if [ -z "$count" ]; then + print_skip + elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || + { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then + fail_test "got $count MP_FAIL[s] TX expected $fail_tx" + else + print_ok + fi + + print_check "failrx" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") + if [ "$count" != "$fail_rx" ]; then + extra_msg+=",rx=$count" + fi + if [ -z "$count" ]; then + print_skip + elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || + { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then + fail_test "got $count MP_FAIL[s] RX expected $fail_rx" + else + print_ok + fi + + print_info "$extra_msg" +} + +chk_fclose_nr() +{ + local fclose_tx=$1 + local fclose_rx=$2 + local ns_invert=$3 + local count + local ns_tx=$ns2 + local ns_rx=$ns1 + local extra_msg="" + + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns1 + ns_rx=$ns2 + extra_msg="invert" + fi + + print_check "ctx" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$fclose_tx" ]; then + extra_msg+=",tx=$count" + fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx" + else + print_ok + fi + + print_check "fclzrx" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$fclose_rx" ]; then + extra_msg+=",rx=$count" + fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx" + else + print_ok + fi + + print_info "$extra_msg" +} + +chk_rst_nr() +{ + local rst_tx=$1 + local rst_rx=$2 + local ns_invert=${3:-""} + local count + local ns_tx=$ns1 + local ns_rx=$ns2 + local extra_msg="" + + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + extra_msg="invert" + fi + + print_check "rtx" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx") + if [ -z "$count" ]; then + print_skip + # accept more rst than expected except if we don't expect any + elif { [ $rst_tx -ne 0 ] && [ $count -lt $rst_tx ]; } || + { [ $rst_tx -eq 0 ] && [ $count -ne 0 ]; }; then + fail_test "got $count MP_RST[s] TX expected $rst_tx" + else + print_ok + fi + + print_check "rstrx" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx") + if [ -z "$count" ]; then + print_skip + # accept more rst than expected except if we don't expect any + elif { [ $rst_rx -ne 0 ] && [ $count -lt $rst_rx ]; } || + { [ $rst_rx -eq 0 ] && [ $count -ne 0 ]; }; then + fail_test "got $count MP_RST[s] RX expected $rst_rx" + else + print_ok + fi + + print_info "$extra_msg" +} + +chk_infi_nr() +{ + local infi_tx=$1 + local infi_rx=$2 + local count + + print_check "itx" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$infi_tx" ]; then + fail_test "got $count infinite map[s] TX expected $infi_tx" + else + print_ok + fi + + print_check "infirx" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$infi_rx" ]; then + fail_test "got $count infinite map[s] RX expected $infi_rx" + else + print_ok fi } chk_join_nr() { - local msg="$1" - local syn_nr=$2 - local syn_ack_nr=$3 - local ack_nr=$4 + local syn_nr=$1 + local syn_ack_nr=$2 + local ack_nr=$3 + local csum_ns1=${4:-0} + local csum_ns2=${5:-0} + local fail_nr=${6:-0} + local rst_nr=${7:-0} + local infi_nr=${8:-0} + local corrupted_pkts=${9:-0} local count - local dump_stats + local with_cookie - printf "%02u %-36s %s" "$TEST_COUNT" "$msg" "syn" - count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'` - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_nr" ]; then - echo "[fail] got $count JOIN[s] syn expected $syn_nr" - ret=1 - dump_stats=1 + if [ "${corrupted_pkts}" -gt 0 ]; then + print_info "${corrupted_pkts} corrupted pkts" + fi + + print_check "syn" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$syn_nr" ]; then + fail_test "got $count JOIN[s] syn expected $syn_nr" else - echo -n "[ ok ]" + print_ok fi - echo -n " - synack" - count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'` - [ -z "$count" ] && count=0 - if [ "$count" != "$syn_ack_nr" ]; then - echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" - ret=1 - dump_stats=1 + print_check "synack" + with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$syn_ack_nr" ]; then + # simult connections exceeding the limit with cookie enabled could go up to + # synack validation as the conn limit can be enforced reliably only after + # the subflow creation + if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then + print_ok + else + fail_test "got $count JOIN[s] synack expected $syn_ack_nr" + fi else - echo -n "[ ok ]" + print_ok fi - echo -n " - ack" - count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'` - [ -z "$count" ] && count=0 - if [ "$count" != "$ack_nr" ]; then - echo "[fail] got $count JOIN[s] ack expected $ack_nr" - ret=1 + print_check "ack" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$ack_nr" ]; then + fail_test "got $count JOIN[s] ack expected $ack_nr" + else + print_ok + fi + if $validate_checksum; then + chk_csum_nr $csum_ns1 $csum_ns2 + chk_fail_nr $fail_nr $fail_nr + chk_rst_nr $rst_nr $rst_nr + chk_infi_nr $infi_nr $infi_nr + fi +} + +# a negative value for 'stale_max' means no upper bound: +# for bidirectional transfer, if one peer sleep for a while +# - as these tests do - we can have a quite high number of +# stale/recover conversions, proportional to +# sleep duration/ MPTCP-level RTX interval. +chk_stale_nr() +{ + local ns=$1 + local stale_min=$2 + local stale_max=$3 + local stale_delta=$4 + local dump_stats + local stale_nr + local recover_nr + + print_check "stale" + + stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale") + recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover") + if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then + print_skip + elif [ $stale_nr -lt $stale_min ] || + { [ $stale_max -gt 0 ] && [ $stale_nr -gt $stale_max ]; } || + [ $((stale_nr - recover_nr)) -ne $stale_delta ]; then + fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \ + " expected stale in range [$stale_min..$stale_max]," \ + " stale-recover delta $stale_delta" dump_stats=1 else - echo "[ ok ]" + print_ok fi + if [ "${dump_stats}" = 1 ]; then - echo Server ns stats - ip netns exec $ns1 nstat -as | grep MPTcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep MPTcp + echo $ns stats + ip netns exec $ns ip -s link show + ip netns exec $ns nstat -as | grep MPTcp fi } @@ -330,36 +1519,136 @@ chk_add_nr() { local add_nr=$1 local echo_nr=$2 + local port_nr=${3:-0} + local syn_nr=${4:-$port_nr} + local syn_ack_nr=${5:-$port_nr} + local ack_nr=${6:-$port_nr} + local mis_syn_nr=${7:-0} + local mis_ack_nr=${8:-0} local count - local dump_stats + local timeout + + timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + + print_check "add" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") + if [ -z "$count" ]; then + print_skip + # if the test configured a short timeout tolerate greater then expected + # add addrs options, due to retransmissions + elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then + fail_test "got $count ADD_ADDR[s] expected $add_nr" + else + print_ok + fi - printf "%-39s %s" " " "add" - count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'` - [ -z "$count" ] && count=0 - if [ "$count" != "$add_nr" ]; then - echo "[fail] got $count ADD_ADDR[s] expected $add_nr" - ret=1 - dump_stats=1 + print_check "echo" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$echo_nr" ]; then + fail_test "got $count ADD_ADDR echo[s] expected $echo_nr" else - echo -n "[ ok ]" + print_ok fi - echo -n " - echo " - count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'` - [ -z "$count" ] && count=0 - if [ "$count" != "$echo_nr" ]; then - echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr" - ret=1 - dump_stats=1 + if [ $port_nr -gt 0 ]; then + print_check "pt" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$port_nr" ]; then + fail_test "got $count ADD_ADDR[s] with a port-number expected $port_nr" + else + print_ok + fi + + print_check "syn" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$syn_nr" ]; then + fail_test "got $count JOIN[s] syn with a different \ + port-number expected $syn_nr" + else + print_ok + fi + + print_check "synack" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$syn_ack_nr" ]; then + fail_test "got $count JOIN[s] synack with a different \ + port-number expected $syn_ack_nr" + else + print_ok + fi + + print_check "ack" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$ack_nr" ]; then + fail_test "got $count JOIN[s] ack with a different \ + port-number expected $ack_nr" + else + print_ok + fi + + print_check "syn" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mis_syn_nr" ]; then + fail_test "got $count JOIN[s] syn with a mismatched \ + port-number expected $mis_syn_nr" + else + print_ok + fi + + print_check "ack" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mis_ack_nr" ]; then + fail_test "got $count JOIN[s] ack with a mismatched \ + port-number expected $mis_ack_nr" + else + print_ok + fi + fi +} + +chk_add_tx_nr() +{ + local add_tx_nr=$1 + local echo_tx_nr=$2 + local timeout + local count + + timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + + print_check "add TX" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") + if [ -z "$count" ]; then + print_skip + # if the test configured a short timeout tolerate greater then expected + # add addrs options, due to retransmissions + elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then + fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr" else - echo "[ ok ]" + print_ok fi - if [ "${dump_stats}" = 1 ]; then - echo Server ns stats - ip netns exec $ns1 nstat -as | grep MPTcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep MPTcp + print_check "echo TX" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$echo_tx_nr" ]; then + fail_test "got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr" + else + print_ok fi } @@ -367,236 +1656,2089 @@ chk_rm_nr() { local rm_addr_nr=$1 local rm_subflow_nr=$2 + local invert + local simult + local count + local addr_ns=$ns1 + local subflow_ns=$ns2 + local extra_msg="" + + shift 2 + while [ -n "$1" ]; do + [ "$1" = "invert" ] && invert=true + [ "$1" = "simult" ] && simult=true + shift + done + + if [ -z $invert ]; then + addr_ns=$ns1 + subflow_ns=$ns2 + elif [ $invert = "true" ]; then + addr_ns=$ns2 + subflow_ns=$ns1 + extra_msg="invert" + fi + + print_check "rm" + count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$rm_addr_nr" ]; then + fail_test "got $count RM_ADDR[s] expected $rm_addr_nr" + else + print_ok + fi + + print_check "rmsf" + count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow") + if [ -z "$count" ]; then + print_skip + elif [ -n "$simult" ]; then + local cnt suffix + + cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow") + + # in case of simult flush, the subflow removal count on each side is + # unreliable + count=$((count + cnt)) + if [ "$count" != "$rm_subflow_nr" ]; then + suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" + extra_msg+=" simult" + fi + if [ $count -ge "$rm_subflow_nr" ] && \ + [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then + print_ok "$suffix" + else + fail_test "got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]" + fi + elif [ "$count" != "$rm_subflow_nr" ]; then + fail_test "got $count RM_SUBFLOW[s] expected $rm_subflow_nr" + else + print_ok + fi + + print_info "$extra_msg" +} + +chk_rm_tx_nr() +{ + local rm_addr_tx_nr=$1 + + print_check "rm TX" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$rm_addr_tx_nr" ]; then + fail_test "got $count RM_ADDR[s] expected $rm_addr_tx_nr" + else + print_ok + fi +} + +chk_prio_nr() +{ + local mp_prio_nr_tx=$1 + local mp_prio_nr_rx=$2 local count + + print_check "ptx" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mp_prio_nr_tx" ]; then + fail_test "got $count MP_PRIO[s] TX expected $mp_prio_nr_tx" + else + print_ok + fi + + print_check "prx" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mp_prio_nr_rx" ]; then + fail_test "got $count MP_PRIO[s] RX expected $mp_prio_nr_rx" + else + print_ok + fi +} + +chk_subflow_nr() +{ + local msg="$1" + local subflow_nr=$2 + local cnt1 + local cnt2 local dump_stats - printf "%-39s %s" " " "rm " - count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` - [ -z "$count" ] && count=0 - if [ "$count" != "$rm_addr_nr" ]; then - echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" - ret=1 + print_check "${msg}" + + cnt1=$(ss -N $ns1 -tOni | grep -c token) + cnt2=$(ss -N $ns2 -tOni | grep -c token) + if [ "$cnt1" != "$subflow_nr" ] || [ "$cnt2" != "$subflow_nr" ]; then + fail_test "got $cnt1:$cnt2 subflows expected $subflow_nr" dump_stats=1 else - echo -n "[ ok ]" + print_ok fi - echo -n " - sf " - count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` - [ -z "$count" ] && count=0 - if [ "$count" != "$rm_subflow_nr" ]; then - echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" - ret=1 + if [ "${dump_stats}" = 1 ]; then + ss -N $ns1 -tOni + ss -N $ns1 -tOni | grep token + ip -n $ns1 mptcp endpoint + fi +} + +chk_mptcp_info() +{ + local info1=$1 + local exp1=$2 + local info2=$3 + local exp2=$4 + local cnt1 + local cnt2 + local dump_stats + + print_check "mptcp_info ${info1:0:15}=$exp1:$exp2" + + cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1") + cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2") + # 'ss' only display active connections and counters that are not 0. + [ -z "$cnt1" ] && cnt1=0 + [ -z "$cnt2" ] && cnt2=0 + + if [ "$cnt1" != "$exp1" ] || [ "$cnt2" != "$exp2" ]; then + fail_test "got $cnt1:$cnt2 $info1:$info2 expected $exp1:$exp2" dump_stats=1 else - echo "[ ok ]" + print_ok fi - if [ "${dump_stats}" = 1 ]; then - echo Server ns stats - ip netns exec $ns1 nstat -as | grep MPTcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep MPTcp - fi -} - -sin=$(mktemp) -sout=$(mktemp) -cin=$(mktemp) -cout=$(mktemp) -init -make_file "$cin" "client" -make_file "$sin" "server" -trap cleanup EXIT - -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "no JOIN" "0" "0" "0" - -# subflow limted by client -reset -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow, limited by client" 0 0 0 - -# subflow limted by server -reset -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow, limited by server" 1 1 0 - -# subflow -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow" 1 1 1 - -# multiple subflows -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows" 2 2 2 - -# multiple subflows limited by serverf -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows, limited by server" 2 2 1 - -# add_address, unused -reset -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "unused signal address" 0 0 0 -chk_add_nr 1 1 - -# accept and use add_addr -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "signal address" 1 1 1 -chk_add_nr 1 1 - -# accept and use add_addr with an additional subflow -# note: signal address in server ns and local addresses in client ns must -# belong to different subnets or one of the listed local address could be -# used for 'add_addr' subflow -reset -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflow and signal" 2 2 2 -chk_add_nr 1 1 - -# accept and use add_addr with additional subflows -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows and signal" 3 3 3 -chk_add_nr 1 1 - -# single subflow, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 0 1 -chk_join_nr "remove single subflow" 1 1 1 -chk_rm_nr 1 1 - -# multiple subflows, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 0 2 -chk_join_nr "remove multiple subflows" 2 2 2 -chk_rm_nr 2 2 - -# single address, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_remove_tests $ns1 $ns2 10.0.1.1 1 0 -chk_join_nr "remove single address" 1 1 1 -chk_add_nr 1 1 -chk_rm_nr 0 0 - -# subflow and signal, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 1 1 -chk_join_nr "remove subflow and signal" 2 2 2 -chk_add_nr 1 1 -chk_rm_nr 1 1 - -# subflows and signal, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 1 2 -chk_join_nr "remove subflows and signal" 3 3 3 -chk_add_nr 1 1 -chk_rm_nr 2 2 - -# single subflow, syncookies -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow with syn cookies" 1 1 1 - -# multiple subflows with syn cookies -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows with syn cookies" 2 2 2 - -# multiple subflows limited by server -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflows limited by server w cookies" 2 2 1 - -# test signal address with cookies -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "signal address with syn cookies" 1 1 1 -chk_add_nr 1 1 - -# test cookie with subflow and signal -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflow and signal w cookies" 2 2 2 -chk_add_nr 1 1 - -# accept and use add_addr with additional subflows -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflows and signal w. cookies" 3 3 3 -chk_add_nr 1 1 + if [ "$dump_stats" = 1 ]; then + ss -N $ns1 -inmHM + ss -N $ns2 -inmHM + fi +} + +# $1: subflows in ns1 ; $2: subflows in ns2 +# number of all subflows, including the initial subflow. +chk_subflows_total() +{ + local cnt1 + local cnt2 + local info="subflows_total" + local dump_stats + + # if subflows_total counter is supported, use it: + if [ -n "$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value $info $info)" ]; then + chk_mptcp_info $info $1 $info $2 + return + fi + + print_check "$info $1:$2" + + # if not, count the TCP connections that are in fact MPTCP subflows + cnt1=$(ss -N $ns1 -ti state established state syn-sent state syn-recv | + grep -c tcp-ulp-mptcp) + cnt2=$(ss -N $ns2 -ti state established state syn-sent state syn-recv | + grep -c tcp-ulp-mptcp) + + if [ "$1" != "$cnt1" ] || [ "$2" != "$cnt2" ]; then + fail_test "got subflows $cnt1:$cnt2 expected $1:$2" + dump_stats=1 + else + print_ok + fi + + if [ "$dump_stats" = 1 ]; then + ss -N $ns1 -ti + ss -N $ns2 -ti + fi +} + +chk_link_usage() +{ + local ns=$1 + local link=$2 + local out=$3 + local expected_rate=$4 + + local tx_link tx_total + tx_link=$(ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes) + tx_total=$(stat --format=%s $out) + local tx_rate=$((tx_link * 100 / tx_total)) + local tolerance=5 + + print_check "link usage" + if [ $tx_rate -lt $((expected_rate - tolerance)) ] || \ + [ $tx_rate -gt $((expected_rate + tolerance)) ]; then + fail_test "got $tx_rate% usage, expected $expected_rate%" + else + print_ok + fi +} + +wait_attempt_fail() +{ + local timeout_ms=$((timeout_poll * 1000)) + local time=0 + local ns=$1 + + while [ $time -lt $timeout_ms ]; do + local cnt + + cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails") + + [ "$cnt" = 1 ] && return 1 + time=$((time + 100)) + sleep 0.1 + done + return 1 +} + +set_userspace_pm() +{ + local ns=$1 + + ip netns exec $ns sysctl -q net.mptcp.pm_type=1 +} + +subflows_tests() +{ + if reset "no JOIN"; then + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # subflow limited by client + if reset "single subflow, limited by client"; then + pm_nl_set_limits $ns1 0 0 + pm_nl_set_limits $ns2 0 0 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # subflow limited by server + if reset "single subflow, limited by server"; then + pm_nl_set_limits $ns1 0 0 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 0 + fi + + # subflow + if reset "single subflow"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # multiple subflows + if reset "multiple subflows"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi + + # multiple subflows limited by server + if reset "multiple subflows, limited by server"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 1 + fi + + # single subflow, dev + if reset "single subflow, dev"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi +} + +subflows_error_tests() +{ + # If a single subflow is configured, and matches the MPC src + # address, no additional subflow should be created + if reset "no MPC reuse with single endpoint"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # multiple subflows, with subflow creation error + if reset_with_tcp_filter "multi subflows, with failing subflow" ns1 10.0.3.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # multiple subflows, with subflow timeout on MPJ + if reset_with_tcp_filter "multi subflows, with subflow timeout" ns1 10.0.3.2 DROP && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # multiple subflows, check that the endpoint corresponding to + # closed subflow (due to reset) is not reused if additional + # subflows are added later + if reset_with_tcp_filter "multi subflows, fair usage on close" ns1 10.0.3.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 & + + # mpj subflow will be in TW after the reset + wait_attempt_fail $ns2 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + wait + + # additional subflow could be created only if the PM select + # the later endpoint, skipping the already used one + chk_join_nr 1 1 1 + fi +} + +signal_address_tests() +{ + # add_address, unused + if reset "unused signal address"; then + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_tx_nr 1 1 + chk_add_nr 1 1 + fi + + # accept and use add_addr + if reset "signal address"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # accept and use add_addr with an additional subflow + # note: signal address in server ns and local addresses in client ns must + # belong to different subnets or one of the listed local address could be + # used for 'add_addr' subflow + if reset "subflow and signal"; then + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + fi + + # accept and use add_addr with additional subflows + if reset "multiple subflows and signal"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + fi + + # signal addresses + if reset "signal addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_set_limits $ns2 3 3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 3 3 + fi + + # signal invalid addresses + if reset "signal invalid addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_set_limits $ns2 3 3 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 3 3 + fi + + # signal addresses race test + if reset "signal addresses race test"; then + pm_nl_set_limits $ns1 4 4 + pm_nl_set_limits $ns2 4 4 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags signal + pm_nl_add_endpoint $ns2 10.0.2.2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal + pm_nl_add_endpoint $ns2 10.0.4.2 flags signal + + # the peer could possibly miss some addr notification, allow retransmission + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + + # It is not directly linked to the commit introducing this + # symbol but for the parent one which is linked anyway. + if ! mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + chk_join_nr 3 3 2 + chk_add_nr 4 4 + else + chk_join_nr 3 3 3 + # the server will not signal the address terminating + # the MPC subflow + chk_add_nr 3 3 + fi + fi +} + +link_failure_tests() +{ + # accept and use add_addr with additional subflows and link loss + if reset "multiple flows, signal, link failure"; then + # without any b/w limit each veth could spool the packets and get + # them acked at xmit time, so that the corresponding subflow will + # have almost always no outstanding pkts, the scheduler will pick + # always the first subflow and we will have hard time testing + # active backup and link switch-over. + # Let's set some arbitrary (low) virtual link limits. + init_shapers + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow + test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_stale_nr $ns2 1 5 1 + fi + + # accept and use add_addr with additional subflows and link loss + # for bidirectional transfer + if reset "multi flows, signal, bidi, link fail"; then + init_shapers + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow + test_linkfail=2 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_stale_nr $ns2 1 -1 1 + fi + + # 2 subflows plus 1 backup subflow with a lossy link, backup + # will never be used + if reset "backup subflow unused, link failure"; then + init_shapers + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup + FAILING_LINKS="1" test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_link_usage $ns2 ns2eth3 $cinsent 0 + fi + + # 2 lossy links after half transfer, backup will get half of + # the traffic + if reset "backup flow used, multi links fail"; then + init_shapers + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup + FAILING_LINKS="1 2" test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_stale_nr $ns2 2 4 2 + chk_link_usage $ns2 ns2eth3 $cinsent 50 + fi + + # use a backup subflow with the first subflow on a lossy link + # for bidirectional transfer + if reset "backup flow used, bidi, link failure"; then + init_shapers + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup + FAILING_LINKS="1 2" test_linkfail=2 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_stale_nr $ns2 1 -1 2 + chk_link_usage $ns2 ns2eth3 $cinsent 50 + fi +} + +add_addr_timeout_tests() +{ + # add_addr timeout + if reset_with_add_addr_timeout "signal address, ADD_ADDR timeout"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_tx_nr 4 4 + chk_add_nr 4 0 + fi + + # add_addr timeout IPv6 + if reset_with_add_addr_timeout "signal address, ADD_ADDR6 timeout" 6; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + chk_add_nr 4 0 + fi + + # signal addresses timeout + if reset_with_add_addr_timeout "signal addresses, ADD_ADDR timeout"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_set_limits $ns2 2 2 + speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 8 0 + fi + + # signal invalid addresses timeout + if reset_with_add_addr_timeout "invalid address, ADD_ADDR timeout"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_set_limits $ns2 2 2 + speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 8 0 + fi +} + +remove_tests() +{ + # single subflow, remove + if reset "remove single subflow"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_rm_tx_nr 1 + chk_rm_nr 1 1 + chk_rst_nr 0 0 + fi + + # multiple subflows, remove + if reset "remove multiple subflows"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns2=-2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_rm_nr 2 2 + chk_rst_nr 0 0 + fi + + # single address, remove + if reset "remove single address"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 1 + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert + chk_rst_nr 0 0 + fi + + # subflow and signal, remove + if reset "remove subflow and signal"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_rm_nr 1 1 + chk_rst_nr 0 0 + fi + + # subflows and signal, remove + if reset "remove subflows and signal"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 2 2 + chk_rst_nr 0 0 + fi + + # addresses remove + if reset "remove addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_set_limits $ns2 3 3 + addr_nr_ns1=-3 speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert + chk_rst_nr 0 0 + fi + + # invalid addresses remove + if reset "remove invalid addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_set_limits $ns2 3 3 + addr_nr_ns1=-3 speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 3 3 + chk_rm_nr 3 1 invert + chk_rst_nr 0 0 + fi + + # subflows and signal, flush + if reset "flush subflows and signal"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 1 3 invert simult + chk_rst_nr 0 0 + fi + + # subflows flush + if reset "flush subflows"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_set_limits $ns2 3 3 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + + if mptcp_lib_kversion_ge 5.18; then + chk_rm_tx_nr 0 + chk_rm_nr 0 3 simult + else + chk_rm_nr 3 3 + fi + chk_rst_nr 0 0 + fi + + # addresses flush + if reset "flush addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_set_limits $ns2 3 3 + addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert simult + chk_rst_nr 0 0 + fi + + # invalid addresses flush + if reset "flush invalid addresses"; then + pm_nl_set_limits $ns1 3 3 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_set_limits $ns2 3 3 + addr_nr_ns1=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 3 3 + chk_rm_nr 3 1 invert + chk_rst_nr 0 0 + fi + + # remove id 0 subflow + if reset "remove id 0 subflow"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns2=-9 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_rm_nr 1 1 + chk_rst_nr 0 0 + fi + + # remove id 0 address + if reset "remove id 0 address"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 1 + addr_nr_ns1=-9 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert + chk_rst_nr 0 0 invert + fi +} + +add_tests() +{ + # add single subflow + if reset "add single subflow"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + addr_nr_ns2=1 speed=slow cestab_ns2=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_cestab_nr $ns2 0 + fi + + # add signal address + if reset "add signal address"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + addr_nr_ns1=1 speed=slow cestab_ns1=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_cestab_nr $ns1 0 + fi + + # add multiple subflows + if reset "add multiple subflows"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 + fi + + # add multiple subflows IPv6 + if reset "add multiple subflows IPv6"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 + fi + + # add multiple addresses IPv6 + if reset "add multiple addresses IPv6"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + addr_nr_ns1=2 speed=slow cestab_ns1=1 \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_cestab_nr $ns1 0 + fi +} + +ipv6_tests() +{ + # subflow IPv6 + if reset "single subflow IPv6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + fi + + # add_address, unused IPv6 + if reset "unused signal address IPv6"; then + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # signal address IPv6 + if reset "single address IPv6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + pm_nl_set_limits $ns2 1 1 + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # single address IPv6, remove + if reset "remove single address IPv6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + pm_nl_set_limits $ns2 1 1 + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert + fi + + # subflow and signal IPv6, remove + if reset "remove subflow and signal IPv6"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow + addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + chk_rm_nr 1 1 + fi +} + +v4mapped_tests() +{ + # subflow IPv4-mapped to IPv4-mapped + if reset "single subflow IPv4-mapped"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr 1 1 1 + fi + + # signal address IPv4-mapped with IPv4-mapped sk + if reset "signal address IPv4-mapped"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # subflow v4-map-v6 + if reset "single subflow v4-map-v6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr 1 1 1 + fi + + # signal address v4-map-v6 + if reset "signal address v4-map-v6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # subflow v6-map-v4 + if reset "single subflow v6-map-v4"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # signal address v6-map-v4 + if reset "signal address v6-map-v4"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # no subflow IPv6 to v4 address + if reset "no JOIN with diff families v4-v6"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # no subflow IPv6 to v4 address even if v6 has a valid v4 at the end + if reset "no JOIN with diff families v4-v6-2"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # no subflow IPv4 to v6 address, no need to slow down too then + if reset "no JOIN with diff families v6-v4"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 0 0 0 + fi +} + +mixed_tests() +{ + if reset "IPv4 sockets do not use IPv6 addresses" && + continue_if mptcp_lib_kversion_ge 6.3; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # Need an IPv6 mptcp socket to allow subflows of both families + if reset "simult IPv4 and IPv6 subflows" && + continue_if mptcp_lib_kversion_ge 6.3; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + speed=slow \ + run_tests $ns1 $ns2 dead:beef:2::1 + chk_join_nr 1 1 1 + fi + + # cross families subflows will not be created even in fullmesh mode + if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1" && + continue_if mptcp_lib_kversion_ge 6.3; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 1 4 + pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal + speed=slow \ + run_tests $ns1 $ns2 dead:beef:2::1 + chk_join_nr 1 1 1 + fi + + # fullmesh still tries to create all the possibly subflows with + # matching family + if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2" && + continue_if mptcp_lib_kversion_ge 6.3; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 2 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + fullmesh=1 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 4 4 4 + fi +} + +backup_tests() +{ + # single subflow, backup + if reset "single subflow, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + sflags=nobackup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_prio_nr 0 1 + fi + + # single address, backup + if reset "single address, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 1 + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 1 + fi + + # single address with port, backup + if reset "single address with port, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 1 + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 1 + fi + + if reset "mpc backup" && + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc backup both sides" && + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi + + if reset "mpc switch to backup" && + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc switch to backup both sides" && + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi +} + +verify_listener_events() +{ + local e_type=$2 + local e_saddr=$4 + local e_sport=$5 + local name + + if [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CREATED ]; then + name="LISTENER_CREATED" + elif [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CLOSED ]; then + name="LISTENER_CLOSED " + else + name="$e_type" + fi + + print_check "$name $e_saddr:$e_sport" + + if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + print_skip "event not supported" + return + fi + + if mptcp_lib_verify_listener_events "${@}"; then + print_ok + return 0 + fi + fail_test +} + +add_addr_ports_tests() +{ + # signal address with port + if reset "signal address with port"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 1 + fi + + # subflow and signal with port + if reset "subflow and signal with port"; then + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 1 + fi + + # single address with port, remove + # pm listener events + if reset_with_events "remove single address with port"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 1 + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 1 + chk_rm_nr 1 1 invert + + verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CREATED \ + $MPTCP_LIB_AF_INET 10.0.2.1 10100 + verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CLOSED \ + $MPTCP_LIB_AF_INET 10.0.2.1 10100 + kill_events_pids + fi + + # subflow and signal with port, remove + if reset "remove subflow and signal with port"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 1 + chk_rm_nr 1 1 + fi + + # subflows and signal with port, flush + if reset "flush subflows and signal with port"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + addr_nr_ns1=-8 addr_nr_ns2=-2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 1 3 invert simult + fi + + # multiple addresses with port + if reset "multiple addresses with port"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100 + pm_nl_set_limits $ns2 2 2 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 2 + fi + + # multiple addresses with ports + if reset "multiple addresses with ports"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101 + pm_nl_set_limits $ns2 2 2 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 2 + fi +} + +syncookies_tests() +{ + # single subflow, syncookies + if reset_with_cookies "single subflow with syn cookies"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # multiple subflows with syn cookies + if reset_with_cookies "multiple subflows with syn cookies"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi + + # multiple subflows limited by server + if reset_with_cookies "subflows limited by server w cookies"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 1 1 + fi + + # test signal address with cookies + if reset_with_cookies "signal address with syn cookies"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # test cookie with subflow and signal + if reset_with_cookies "subflow and signal w cookies"; then + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 1 1 + fi + + # accept and use add_addr with additional subflows + if reset_with_cookies "subflows and signal w. cookies"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + fi +} + +checksum_tests() +{ + # checksum test 0 0 + if reset_with_checksum 0 0; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # checksum test 1 1 + if reset_with_checksum 1 1; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # checksum test 0 1 + if reset_with_checksum 0 1; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # checksum test 1 0 + if reset_with_checksum 1 0; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi +} + +deny_join_id0_tests() +{ + # subflow allow join id0 ns1 + if reset_with_allow_join_id0 "single subflow allow join id0 ns1" 1 0; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi + + # subflow allow join id0 ns2 + if reset_with_allow_join_id0 "single subflow allow join id0 ns2" 0 1; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # signal address allow join id0 ns1 + # ADD_ADDRs are not affected by allow_join_id0 value. + if reset_with_allow_join_id0 "signal address allow join id0 ns1" 1 0; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # signal address allow join id0 ns2 + # ADD_ADDRs are not affected by allow_join_id0 value. + if reset_with_allow_join_id0 "signal address allow join id0 ns2" 0 1; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # subflow and address allow join id0 ns1 + if reset_with_allow_join_id0 "subflow and address allow join id0 1" 1 0; then + pm_nl_set_limits $ns1 2 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi + + # subflow and address allow join id0 ns2 + if reset_with_allow_join_id0 "subflow and address allow join id0 2" 0 1; then + pm_nl_set_limits $ns1 2 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + fi +} + +fullmesh_tests() +{ + # fullmesh 1 + # 2 fullmesh addrs in ns2, added before the connection, + # 1 non-fullmesh addr in ns1, added during the connection. + if reset "fullmesh test 2x1"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 1 4 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh + addr_nr_ns1=1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 4 4 4 + chk_add_nr 1 1 + fi + + # fullmesh 2 + # 1 non-fullmesh addr in ns1, added before the connection, + # 1 fullmesh addr in ns2, added during the connection. + if reset "fullmesh test 1x1"; then + pm_nl_set_limits $ns1 1 3 + pm_nl_set_limits $ns2 1 3 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + fullmesh=1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 3 3 3 + chk_add_nr 1 1 + fi + + # fullmesh 3 + # 1 non-fullmesh addr in ns1, added before the connection, + # 2 fullmesh addrs in ns2, added during the connection. + if reset "fullmesh test 1x2"; then + pm_nl_set_limits $ns1 2 5 + pm_nl_set_limits $ns2 1 5 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + fullmesh=2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 5 5 5 + chk_add_nr 1 1 + fi + + # fullmesh 4 + # 1 non-fullmesh addr in ns1, added before the connection, + # 2 fullmesh addrs in ns2, added during the connection, + # limit max_subflows to 4. + if reset "fullmesh test 1x2, limited"; then + pm_nl_set_limits $ns1 2 4 + pm_nl_set_limits $ns2 1 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + fullmesh=2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 4 4 4 + chk_add_nr 1 1 + fi + + # set fullmesh flag + if reset "set fullmesh flag test" && + continue_if mptcp_lib_kversion_ge 5.18; then + pm_nl_set_limits $ns1 4 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow + pm_nl_set_limits $ns2 4 4 + addr_nr_ns2=1 sflags=fullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_rm_nr 0 1 + fi + + # set nofullmesh flag + if reset "set nofullmesh flag test" && + continue_if mptcp_lib_kversion_ge 5.18; then + pm_nl_set_limits $ns1 4 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh + pm_nl_set_limits $ns2 4 4 + fullmesh=1 sflags=nofullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_rm_nr 0 1 + fi + + # set backup,fullmesh flags + if reset "set backup,fullmesh flags test" && + continue_if mptcp_lib_kversion_ge 5.18; then + pm_nl_set_limits $ns1 4 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow + pm_nl_set_limits $ns2 4 4 + addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_prio_nr 0 1 + chk_rm_nr 0 1 + fi + + # set nobackup,nofullmesh flags + if reset "set nobackup,nofullmesh flags test" && + continue_if mptcp_lib_kversion_ge 5.18; then + pm_nl_set_limits $ns1 4 4 + pm_nl_set_limits $ns2 4 4 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh + sflags=nobackup,nofullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_prio_nr 0 1 + chk_rm_nr 0 1 + fi +} + +fastclose_tests() +{ + if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then + test_linkfail=1024 fastclose=client \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_fclose_nr 1 1 + chk_rst_nr 1 1 invert + fi + + if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then + test_linkfail=1024 fastclose=server \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 0 0 0 1 + chk_fclose_nr 1 1 invert + chk_rst_nr 1 1 + fi +} + +pedit_action_pkts() +{ + tc -n $ns2 -j -s action show action pedit index 100 | \ + mptcp_lib_get_info_value \"packets\" packets +} + +fail_tests() +{ + # single subflow + if reset_with_fail "Infinite map" 1; then + test_linkfail=128 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" + chk_fail_nr 1 -1 invert + fi + + # multiple subflows + if reset_with_fail "MP_FAIL MP_RST" 2; then + tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + test_linkfail=1024 \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)" + fi +} + +# $1: ns ; $2: addr ; $3: id +userspace_pm_add_addr() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3 + sleep 1 +} + +# $1: ns ; $2: id +userspace_pm_rm_addr() +{ + local evts=$evts_ns1 + local tk + local cnt + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + cnt=$(rm_addr_count ${1}) + ip netns exec $1 ./pm_nl_ctl rem token $tk id $2 + wait_rm_addr $1 "${cnt}" +} + +# $1: ns ; $2: addr ; $3: id +userspace_pm_add_sf() +{ + local evts=$evts_ns1 + local tk da dp + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + da=$(mptcp_lib_evts_get_info daddr4 "$evts") + dp=$(mptcp_lib_evts_get_info dport "$evts") + + ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \ + rip $da rport $dp token $tk + sleep 1 +} + +# $1: ns ; $2: addr $3: event type +userspace_pm_rm_sf() +{ + local evts=$evts_ns1 + local t=${3:-1} + local ip + local tk da dp sp + local cnt + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + [ -n "$(mptcp_lib_evts_get_info "saddr4" "$evts" $t)" ] && ip=4 + [ -n "$(mptcp_lib_evts_get_info "saddr6" "$evts" $t)" ] && ip=6 + tk=$(mptcp_lib_evts_get_info token "$evts") + da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t $2) + dp=$(mptcp_lib_evts_get_info dport "$evts" $t $2) + sp=$(mptcp_lib_evts_get_info sport "$evts" $t $2) + + cnt=$(rm_sf_count ${1}) + ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \ + rip $da rport $dp token $tk + wait_rm_sf $1 "${cnt}" +} + +check_output() +{ + local cmd="$1" + local expected="$2" + local msg="$3" + local rc=0 + + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + fail_test "fail to check output # error ${rc}" + elif [ ${rc} -eq 0 ]; then + print_ok + elif [ ${rc} -eq 1 ]; then + fail_test "fail to check output # different output" + fi +} + +# $1: ns +userspace_pm_dump() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl dump token $tk +} + +# $1: ns ; $2: id +userspace_pm_get_addr() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl get $2 token $tk +} + +userspace_pm_chk_dump_addr() +{ + local ns="${1}" + local exp="${2}" + local check="${3}" + + print_check "dump addrs ${check}" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_dump_addr$"; then + check_output "userspace_pm_dump ${ns}" "${exp}" + else + print_skip + fi +} + +userspace_pm_chk_get_addr() +{ + local ns="${1}" + local id="${2}" + local exp="${3}" + + print_check "get id ${id} addr" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_get_addr$"; then + check_output "userspace_pm_get_addr ${ns} ${id}" "${exp}" + else + print_skip + fi +} + +userspace_tests() +{ + # userspace pm type prevents add_addr + if reset "userspace pm type prevents add_addr" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_nr 0 0 + fi + + # userspace pm type does not echo add_addr without daemon + if reset "userspace pm no echo w/o daemon" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_nr 1 0 + fi + + # userspace pm type rejects join + if reset "userspace pm type rejects join" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 0 + fi + + # userspace pm type does not send join + if reset "userspace pm type does not send join" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + + # userspace pm type prevents mp_prio + if reset "userspace pm type prevents mp_prio" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 0 + chk_prio_nr 0 0 + fi + + # userspace pm type prevents rm_addr + if reset "userspace pm type prevents rm_addr" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_rm_nr 0 0 + fi + + # userspace pm add & remove address + if reset_with_events "userspace pm add & remove address" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 2 2 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns1 + userspace_pm_add_addr $ns1 10.0.2.1 10 + userspace_pm_add_addr $ns1 10.0.3.1 20 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_mptcp_info subflows 2 subflows 2 + chk_subflows_total 3 3 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + userspace_pm_chk_dump_addr "${ns1}" \ + $'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \ + "signal" + userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" + userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" + userspace_pm_rm_addr $ns1 10 + userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" \ + "id 20 flags signal 10.0.3.1" "after rm_addr 10" + userspace_pm_rm_addr $ns1 20 + userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" + chk_rm_nr 2 2 invert + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm create destroy subflow + if reset_with_events "userspace pm create destroy subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + userspace_pm_add_sf $ns2 10.0.3.2 20 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 20 flags subflow 10.0.3.2" \ + "subflow" + userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" + userspace_pm_rm_addr $ns2 20 + userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns2}" \ + "" \ + "after rm_addr 20" + chk_rm_nr 1 1 + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm create id 0 subflow + if reset_with_events "userspace pm create id 0 subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + userspace_pm_add_sf $ns2 10.0.3.2 0 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 0 flags subflow 10.0.3.2" "id 0 subflow" + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm remove initial subflow + if reset_with_events "userspace pm remove initial subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + userspace_pm_add_sf $ns2 10.0.3.2 20 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + userspace_pm_rm_sf $ns2 10.0.1.2 + # we don't look at the counter linked to the RM_ADDR but + # to the one linked to the subflows that have been removed + chk_rm_nr 0 1 + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm send RM_ADDR for ID 0 + if reset_with_events "userspace pm send RM_ADDR for ID 0" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 1 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns1 + userspace_pm_add_addr $ns1 10.0.2.1 10 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + userspace_pm_rm_addr $ns1 0 + # we don't look at the counter linked to the subflows that + # have been removed but to the one linked to the RM_ADDR + chk_rm_nr 1 0 invert + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi +} + +endpoint_tests() +{ + # subflow_rebuild_header is needed to support the implicit flag + # userspace pm type prevents add_addr + if reset "implicit EP" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 2 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_mpj $ns1 + pm_nl_check_endpoint "creation" \ + $ns2 10.0.2.2 id 1 flags implicit + chk_mptcp_info subflows 1 subflows 1 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + + pm_nl_add_endpoint $ns2 10.0.2.2 id 33 2>/dev/null + pm_nl_check_endpoint "ID change is prevented" \ + $ns2 10.0.2.2 id 1 flags implicit + + pm_nl_add_endpoint $ns2 10.0.2.2 flags signal + pm_nl_check_endpoint "modif is allowed" \ + $ns2 10.0.2.2 id 1 flags signal + mptcp_lib_kill_wait $tests_pid + fi + + if reset "delete and re-add" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 1 1 + pm_nl_set_limits $ns2 1 1 + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_mpj $ns2 + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 + + pm_nl_del_endpoint $ns2 2 10.0.2.2 + sleep 0.5 + chk_subflow_nr "after delete" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after re-add" 2 + chk_mptcp_info subflows 1 subflows 1 + mptcp_lib_kill_wait $tests_pid + fi +} + +# [$1: error message] +usage() +{ + if [ -n "${1}" ]; then + echo "${1}" + ret=${KSFT_FAIL} + fi + + echo "mptcp_join usage:" + + local key + for key in "${!all_tests[@]}"; do + echo " -${key} ${all_tests[${key}]}" + done + + echo " -c capture pcap files" + echo " -C enable data checksum" + echo " -i use ip mptcp" + echo " -h help" + + echo "[test ids|names]" + + exit ${ret} +} + + +# Use a "simple" array to force an specific order we cannot have with an associative one +all_tests_sorted=( + f@subflows_tests + e@subflows_error_tests + s@signal_address_tests + l@link_failure_tests + t@add_addr_timeout_tests + r@remove_tests + a@add_tests + 6@ipv6_tests + 4@v4mapped_tests + M@mixed_tests + b@backup_tests + p@add_addr_ports_tests + k@syncookies_tests + S@checksum_tests + d@deny_join_id0_tests + m@fullmesh_tests + z@fastclose_tests + F@fail_tests + u@userspace_tests + I@endpoint_tests +) + +all_tests_args="" +all_tests_names=() +for subtests in "${all_tests_sorted[@]}"; do + key="${subtests%@*}" + value="${subtests#*@}" + + all_tests_args+="${key}" + all_tests_names+=("${value}") + all_tests[${key}]="${value}" +done + +tests=() +while getopts "${all_tests_args}cCih" opt; do + case $opt in + ["${all_tests_args}"]) + tests+=("${all_tests[${opt}]}") + ;; + c) + capture=true + ;; + C) + checksum=true + ;; + i) + ip_mptcp=1 + ;; + h) + usage + ;; + *) + usage "Unknown option: -${opt}" + ;; + esac +done + +shift $((OPTIND - 1)) + +for arg in "${@}"; do + if [[ "${arg}" =~ ^[0-9]+$ ]]; then + only_tests_ids+=("${arg}") + else + only_tests_names+=("${arg}") + fi +done + +if [ ${#tests[@]} -eq 0 ]; then + tests=("${all_tests_names[@]}") +fi + +for subtests in "${tests[@]}"; do + "${subtests}" +done + +if [ ${ret} -ne 0 ]; then + echo + echo "${#failed_tests[@]} failure(s) has(ve) been detected:" + for i in $(get_failed_tests_ids); do + echo -e "\t- ${i}: ${failed_tests[${i}]}" + done + echo +fi + +append_prev_results +mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh new file mode 100644 index 000000000000..d529b4b37af8 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -0,0 +1,507 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly KSFT_PASS=0 +readonly KSFT_FAIL=1 +readonly KSFT_SKIP=4 + +# shellcheck disable=SC2155 # declare and assign separately +readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" + +# These variables are used in some selftests, read-only +declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED +declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED +declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED +declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED +declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED + +declare -rx MPTCP_LIB_AF_INET=2 +declare -rx MPTCP_LIB_AF_INET6=10 + +MPTCP_LIB_SUBTESTS=() +MPTCP_LIB_SUBTESTS_DUPLICATED=0 +MPTCP_LIB_TEST_COUNTER=0 +MPTCP_LIB_TEST_FORMAT="%02u %-50s" + +# only if supported (or forced) and not disabled, see no-color.org +if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } && + [ "${NO_COLOR:-}" != "1" ]; then + readonly MPTCP_LIB_COLOR_RED="\E[1;31m" + readonly MPTCP_LIB_COLOR_GREEN="\E[1;32m" + readonly MPTCP_LIB_COLOR_YELLOW="\E[1;33m" + readonly MPTCP_LIB_COLOR_BLUE="\E[1;34m" + readonly MPTCP_LIB_COLOR_RESET="\E[0m" +else + readonly MPTCP_LIB_COLOR_RED= + readonly MPTCP_LIB_COLOR_GREEN= + readonly MPTCP_LIB_COLOR_YELLOW= + readonly MPTCP_LIB_COLOR_BLUE= + readonly MPTCP_LIB_COLOR_RESET= +fi + +# $1: color, $2: text +mptcp_lib_print_color() { + echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}" +} + +mptcp_lib_print_ok() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_GREEN}${*}" +} + +mptcp_lib_print_warn() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_YELLOW}${*}" +} + +mptcp_lib_print_info() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_BLUE}${*}" +} + +mptcp_lib_print_err() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_RED}${*}" +} + +# shellcheck disable=SC2120 # parameters are optional +mptcp_lib_pr_ok() { + mptcp_lib_print_ok "[ OK ]${1:+ ${*}}" +} + +mptcp_lib_pr_skip() { + mptcp_lib_print_warn "[SKIP]${1:+ ${*}}" +} + +mptcp_lib_pr_fail() { + mptcp_lib_print_err "[FAIL]${1:+ ${*}}" +} + +mptcp_lib_pr_info() { + mptcp_lib_print_info "INFO: ${*}" +} + +# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all +# features using the last version of the kernel and the selftests to make sure +# a test is not being skipped by mistake. +mptcp_lib_expect_all_features() { + [ "${SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES:-}" = "1" ] +} + +# $1: msg +mptcp_lib_fail_if_expected_feature() { + if mptcp_lib_expect_all_features; then + echo "ERROR: missing feature: ${*}" + exit ${KSFT_FAIL} + fi + + return 1 +} + +# $1: file +mptcp_lib_has_file() { + local f="${1}" + + if [ -f "${f}" ]; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${f} file not found" +} + +mptcp_lib_check_mptcp() { + if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then + mptcp_lib_pr_skip "MPTCP support is not available" + exit ${KSFT_SKIP} + fi +} + +mptcp_lib_check_kallsyms() { + if ! mptcp_lib_has_file "/proc/kallsyms"; then + mptcp_lib_pr_skip "CONFIG_KALLSYMS is missing" + exit ${KSFT_SKIP} + fi +} + +# Internal: use mptcp_lib_kallsyms_has() instead +__mptcp_lib_kallsyms_has() { + local sym="${1}" + + mptcp_lib_check_kallsyms + + grep -q " ${sym}" /proc/kallsyms +} + +# $1: part of a symbol to look at, add '$' at the end for full name +mptcp_lib_kallsyms_has() { + local sym="${1}" + + if __mptcp_lib_kallsyms_has "${sym}"; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${sym} symbol not found" +} + +# $1: part of a symbol to look at, add '$' at the end for full name +mptcp_lib_kallsyms_doesnt_have() { + local sym="${1}" + + if ! __mptcp_lib_kallsyms_has "${sym}"; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${sym} symbol has been found" +} + +# !!!AVOID USING THIS!!! +# Features might not land in the expected version and features can be backported +# +# $1: kernel version, e.g. 6.3 +mptcp_lib_kversion_ge() { + local exp_maj="${1%.*}" + local exp_min="${1#*.}" + local v maj min + + # If the kernel has backported features, set this env var to 1: + if [ "${SELFTESTS_MPTCP_LIB_NO_KVERSION_CHECK:-}" = "1" ]; then + return 0 + fi + + v=$(uname -r | cut -d'.' -f1,2) + maj=${v%.*} + min=${v#*.} + + if [ "${maj}" -gt "${exp_maj}" ] || + { [ "${maj}" -eq "${exp_maj}" ] && [ "${min}" -ge "${exp_min}" ]; }; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" +} + +__mptcp_lib_result_check_duplicated() { + local subtest + + for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do + if [[ "${subtest}" == *" - ${KSFT_TEST}: ${*%% #*}" ]]; then + MPTCP_LIB_SUBTESTS_DUPLICATED=1 + mptcp_lib_print_err "Duplicated entry: ${*}" + break + fi + done +} + +__mptcp_lib_result_add() { + local result="${1}" + shift + + local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) + + __mptcp_lib_result_check_duplicated "${*}" + + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}") +} + +# $1: test name +mptcp_lib_result_pass() { + __mptcp_lib_result_add "ok" "${1}" +} + +# $1: test name +mptcp_lib_result_fail() { + __mptcp_lib_result_add "not ok" "${1}" +} + +# $1: test name +mptcp_lib_result_skip() { + __mptcp_lib_result_add "ok" "${1} # SKIP" +} + +# $1: result code ; $2: test name +mptcp_lib_result_code() { + local ret="${1}" + local name="${2}" + + case "${ret}" in + "${KSFT_PASS}") + mptcp_lib_result_pass "${name}" + ;; + "${KSFT_FAIL}") + mptcp_lib_result_fail "${name}" + ;; + "${KSFT_SKIP}") + mptcp_lib_result_skip "${name}" + ;; + *) + echo "ERROR: wrong result code: ${ret}" + exit ${KSFT_FAIL} + ;; + esac +} + +mptcp_lib_result_print_all_tap() { + local subtest + + if [ ${#MPTCP_LIB_SUBTESTS[@]} -eq 0 ] || + [ "${SELFTESTS_MPTCP_LIB_NO_TAP:-}" = "1" ]; then + return + fi + + printf "\nTAP version 13\n" + printf "1..%d\n" "${#MPTCP_LIB_SUBTESTS[@]}" + + for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do + printf "%s\n" "${subtest}" + done + + if [ "${MPTCP_LIB_SUBTESTS_DUPLICATED}" = 1 ] && + mptcp_lib_expect_all_features; then + mptcp_lib_print_err "Duplicated test entries" + exit ${KSFT_FAIL} + fi +} + +# get the value of keyword $1 in the line marked by keyword $2 +mptcp_lib_get_info_value() { + grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' +} + +# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]] +mptcp_lib_evts_get_info() { + grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1}," +} + +# $1: PID +mptcp_lib_kill_wait() { + [ "${1}" -eq 0 ] && return 0 + + kill -SIGUSR1 "${1}" > /dev/null 2>&1 + kill "${1}" > /dev/null 2>&1 + wait "${1}" 2>/dev/null +} + +# $1: IP address +mptcp_lib_is_v6() { + [ -z "${1##*:*}" ] +} + +# $1: ns, $2: MIB counter +mptcp_lib_get_counter() { + local ns="${1}" + local counter="${2}" + local count + + count=$(ip netns exec "${ns}" nstat -asz "${counter}" | + awk 'NR==1 {next} {print $2}') + if [ -z "${count}" ]; then + mptcp_lib_fail_if_expected_feature "${counter} counter" + return 1 + fi + + echo "${count}" +} + +mptcp_lib_make_file() { + local name="${1}" + local bs="${2}" + local size="${3}" + + dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}" +} + +# $1: file +mptcp_lib_print_file_err() { + ls -l "${1}" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "${1}" +} + +# $1: input file ; $2: output file ; $3: what kind of file +mptcp_lib_check_transfer() { + local in="${1}" + local out="${2}" + local what="${3}" + + if ! cmp "$in" "$out" > /dev/null 2>&1; then + mptcp_lib_pr_fail "$what does not match (in, out):" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" + + return 1 + fi + + return 0 +} + +# $1: ns, $2: port +mptcp_lib_wait_local_port_listen() { + local listener_ns="${1}" + local port="${2}" + + local port_hex + port_hex="$(printf "%04X" "${port}")" + + local _ + for _ in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \ + {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +mptcp_lib_check_output() { + local err="${1}" + local cmd="${2}" + local expected="${3}" + local cmd_ret=0 + local out + + if ! out=$(${cmd} 2>"${err}"); then + cmd_ret=${?} + fi + + if [ ${cmd_ret} -ne 0 ]; then + mptcp_lib_pr_fail "command execution '${cmd}' stderr" + cat "${err}" + return 2 + elif [ "${out}" = "${expected}" ]; then + return 0 + else + mptcp_lib_pr_fail "expected '${expected}' got '${out}'" + return 1 + fi +} + +mptcp_lib_check_tools() { + local tool + + for tool in "${@}"; do + case "${tool}" in + "ip") + if ! ip -Version &> /dev/null; then + mptcp_lib_pr_skip "Could not run test without ip tool" + exit ${KSFT_SKIP} + fi + ;; + "ss") + if ! ss -h | grep -q MPTCP; then + mptcp_lib_pr_skip "ss tool does not support MPTCP" + exit ${KSFT_SKIP} + fi + ;; + "iptables"* | "ip6tables"*) + if ! "${tool}" -V &> /dev/null; then + mptcp_lib_pr_skip "Could not run all tests without ${tool}" + exit ${KSFT_SKIP} + fi + ;; + *) + mptcp_lib_pr_fail "Internal error: unsupported tool: ${tool}" + exit ${KSFT_FAIL} + ;; + esac + done +} + +mptcp_lib_ns_init() { + local sec rndh + + sec=$(date +%s) + rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX) + + local netns + for netns in "${@}"; do + eval "${netns}=${netns}-${rndh}" + + ip netns add "${!netns}" || exit ${KSFT_SKIP} + ip -net "${!netns}" link set lo up + ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 + ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 + ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 + done +} + +mptcp_lib_ns_exit() { + local netns + for netns in "${@}"; do + ip netns del "${netns}" + rm -f /tmp/"${netns}".{nstat,out} + done +} + +mptcp_lib_events() { + local ns="${1}" + local evts="${2}" + declare -n pid="${3}" + + :>"${evts}" + + mptcp_lib_kill_wait "${pid:-0}" + ip netns exec "${ns}" ./pm_nl_ctl events >> "${evts}" 2>&1 & + pid=$! +} + +mptcp_lib_print_title() { + : "${MPTCP_LIB_TEST_COUNTER:?}" + : "${MPTCP_LIB_TEST_FORMAT:?}" + + # shellcheck disable=SC2059 # the format is in a variable + printf "${MPTCP_LIB_TEST_FORMAT}" "$((++MPTCP_LIB_TEST_COUNTER))" "${*}" +} + +# $1: var name ; $2: prev ret +mptcp_lib_check_expected_one() { + local var="${1}" + local exp="e_${var}" + local prev_ret="${2}" + + if [ "${!var}" = "${!exp}" ]; then + return 0 + fi + + if [ "${prev_ret}" = "0" ]; then + mptcp_lib_pr_fail + fi + + mptcp_lib_print_err "Expected value for '${var}': '${!exp}', got '${!var}'." + return 1 +} + +# $@: all var names to check +mptcp_lib_check_expected() { + local rc=0 + local var + + for var in "${@}"; do + mptcp_lib_check_expected_one "${var}" "${rc}" || rc=1 + done + + return "${rc}" +} + +# shellcheck disable=SC2034 # Some variables are used below but indirectly +mptcp_lib_verify_listener_events() { + local evt=${1} + local e_type=${2} + local e_family=${3} + local e_saddr=${4} + local e_sport=${5} + local type + local family + local saddr + local sport + local rc=0 + + type=$(mptcp_lib_evts_get_info type "${evt}" "${e_type}") + family=$(mptcp_lib_evts_get_info family "${evt}" "${e_type}") + if [ "${family}" ] && [ "${family}" = "${AF_INET6}" ]; then + saddr=$(mptcp_lib_evts_get_info saddr6 "${evt}" "${e_type}") + else + saddr=$(mptcp_lib_evts_get_info saddr4 "${evt}" "${e_type}") + fi + sport=$(mptcp_lib_evts_get_info sport "${evt}" "${e_type}") + + mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}" + return "${rc}" +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c new file mode 100644 index 000000000000..926b0be87c99 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -0,0 +1,866 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <string.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <time.h> +#include <unistd.h> + +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include <netdb.h> +#include <netinet/in.h> + +#include <linux/tcp.h> + +static int pf = AF_INET; + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif +#ifndef SOL_MPTCP +#define SOL_MPTCP 284 +#endif + +#ifndef MPTCP_INFO +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; +}; + +struct mptcp_subflow_data { + __u32 size_subflow_data; /* size of this structure in userspace */ + __u32 num_subflows; /* must be 0, set by kernel */ + __u32 size_kernel; /* must be 0, set by kernel */ + __u32 size_user; /* size of one element in data[] */ +} __attribute__((aligned(8))); + +struct mptcp_subflow_addrs { + union { + __kernel_sa_family_t sa_family; + struct sockaddr sa_local; + struct sockaddr_in sin_local; + struct sockaddr_in6 sin6_local; + struct __kernel_sockaddr_storage ss_local; + }; + union { + struct sockaddr sa_remote; + struct sockaddr_in sin_remote; + struct sockaddr_in6 sin6_remote; + struct __kernel_sockaddr_storage ss_remote; + }; +}; + +#define MPTCP_INFO 1 +#define MPTCP_TCPINFO 2 +#define MPTCP_SUBFLOW_ADDRS 3 +#endif + +#ifndef MPTCP_FULL_INFO +struct mptcp_subflow_info { + __u32 id; + struct mptcp_subflow_addrs addrs; +}; + +struct mptcp_full_info { + __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */ + __u32 size_tcpinfo_user; + __u32 size_sfinfo_kernel; /* must be 0, set by kernel */ + __u32 size_sfinfo_user; + __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */ + __u32 size_arrays_user; /* max subflows that userspace is interested in; + * the buffers at subflow_info/tcp_info + * are respectively at least: + * size_arrays * size_sfinfo_user + * size_arrays * size_tcpinfo_user + * bytes wide + */ + __aligned_u64 subflow_info; + __aligned_u64 tcp_info; + struct mptcp_info mptcp_info; +}; + +#define MPTCP_FULL_INFO 4 +#endif + +struct so_state { + struct mptcp_info mi; + struct mptcp_info last_sample; + struct tcp_info tcp_info; + struct mptcp_subflow_addrs addrs; + uint64_t mptcpi_rcv_delta; + uint64_t tcpi_rcv_delta; + bool pkt_stats_avail; +}; + +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +static void die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage: mptcp_sockopt [-6]\n"); + exit(r); +} + +static void xerror(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + exit(1); +} + +static const char *getxinfo_strerr(int err) +{ + if (err == EAI_SYSTEM) + return strerror(errno); + + return gai_strerror(err); +} + +static void xgetaddrinfo(const char *node, const char *service, + const struct addrinfo *hints, + struct addrinfo **res) +{ + int err = getaddrinfo(node, service, hints, res); + + if (err) { + const char *errstr = getxinfo_strerr(err); + + fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", + node ? node : "", service ? service : "", errstr); + exit(1); + } +} + +static int sock_listen_mptcp(const char * const listenaddr, + const char * const port) +{ + int sock = -1; + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + .ai_flags = AI_PASSIVE | AI_NUMERICHOST + }; + + hints.ai_family = pf; + + struct addrinfo *a, *addr; + int one = 1; + + xgetaddrinfo(listenaddr, port, &hints, &addr); + hints.ai_family = pf; + + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, IPPROTO_MPTCP); + if (sock < 0) + continue; + + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, + sizeof(one))) + perror("setsockopt"); + + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + perror("bind"); + close(sock); + sock = -1; + } + + freeaddrinfo(addr); + + if (sock < 0) + xerror("could not create listen socket"); + + if (listen(sock, 20)) + die_perror("listen"); + + return sock; +} + +static int sock_connect_mptcp(const char * const remoteaddr, + const char * const port, int proto) +{ + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *a, *addr; + int sock = -1; + + hints.ai_family = pf; + + xgetaddrinfo(remoteaddr, port, &hints, &addr); + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto); + if (sock < 0) + continue; + + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + die_perror("connect"); + } + + if (sock < 0) + xerror("could not create connect socket"); + + freeaddrinfo(addr); + return sock; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "h6")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case '6': + pf = AF_INET6; + break; + default: + die_usage(1); + break; + } + } +} + +static void do_getsockopt_bogus_sf_data(int fd, int optname) +{ + struct mptcp_subflow_data good_data; + struct bogus_data { + struct mptcp_subflow_data d; + char buf[2]; + } bd; + socklen_t olen, _olen; + int ret; + + memset(&bd, 0, sizeof(bd)); + memset(&good_data, 0, sizeof(good_data)); + + olen = sizeof(good_data); + good_data.size_subflow_data = olen; + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* 0 size_subflow_data */ + assert(olen == sizeof(good_data)); + + bd.d = good_data; + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret == 0); + assert(olen == sizeof(good_data)); + assert(bd.d.num_subflows == 1); + assert(bd.d.size_kernel > 0); + assert(bd.d.size_user == 0); + + bd.d = good_data; + _olen = rand() % olen; + olen = _olen; + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* bogus olen */ + assert(olen == _olen); /* must be unchanged */ + + bd.d = good_data; + olen = sizeof(good_data); + bd.d.size_kernel = 1; + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* size_kernel not 0 */ + + bd.d = good_data; + olen = sizeof(good_data); + bd.d.num_subflows = 1; + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret < 0); /* num_subflows not 0 */ + + /* forward compat check: larger struct mptcp_subflow_data on 'old' kernel */ + bd.d = good_data; + olen = sizeof(bd); + bd.d.size_subflow_data = sizeof(bd); + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen); + assert(ret == 0); + + /* olen must be truncated to real data size filled by kernel: */ + assert(olen == sizeof(good_data)); + + assert(bd.d.size_subflow_data == sizeof(bd)); + + bd.d = good_data; + bd.d.size_subflow_data += 1; + bd.d.size_user = 1; + olen = bd.d.size_subflow_data + 1; + _olen = olen; + + ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &_olen); + assert(ret == 0); + + /* no truncation, kernel should have filled 1 byte of optname payload in buf[1]: */ + assert(olen == _olen); + + assert(bd.d.size_subflow_data == sizeof(good_data) + 1); + assert(bd.buf[0] == 0); +} + +static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w) +{ + struct mptcp_info i; + socklen_t olen; + int ret; + + olen = sizeof(i); + ret = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &i, &olen); + + if (ret < 0) + die_perror("getsockopt MPTCP_INFO"); + + s->pkt_stats_avail = olen >= sizeof(i); + + s->last_sample = i; + if (s->mi.mptcpi_write_seq == 0) + s->mi = i; + + assert(s->mi.mptcpi_write_seq + w == i.mptcpi_write_seq); + + s->mptcpi_rcv_delta = i.mptcpi_rcv_nxt - s->mi.mptcpi_rcv_nxt; +} + +static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t w) +{ + struct my_tcp_info { + struct mptcp_subflow_data d; + struct tcp_info ti[2]; + } ti; + int ret, tries = 5; + socklen_t olen; + + do { + memset(&ti, 0, sizeof(ti)); + + ti.d.size_subflow_data = sizeof(struct mptcp_subflow_data); + ti.d.size_user = sizeof(struct tcp_info); + olen = sizeof(ti); + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_TCPINFO, &ti, &olen); + if (ret < 0) + xerror("getsockopt MPTCP_TCPINFO (tries %d, %m)"); + + assert(olen <= sizeof(ti)); + assert(ti.d.size_kernel > 0); + assert(ti.d.size_user == + MIN(ti.d.size_kernel, sizeof(struct tcp_info))); + assert(ti.d.num_subflows == 1); + + assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data)); + olen -= sizeof(struct mptcp_subflow_data); + assert(olen == ti.d.size_user); + + s->tcp_info = ti.ti[0]; + + if (ti.ti[0].tcpi_bytes_sent == w && + ti.ti[0].tcpi_bytes_received == r) + goto done; + + if (r == 0 && ti.ti[0].tcpi_bytes_sent == w && + ti.ti[0].tcpi_bytes_received) { + s->tcpi_rcv_delta = ti.ti[0].tcpi_bytes_received; + goto done; + } + + /* wait and repeat, might be that tx is still ongoing */ + sleep(1); + } while (tries-- > 0); + + xerror("tcpi_bytes_sent %" PRIu64 ", want %zu. tcpi_bytes_received %" PRIu64 ", want %zu", + ti.ti[0].tcpi_bytes_sent, w, ti.ti[0].tcpi_bytes_received, r); + +done: + do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO); +} + +static void do_getsockopt_subflow_addrs(struct so_state *s, int fd) +{ + struct sockaddr_storage remote, local; + socklen_t olen, rlen, llen; + int ret; + struct my_addrs { + struct mptcp_subflow_data d; + struct mptcp_subflow_addrs addr[2]; + } addrs; + + memset(&addrs, 0, sizeof(addrs)); + memset(&local, 0, sizeof(local)); + memset(&remote, 0, sizeof(remote)); + + addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data); + addrs.d.size_user = sizeof(struct mptcp_subflow_addrs); + olen = sizeof(addrs); + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen); + if (ret < 0) + die_perror("getsockopt MPTCP_SUBFLOW_ADDRS"); + + assert(olen <= sizeof(addrs)); + assert(addrs.d.size_kernel > 0); + assert(addrs.d.size_user == + MIN(addrs.d.size_kernel, sizeof(struct mptcp_subflow_addrs))); + assert(addrs.d.num_subflows == 1); + + assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data)); + olen -= sizeof(struct mptcp_subflow_data); + assert(olen == addrs.d.size_user); + + llen = sizeof(local); + ret = getsockname(fd, (struct sockaddr *)&local, &llen); + if (ret < 0) + die_perror("getsockname"); + rlen = sizeof(remote); + ret = getpeername(fd, (struct sockaddr *)&remote, &rlen); + if (ret < 0) + die_perror("getpeername"); + + assert(rlen > 0); + assert(rlen == llen); + + assert(remote.ss_family == local.ss_family); + + assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0); + assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0); + s->addrs = addrs.addr[0]; + + memset(&addrs, 0, sizeof(addrs)); + + addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data); + addrs.d.size_user = sizeof(sa_family_t); + olen = sizeof(addrs.d) + sizeof(sa_family_t); + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen); + assert(ret == 0); + assert(olen == sizeof(addrs.d) + sizeof(sa_family_t)); + + assert(addrs.addr[0].sa_family == pf); + assert(addrs.addr[0].sa_family == local.ss_family); + + assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) != 0); + assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) != 0); + + do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS); +} + +static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd) +{ + size_t data_size = sizeof(struct mptcp_full_info); + struct mptcp_subflow_info sfinfo[2]; + struct tcp_info tcp_info[2]; + struct mptcp_full_info mfi; + socklen_t olen; + int ret; + + memset(&mfi, 0, data_size); + memset(tcp_info, 0, sizeof(tcp_info)); + memset(sfinfo, 0, sizeof(sfinfo)); + + mfi.size_tcpinfo_user = sizeof(struct tcp_info); + mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info); + mfi.size_arrays_user = 2; + mfi.subflow_info = (unsigned long)&sfinfo[0]; + mfi.tcp_info = (unsigned long)&tcp_info[0]; + olen = data_size; + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + perror("MPTCP_FULL_INFO test skipped"); + return; + } + xerror("getsockopt MPTCP_FULL_INFO"); + } + + assert(olen <= data_size); + assert(mfi.size_tcpinfo_kernel > 0); + assert(mfi.size_tcpinfo_user == + MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info))); + assert(mfi.size_sfinfo_kernel > 0); + assert(mfi.size_sfinfo_user == + MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info))); + assert(mfi.num_subflows == 1); + + /* Tolerate future extension to mptcp_info struct and running newer + * test on top of older kernel. + * Anyway any kernel supporting MPTCP_FULL_INFO must at least include + * the following in mptcp_info. + */ + assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info)); + assert(mfi.mptcp_info.mptcpi_subflows == 0); + assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent); + assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received); + + assert(sfinfo[0].id == 1); + assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent); + assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received); + assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs))); +} + +static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w) +{ + do_getsockopt_mptcp_info(s, fd, w); + + do_getsockopt_tcp_info(s, fd, r, w); + + do_getsockopt_subflow_addrs(s, fd); + + if (r) + do_getsockopt_mptcp_full_info(s, fd); +} + +static void connect_one_server(int fd, int pipefd) +{ + char buf[4096], buf2[4096]; + size_t len, i, total; + struct so_state s; + bool eof = false; + ssize_t ret; + + memset(&s, 0, sizeof(s)); + + len = rand() % (sizeof(buf) - 1); + + if (len < 128) + len = 128; + + for (i = 0; i < len ; i++) { + buf[i] = rand() % 26; + buf[i] += 'A'; + } + + buf[i] = '\n'; + + do_getsockopts(&s, fd, 0, 0); + + /* un-block server */ + ret = read(pipefd, buf2, 4); + assert(ret == 4); + close(pipefd); + + assert(strncmp(buf2, "xmit", 4) == 0); + + ret = write(fd, buf, len); + if (ret < 0) + die_perror("write"); + + if (ret != (ssize_t)len) + xerror("short write"); + + total = 0; + do { + ret = read(fd, buf2 + total, sizeof(buf2) - total); + if (ret < 0) + die_perror("read"); + if (ret == 0) { + eof = true; + break; + } + + total += ret; + } while (total < len); + + if (total != len) + xerror("total %lu, len %lu eof %d\n", total, len, eof); + + if (memcmp(buf, buf2, len)) + xerror("data corruption"); + + if (s.tcpi_rcv_delta) + assert(s.tcpi_rcv_delta <= total); + + do_getsockopts(&s, fd, ret, ret); + + if (eof) + total += 1; /* sequence advances due to FIN */ + + assert(s.mptcpi_rcv_delta == (uint64_t)total); + close(fd); +} + +static void process_one_client(int fd, int pipefd) +{ + ssize_t ret, ret2, ret3; + struct so_state s; + char buf[4096]; + + memset(&s, 0, sizeof(s)); + do_getsockopts(&s, fd, 0, 0); + + ret = write(pipefd, "xmit", 4); + assert(ret == 4); + + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) + die_perror("read"); + + assert(s.mptcpi_rcv_delta <= (uint64_t)ret); + + if (s.tcpi_rcv_delta) + assert(s.tcpi_rcv_delta == (uint64_t)ret); + + ret2 = write(fd, buf, ret); + if (ret2 < 0) + die_perror("write"); + + /* wait for hangup */ + ret3 = read(fd, buf, 1); + if (ret3 != 0) + xerror("expected EOF, got %lu", ret3); + + do_getsockopts(&s, fd, ret, ret2); + if (s.mptcpi_rcv_delta != (uint64_t)ret + 1) + xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret); + + /* be nice when running on top of older kernel */ + if (s.pkt_stats_avail) { + if (s.last_sample.mptcpi_bytes_sent != ret2) + xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_sent, ret2, + s.last_sample.mptcpi_bytes_sent - ret2); + if (s.last_sample.mptcpi_bytes_received != ret) + xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_received, ret, + s.last_sample.mptcpi_bytes_received - ret); + if (s.last_sample.mptcpi_bytes_acked != ret) + xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_acked, ret2, + s.last_sample.mptcpi_bytes_acked - ret2); + } + + close(fd); +} + +static int xaccept(int s) +{ + int fd = accept(s, NULL, 0); + + if (fd < 0) + die_perror("accept"); + + return fd; +} + +static int server(int pipefd) +{ + int fd = -1, r; + + switch (pf) { + case AF_INET: + fd = sock_listen_mptcp("127.0.0.1", "15432"); + break; + case AF_INET6: + fd = sock_listen_mptcp("::1", "15432"); + break; + default: + xerror("Unknown pf %d\n", pf); + break; + } + + r = write(pipefd, "conn", 4); + assert(r == 4); + + alarm(15); + r = xaccept(fd); + + process_one_client(r, pipefd); + + return 0; +} + +static void test_ip_tos_sockopt(int fd) +{ + uint8_t tos_in, tos_out; + socklen_t s; + int r; + + tos_in = rand() & 0xfc; + r = setsockopt(fd, SOL_IP, IP_TOS, &tos_in, sizeof(tos_out)); + if (r != 0) + die_perror("setsockopt IP_TOS"); + + tos_out = 0; + s = sizeof(tos_out); + r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s); + if (r != 0) + die_perror("getsockopt IP_TOS"); + + if (tos_in != tos_out) + xerror("tos %x != %x socklen_t %d\n", tos_in, tos_out, s); + + if (s != 1) + xerror("tos should be 1 byte"); + + s = 0; + r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s); + if (r != 0) + die_perror("getsockopt IP_TOS 0"); + if (s != 0) + xerror("expect socklen_t == 0"); + + s = -1; + r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s); + if (r != -1 && errno != EINVAL) + die_perror("getsockopt IP_TOS did not indicate -EINVAL"); + if (s != -1) + xerror("expect socklen_t == -1"); +} + +static int client(int pipefd) +{ + int fd = -1; + + alarm(15); + + switch (pf) { + case AF_INET: + fd = sock_connect_mptcp("127.0.0.1", "15432", IPPROTO_MPTCP); + break; + case AF_INET6: + fd = sock_connect_mptcp("::1", "15432", IPPROTO_MPTCP); + break; + default: + xerror("Unknown pf %d\n", pf); + } + + test_ip_tos_sockopt(fd); + + connect_one_server(fd, pipefd); + + return 0; +} + +static pid_t xfork(void) +{ + pid_t p = fork(); + + if (p < 0) + die_perror("fork"); + + return p; +} + +static int rcheck(int wstatus, const char *what) +{ + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == 0) + return 0; + fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus)); + return WEXITSTATUS(wstatus); + } else if (WIFSIGNALED(wstatus)) { + xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus)); + } else if (WIFSTOPPED(wstatus)) { + xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus)); + } + + return 111; +} + +static void init_rng(void) +{ + int fd = open("/dev/urandom", O_RDONLY); + + if (fd >= 0) { + unsigned int foo; + ssize_t ret; + + /* can't fail */ + ret = read(fd, &foo, sizeof(foo)); + assert(ret == sizeof(foo)); + + close(fd); + srand(foo); + } else { + srand(time(NULL)); + } +} + +int main(int argc, char *argv[]) +{ + int e1, e2, wstatus; + pid_t s, c, ret; + int pipefds[2]; + + parse_opts(argc, argv); + + init_rng(); + + e1 = pipe(pipefds); + if (e1 < 0) + die_perror("pipe"); + + s = xfork(); + if (s == 0) + return server(pipefds[1]); + + close(pipefds[1]); + + /* wait until server bound a socket */ + e1 = read(pipefds[0], &e1, 4); + assert(e1 == 4); + + c = xfork(); + if (c == 0) + return client(pipefds[0]); + + close(pipefds[0]); + + ret = waitpid(s, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e1 = rcheck(wstatus, "server"); + ret = waitpid(c, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e2 = rcheck(wstatus, "client"); + + return e1 ? e1 : e2; +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh new file mode 100755 index 000000000000..e2d70c18786e --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -0,0 +1,338 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +. "$(dirname "${0}")/mptcp_lib.sh" + +ret=0 +sin="" +sout="" +cin="" +cout="" +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) +iptables="iptables" +ip6tables="ip6tables" + +ns1="" +ns2="" +ns_sbox="" + +add_mark_rules() +{ + local ns=$1 + local m=$2 + + local t + for t in ${iptables} ${ip6tables}; do + # just to debug: check we have multiple subflows connection requests + ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT + + # RST packets might be handled by a internal dummy socket + ip netns exec $ns $t -A OUTPUT -p tcp --tcp-flags RST RST -m mark --mark 0 -j ACCEPT + + ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark $m -j ACCEPT + ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark 0 -j DROP + done +} + +init() +{ + mptcp_lib_ns_init ns1 ns2 ns_sbox + + local i + for i in $(seq 1 4); do + ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" + ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i + ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad + ip -net "$ns1" link set ns1eth$i up + + ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i + ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad + ip -net "$ns2" link set ns2eth$i up + + # let $ns2 reach any $ns1 address from any interface + ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i + + ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal + + ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal + done + + ip netns exec $ns1 ./pm_nl_ctl limits 8 8 + ip netns exec $ns2 ./pm_nl_ctl limits 8 8 + + add_mark_rules $ns1 1 + add_mark_rules $ns2 2 +} + +# This function is used in the cleanup trap +#shellcheck disable=SC2317 +cleanup() +{ + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}" + rm -f "$cin" "$cout" + rm -f "$sin" "$sout" +} + +mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms +mptcp_lib_check_tools ip "${iptables}" "${ip6tables}" + +check_mark() +{ + local ns=$1 + local af=$2 + + local tables=${iptables} + + if [ $af -eq 6 ];then + tables=${ip6tables} + fi + + local counters values + counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP) + values=${counters%DROP*} + + local v + for v in $values; do + if [ $v -ne 0 ]; then + mptcp_lib_pr_fail "got $tables $values in ns $ns," \ + "not 0 - not all expected packets marked" + ret=${KSFT_FAIL} + return 1 + fi + done + + return 0 +} + +print_title() +{ + mptcp_lib_print_title "${@}" +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + + local port=12001 + + :> "$cout" + :> "$sout" + + local mptcp_connect="./mptcp_connect -r 20" + + local local_addr ip + if mptcp_lib_is_v6 "${connect_addr}"; then + local_addr="::" + ip=ipv6 + else + local_addr="0.0.0.0" + ip=ipv4 + fi + + cmsg="TIMESTAMPNS" + if mptcp_lib_kallsyms_has "mptcp_ioctl$"; then + cmsg+=",TCPINQ" + fi + + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ + ${local_addr} < "$sin" > "$sout" & + local spid=$! + + sleep 1 + + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ + $connect_addr < "$cin" > "$cout" & + + local cpid=$! + + wait $cpid + local retc=$? + wait $spid + local rets=$? + + print_title "Transfer ${ip:2}" + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + mptcp_lib_pr_fail "client exit code $retc, server $rets" + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 + ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" + + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 + ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + + mptcp_lib_result_fail "transfer ${ip}" + + ret=${KSFT_FAIL} + return 1 + fi + if ! mptcp_lib_check_transfer $cin $sout "file received by server"; then + rets=1 + else + mptcp_lib_pr_ok + fi + mptcp_lib_result_code "${rets}" "transfer ${ip}" + + print_title "Mark ${ip:2}" + if [ $local_addr = "::" ];then + check_mark $listener_ns 6 || retc=1 + check_mark $connector_ns 6 || retc=1 + else + check_mark $listener_ns 4 || retc=1 + check_mark $connector_ns 4 || retc=1 + fi + + mptcp_lib_result_code "${retc}" "mark ${ip}" + + if [ $retc -eq 0 ] && [ $rets -eq 0 ];then + mptcp_lib_pr_ok + return 0 + fi + mptcp_lib_pr_fail + + return 1 +} + +make_file() +{ + local name=$1 + local who=$2 + local size=$3 + + mptcp_lib_make_file $name 1024 $size + + echo "Created $name (size $size KB) containing data sent by $who" +} + +do_mptcp_sockopt_tests() +{ + local lret=0 + + if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then + mptcp_lib_pr_skip "MPTCP sockopt not supported" + mptcp_lib_result_skip "sockopt" + return + fi + + ip netns exec "$ns_sbox" ./mptcp_sockopt + lret=$? + + print_title "SOL_MPTCP sockopt v4" + if [ $lret -ne 0 ]; then + mptcp_lib_pr_fail + mptcp_lib_result_fail "sockopt v4" + ret=$lret + return + fi + mptcp_lib_pr_ok + mptcp_lib_result_pass "sockopt v4" + + ip netns exec "$ns_sbox" ./mptcp_sockopt -6 + lret=$? + + print_title "SOL_MPTCP sockopt v6" + if [ $lret -ne 0 ]; then + mptcp_lib_pr_fail + mptcp_lib_result_fail "sockopt v6" + ret=$lret + return + fi + mptcp_lib_pr_ok + mptcp_lib_result_pass "sockopt v6" +} + +run_tests() +{ + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + local lret=0 + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} + + lret=$? + + if [ $lret -ne 0 ]; then + ret=$lret + return + fi +} + +do_tcpinq_test() +{ + print_title "TCP_INQ cmsg/ioctl $*" + ip netns exec "$ns_sbox" ./mptcp_inq "$@" + local lret=$? + if [ $lret -ne 0 ];then + ret=$lret + mptcp_lib_pr_fail + mptcp_lib_result_fail "TCP_INQ: $*" + return $lret + fi + + mptcp_lib_pr_ok + mptcp_lib_result_pass "TCP_INQ: $*" + return $lret +} + +do_tcpinq_tests() +{ + local lret=0 + + if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then + mptcp_lib_pr_skip "TCP_INQ not supported" + mptcp_lib_result_skip "TCP_INQ" + return + fi + + local args + for args in "-t tcp" "-r tcp"; do + do_tcpinq_test $args + lret=$? + if [ $lret -ne 0 ] ; then + return $lret + fi + do_tcpinq_test -6 $args + lret=$? + if [ $lret -ne 0 ] ; then + return $lret + fi + done + + do_tcpinq_test -r tcp -t tcp + + return $? +} + +sin=$(mktemp) +sout=$(mktemp) +cin=$(mktemp) +cout=$(mktemp) +init +make_file "$cin" "client" 1 +make_file "$sin" "server" 1 +trap cleanup EXIT + +run_tests $ns1 $ns2 10.0.1.1 +run_tests $ns1 $ns2 dead:beef:1::1 + +do_mptcp_sockopt_tests +do_tcpinq_tests + +mptcp_lib_result_print_all_tap +exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 15f4f46ca3a9..6ab8c5d36340 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -1,76 +1,79 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ksft_skip=4 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +. "$(dirname "${0}")/mptcp_lib.sh" + ret=0 usage() { echo "Usage: $0 [ -h ]" } - +optstring=h while getopts "$optstring" option;do case "$option" in "h") usage $0 - exit 0 + exit ${KSFT_PASS} ;; "?") usage $0 - exit 1 + exit ${KSFT_FAIL} ;; esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" +ns1="" err=$(mktemp) -ret=0 +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { rm -f $err - ip netns del $ns1 + mptcp_lib_ns_exit "${ns1}" } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_mptcp +mptcp_lib_check_tools ip trap cleanup EXIT -ip netns add $ns1 || exit $ksft_skip -ip -net $ns1 link set lo up -ip netns exec $ns1 sysctl -q net.mptcp.enabled=1 +mptcp_lib_ns_init ns1 check() { local cmd="$1" local expected="$2" local msg="$3" - local out=`$cmd 2>$err` - local cmd_ret=$? - - printf "%-50s %s" "$msg" - if [ $cmd_ret -ne 0 ]; then - echo "[FAIL] command execution '$cmd' stderr " - cat $err - ret=1 - elif [ "$out" = "$expected" ]; then - echo "[ OK ]" - else - echo -n "[FAIL] " - echo "expected '$expected' got '$out'" - ret=1 + local rc=0 + + mptcp_lib_print_title "$msg" + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + mptcp_lib_result_fail "${msg} # error ${rc}" + ret=${KSFT_FAIL} + elif [ ${rc} -eq 0 ]; then + mptcp_lib_print_ok "[ OK ]" + mptcp_lib_result_pass "${msg}" + elif [ ${rc} -eq 1 ]; then + mptcp_lib_result_fail "${msg} # different output" + ret=${KSFT_FAIL} fi } check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "defaults limits" + +default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)" +if mptcp_lib_expect_all_features; then + check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 +subflows 2" "defaults limits" +fi ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo @@ -88,21 +91,22 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" \ "id 1 flags 10.0.1.1 id 3 flags signal,backup 10.0.1.3" "dump addrs after del" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 2>/dev/null check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment" -for i in `seq 5 9`; do +for i in $(seq 5 9); do ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1 done check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit" check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" -for i in `seq 9 256`; do +ip netns exec $ns1 ./pm_nl_ctl del 9 +for i in $(seq 10 255); do + ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i ip netns exec $ns1 ./pm_nl_ctl del $i - ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 done check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 id 3 flags signal,backup 10.0.1.3 @@ -115,16 +119,81 @@ id 8 flags signal 10.0.1.8" "id limit" ip netns exec $ns1 ./pm_nl_ctl flush check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" -ip netns exec $ns1 ./pm_nl_ctl limits 9 1 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "rcv addrs above hard limit" +ip netns exec $ns1 ./pm_nl_ctl limits 9 1 2>/dev/null +check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit" -ip netns exec $ns1 ./pm_nl_ctl limits 1 9 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 0" "subflows above hard limit" +ip netns exec $ns1 ./pm_nl_ctl limits 1 9 2>/dev/null +check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit" ip netns exec $ns1 ./pm_nl_ctl limits 8 8 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 subflows 8" "set limits" +ip netns exec $ns1 ./pm_nl_ctl flush +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8 +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 +id 2 flags 10.0.1.2 +id 3 flags 10.0.1.7 +id 4 flags 10.0.1.8 +id 100 flags 10.0.1.3 +id 101 flags 10.0.1.4 +id 254 flags 10.0.1.5 +id 255 flags 10.0.1.6" "set ids" + +ip netns exec $ns1 ./pm_nl_ctl flush +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8 +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.0.1 +id 2 flags 10.0.0.4 +id 3 flags 10.0.0.6 +id 4 flags 10.0.0.7 +id 5 flags 10.0.0.8 +id 253 flags 10.0.0.5 +id 254 flags 10.0.0.2 +id 255 flags 10.0.0.3" "wrap-around ids" + +ip netns exec $ns1 ./pm_nl_ctl flush +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow +ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +subflow,backup 10.0.1.1" "set flags (backup)" +ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +subflow 10.0.1.1" " (nobackup)" + +# fullmesh support has been added later +ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null +if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" || + mptcp_lib_expect_all_features; then + check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +subflow,fullmesh 10.0.1.1" " (fullmesh)" + ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh + check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +subflow 10.0.1.1" " (nofullmesh)" + ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh + check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ +subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" +else + for st in fullmesh nofullmesh backup,fullmesh; do + st=" (${st})" + mptcp_lib_print_title "${st}" + mptcp_lib_pr_skip + mptcp_lib_result_skip "${st}" + done +fi + +mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index b24a2f17d415..7426a2cbd4a0 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -6,6 +6,7 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <limits.h> #include <sys/socket.h> #include <sys/types.h> @@ -21,16 +22,29 @@ #ifndef MPTCP_PM_NAME #define MPTCP_PM_NAME "mptcp_pm" #endif +#ifndef MPTCP_PM_EVENTS +#define MPTCP_PM_EVENTS "mptcp_pm_events" +#endif +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif static void syntax(char *argv[]) { - fprintf(stderr, "%s add|get|del|flush|dump|accept [<args>]\n", argv[0]); - fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n"); - fprintf(stderr, "\tdel <id>\n"); + fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]); + fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n"); + fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n"); + fprintf(stderr, "\trem id <local-id> token <token>\n"); + fprintf(stderr, "\tcsf lip <local-ip> lid <local-id> rip <remote-ip> rport <remote-port> token <token>\n"); + fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n"); + fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); + fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>] [token <token>] [rip <ip>] [rport <port>]\n"); fprintf(stderr, "\tflush\n"); fprintf(stderr, "\tdump\n"); fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n"); + fprintf(stderr, "\tevents\n"); + fprintf(stderr, "\tlisten <local-ip> <local-port>\n"); exit(0); } @@ -52,20 +66,25 @@ static int init_genl_req(char *data, int family, int cmd, int version) return off; } -static void nl_error(struct nlmsghdr *nh) +static int nl_error(struct nlmsghdr *nh) { struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nh); int len = nh->nlmsg_len - sizeof(*nh); uint32_t off; - if (len < sizeof(struct nlmsgerr)) + if (len < sizeof(struct nlmsgerr)) { error(1, 0, "netlink error message truncated %d min %ld", len, sizeof(struct nlmsgerr)); + return -1; + } - if (!err->error) { + if (err->error) { /* check messages from kernel */ struct rtattr *attrs = (struct rtattr *)NLMSG_DATA(nh); + fprintf(stderr, "netlink error %d (%s)\n", + err->error, strerror(-err->error)); + while (RTA_OK(attrs, len)) { if (attrs->rta_type == NLMSGERR_ATTR_MSG) fprintf(stderr, "netlink ext ack msg: %s\n", @@ -77,12 +96,115 @@ static void nl_error(struct nlmsghdr *nh) } attrs = RTA_NEXT(attrs, len); } - } else { - fprintf(stderr, "netlink error %d", err->error); + return -1; } + + return 0; } -/* do a netlink command and, if max > 0, fetch the reply */ +static int capture_events(int fd, int event_group) +{ + u_int8_t buffer[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024]; + struct genlmsghdr *ghdr; + struct rtattr *attrs; + struct nlmsghdr *nh; + int ret = 0; + int res_len; + int msg_len; + fd_set rfds; + + if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, + &event_group, sizeof(event_group)) < 0) + error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group"); + + do { + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024; + + ret = select(FD_SETSIZE, &rfds, NULL, NULL, NULL); + + if (ret < 0) + error(1, ret, "error in select() on NL socket"); + + res_len = recv(fd, buffer, res_len, 0); + if (res_len < 0) + error(1, res_len, "error on recv() from NL socket"); + + nh = (struct nlmsghdr *)buffer; + + for (; NLMSG_OK(nh, res_len); nh = NLMSG_NEXT(nh, res_len)) { + if (nh->nlmsg_type == NLMSG_ERROR) + error(1, NLMSG_ERROR, "received invalid NL message"); + + ghdr = (struct genlmsghdr *)NLMSG_DATA(nh); + + if (ghdr->cmd == 0) + continue; + + fprintf(stderr, "type:%d", ghdr->cmd); + + msg_len = nh->nlmsg_len - NLMSG_LENGTH(GENL_HDRLEN); + + attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN); + while (RTA_OK(attrs, msg_len)) { + if (attrs->rta_type == MPTCP_ATTR_TOKEN) + fprintf(stderr, ",token:%u", *(__u32 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_FAMILY) + fprintf(stderr, ",family:%u", *(__u16 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_LOC_ID) + fprintf(stderr, ",loc_id:%u", *(__u8 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_REM_ID) + fprintf(stderr, ",rem_id:%u", *(__u8 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_SADDR4) { + u_int32_t saddr4 = ntohl(*(__u32 *)RTA_DATA(attrs)); + + fprintf(stderr, ",saddr4:%u.%u.%u.%u", saddr4 >> 24, + (saddr4 >> 16) & 0xFF, (saddr4 >> 8) & 0xFF, + (saddr4 & 0xFF)); + } else if (attrs->rta_type == MPTCP_ATTR_SADDR6) { + char buf[INET6_ADDRSTRLEN]; + + if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf, + sizeof(buf)) != NULL) + fprintf(stderr, ",saddr6:%s", buf); + } else if (attrs->rta_type == MPTCP_ATTR_DADDR4) { + u_int32_t daddr4 = ntohl(*(__u32 *)RTA_DATA(attrs)); + + fprintf(stderr, ",daddr4:%u.%u.%u.%u", daddr4 >> 24, + (daddr4 >> 16) & 0xFF, (daddr4 >> 8) & 0xFF, + (daddr4 & 0xFF)); + } else if (attrs->rta_type == MPTCP_ATTR_DADDR6) { + char buf[INET6_ADDRSTRLEN]; + + if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf, + sizeof(buf)) != NULL) + fprintf(stderr, ",daddr6:%s", buf); + } else if (attrs->rta_type == MPTCP_ATTR_SPORT) + fprintf(stderr, ",sport:%u", + ntohs(*(__u16 *)RTA_DATA(attrs))); + else if (attrs->rta_type == MPTCP_ATTR_DPORT) + fprintf(stderr, ",dport:%u", + ntohs(*(__u16 *)RTA_DATA(attrs))); + else if (attrs->rta_type == MPTCP_ATTR_BACKUP) + fprintf(stderr, ",backup:%u", *(__u8 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_ERROR) + fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs)); + else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE) + fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs)); + + attrs = RTA_NEXT(attrs, msg_len); + } + } + fprintf(stderr, "\n"); + } while (1); + + return 0; +} + +/* do a netlink command and, if max > 0, fetch the reply ; nh's size >1024B */ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) { struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; @@ -91,12 +213,16 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) int rem, ret; int err = 0; + /* If no expected answer, ask for an ACK to look for errors if any */ + if (max == 0) { + nh->nlmsg_flags |= NLM_F_ACK; + max = 1024; + } + nh->nlmsg_len = len; ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr)); if (ret != len) error(1, errno, "send netlink: %uB != %uB\n", ret, len); - if (max == 0) - return 0; addr_len = sizeof(nladdr); rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len); @@ -105,21 +231,29 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) /* Beware: the NLMSG_NEXT macro updates the 'rem' argument */ for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) { - if (nh->nlmsg_type == NLMSG_ERROR) { - nl_error(nh); + if (nh->nlmsg_type == NLMSG_DONE) + break; + + if (nh->nlmsg_type == NLMSG_ERROR && nl_error(nh)) err = 1; - } } if (err) error(1, 0, "bailing out due to netlink error[s]"); return ret; } -static int genl_parse_getfamily(struct nlmsghdr *nlh) +static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family, + int *events_mcast_grp) { struct genlmsghdr *ghdr = NLMSG_DATA(nlh); int len = nlh->nlmsg_len; struct rtattr *attrs; + struct rtattr *grps; + struct rtattr *grp; + int got_events_grp; + int got_family; + int grps_len; + int grp_len; if (nlh->nlmsg_type != GENL_ID_CTRL) error(1, errno, "Not a controller message, len=%d type=0x%x\n", @@ -134,9 +268,42 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh) error(1, errno, "Unknown controller command %d\n", ghdr->cmd); attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN); + got_family = 0; + got_events_grp = 0; + while (RTA_OK(attrs, len)) { - if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) - return *(__u16 *)RTA_DATA(attrs); + if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) { + *pm_family = *(__u16 *)RTA_DATA(attrs); + got_family = 1; + } else if (attrs->rta_type == CTRL_ATTR_MCAST_GROUPS) { + grps = RTA_DATA(attrs); + grps_len = RTA_PAYLOAD(attrs); + + while (RTA_OK(grps, grps_len)) { + grp = RTA_DATA(grps); + grp_len = RTA_PAYLOAD(grps); + got_events_grp = 0; + + while (RTA_OK(grp, grp_len)) { + if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID) + *events_mcast_grp = *(__u32 *)RTA_DATA(grp); + else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME && + !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS)) + got_events_grp = 1; + + grp = RTA_NEXT(grp, grp_len); + } + + if (got_events_grp) + break; + + grps = RTA_NEXT(grps, grps_len); + } + } + + if (got_family && got_events_grp) + return 0; + attrs = RTA_NEXT(attrs, len); } @@ -144,7 +311,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh) return -1; } -static int resolve_mptcp_pm_netlink(int fd) +static int resolve_mptcp_pm_netlink(int fd, int *pm_family, int *events_mcast_grp) { char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + @@ -166,7 +333,429 @@ static int resolve_mptcp_pm_netlink(int fd) off += NLMSG_ALIGN(rta->rta_len); do_nl_req(fd, nh, off, sizeof(data)); - return genl_parse_getfamily((void *)data); + return genl_parse_getfamily((void *)data, pm_family, events_mcast_grp); +} + +int dsf(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *addr; + u_int16_t family, port; + struct nlmsghdr *nh; + u_int32_t token; + int addr_start; + int off = 0; + int arg; + + const char *params[5]; + + memset(params, 0, 5 * sizeof(const char *)); + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_DESTROY, + MPTCP_PM_VER); + + if (argc < 12) + syntax(argv); + + /* Params recorded in this order: + * <local-ip>, <local-port>, <remote-ip>, <remote-port>, <token> + */ + for (arg = 2; arg < argc; arg++) { + if (!strcmp(argv[arg], "lip")) { + if (++arg >= argc) + error(1, 0, " missing local IP"); + + params[0] = argv[arg]; + } else if (!strcmp(argv[arg], "lport")) { + if (++arg >= argc) + error(1, 0, " missing local port"); + + params[1] = argv[arg]; + } else if (!strcmp(argv[arg], "rip")) { + if (++arg >= argc) + error(1, 0, " missing remote IP"); + + params[2] = argv[arg]; + } else if (!strcmp(argv[arg], "rport")) { + if (++arg >= argc) + error(1, 0, " missing remote port"); + + params[3] = argv[arg]; + } else if (!strcmp(argv[arg], "token")) { + if (++arg >= argc) + error(1, 0, " missing token"); + + params[4] = argv[arg]; + } else + error(1, 0, "unknown keyword %s", argv[arg]); + } + + for (arg = 0; arg < 4; arg = arg + 2) { + /* addr header */ + addr_start = off; + addr = (void *)(data + off); + addr->rta_type = NLA_F_NESTED | + ((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE); + addr->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(addr->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else + error(1, errno, "can't parse ip %s", params[arg]); + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + /* port */ + port = atoi(params[arg + 1]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); + + addr->rta_len = off - addr_start; + } + + /* token */ + token = strtoul(params[4], NULL, 10); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + + do_nl_req(fd, nh, off, 0); + + return 0; +} + +int csf(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + u_int32_t flags = MPTCP_PM_ADDR_FLAG_SUBFLOW; + const char *params[5]; + struct nlmsghdr *nh; + struct rtattr *addr; + struct rtattr *rta; + u_int16_t family; + u_int32_t token; + u_int16_t port; + int addr_start; + u_int8_t id; + int off = 0; + int arg; + + memset(params, 0, 5 * sizeof(const char *)); + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_CREATE, + MPTCP_PM_VER); + + if (argc < 12) + syntax(argv); + + /* Params recorded in this order: + * <local-ip>, <local-id>, <remote-ip>, <remote-port>, <token> + */ + for (arg = 2; arg < argc; arg++) { + if (!strcmp(argv[arg], "lip")) { + if (++arg >= argc) + error(1, 0, " missing local IP"); + + params[0] = argv[arg]; + } else if (!strcmp(argv[arg], "lid")) { + if (++arg >= argc) + error(1, 0, " missing local id"); + + params[1] = argv[arg]; + } else if (!strcmp(argv[arg], "rip")) { + if (++arg >= argc) + error(1, 0, " missing remote ip"); + + params[2] = argv[arg]; + } else if (!strcmp(argv[arg], "rport")) { + if (++arg >= argc) + error(1, 0, " missing remote port"); + + params[3] = argv[arg]; + } else if (!strcmp(argv[arg], "token")) { + if (++arg >= argc) + error(1, 0, " missing token"); + + params[4] = argv[arg]; + } else + error(1, 0, "unknown param %s", argv[arg]); + } + + for (arg = 0; arg < 4; arg = arg + 2) { + /* addr header */ + addr_start = off; + addr = (void *)(data + off); + addr->rta_type = NLA_F_NESTED | + ((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE); + addr->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(addr->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else + error(1, errno, "can't parse ip %s", params[arg]); + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + if (arg == 2) { + /* port */ + port = atoi(params[arg + 1]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); + } + + if (arg == 0) { + /* id */ + id = atoi(params[arg + 1]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + } + + /* addr flags */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + + addr->rta_len = off - addr_start; + } + + /* token */ + token = strtoul(params[4], NULL, 10); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + + do_nl_req(fd, nh, off, 0); + + return 0; +} + +int remove_addr(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct nlmsghdr *nh; + struct rtattr *rta; + u_int32_t token; + u_int8_t id; + int off = 0; + int arg; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_REMOVE, + MPTCP_PM_VER); + + if (argc < 6) + syntax(argv); + + for (arg = 2; arg < argc; arg++) { + if (!strcmp(argv[arg], "id")) { + if (++arg >= argc) + error(1, 0, " missing id value"); + + id = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_LOC_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "token")) { + if (++arg >= argc) + error(1, 0, " missing token value"); + + token = strtoul(argv[arg], NULL, 10); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else + error(1, 0, "unknown keyword %s", argv[arg]); + } + + do_nl_req(fd, nh, off, 0); + return 0; +} + +int announce_addr(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + u_int32_t flags = MPTCP_PM_ADDR_FLAG_SIGNAL; + u_int32_t token = UINT_MAX; + struct rtattr *rta, *addr; + u_int32_t id = UINT_MAX; + struct nlmsghdr *nh; + u_int16_t family; + int addr_start; + int off = 0; + int arg; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ANNOUNCE, + MPTCP_PM_VER); + + if (argc < 7) + syntax(argv); + + /* local-ip header */ + addr_start = off; + addr = (void *)(data + off); + addr->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + addr->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(addr->rta_len); + + /* local-ip data */ + /* record addr type */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else + error(1, errno, "can't parse ip %s", argv[2]); + off += NLMSG_ALIGN(rta->rta_len); + + /* addr family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + for (arg = 3; arg < argc; arg++) { + if (!strcmp(argv[arg], "id")) { + /* local-id */ + if (++arg >= argc) + error(1, 0, " missing id value"); + + id = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "dev")) { + /* for the if_index */ + int32_t ifindex; + + if (++arg >= argc) + error(1, 0, " missing dev name"); + + ifindex = if_nametoindex(argv[arg]); + if (!ifindex) + error(1, errno, "unknown device %s", argv[arg]); + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &ifindex, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "port")) { + /* local-port (optional) */ + u_int16_t port; + + if (++arg >= argc) + error(1, 0, " missing port value"); + + port = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "token")) { + /* MPTCP connection token */ + if (++arg >= argc) + error(1, 0, " missing token value"); + + token = strtoul(argv[arg], NULL, 10); + } else + error(1, 0, "unknown keyword %s", argv[arg]); + } + + /* addr flags */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + + addr->rta_len = off - addr_start; + + if (id == UINT_MAX || token == UINT_MAX) + error(1, 0, " missing mandatory inputs"); + + /* token */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + + do_nl_req(fd, nh, off, 0); + + return 0; } int add_addr(int fd, int pm_family, int argc, char *argv[]) @@ -176,8 +765,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int32_t flags = 0; u_int16_t family; - u_int32_t flags; int nest_start; u_int8_t id; int off = 0; @@ -223,7 +812,6 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) char *tok, *str; /* flags */ - flags = 0; if (++arg >= argc) error(1, 0, " missing flags value"); @@ -236,11 +824,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; else if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else if (!strcmp(tok, "fullmesh")) + flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; else error(1, errno, "unknown flag %s", argv[arg]); } + if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL && + flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + error(1, errno, "error flag fullmesh"); + } + rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; rta->rta_len = RTA_LENGTH(4); @@ -271,6 +866,20 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(4); memcpy(RTA_DATA(rta), &ifindex, 4); off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "port")) { + u_int16_t port; + + if (++arg >= argc) + error(1, 0, " missing port value"); + if (!(flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) + error(1, 0, " flags must be signal when using port"); + + port = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); } else error(1, 0, "unknown keyword %s", argv[arg]); } @@ -287,6 +896,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int16_t family; int nest_start; u_int8_t id; int off = 0; @@ -296,11 +906,14 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR, MPTCP_PM_VER); - /* the only argument is the address id */ - if (argc != 3) + /* the only argument is the address id (nonzero) */ + if (argc != 3 && argc != 4) syntax(argv); id = atoi(argv[2]); + /* zero id with the IP address */ + if (!id && argc != 4) + syntax(argv); nest_start = off; nest = (void *)(data + off); @@ -314,6 +927,30 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(1); memcpy(RTA_DATA(rta), &id, 1); off += NLMSG_ALIGN(rta->rta_len); + + if (!id) { + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[3], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[3], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", argv[3]); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + } nest->rta_len = off - nest_start; do_nl_req(fd, nh, off, 0); @@ -323,6 +960,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) static void print_addr(struct rtattr *attrs, int len) { uint16_t family = 0; + uint16_t port = 0; char str[1024]; uint32_t flags; uint8_t id; @@ -330,12 +968,16 @@ static void print_addr(struct rtattr *attrs, int len) while (RTA_OK(attrs, len)) { if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY) memcpy(&family, RTA_DATA(attrs), 2); + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_PORT) + memcpy(&port, RTA_DATA(attrs), 2); if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) { if (family != AF_INET) error(1, errno, "wrong IP (v4) for family %d", family); inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str)); printf("%s", str); + if (port) + printf(" %d", port); } if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) { if (family != AF_INET6) @@ -343,6 +985,8 @@ static void print_addr(struct rtattr *attrs, int len) family); inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str)); printf("%s", str); + if (port) + printf(" %d", port); } if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) { memcpy(&id, RTA_DATA(attrs), 1); @@ -373,6 +1017,20 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { + printf("fullmesh"); + flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH; + if (flags) + printf(","); + } + + if (flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { + printf("implicit"); + flags &= ~MPTCP_PM_ADDR_FLAG_IMPLICIT; + if (flags) + printf(","); + } + /* bump unknown flags, if any */ if (flags) printf("0x%x", flags); @@ -429,6 +1087,7 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int32_t token = 0; int nest_start; u_int8_t id; int off = 0; @@ -439,10 +1098,12 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) MPTCP_PM_VER); /* the only argument is the address id */ - if (argc != 3) + if (argc != 3 && argc != 5) syntax(argv); id = atoi(argv[2]); + if (argc == 5 && !strcmp(argv[3], "token")) + token = strtoul(argv[4], NULL, 10); nest_start = off; nest = (void *)(data + off); @@ -458,6 +1119,15 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) off += NLMSG_ALIGN(rta->rta_len); nest->rta_len = off - nest_start; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } @@ -469,8 +1139,16 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) 1024]; pid_t pid = getpid(); struct nlmsghdr *nh; + u_int32_t token = 0; + struct rtattr *rta; int off = 0; + if (argc != 2 && argc != 4) + syntax(argv); + + if (argc == 4 && !strcmp(argv[2], "token")) + token = strtoul(argv[3], NULL, 10); + memset(data, 0, sizeof(data)); nh = (void *)data; off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR, @@ -480,6 +1158,15 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) nh->nlmsg_pid = pid; nh->nlmsg_len = off; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } @@ -584,9 +1271,245 @@ int get_set_limits(int fd, int pm_family, int argc, char *argv[]) return 0; } +int add_listener(int argc, char *argv[]) +{ + struct sockaddr_storage addr; + struct sockaddr_in6 *a6; + struct sockaddr_in *a4; + u_int16_t family; + int enable = 1; + int sock; + int err; + + if (argc < 4) + syntax(argv); + + memset(&addr, 0, sizeof(struct sockaddr_storage)); + a4 = (struct sockaddr_in *)&addr; + a6 = (struct sockaddr_in6 *)&addr; + + if (inet_pton(AF_INET, argv[2], &a4->sin_addr)) { + family = AF_INET; + a4->sin_family = family; + a4->sin_port = htons(atoi(argv[3])); + } else if (inet_pton(AF_INET6, argv[2], &a6->sin6_addr)) { + family = AF_INET6; + a6->sin6_family = family; + a6->sin6_port = htons(atoi(argv[3])); + } else + error(1, errno, "can't parse ip %s", argv[2]); + + sock = socket(family, SOCK_STREAM, IPPROTO_MPTCP); + if (sock < 0) + error(1, errno, "can't create listener sock\n"); + + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable))) { + close(sock); + error(1, errno, "can't set SO_REUSEADDR on listener sock\n"); + } + + err = bind(sock, (struct sockaddr *)&addr, + ((family == AF_INET) ? sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6))); + + if (err == 0 && listen(sock, 30) == 0) + pause(); + + close(sock); + return 0; +} + +int set_flags(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *nest; + struct nlmsghdr *nh; + u_int32_t flags = 0; + u_int32_t token = 0; + u_int16_t rport = 0; + u_int16_t family; + void *rip = NULL; + int nest_start; + int use_id = 0; + u_int8_t id; + int off = 0; + int arg = 2; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SET_FLAGS, + MPTCP_PM_VER); + + if (argc < 3) + syntax(argv); + + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + if (!strcmp(argv[arg], "id")) { + if (++arg >= argc) + error(1, 0, " missing id value"); + + use_id = 1; + id = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + } else { + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[arg], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[arg], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", argv[arg]); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + } + + if (++arg >= argc) + error(1, 0, " missing flags keyword"); + + for (; arg < argc; arg++) { + if (!strcmp(argv[arg], "token")) { + if (++arg >= argc) + error(1, 0, " missing token value"); + + /* token */ + token = strtoul(argv[arg], NULL, 10); + } else if (!strcmp(argv[arg], "flags")) { + char *tok, *str; + + /* flags */ + if (++arg >= argc) + error(1, 0, " missing flags value"); + + for (str = argv[arg]; (tok = strtok(str, ",")); + str = NULL) { + if (!strcmp(tok, "backup")) + flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else if (!strcmp(tok, "fullmesh")) + flags |= MPTCP_PM_ADDR_FLAG_FULLMESH; + else if (strcmp(tok, "nobackup") && + strcmp(tok, "nofullmesh")) + error(1, errno, + "unknown flag %s", argv[arg]); + } + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "port")) { + u_int16_t port; + + if (use_id) + error(1, 0, " port can't be used with id"); + + if (++arg >= argc) + error(1, 0, " missing port value"); + + port = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "rport")) { + if (++arg >= argc) + error(1, 0, " missing remote port"); + + rport = atoi(argv[arg]); + } else if (!strcmp(argv[arg], "rip")) { + if (++arg >= argc) + error(1, 0, " missing remote ip"); + + rip = argv[arg]; + } else { + error(1, 0, "unknown keyword %s", argv[arg]); + } + } + nest->rta_len = off - nest_start; + + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + + /* remote addr/port */ + if (rip) { + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR_REMOTE; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, rip, RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, rip, RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", (char *)rip); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + if (rport) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &rport, 2); + off += NLMSG_ALIGN(rta->rta_len); + } + + nest->rta_len = off - nest_start; + } + + do_nl_req(fd, nh, off, 0); + return 0; +} + int main(int argc, char *argv[]) { - int fd, pm_family; + int events_mcast_grp; + int pm_family; + int fd; if (argc < 2) syntax(argv); @@ -595,10 +1518,18 @@ int main(int argc, char *argv[]) if (fd == -1) error(1, errno, "socket netlink"); - pm_family = resolve_mptcp_pm_netlink(fd); + resolve_mptcp_pm_netlink(fd, &pm_family, &events_mcast_grp); if (!strcmp(argv[1], "add")) return add_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "ann")) + return announce_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "rem")) + return remove_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "csf")) + return csf(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "dsf")) + return dsf(fd, pm_family, argc, argv); else if (!strcmp(argv[1], "del")) return del_addr(fd, pm_family, argc, argv); else if (!strcmp(argv[1], "flush")) @@ -609,6 +1540,12 @@ int main(int argc, char *argv[]) return dump_addrs(fd, pm_family, argc, argv); else if (!strcmp(argv[1], "limits")) return get_set_limits(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "set")) + return set_flags(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "events")) + return capture_events(fd, events_mcast_grp); + else if (!strcmp(argv[1], "listen")) + return add_listener(argc, argv); fprintf(stderr, "unknown sub-command: %s", argv[1]); syntax(argv); diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 026384c189c9..abc5648b59ab 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=450 +timeout=1800 diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 2f649b431456..1b2366220388 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -1,16 +1,30 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns3="ns3-$rndh" +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + +. "$(dirname "${0}")/mptcp_lib.sh" + +ns1="" +ns2="" +ns3="" capture=false -ksft_skip=4 -timeout=30 -test_cnt=1 +timeout_poll=30 +timeout_test=$((timeout_poll * 2 + 1)) +# a bit more space: because we have more to display +MPTCP_LIB_TEST_FORMAT="%02u %-60s" ret=0 bail=0 +slack=50 +large="" +small="" +sout="" +cout="" +capout="" +size=0 usage() { echo "Usage: $0 [ -b ] [ -c ] [ -d ]" @@ -19,23 +33,19 @@ usage() { echo -e "\t-d: debug this script" } +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { - rm -f "$cin" "$cout" - rm -f "$sin" "$sout" + rm -f "$cout" "$sout" + rm -f "$large" "$small" rm -f "$capout" - local netns - for netns in "$ns1" "$ns2" "$ns3";do - ip netns del $netns - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_mptcp +mptcp_lib_check_tools ip # "$ns1" ns2 ns3 # ns1eth1 ns2eth1 ns2eth3 ns3eth1 @@ -50,16 +60,14 @@ setup() sout=$(mktemp) cout=$(mktemp) capout=$(mktemp) - size=$((2048 * 4096)) + size=$((2 * 2048 * 4096)) + dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1 dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1 trap cleanup EXIT - for i in "$ns1" "$ns2" "$ns3";do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up - done + mptcp_lib_ns_init ns1 ns2 ns3 ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2" @@ -79,7 +87,6 @@ setup() ip netns exec "$ns1" ./pm_nl_ctl limits 1 1 ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow - ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0 ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad @@ -102,23 +109,16 @@ setup() ip -net "$ns3" route add default via dead:beef:3::2 ip netns exec "$ns3" ./pm_nl_ctl limits 1 1 -} - -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done + # debug build can slow down measurably the test program + # we use quite tight time limit on the run-time, to ensure + # maximum B/W usage. + # Use kmemleak/lockdep/kasan/prove_locking presence as a rough + # estimate for this being a debug kernel and increase the + # maximum run-time accordingly. Observed run times for CI builds + # running selftests, including kbuild, were used to determine the + # amount of time to add. + grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550)) } do_transfer() @@ -127,18 +127,15 @@ do_transfer() local sin=$2 local max_time=$3 local port - port=$((10000+$test_cnt)) - test_cnt=$((test_cnt+1)) + port=$((10000+MPTCP_LIB_TEST_COUNTER)) :> "$cout" :> "$sout" :> "$capout" - local addr_port - addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - if $capture; then local capuser + local rndh="${ns1:4}" if [ -z $SUDO_USER ] ; then capuser="" else @@ -157,14 +154,18 @@ do_transfer() sleep 1 fi - ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" & + timeout ${timeout_test} \ + ip netns exec ${ns3} \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ + 0.0.0.0 < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${ns3}" "${port}" + mptcp_lib_wait_local_port_listen "${ns3}" "${port}" - local start - start=$(date +%s%3N) - ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" & + timeout ${timeout_test} \ + ip netns exec ${ns1} \ + ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ + 10.0.3.3 < "$cin" > "$cout" & local cpid=$! wait $cpid @@ -172,33 +173,26 @@ do_transfer() wait $spid local rets=$? - local stop - stop=$(date +%s%3N) - if $capture; then sleep 1 kill ${cappid_listener} kill ${cappid_connector} fi - local duration - duration=$((stop-start)) - cmp $sin $cout > /dev/null 2>&1 local cmps=$? cmp $cin $sout > /dev/null 2>&1 local cmpc=$? - printf "%16s" "$duration max $max_time " + printf "%-16s" " max $max_time " if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ - [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \ - [ $duration -lt $max_time ]; then - echo "[ OK ]" + [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then + mptcp_lib_pr_ok cat "$capout" return 0 fi - echo " [ fail ]" + mptcp_lib_pr_fail echo "client exit code $retc, server $rets" 1>&2 echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" @@ -236,24 +230,29 @@ run_test() tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 - # time is measure in ms - local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) )) + # time is measured in ms, account for transfer size, aggregated link speed + # and header overhead (10%) + # ms byte -> bit 10% mbit -> kbit -> bit 10% + local time=$((1000 * size * 8 * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) )) # mptcp_connect will do some sleeps to allow the mp_join handshake - # completion - time=$((time + 1350)) + # completion (see mptcp_connect): 200ms on each side, add some slack + time=$((time + 400 + slack)) - printf "%-50s" "$msg" - do_transfer $small $large $((time * 11 / 10)) + mptcp_lib_print_title "$msg" + do_transfer $small $large $time lret=$? + mptcp_lib_result_code "${lret}" "${msg}" if [ $lret -ne 0 ]; then ret=$lret [ $bail -eq 0 ] || exit $ret fi - printf "%-50s" "$msg - reverse direction" - do_transfer $large $small $((time * 11 / 10)) + msg+=" - reverse direction" + mptcp_lib_print_title "${msg}" + do_transfer $large $small $time lret=$? + mptcp_lib_result_code "${lret}" "${msg}" if [ $lret -ne 0 ]; then ret=$lret [ $bail -eq 0 ] || exit $ret @@ -264,7 +263,7 @@ while getopts "bcdh" option;do case "$option" in "h") usage $0 - exit 0 + exit ${KSFT_PASS} ;; "b") bail=1 @@ -277,17 +276,19 @@ while getopts "bcdh" option;do ;; "?") usage $0 - exit 1 + exit ${KSFT_FAIL} ;; esac done setup run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 50 "balanced bwidth with unbalanced delay" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -# run_test 30 10 0 0 "unbalanced bwidth" -# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" + +mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh new file mode 100755 index 000000000000..9e2981f2d7f5 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -0,0 +1,897 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it. +#shellcheck disable=SC2086 + +# Some variables are used below but indirectly, see verify_*_event() +#shellcheck disable=SC2034 + +. "$(dirname "${0}")/mptcp_lib.sh" + +mptcp_lib_check_mptcp +mptcp_lib_check_kallsyms + +if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + echo "userspace pm tests are not supported by the kernel: SKIP" + exit ${KSFT_SKIP} +fi +mptcp_lib_check_tools ip + +ANNOUNCED=${MPTCP_LIB_EVENT_ANNOUNCED} +REMOVED=${MPTCP_LIB_EVENT_REMOVED} +SUB_ESTABLISHED=${MPTCP_LIB_EVENT_SUB_ESTABLISHED} +SUB_CLOSED=${MPTCP_LIB_EVENT_SUB_CLOSED} +LISTENER_CREATED=${MPTCP_LIB_EVENT_LISTENER_CREATED} +LISTENER_CLOSED=${MPTCP_LIB_EVENT_LISTENER_CLOSED} + +AF_INET=${MPTCP_LIB_AF_INET} +AF_INET6=${MPTCP_LIB_AF_INET6} + +file="" +server_evts="" +client_evts="" +server_evts_pid=0 +client_evts_pid=0 +client4_pid=0 +server4_pid=0 +client6_pid=0 +server6_pid=0 +client4_token="" +server4_token="" +client6_token="" +server6_token="" +client4_port=0; +client6_port=0; +app4_port=50002 +new4_port=50003 +app6_port=50004 +client_addr_id=${RANDOM:0:2} +server_addr_id=${RANDOM:0:2} + +ns1="" +ns2="" +ret=0 +test_name="" +# a bit more space: because we have more to display +MPTCP_LIB_TEST_FORMAT="%02u %-68s" + +print_title() +{ + mptcp_lib_pr_info "${1}" +} + +# $1: test name +print_test() +{ + test_name="${1}" + + mptcp_lib_print_title "${test_name}" +} + +test_pass() +{ + mptcp_lib_pr_ok + mptcp_lib_result_pass "${test_name}" +} + +test_skip() +{ + mptcp_lib_pr_skip + mptcp_lib_result_skip "${test_name}" +} + +# $1: msg +test_fail() +{ + if [ ${#} -gt 0 ] + then + mptcp_lib_pr_fail "${@}" + fi + ret=${KSFT_FAIL} + mptcp_lib_result_fail "${test_name}" +} + +# This function is used in the cleanup trap +#shellcheck disable=SC2317 +cleanup() +{ + print_title "Cleanup" + + # Terminate the MPTCP connection and related processes + local pid + for pid in $client4_pid $server4_pid $client6_pid $server6_pid\ + $server_evts_pid $client_evts_pid + do + mptcp_lib_kill_wait $pid + done + + mptcp_lib_ns_exit "${ns1}" "${ns2}" + + rm -rf $file $client_evts $server_evts + + mptcp_lib_pr_info "Done" +} + +trap cleanup EXIT + +# Create and configure network namespaces for testing +mptcp_lib_ns_init ns1 ns2 +for i in "$ns1" "$ns2" ;do + ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 +done + +# "$ns1" ns2 +# ns1eth2 ns2eth1 + +ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + +# Add IPv4/v6 addresses to the namespaces +ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2 +ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2 +ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad +ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad +ip -net "$ns1" link set ns1eth2 up + +ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 +ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth1 +ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad +ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad +ip -net "$ns2" link set ns2eth1 up + +file=$(mktemp) +mptcp_lib_make_file "$file" 2 1 + +# Capture netlink events over the two network namespaces running +# the MPTCP client and server +client_evts=$(mktemp) +mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid +server_evts=$(mktemp) +mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid +sleep 0.5 + +print_title "Init" +print_test "Created network namespaces ns1, ns2" +test_pass + +make_connection() +{ + local is_v6=$1 + local app_port=$app4_port + local connect_addr="10.0.1.1" + local listen_addr="0.0.0.0" + if [ "$is_v6" = "v6" ] + then + connect_addr="dead:beef:1::1" + listen_addr="::" + app_port=$app6_port + else + is_v6="v4" + fi + + :>"$client_evts" + :>"$server_evts" + + # Run the server + ip netns exec "$ns1" \ + ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 & + local server_pid=$! + sleep 0.5 + + # Run the client, transfer $file and stay connected to the server + # to conduct tests + ip netns exec "$ns2" \ + ./mptcp_connect -s MPTCP -w 300 -m sendfile -p $app_port $connect_addr\ + 2>&1 > /dev/null < "$file" & + local client_pid=$! + sleep 1 + + # Capture client/server attributes from MPTCP connection netlink events + + local client_token + local client_port + local client_serverside + local server_token + local server_serverside + + client_token=$(mptcp_lib_evts_get_info token "$client_evts") + client_port=$(mptcp_lib_evts_get_info sport "$client_evts") + client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts") + server_token=$(mptcp_lib_evts_get_info token "$server_evts") + server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts") + + print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" + if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && + [ "$server_serverside" = 1 ] + then + test_pass + else + test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + if [ "$is_v6" = "v6" ] + then + client6_token=$client_token + server6_token=$server_token + client6_port=$client_port + client6_pid=$client_pid + server6_pid=$server_pid + else + client4_token=$client_token + server4_token=$server_token + client4_port=$client_port + client4_pid=$client_pid + server4_pid=$server_pid + fi +} + +# $@: all var names to check +check_expected() +{ + if mptcp_lib_check_expected "${@}" + then + test_pass + return 0 + fi + + test_fail + return 1 +} + +verify_announce_event() +{ + local evt=$1 + local e_type=$2 + local e_token=$3 + local e_addr=$4 + local e_id=$5 + local e_dport=$6 + local e_af=$7 + local type + local token + local addr + local dport + local id + + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + if [ "$e_af" = "v6" ] + then + addr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type) + else + addr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type) + fi + dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type) + id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) + + check_expected "type" "token" "addr" "dport" "id" +} + +test_announce() +{ + print_title "Announce tests" + + # Capture events on the network namespace running the server + :>"$server_evts" + + # ADD_ADDR using an invalid token should result in no action + local invalid_token=$(( client4_token - 1)) + ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token $invalid_token id\ + $client_addr_id dev ns2eth1 > /dev/null 2>&1 + + local type + type=$(mptcp_lib_evts_get_info type "$server_evts") + print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token" + if [ "$type" = "" ] + then + test_pass + else + test_fail "type defined: ${type}" + fi + + # ADD_ADDR from the client to server machine reusing the subflow port + :>"$server_evts" + ip netns exec "$ns2"\ + ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ + ns2eth1 + print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port" + sleep 0.5 + verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ + "$client4_port" + + # ADD_ADDR6 from the client to server machine reusing the subflow port + :>"$server_evts" + ip netns exec "$ns2" ./pm_nl_ctl ann\ + dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 + print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port" + sleep 0.5 + verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ + "$client_addr_id" "$client6_port" "v6" + + # ADD_ADDR from the client to server machine using a new port + :>"$server_evts" + client_addr_id=$((client_addr_id+1)) + ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ + $client_addr_id dev ns2eth1 port $new4_port + print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port" + sleep 0.5 + verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ + "$client_addr_id" "$new4_port" + + # Capture events on the network namespace running the client + :>"$client_evts" + + # ADD_ADDR from the server to client machine reusing the subflow port + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ + $server_addr_id dev ns1eth2 + print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ + "$server_addr_id" "$app4_port" + + # ADD_ADDR6 from the server to client machine reusing the subflow port + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ + $server_addr_id dev ns1eth2 + print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port" + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ + "$server_addr_id" "$app6_port" "v6" + + # ADD_ADDR from the server to client machine using a new port + :>"$client_evts" + server_addr_id=$((server_addr_id+1)) + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ + $server_addr_id dev ns1eth2 port $new4_port + print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port" + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ + "$server_addr_id" "$new4_port" +} + +verify_remove_event() +{ + local evt=$1 + local e_type=$2 + local e_token=$3 + local e_id=$4 + local type + local token + local id + + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) + + check_expected "type" "token" "id" +} + +test_remove() +{ + print_title "Remove tests" + + # Capture events on the network namespace running the server + :>"$server_evts" + + # RM_ADDR using an invalid token should result in no action + local invalid_token=$(( client4_token - 1 )) + ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\ + $client_addr_id > /dev/null 2>&1 + print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" + local type + type=$(mptcp_lib_evts_get_info type "$server_evts") + if [ "$type" = "" ] + then + test_pass + else + test_fail "unexpected type: ${type}" + fi + + # RM_ADDR using an invalid addr id should result in no action + local invalid_id=$(( client_addr_id + 1 )) + ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ + $invalid_id > /dev/null 2>&1 + print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" + type=$(mptcp_lib_evts_get_info type "$server_evts") + if [ "$type" = "" ] + then + test_pass + else + test_fail "unexpected type: ${type}" + fi + + # RM_ADDR from the client to server machine + :>"$server_evts" + ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ + $client_addr_id + print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + sleep 0.5 + verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" + + # RM_ADDR from the client to server machine + :>"$server_evts" + client_addr_id=$(( client_addr_id - 1 )) + ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ + $client_addr_id + print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + sleep 0.5 + verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" + + # RM_ADDR6 from the client to server machine + :>"$server_evts" + ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ + $client_addr_id + print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1" + sleep 0.5 + verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" + + # Capture events on the network namespace running the client + :>"$client_evts" + + # RM_ADDR from the server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ + $server_addr_id + print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + sleep 0.5 + verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" + + # RM_ADDR from the server to client machine + :>"$client_evts" + server_addr_id=$(( server_addr_id - 1 )) + ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ + $server_addr_id + print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + sleep 0.5 + verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" + + # RM_ADDR6 from the server to client machine + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ + $server_addr_id + print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2" + sleep 0.5 + verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" +} + +verify_subflow_events() +{ + local evt=$1 + local e_type=$2 + local e_token=$3 + local e_family=$4 + local e_saddr=$5 + local e_daddr=$6 + local e_dport=$7 + local e_locid=$8 + local e_remid=$9 + shift 2 + local e_from=$8 + local e_to=$9 + local type + local token + local family + local saddr + local daddr + local dport + local locid + local remid + local info + + info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" + + if [ "$e_type" = "$SUB_ESTABLISHED" ] + then + if [ "$e_family" = "$AF_INET6" ] + then + print_test "CREATE_SUBFLOW6 ${info}" + else + print_test "CREATE_SUBFLOW ${info}" + fi + else + if [ "$e_family" = "$AF_INET6" ] + then + print_test "DESTROY_SUBFLOW6 ${info}" + else + print_test "DESTROY_SUBFLOW ${info}" + fi + fi + + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + family=$(mptcp_lib_evts_get_info family "$evt" $e_type) + dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type) + locid=$(mptcp_lib_evts_get_info loc_id "$evt" $e_type) + remid=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) + if [ "$family" = "$AF_INET6" ] + then + saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" $e_type) + daddr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type) + else + saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" $e_type) + daddr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type) + fi + + check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid" +} + +test_subflows() +{ + print_title "Subflows v4 or v6 only tests" + + # Capture events on the network namespace running the server + :>"$server_evts" + + # Attempt to add a listener at 10.0.2.2:<subflow-port> + ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\ + "$client4_port" & + local listener_pid=$! + + # ADD_ADDR from client to server machine reusing the subflow port + ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ + $client_addr_id + sleep 0.5 + + # CREATE_SUBFLOW from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ + rport "$client4_port" token "$server4_token" + sleep 0.5 + verify_subflow_events $server_evts $SUB_ESTABLISHED $server4_token $AF_INET "10.0.2.1" \ + "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" + + # Delete the listener from the client ns, if one was created + mptcp_lib_kill_wait $listener_pid + + local sport + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ + "$client4_port" token "$server4_token" + sleep 0.5 + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ + "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" + + # RM_ADDR from client to server machine + ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ + "$client4_token" + sleep 0.5 + + # Attempt to add a listener at dead:beef:2::2:<subflow-port> + ip netns exec "$ns2" ./pm_nl_ctl listen dead:beef:2::2\ + "$client6_port" & + listener_pid=$! + + # ADD_ADDR6 from client to server machine reusing the subflow port + :>"$server_evts" + ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\ + $client_addr_id + sleep 0.5 + + # CREATE_SUBFLOW6 from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\ + dead:beef:2::2 rport "$client6_port" token "$server6_token" + sleep 0.5 + verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\ + "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ + "$client_addr_id" "ns1" "ns2" + + # Delete the listener from the client ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW6 from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\ + dead:beef:2::2 rport "$client6_port" token "$server6_token" + sleep 0.5 + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\ + "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ + "$client_addr_id" "ns1" "ns2" + + # RM_ADDR from client to server machine + ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ + "$client6_token" + sleep 0.5 + + # Attempt to add a listener at 10.0.2.2:<new-port> + ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\ + $new4_port & + listener_pid=$! + + # ADD_ADDR from client to server machine using a new port + :>"$server_evts" + ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ + $client_addr_id port $new4_port + sleep 0.5 + + # CREATE_SUBFLOW from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\ + $new4_port token "$server4_token" + sleep 0.5 + verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\ + "10.0.2.1" "10.0.2.2" "$new4_port" "23"\ + "$client_addr_id" "ns1" "ns2" + + # Delete the listener from the client ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW from server to client machine + :>"$server_evts" + ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ + $new4_port token "$server4_token" + sleep 0.5 + verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ + "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2" + + # RM_ADDR from client to server machine + ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ + "$client4_token" + + # Capture events on the network namespace running the client + :>"$client_evts" + + # Attempt to add a listener at 10.0.2.1:<subflow-port> + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $app4_port & + listener_pid=$! + + # ADD_ADDR from server to client machine reusing the subflow port + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ + $server_addr_id + sleep 0.5 + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $app4_port token "$client4_token" + sleep 0.5 + verify_subflow_events $client_evts $SUB_ESTABLISHED $client4_token $AF_INET "10.0.2.2"\ + "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $app4_port token "$client4_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ + "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server4_token" + sleep 0.5 + + # Attempt to add a listener at dead:beef:2::1:<subflow-port> + ip netns exec "$ns1" ./pm_nl_ctl listen dead:beef:2::1\ + $app6_port & + listener_pid=$! + + # ADD_ADDR6 from server to client machine reusing the subflow port + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ + $server_addr_id + sleep 0.5 + + # CREATE_SUBFLOW6 from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\ + dead:beef:2::1 rport $app6_port token "$client6_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ + "$AF_INET6" "dead:beef:2::2"\ + "dead:beef:2::1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW6 from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\ + dead:beef:2::1 rport $app6_port token "$client6_token" + sleep 0.5 + verify_subflow_events $client_evts $SUB_CLOSED $client6_token $AF_INET6 "dead:beef:2::2"\ + "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1" + + # RM_ADDR6 from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server6_token" + sleep 0.5 + + # Attempt to add a listener at 10.0.2.1:<new-port> + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $new4_port & + listener_pid=$! + + # ADD_ADDR from server to client machine using a new port + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ + $server_addr_id port $new4_port + sleep 0.5 + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $new4_port token "$client4_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\ + "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $new4_port token "$client4_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ + "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server4_token" +} + +test_subflows_v4_v6_mix() +{ + print_title "Subflows v4 and v6 mix tests" + + # Attempt to add a listener at 10.0.2.1:<subflow-port> + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $app6_port & + local listener_pid=$! + + # ADD_ADDR4 from server to client machine reusing the subflow port on + # the established v6 connection + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ + $server_addr_id dev ns1eth2 + print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ + "$server_addr_id" "$app6_port" + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $app6_port token "$client6_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $app6_port token "$client6_token" + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server6_token" + sleep 0.5 +} + +test_prio() +{ + print_title "Prio tests" + + local count + + # Send MP_PRIO signal from client to server machine + ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$app4_port" + sleep 0.5 + + # Check TX + print_test "MP_PRIO TX" + count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then + test_fail "Count != 1: ${count}" + else + test_pass + fi + + # Check RX + print_test "MP_PRIO RX" + count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then + test_fail "Count != 1: ${count}" + else + test_pass + fi +} + +verify_listener_events() +{ + if mptcp_lib_verify_listener_events "${@}"; then + test_pass + else + test_fail + fi +} + +test_listener() +{ + print_title "Listener tests" + + if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + print_test "LISTENER events" + test_skip + return + fi + + # Capture events on the network namespace running the client + :>$client_evts + + # Attempt to add a listener at 10.0.2.2:<subflow-port> + ip netns exec $ns2 ./pm_nl_ctl listen 10.0.2.2\ + $client4_port & + local listener_pid=$! + + sleep 0.5 + print_test "CREATE_LISTENER 10.0.2.2:$client4_port" + verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port + + # ADD_ADDR from client to server machine reusing the subflow port + ip netns exec $ns2 ./pm_nl_ctl ann 10.0.2.2 token $client4_token id\ + $client_addr_id + sleep 0.5 + + # CREATE_SUBFLOW from server to client machine + ip netns exec $ns1 ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ + rport $client4_port token $server4_token + sleep 0.5 + + # Delete the listener from the client ns, if one was created + mptcp_lib_kill_wait $listener_pid + + sleep 0.5 + print_test "CLOSE_LISTENER 10.0.2.2:$client4_port" + verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port +} + +print_title "Make connections" +make_connection +make_connection "v6" + +test_announce +test_remove +test_subflows +test_subflows_v4_v6_mix +test_prio +test_listener + +mptcp_lib_result_print_all_tap +exit ${ret} diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index 825ffec85cea..89c22f5320e0 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -70,23 +70,22 @@ case "${TXMODE}" in esac # Start of state changes: install cleanup handler -save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" cleanup() { ip netns del "${NS2}" ip netns del "${NS1}" - sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" } trap cleanup EXIT -# Configure system settings -sysctl -w -q "${path_sysctl_mem}=1000000" - # Create virtual ethernet pair between network namespaces ip netns add "${NS1}" ip netns add "${NS2}" +# Configure system settings +ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000" +ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" + ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" diff --git a/tools/testing/selftests/net/nat6to4.c b/tools/testing/selftests/net/nat6to4.c new file mode 100644 index 000000000000..ac54c36b25fc --- /dev/null +++ b/tools/testing/selftests/net/nat6to4.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This code is taken from the Android Open Source Project and the author + * (Maciej Żenczykowski) has gave permission to relicense it under the + * GPLv2. Therefore this program is free software; + * You can redistribute it and/or modify it under the terms of the GNU + * General Public License version 2 as published by the Free Software + * Foundation + + * The original headers, including the original license headers, are + * included below for completeness. + * + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <linux/bpf.h> +#include <linux/if.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pkt_cls.h> +#include <linux/swab.h> +#include <stdbool.h> +#include <stdint.h> + + +#include <linux/udp.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define IP_DF 0x4000 // Flag: "Don't Fragment" + +SEC("schedcls/ingress6/nat_6") +int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb) +{ + const int l2_header_size = sizeof(struct ethhdr); + void *data = (void *)(long)skb->data; + const void *data_end = (void *)(long)skb->data_end; + const struct ethhdr * const eth = data; // used iff is_ethernet + const struct ipv6hdr * const ip6 = (void *)(eth + 1); + + // Require ethernet dst mac address to be our unicast address. + if (skb->pkt_type != PACKET_HOST) + return TC_ACT_OK; + + // Must be meta-ethernet IPv6 frame + if (skb->protocol != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + // Must have (ethernet and) ipv6 header + if (data + l2_header_size + sizeof(*ip6) > data_end) + return TC_ACT_OK; + + // Ethertype - if present - must be IPv6 + if (eth->h_proto != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + // IP version must be 6 + if (ip6->version != 6) + return TC_ACT_OK; + // Maximum IPv6 payload length that can be translated to IPv4 + if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr)) + return TC_ACT_OK; + switch (ip6->nexthdr) { + case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 + case IPPROTO_UDP: // address means there is no need to update their checksums. + case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers, + case IPPROTO_ESP: // since there is never a checksum to update. + break; + default: // do not know how to handle anything else + return TC_ACT_OK; + } + + struct ethhdr eth2; // used iff is_ethernet + + eth2 = *eth; // Copy over the ethernet header (src/dst mac) + eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype + + struct iphdr ip = { + .version = 4, // u4 + .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4 + .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8 + .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16 + .id = 0, // u16 + .frag_off = bpf_htons(IP_DF), // u16 + .ttl = ip6->hop_limit, // u8 + .protocol = ip6->nexthdr, // u8 + .check = 0, // u16 + .saddr = 0x0201a8c0, // u32 + .daddr = 0x0101a8c0, // u32 + }; + + // Calculate the IPv4 one's complement checksum of the IPv4 header. + __wsum sum4 = 0; + + for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i) + sum4 += ((__u16 *)&ip)[i]; + + // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4 + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 + ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF + + // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header. + __wsum sum6 = 0; + // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits) + for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i) + sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation + + // Note that there is no L4 checksum update: we are relying on the checksum neutrality + // of the ipv6 address chosen by netd's ClatdController. + + // Packet mutations begin - point of no return, but if this first modification fails + // the packet is probably still pristine, so let clatd handle it. + if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0)) + return TC_ACT_OK; + bpf_csum_update(skb, sum6); + + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + if (data + l2_header_size + sizeof(struct iphdr) > data_end) + return TC_ACT_SHOT; + + struct ethhdr *new_eth = data; + + // Copy over the updated ethernet header + *new_eth = eth2; + + // Copy over the new ipv4 header. + *(struct iphdr *)(new_eth + 1) = ip; + return bpf_redirect(skb->ifindex, BPF_F_INGRESS); +} + +SEC("schedcls/egress4/snat4") +int sched_cls_egress4_snat4_prog(struct __sk_buff *skb) +{ + const int l2_header_size = sizeof(struct ethhdr); + void *data = (void *)(long)skb->data; + const void *data_end = (void *)(long)skb->data_end; + const struct ethhdr *const eth = data; // used iff is_ethernet + const struct iphdr *const ip4 = (void *)(eth + 1); + + // Must be meta-ethernet IPv4 frame + if (skb->protocol != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + // Must have ipv4 header + if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end) + return TC_ACT_OK; + + // Ethertype - if present - must be IPv4 + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + // IP version must be 4 + if (ip4->version != 4) + return TC_ACT_OK; + + // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header + if (ip4->ihl != 5) + return TC_ACT_OK; + + // Maximum IPv6 payload length that can be translated to IPv4 + if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr)) + return TC_ACT_OK; + + // Calculate the IPv4 one's complement checksum of the IPv4 header. + __wsum sum4 = 0; + + for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i) + sum4 += ((__u16 *)ip4)[i]; + + // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4 + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 + // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF + if (sum4 != 0xFFFF) + return TC_ACT_OK; + + // Minimum IPv4 total length is the size of the header + if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4)) + return TC_ACT_OK; + + // We are incapable of dealing with IPv4 fragments + if (ip4->frag_off & ~bpf_htons(IP_DF)) + return TC_ACT_OK; + + switch (ip4->protocol) { + case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 + case IPPROTO_GRE: // address means there is no need to update their checksums. + case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers, + break; // since there is never a checksum to update. + + case IPPROTO_UDP: // See above comment, but must also have UDP header... + if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end) + return TC_ACT_OK; + const struct udphdr *uh = (const struct udphdr *)(ip4 + 1); + // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the + // checksum. Otherwise the network or more likely the NAT64 gateway might + // drop the packet because in most cases IPv6/UDP packets with a zero checksum + // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff() + if (!uh->check) + return TC_ACT_OK; + break; + + default: // do not know how to handle anything else + return TC_ACT_OK; + } + struct ethhdr eth2; // used iff is_ethernet + + eth2 = *eth; // Copy over the ethernet header (src/dst mac) + eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype + + struct ipv6hdr ip6 = { + .version = 6, // __u8:4 + .priority = ip4->tos >> 4, // __u8:4 + .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3] + .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16 + .nexthdr = ip4->protocol, // __u8 + .hop_limit = ip4->ttl, // __u8 + }; + ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); + ip6.saddr.in6_u.u6_addr32[1] = 0; + ip6.saddr.in6_u.u6_addr32[2] = 0; + ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1); + ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); + ip6.daddr.in6_u.u6_addr32[1] = 0; + ip6.daddr.in6_u.u6_addr32[2] = 0; + ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2); + + // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header. + __wsum sum6 = 0; + // We'll end up with a non-zero sum due to ip6.version == 6 + for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i) + sum6 += ((__u16 *)&ip6)[i]; + + // Packet mutations begin - point of no return, but if this first modification fails + // the packet is probably still pristine, so let clatd handle it. + if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0)) + return TC_ACT_OK; + + // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet. + // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload, + // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum. + // However, we've already verified the ipv4 checksum is correct and thus 0. + // Thus we only need to add the ipv6 header's sum. + // + // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error + // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details). + bpf_csum_update(skb, sum6); + + // bpf_skb_change_proto() invalidates all pointers - reload them. + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + + // I cannot think of any valid way for this error condition to trigger, however I do + // believe the explicit check is required to keep the in kernel ebpf verifier happy. + if (data + l2_header_size + sizeof(ip6) > data_end) + return TC_ACT_SHOT; + + struct ethhdr *new_eth = data; + + // Copy over the updated ethernet header + *new_eth = eth2; + // Copy over the new ipv4 header. + *(struct ipv6hdr *)(new_eth + 1) = ip6; + return TC_ACT_OK; +} + +char _license[] SEC("license") = ("GPL"); diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh new file mode 100755 index 000000000000..5db69dad0cfc --- /dev/null +++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh @@ -0,0 +1,249 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for the accept_untracked_na feature to +# enable RFC9131 behaviour. The following is the test-matrix. +# drop accept fwding behaviour +# ---- ------ ------ ---------------------------------------------- +# 1 X X Don't update NC +# 0 0 X Don't update NC +# 0 1 0 Don't update NC +# 0 1 1 Add a STALE NC entry + +source lib.sh +ret=0 + +PAUSE_ON_FAIL=no +PAUSE=no + +HOST_INTF="veth-host" +ROUTER_INTF="veth-router" + +ROUTER_ADDR="2000:20::1" +HOST_ADDR="2000:20::2" +SUBNET_WIDTH=64 +ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}" +HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}" + +tcpdump_stdout= +tcpdump_stderr= + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf " TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi +} + +setup() +{ + set -e + + local drop_unsolicited_na=$1 + local accept_untracked_na=$2 + local forwarding=$3 + + # Setup two namespaces and a veth tunnel across them. + # On end of the tunnel is a router and the other end is a host. + setup_ns HOST_NS ROUTER_NS + IP_HOST="ip -6 -netns ${HOST_NS}" + IP_HOST_EXEC="ip netns exec ${HOST_NS}" + IP_ROUTER="ip -6 -netns ${ROUTER_NS}" + IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}" + + ${IP_ROUTER} link add ${ROUTER_INTF} type veth \ + peer name ${HOST_INTF} netns ${HOST_NS} + + # Enable IPv6 on both router and host, and configure static addresses. + # The router here is the DUT + # Setup router configuration as specified by the arguments. + # forwarding=0 case is to check that a non-router + # doesn't add neighbour entries. + ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.forwarding=${forwarding} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na} + ${IP_ROUTER_EXEC} sysctl -qw \ + ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} + ${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0 + ${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF} + + # Turn on ndisc_notify on host interface so that + # the host sends unsolicited NAs. + HOST_CONF=net.ipv6.conf.${HOST_INTF} + ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.ndisc_notify=1 + ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.disable_ipv6=0 + ${IP_HOST} addr add ${HOST_ADDR_WITH_MASK} dev ${HOST_INTF} + + set +e +} + +start_tcpdump() { + set -e + tcpdump_stdout=`mktemp` + tcpdump_stderr=`mktemp` + ${IP_ROUTER_EXEC} timeout 15s \ + tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \ + "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR}" \ + > ${tcpdump_stdout} 2> /dev/null + set +e +} + +cleanup_tcpdump() +{ + set -e + [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout} + [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr} + tcpdump_stdout= + tcpdump_stderr= + set +e +} + +cleanup() +{ + cleanup_tcpdump + ip netns del ${HOST_NS} + ip netns del ${ROUTER_NS} +} + +link_up() { + set -e + ${IP_ROUTER} link set dev ${ROUTER_INTF} up + ${IP_HOST} link set dev ${HOST_INTF} up + set +e +} + +verify_ndisc() { + local drop_unsolicited_na=$1 + local accept_untracked_na=$2 + local forwarding=$3 + + neigh_show_output=$(${IP_ROUTER} neigh show \ + to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale) + if [ ${drop_unsolicited_na} -eq 0 ] && \ + [ ${accept_untracked_na} -eq 1 ] && \ + [ ${forwarding} -eq 1 ]; then + # Neighbour entry expected to be present for 011 case + [[ ${neigh_show_output} ]] + else + # Neighbour entry expected to be absent for all other cases + [[ -z ${neigh_show_output} ]] + fi +} + +test_unsolicited_na_common() +{ + # Setup the test bed, but keep links down + setup $1 $2 $3 + + # Bring the link up, wait for the NA, + # and add a delay to ensure neighbour processing is done. + link_up + start_tcpdump + + # Verify the neighbour table + verify_ndisc $1 $2 $3 + +} + +test_unsolicited_na_combination() { + test_unsolicited_na_common $1 $2 $3 + test_msg=("test_unsolicited_na: " + "drop_unsolicited_na=$1 " + "accept_untracked_na=$2 " + "forwarding=$3") + log_test $? 0 "${test_msg[*]}" + cleanup +} + +test_unsolicited_na_combinations() { + # Args: drop_unsolicited_na accept_untracked_na forwarding + + # Expect entry + test_unsolicited_na_combination 0 1 1 + + # Expect no entry + test_unsolicited_na_combination 0 0 0 + test_unsolicited_na_combination 0 0 1 + test_unsolicited_na_combination 0 1 0 + test_unsolicited_na_combination 1 0 0 + test_unsolicited_na_combination 1 0 1 + test_unsolicited_na_combination 1 1 0 + test_unsolicited_na_combination 1 1 1 +} + +############################################################################### +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + -p Pause on fail + -P Pause after each test before cleanup +EOF +} + +############################################################################### +# main + +while getopts :pPh o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null + +test_unsolicited_na_combinations + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} + +exit $ret diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh new file mode 100644 index 000000000000..6596fe03c77f --- /dev/null +++ b/tools/testing/selftests/net/net_helper.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Helper functions + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local pattern + local i + + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ ${protocol} = "tcp" ] && pattern="${pattern}0A" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ + /proc/net/"${protocol}"* | grep -q "${pattern}"; then + break + fi + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh new file mode 100755 index 000000000000..6974474c26f3 --- /dev/null +++ b/tools/testing/selftests/net/netns-name.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +set -o pipefail + +DEV=dummy-dev0 +DEV2=dummy-dev1 +ALT_NAME=some-alt-name + +RET_CODE=0 + +cleanup() { + cleanup_ns $NS $test_ns +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + RET_CODE=1 +} + +setup_ns NS test_ns + +# +# Test basic move without a rename +# +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link set dev $DEV netns $test_ns || + fail "Can't perform a netns move" +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" +ip -netns $test_ns link del $DEV || fail + +# +# Test move with a conflict +# +ip -netns $test_ns link add name $DEV type dummy +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link set dev $DEV netns $test_ns 2> /dev/null && + fail "Performed a netns move with a name conflict" +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" +ip -netns $NS link del $DEV || fail +ip -netns $test_ns link del $DEV || fail + +# +# Test move with a conflict and rename +# +ip -netns $test_ns link add name $DEV type dummy +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link set dev $DEV netns $test_ns name $DEV2 || + fail "Can't perform a netns move with rename" +ip -netns $test_ns link del $DEV2 || fail +ip -netns $test_ns link del $DEV || fail + +# +# Test dup alt-name with netns move +# +ip -netns $test_ns link add name $DEV type dummy || fail +ip -netns $test_ns link property add dev $DEV altname $ALT_NAME || fail +ip -netns $NS link add name $DEV2 type dummy || fail +ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail + +ip -netns $NS link set dev $DEV2 netns $test_ns 2> /dev/null && + fail "Moved with alt-name dup" + +ip -netns $test_ns link del $DEV || fail +ip -netns $NS link del $DEV2 || fail + +# +# Test creating alt-name in one net-ns and using in another +# +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail +ip -netns $NS link set dev $DEV netns $test_ns || fail +ip -netns $test_ns link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move" +ip -netns $NS link show dev $ALT_NAME 2> /dev/null && + fail "Can still find alt-name after move" +ip -netns $test_ns link del $DEV || fail + +echo -ne "$(basename $0) \t\t\t\t" +if [ $RET_CODE -eq 0 ]; then + echo "[ OK ]" +else + echo "[ FAIL ]" +fi +exit $RET_CODE diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index f75c53ce0a2d..cd8a58097448 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -9,14 +9,18 @@ #include <sys/types.h> #include <sys/ioctl.h> #include <sys/socket.h> +#include <sys/wait.h> #include <linux/tcp.h> +#include <linux/udp.h> #include <arpa/inet.h> #include <net/if.h> #include <netinet/in.h> +#include <netinet/ip.h> #include <netdb.h> #include <fcntl.h> #include <libgen.h> #include <limits.h> +#include <sched.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -24,6 +28,11 @@ #include <unistd.h> #include <time.h> #include <errno.h> +#include <getopt.h> + +#include <linux/xfrm.h> +#include <linux/ipsec.h> +#include <linux/pfkeyv2.h> #ifndef IPV6_UNICAST_IF #define IPV6_UNICAST_IF 76 @@ -34,6 +43,8 @@ #define DEFAULT_PORT 12345 +#define NS_PREFIX "/run/netns/" + #ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) #endif @@ -43,12 +54,15 @@ struct sock_args { /* local address */ + const char *local_addr_str; + const char *client_local_addr_str; union { struct in_addr in; struct in6_addr in6; } local_addr; /* remote address */ + const char *remote_addr_str; union { struct in_addr in; struct in6_addr in6; @@ -62,7 +76,9 @@ struct sock_args { has_grp:1, has_expected_laddr:1, has_expected_raddr:1, - bind_test_only:1; + bind_test_only:1, + client_dontroute:1, + server_dontroute:1; unsigned short port; @@ -71,32 +87,52 @@ struct sock_args { int version; /* AF_INET/AF_INET6 */ int use_setsockopt; + int use_freebind; int use_cmsg; + uint8_t dsfield; const char *dev; + const char *server_dev; int ifindex; + const char *clientns; + const char *serverns; + const char *password; + const char *client_pw; /* prefix for MD5 password */ + const char *md5_prefix_str; union { struct sockaddr_in v4; struct sockaddr_in6 v6; } md5_prefix; unsigned int prefix_len; + /* 0: default, -1: force off, +1: force on */ + int bind_key_ifindex; /* expected addresses and device index for connection */ + const char *expected_dev; + const char *expected_server_dev; int expected_ifindex; /* local address */ + const char *expected_laddr_str; union { struct in_addr in; struct in6_addr in6; } expected_laddr; /* remote address */ + const char *expected_raddr_str; union { struct in_addr in; struct in6_addr in6; } expected_raddr; + + /* ESP in UDP encap test */ + int use_xfrm; + + /* use send() and connect() instead of sendto */ + int datagram_connect; }; static int server_mode; @@ -186,7 +222,7 @@ static void log_address(const char *desc, struct sockaddr *sa) if (sa->sa_family == AF_INET) { struct sockaddr_in *s = (struct sockaddr_in *) sa; - log_msg("%s %s:%d", + log_msg("%s %s:%d\n", desc, inet_ntop(AF_INET, &s->sin_addr, addrstr, sizeof(addrstr)), @@ -195,18 +231,37 @@ static void log_address(const char *desc, struct sockaddr *sa) } else if (sa->sa_family == AF_INET6) { struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa; - log_msg("%s [%s]:%d", + log_msg("%s [%s]:%d\n", desc, inet_ntop(AF_INET6, &s6->sin6_addr, addrstr, sizeof(addrstr)), ntohs(s6->sin6_port)); } - printf("\n"); - fflush(stdout); } +static int switch_ns(const char *ns) +{ + char path[PATH_MAX]; + int fd, ret; + + if (geteuid()) + log_error("warning: likely need root to set netns %s!\n", ns); + + snprintf(path, sizeof(path), "%s%s", NS_PREFIX, ns); + fd = open(path, 0); + if (fd < 0) { + log_err_errno("Failed to open netns path; can not switch netns"); + return 1; + } + + ret = setns(fd, CLONE_NEWNET); + close(fd); + + return ret; +} + static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args) { int keylen = strlen(args->password); @@ -226,11 +281,14 @@ static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args } memcpy(&md5sig.tcpm_addr, addr, alen); - if (args->ifindex) { + if ((args->ifindex && args->bind_key_ifindex >= 0) || args->bind_key_ifindex >= 1) { opt = TCP_MD5SIG_EXT; md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX; md5sig.tcpm_ifindex = args->ifindex; + log_msg("TCP_MD5SIG_FLAG_IFINDEX set tcpm_ifindex=%d\n", md5sig.tcpm_ifindex); + } else { + log_msg("TCP_MD5SIG_FLAG_IFINDEX off\n", md5sig.tcpm_ifindex); } rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig)); @@ -259,13 +317,13 @@ static int tcp_md5_remote(int sd, struct sock_args *args) switch (args->version) { case AF_INET: sin.sin_port = htons(args->port); - sin.sin_addr = args->remote_addr.in; + sin.sin_addr = args->md5_prefix.v4.sin_addr; addr = &sin; alen = sizeof(sin); break; case AF_INET6: sin6.sin6_port = htons(args->port); - sin6.sin6_addr = args->remote_addr.in6; + sin6.sin6_addr = args->md5_prefix.v6.sin6_addr; addr = &sin6; alen = sizeof(sin6); break; @@ -463,6 +521,29 @@ static int set_membership(int sd, uint32_t grp, uint32_t addr, int ifindex) return 0; } +static int set_freebind(int sd, int version) +{ + unsigned int one = 1; + int rc = 0; + + switch (version) { + case AF_INET: + if (setsockopt(sd, SOL_IP, IP_FREEBIND, &one, sizeof(one))) { + log_err_errno("setsockopt(IP_FREEBIND)"); + rc = -1; + } + break; + case AF_INET6: + if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) { + log_err_errno("setsockopt(IPV6_FREEBIND"); + rc = -1; + } + break; + } + + return rc; +} + static int set_broadcast(int sd) { unsigned int one = 1; @@ -502,6 +583,48 @@ static int set_reuseaddr(int sd) return rc; } +static int set_dsfield(int sd, int version, int dsfield) +{ + if (!dsfield) + return 0; + + switch (version) { + case AF_INET: + if (setsockopt(sd, SOL_IP, IP_TOS, &dsfield, + sizeof(dsfield)) < 0) { + log_err_errno("setsockopt(IP_TOS)"); + return -1; + } + break; + + case AF_INET6: + if (setsockopt(sd, SOL_IPV6, IPV6_TCLASS, &dsfield, + sizeof(dsfield)) < 0) { + log_err_errno("setsockopt(IPV6_TCLASS)"); + return -1; + } + break; + + default: + log_error("Invalid address family\n"); + return -1; + } + + return 0; +} + +static int set_dontroute(int sd) +{ + unsigned int one = 1; + + if (setsockopt(sd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) { + log_err_errno("setsockopt(SO_DONTROUTE)"); + return -1; + } + + return 0; +} + static int str_to_uint(const char *str, int min, int max, unsigned int *value) { int number; @@ -522,6 +645,33 @@ static int str_to_uint(const char *str, int min, int max, unsigned int *value) return -1; } +static int resolve_devices(struct sock_args *args) +{ + if (args->dev) { + args->ifindex = get_ifidx(args->dev); + if (args->ifindex < 0) { + log_error("Invalid device name\n"); + return 1; + } + } + + if (args->expected_dev) { + unsigned int tmp; + + if (str_to_uint(args->expected_dev, 0, INT_MAX, &tmp) == 0) { + args->expected_ifindex = (int)tmp; + } else { + args->expected_ifindex = get_ifidx(args->expected_dev); + if (args->expected_ifindex < 0) { + fprintf(stderr, "Invalid expected device\n"); + return 1; + } + } + } + + return 0; +} + static int expected_addr_match(struct sockaddr *sa, void *expected, const char *desc) { @@ -533,7 +683,7 @@ static int expected_addr_match(struct sockaddr *sa, void *expected, struct in_addr *exp_in = (struct in_addr *) expected; if (s->sin_addr.s_addr != exp_in->s_addr) { - log_error("%s address does not match expected %s", + log_error("%s address does not match expected %s\n", desc, inet_ntop(AF_INET, exp_in, addrstr, sizeof(addrstr))); @@ -544,14 +694,14 @@ static int expected_addr_match(struct sockaddr *sa, void *expected, struct in6_addr *exp_in = (struct in6_addr *) expected; if (memcmp(&s6->sin6_addr, exp_in, sizeof(*exp_in))) { - log_error("%s address does not match expected %s", + log_error("%s address does not match expected %s\n", desc, inet_ntop(AF_INET6, exp_in, addrstr, sizeof(addrstr))); rc = 1; } } else { - log_error("%s address does not match expected - unknown family", + log_error("%s address does not match expected - unknown family\n", desc); rc = 1; } @@ -599,6 +749,160 @@ static int show_sockstat(int sd, struct sock_args *args) return rc; } +enum addr_type { + ADDR_TYPE_LOCAL, + ADDR_TYPE_REMOTE, + ADDR_TYPE_MCAST, + ADDR_TYPE_EXPECTED_LOCAL, + ADDR_TYPE_EXPECTED_REMOTE, + ADDR_TYPE_MD5_PREFIX, +}; + +static int convert_addr(struct sock_args *args, const char *_str, + enum addr_type atype) +{ + int pfx_len_max = args->version == AF_INET6 ? 128 : 32; + int family = args->version; + char *str, *dev, *sep; + struct in6_addr *in6; + struct in_addr *in; + const char *desc; + void *addr; + int rc = 0; + + str = strdup(_str); + if (!str) + return -ENOMEM; + + switch (atype) { + case ADDR_TYPE_LOCAL: + desc = "local"; + addr = &args->local_addr; + break; + case ADDR_TYPE_REMOTE: + desc = "remote"; + addr = &args->remote_addr; + break; + case ADDR_TYPE_MCAST: + desc = "mcast grp"; + addr = &args->grp; + break; + case ADDR_TYPE_EXPECTED_LOCAL: + desc = "expected local"; + addr = &args->expected_laddr; + break; + case ADDR_TYPE_EXPECTED_REMOTE: + desc = "expected remote"; + addr = &args->expected_raddr; + break; + case ADDR_TYPE_MD5_PREFIX: + desc = "md5 prefix"; + if (family == AF_INET) { + args->md5_prefix.v4.sin_family = AF_INET; + addr = &args->md5_prefix.v4.sin_addr; + } else if (family == AF_INET6) { + args->md5_prefix.v6.sin6_family = AF_INET6; + addr = &args->md5_prefix.v6.sin6_addr; + } else + return 1; + + sep = strchr(str, '/'); + if (sep) { + *sep = '\0'; + sep++; + if (str_to_uint(sep, 1, pfx_len_max, + &args->prefix_len) != 0) { + fprintf(stderr, "Invalid port\n"); + return 1; + } + } else { + args->prefix_len = 0; + } + break; + default: + log_error("unknown address type\n"); + exit(1); + } + + switch (family) { + case AF_INET: + in = (struct in_addr *) addr; + if (str) { + if (inet_pton(AF_INET, str, in) == 0) { + log_error("Invalid %s IP address\n", desc); + rc = -1; + goto out; + } + } else { + in->s_addr = htonl(INADDR_ANY); + } + break; + + case AF_INET6: + dev = strchr(str, '%'); + if (dev) { + *dev = '\0'; + dev++; + } + + in6 = (struct in6_addr *) addr; + if (str) { + if (inet_pton(AF_INET6, str, in6) == 0) { + log_error("Invalid %s IPv6 address\n", desc); + rc = -1; + goto out; + } + } else { + *in6 = in6addr_any; + } + if (dev) { + args->scope_id = get_ifidx(dev); + if (args->scope_id < 0) { + log_error("Invalid scope on %s IPv6 address\n", + desc); + rc = -1; + goto out; + } + } + break; + + default: + log_error("Invalid address family\n"); + } + +out: + free(str); + return rc; +} + +static int validate_addresses(struct sock_args *args) +{ + if (args->local_addr_str && + convert_addr(args, args->local_addr_str, ADDR_TYPE_LOCAL) < 0) + return 1; + + if (args->remote_addr_str && + convert_addr(args, args->remote_addr_str, ADDR_TYPE_REMOTE) < 0) + return 1; + + if (args->md5_prefix_str && + convert_addr(args, args->md5_prefix_str, + ADDR_TYPE_MD5_PREFIX) < 0) + return 1; + + if (args->expected_laddr_str && + convert_addr(args, args->expected_laddr_str, + ADDR_TYPE_EXPECTED_LOCAL)) + return 1; + + if (args->expected_raddr_str && + convert_addr(args, args->expected_raddr_str, + ADDR_TYPE_EXPECTED_REMOTE)) + return 1; + + return 0; +} + static int get_index_from_cmsg(struct msghdr *m) { struct cmsghdr *cm; @@ -723,6 +1027,11 @@ static int send_msg(int sd, void *addr, socklen_t alen, struct sock_args *args) log_err_errno("write failed sending msg to peer"); return 1; } + } else if (args->datagram_connect) { + if (send(sd, msg, msglen, 0) < 0) { + log_err_errno("send failed sending msg to peer"); + return 1; + } } else if (args->ifindex && args->use_cmsg) { if (send_msg_cmsg(sd, addr, alen, args->ifindex, args->version)) return 1; @@ -1053,6 +1362,17 @@ static int msock_init(struct sock_args *args, int server) (char *)&one, sizeof(one)) < 0) log_err_errno("Setting SO_BROADCAST error"); + if (set_dsfield(sd, AF_INET, args->dsfield) != 0) + goto out_err; + + if (server) { + if (args->server_dontroute && set_dontroute(sd) != 0) + goto out_err; + } else { + if (args->client_dontroute && set_dontroute(sd) != 0) + goto out_err; + } + if (args->dev && bind_to_device(sd, args->dev) != 0) goto out_err; else if (args->use_setsockopt && @@ -1129,6 +1449,41 @@ static int bind_socket(int sd, struct sock_args *args) return 0; } +static int config_xfrm_policy(int sd, struct sock_args *args) +{ + struct xfrm_userpolicy_info policy = {}; + int type = UDP_ENCAP_ESPINUDP; + int xfrm_af = IP_XFRM_POLICY; + int level = SOL_IP; + + if (args->type != SOCK_DGRAM) { + log_error("Invalid socket type. Only DGRAM could be used for XFRM\n"); + return 1; + } + + policy.action = XFRM_POLICY_ALLOW; + policy.sel.family = args->version; + if (args->version == AF_INET6) { + xfrm_af = IPV6_XFRM_POLICY; + level = SOL_IPV6; + } + + policy.dir = XFRM_POLICY_OUT; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + policy.dir = XFRM_POLICY_IN; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) { + log_err_errno("Failed to set xfrm encap"); + return 1; + } + + return 0; +} + static int lsock_init(struct sock_args *args) { long flags; @@ -1146,12 +1501,21 @@ static int lsock_init(struct sock_args *args) if (set_reuseport(sd) != 0) goto err; + if (set_dsfield(sd, args->version, args->dsfield) != 0) + goto err; + + if (args->server_dontroute && set_dontroute(sd) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && set_unicast_if(sd, args->ifindex, args->version)) goto err; + if (args->use_freebind && set_freebind(sd, args->version)) + goto err; + if (bind_socket(sd, args)) goto err; @@ -1172,6 +1536,11 @@ static int lsock_init(struct sock_args *args) if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0) log_err_errno("Failed to set close-on-exec flag"); + if (args->use_xfrm && config_xfrm_policy(sd, args)) { + log_err_errno("Failed to set xfrm policy"); + goto err; + } + out: return sd; @@ -1180,8 +1549,19 @@ err: return -1; } -static int do_server(struct sock_args *args) +static void ipc_write(int fd, int message) +{ + /* Not in both_mode, so there's no process to signal */ + if (fd < 0) + return; + + if (write(fd, &message, sizeof(message)) < 0) + log_err_errno("Failed to send client status"); +} + +static int do_server(struct sock_args *args, int ipc_fd) { + /* ipc_fd = -1 if no parent process to signal */ struct timeval timeout = { .tv_sec = prog_timeout }, *ptval = NULL; unsigned char addr[sizeof(struct sockaddr_in6)] = {}; socklen_t alen = sizeof(addr); @@ -1190,6 +1570,20 @@ static int do_server(struct sock_args *args) fd_set rfds; int rc; + if (args->serverns) { + if (switch_ns(args->serverns)) { + log_error("Could not set server netns to %s\n", + args->serverns); + goto err_exit; + } + log_msg("Switched server netns\n"); + } + + args->dev = args->server_dev; + args->expected_dev = args->expected_server_dev; + if (resolve_devices(args) || validate_addresses(args)) + goto err_exit; + if (prog_timeout) ptval = &timeout; @@ -1199,14 +1593,16 @@ static int do_server(struct sock_args *args) lsd = lsock_init(args); if (lsd < 0) - return 1; + goto err_exit; if (args->bind_test_only) { close(lsd); + ipc_write(ipc_fd, 1); return 0; } if (args->type != SOCK_STREAM) { + ipc_write(ipc_fd, 1); rc = msg_loop(0, lsd, (void *) addr, alen, args); close(lsd); return rc; @@ -1214,11 +1610,11 @@ static int do_server(struct sock_args *args) if (args->password && tcp_md5_remote(lsd, args)) { close(lsd); - return 1; + goto err_exit; } + ipc_write(ipc_fd, 1); while (1) { - log_msg("\n"); log_msg("waiting for client connection.\n"); FD_ZERO(&rfds); FD_SET(lsd, &rfds); @@ -1264,6 +1660,9 @@ static int do_server(struct sock_args *args) close(lsd); return rc; +err_exit: + ipc_write(ipc_fd, 0); + return 1; } static int wait_for_connect(int sd) @@ -1321,6 +1720,12 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args) if (set_reuseport(sd) != 0) goto err; + if (set_dsfield(sd, args->version, args->dsfield) != 0) + goto err; + + if (args->client_dontroute && set_dontroute(sd) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1330,7 +1735,7 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args) if (args->has_local_ip && bind_socket(sd, args)) goto err; - if (args->type != SOCK_STREAM) + if (args->type != SOCK_STREAM && !args->datagram_connect) goto out; if (args->password && tcp_md5sig(sd, addr, alen, args)) @@ -1375,6 +1780,26 @@ static int do_client(struct sock_args *args) return 1; } + if (args->clientns) { + if (switch_ns(args->clientns)) { + log_error("Could not set client netns to %s\n", + args->clientns); + return 1; + } + log_msg("Switched client netns\n"); + } + + args->local_addr_str = args->client_local_addr_str; + if (resolve_devices(args) || validate_addresses(args)) + return 1; + + if ((args->use_setsockopt || args->use_cmsg) && !args->ifindex) { + fprintf(stderr, "Device binding not specified\n"); + return 1; + } + if (args->use_setsockopt || args->use_cmsg) + args->dev = NULL; + switch (args->version) { case AF_INET: sin.sin_port = htons(args->port); @@ -1394,6 +1819,8 @@ static int do_client(struct sock_args *args) break; } + args->password = args->client_pw; + if (args->has_grp) sd = msock_client(args); else @@ -1419,132 +1846,6 @@ out: return rc; } -enum addr_type { - ADDR_TYPE_LOCAL, - ADDR_TYPE_REMOTE, - ADDR_TYPE_MCAST, - ADDR_TYPE_EXPECTED_LOCAL, - ADDR_TYPE_EXPECTED_REMOTE, - ADDR_TYPE_MD5_PREFIX, -}; - -static int convert_addr(struct sock_args *args, const char *_str, - enum addr_type atype) -{ - int pfx_len_max = args->version == AF_INET6 ? 128 : 32; - int family = args->version; - char *str, *dev, *sep; - struct in6_addr *in6; - struct in_addr *in; - const char *desc; - void *addr; - int rc = 0; - - str = strdup(_str); - if (!str) - return -ENOMEM; - - switch (atype) { - case ADDR_TYPE_LOCAL: - desc = "local"; - addr = &args->local_addr; - break; - case ADDR_TYPE_REMOTE: - desc = "remote"; - addr = &args->remote_addr; - break; - case ADDR_TYPE_MCAST: - desc = "mcast grp"; - addr = &args->grp; - break; - case ADDR_TYPE_EXPECTED_LOCAL: - desc = "expected local"; - addr = &args->expected_laddr; - break; - case ADDR_TYPE_EXPECTED_REMOTE: - desc = "expected remote"; - addr = &args->expected_raddr; - break; - case ADDR_TYPE_MD5_PREFIX: - desc = "md5 prefix"; - if (family == AF_INET) { - args->md5_prefix.v4.sin_family = AF_INET; - addr = &args->md5_prefix.v4.sin_addr; - } else if (family == AF_INET6) { - args->md5_prefix.v6.sin6_family = AF_INET6; - addr = &args->md5_prefix.v6.sin6_addr; - } else - return 1; - - sep = strchr(str, '/'); - if (sep) { - *sep = '\0'; - sep++; - if (str_to_uint(sep, 1, pfx_len_max, - &args->prefix_len) != 0) { - fprintf(stderr, "Invalid port\n"); - return 1; - } - } else { - args->prefix_len = pfx_len_max; - } - break; - default: - log_error("unknown address type"); - exit(1); - } - - switch (family) { - case AF_INET: - in = (struct in_addr *) addr; - if (str) { - if (inet_pton(AF_INET, str, in) == 0) { - log_error("Invalid %s IP address\n", desc); - rc = -1; - goto out; - } - } else { - in->s_addr = htonl(INADDR_ANY); - } - break; - - case AF_INET6: - dev = strchr(str, '%'); - if (dev) { - *dev = '\0'; - dev++; - } - - in6 = (struct in6_addr *) addr; - if (str) { - if (inet_pton(AF_INET6, str, in6) == 0) { - log_error("Invalid %s IPv6 address\n", desc); - rc = -1; - goto out; - } - } else { - *in6 = in6addr_any; - } - if (dev) { - args->scope_id = get_ifidx(dev); - if (args->scope_id < 0) { - log_error("Invalid scope on %s IPv6 address\n", - desc); - rc = -1; - goto out; - } - } - break; - - default: - log_error("Invalid address family\n"); - } - -out: - free(str); - return rc; -} - static char *random_msg(int len) { int i, n = 0, olen = len + 1; @@ -1563,12 +1864,86 @@ static char *random_msg(int len) n += i; len -= i; } - i = snprintf(m + n, olen - n, "%.*s", len, - "abcdefghijklmnopqrstuvwxyz"); + + snprintf(m + n, olen - n, "%.*s", len, + "abcdefghijklmnopqrstuvwxyz"); return m; } -#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:SCi6L:0:1:2:Fbq" +static int ipc_child(int fd, struct sock_args *args) +{ + char *outbuf, *errbuf; + int rc = 1; + + outbuf = malloc(4096); + errbuf = malloc(4096); + if (!outbuf || !errbuf) { + fprintf(stderr, "server: Failed to allocate buffers for stdout and stderr\n"); + goto out; + } + + setbuffer(stdout, outbuf, 4096); + setbuffer(stderr, errbuf, 4096); + + server_mode = 1; /* to tell log_msg in case we are in both_mode */ + + /* when running in both mode, address validation applies + * solely to client side + */ + args->has_expected_laddr = 0; + args->has_expected_raddr = 0; + + rc = do_server(args, fd); + +out: + free(outbuf); + free(errbuf); + + return rc; +} + +static int ipc_parent(int cpid, int fd, struct sock_args *args) +{ + int client_status; + int status; + int buf; + + /* do the client-side function here in the parent process, + * waiting to be told when to continue + */ + if (read(fd, &buf, sizeof(buf)) <= 0) { + log_err_errno("Failed to read IPC status from status"); + return 1; + } + if (!buf) { + log_error("Server failed; can not continue\n"); + return 1; + } + log_msg("Server is ready\n"); + + client_status = do_client(args); + log_msg("parent is done!\n"); + + if (kill(cpid, 0) == 0) + kill(cpid, SIGKILL); + + wait(&status); + return client_status; +} + +#define GETOPT_STR "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" +#define OPT_FORCE_BIND_KEY_IFINDEX 1001 +#define OPT_NO_BIND_KEY_IFINDEX 1002 +#define OPT_CLIENT_DONTROUTE 1003 +#define OPT_SERVER_DONTROUTE 1004 + +static struct option long_opts[] = { + {"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX}, + {"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX}, + {"client-dontroute", 0, 0, OPT_CLIENT_DONTROUTE}, + {"server-dontroute", 0, 0, OPT_SERVER_DONTROUTE}, + {0, 0, 0, 0} +}; static void print_usage(char *prog) { @@ -1582,28 +1957,50 @@ static void print_usage(char *prog) " -t timeout seconds (default: none)\n" "\n" "Optional:\n" + " -B do both client and server via fork and IPC\n" + " -N ns set client to network namespace ns (requires root)\n" + " -O ns set server to network namespace ns (requires root)\n" " -F Restart server loop\n" " -6 IPv6 (default is IPv4)\n" " -P proto protocol for socket: icmp, ospf (default: none)\n" " -D|R datagram (D) / raw (R) socket (default stream)\n" - " -l addr local address to bind to\n" + " -l addr local address to bind to in server mode\n" + " -c addr local address to bind to in client mode\n" + " -Q dsfield DS Field value of the socket (the IP_TOS or\n" + " IPV6_TCLASS socket option)\n" + " -x configure XFRM policy on socket\n" "\n" " -d dev bind socket to given device name\n" + " -I dev bind socket to given device name - server mode\n" " -S use setsockopt (IP_UNICAST_IF or IP_MULTICAST_IF)\n" " to set device binding\n" + " -U Use connect() and send() for datagram sockets\n" + " -f bind socket with the IP[V6]_FREEBIND option\n" " -C use cmsg and IP_PKTINFO to specify device binding\n" "\n" " -L len send random message of given length\n" " -n num number of times to send message\n" "\n" " -M password use MD5 sum protection\n" + " -X password MD5 password for client mode\n" " -m prefix/len prefix and length to use for MD5 key\n" + " --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n" + " --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n" + " (default: only if -I is passed)\n" + " --client-dontroute: don't use gateways for client socket: send\n" + " packets only if destination is on link (see\n" + " SO_DONTROUTE in socket(7))\n" + " --server-dontroute: don't use gateways for server socket: send\n" + " packets only if destination is on link (see\n" + " SO_DONTROUTE in socket(7))\n" + "\n" " -g grp multicast group (e.g., 239.1.1.1)\n" " -i interactive mode (default is echo and terminate)\n" "\n" " -0 addr Expected local address\n" " -1 addr Expected remote address\n" " -2 dev Expected device name (or index) to receive packet\n" + " -3 dev Expected device name (or index) to receive packets - server mode\n" "\n" " -b Bind test only.\n" " -q Be quiet. Run test without printing anything.\n" @@ -1618,8 +2015,11 @@ int main(int argc, char *argv[]) .port = DEFAULT_PORT, }; struct protoent *pe; + int both_mode = 0; unsigned int tmp; int forever = 0; + int fd[2]; + int cpid; /* process inputs */ extern char *optarg; @@ -1629,8 +2029,11 @@ int main(int argc, char *argv[]) * process input args */ - while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) { + while ((rc = getopt_long(argc, argv, GETOPT_STR, long_opts, NULL)) != -1) { switch (rc) { + case 'B': + both_mode = 1; + break; case 's': server_mode = 1; break; @@ -1639,13 +2042,22 @@ int main(int argc, char *argv[]) break; case 'l': args.has_local_ip = 1; - if (convert_addr(&args, optarg, ADDR_TYPE_LOCAL) < 0) - return 1; + args.local_addr_str = optarg; break; case 'r': args.has_remote_ip = 1; - if (convert_addr(&args, optarg, ADDR_TYPE_REMOTE) < 0) + args.remote_addr_str = optarg; + break; + case 'c': + args.has_local_ip = 1; + args.client_local_addr_str = optarg; + break; + case 'Q': + if (str_to_uint(optarg, 0, 255, &tmp) != 0) { + fprintf(stderr, "Invalid DS Field\n"); return 1; + } + args.dsfield = tmp; break; case 'p': if (str_to_uint(optarg, 1, 65535, &tmp) != 0) { @@ -1685,29 +2097,50 @@ int main(int argc, char *argv[]) case 'n': iter = atoi(optarg); break; + case 'N': + args.clientns = optarg; + break; + case 'O': + args.serverns = optarg; + break; case 'L': msg = random_msg(atoi(optarg)); break; case 'M': args.password = optarg; break; + case OPT_FORCE_BIND_KEY_IFINDEX: + args.bind_key_ifindex = 1; + break; + case OPT_NO_BIND_KEY_IFINDEX: + args.bind_key_ifindex = -1; + break; + case OPT_CLIENT_DONTROUTE: + args.client_dontroute = 1; + break; + case OPT_SERVER_DONTROUTE: + args.server_dontroute = 1; + break; + case 'X': + args.client_pw = optarg; + break; case 'm': - if (convert_addr(&args, optarg, ADDR_TYPE_MD5_PREFIX) < 0) - return 1; + args.md5_prefix_str = optarg; break; case 'S': args.use_setsockopt = 1; break; + case 'f': + args.use_freebind = 1; + break; case 'C': args.use_cmsg = 1; break; case 'd': args.dev = optarg; - args.ifindex = get_ifidx(optarg); - if (args.ifindex < 0) { - fprintf(stderr, "Invalid device name\n"); - return 1; - } + break; + case 'I': + args.server_dev = optarg; break; case 'i': interactive = 1; @@ -1726,32 +2159,27 @@ int main(int argc, char *argv[]) break; case '0': args.has_expected_laddr = 1; - if (convert_addr(&args, optarg, - ADDR_TYPE_EXPECTED_LOCAL)) - return 1; + args.expected_laddr_str = optarg; break; case '1': args.has_expected_raddr = 1; - if (convert_addr(&args, optarg, - ADDR_TYPE_EXPECTED_REMOTE)) - return 1; - + args.expected_raddr_str = optarg; break; case '2': - if (str_to_uint(optarg, 0, INT_MAX, &tmp) == 0) { - args.expected_ifindex = (int)tmp; - } else { - args.expected_ifindex = get_ifidx(optarg); - if (args.expected_ifindex < 0) { - fprintf(stderr, - "Invalid expected device\n"); - return 1; - } - } + args.expected_dev = optarg; + break; + case '3': + args.expected_server_dev = optarg; break; case 'q': quiet = 1; break; + case 'x': + args.use_xfrm = 1; + break; + case 'U': + args.datagram_connect = 1; + break; default: print_usage(argv[0]); return 1; @@ -1759,23 +2187,17 @@ int main(int argc, char *argv[]) } if (args.password && - ((!args.has_remote_ip && !args.prefix_len) || args.type != SOCK_STREAM)) { + ((!args.has_remote_ip && !args.md5_prefix_str) || + args.type != SOCK_STREAM)) { log_error("MD5 passwords apply to TCP only and require a remote ip for the password\n"); return 1; } - if (args.prefix_len && !args.password) { + if (args.md5_prefix_str && !args.password) { log_error("Prefix range for MD5 protection specified without a password\n"); return 1; } - if ((args.use_setsockopt || args.use_cmsg) && !args.ifindex) { - fprintf(stderr, "Device binding not specified\n"); - return 1; - } - if (args.use_setsockopt || args.use_cmsg) - args.dev = NULL; - if (iter == 0) { fprintf(stderr, "Invalid number of messages to send\n"); return 1; @@ -1792,7 +2214,7 @@ int main(int argc, char *argv[]) return 1; } - if (!server_mode && !args.has_grp && + if ((both_mode || !server_mode) && !args.has_grp && !args.has_remote_ip && !args.has_local_ip) { fprintf(stderr, "Local (server mode) or remote IP (client IP) required\n"); @@ -1804,9 +2226,26 @@ int main(int argc, char *argv[]) msg = NULL; } + if (both_mode) { + if (pipe(fd) < 0) { + perror("pipe"); + exit(1); + } + + cpid = fork(); + if (cpid < 0) { + perror("fork"); + exit(1); + } + if (cpid) + return ipc_parent(cpid, fd[0], &args); + + return ipc_child(fd[1], &args); + } + if (server_mode) { do { - rc = do_server(&args); + rc = do_server(&args, -1); } while (forever); return rc; diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile new file mode 100644 index 000000000000..2f1508abc826 --- /dev/null +++ b/tools/testing/selftests/net/openvswitch/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) + +TEST_PROGS := openvswitch.sh + +TEST_FILES := ovs-dpctl.py + +EXTRA_CLEAN := test_netlink_checks + +include ../../lib.mk diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh new file mode 100755 index 000000000000..5cae53543849 --- /dev/null +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -0,0 +1,722 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# OVS kernel module self tests + +trap ovs_exit_sig EXIT TERM INT ERR + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +PAUSE_ON_FAIL=no +VERBOSE=0 +TRACING=0 + +tests=" + arp_ping eth-arp: Basic arp ping between two NS + ct_connect_v4 ip4-ct-xon: Basic ipv4 tcp connection using ct + connect_v4 ip4-xon: Basic ipv4 ping between two NS + nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT + nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT + netlink_checks ovsnl: validate netlink attrs and settings + upcall_interfaces ovs: test the upcall interfaces + drop_reason drop: test drop reasons are emitted" + +info() { + [ $VERBOSE = 0 ] || echo $* +} + +ovs_base=`pwd` +sbxs= +sbx_add () { + info "adding sandbox '$1'" + + sbxs="$sbxs $1" + + NO_BIN=0 + + # Create sandbox. + local d="$ovs_base"/$1 + if [ -e $d ]; then + info "removing $d" + rm -rf "$d" + fi + mkdir "$d" || return 1 + ovs_setenv $1 +} + +ovs_exit_sig() { + [ -e ${ovs_dir}/cleanup ] && . "$ovs_dir/cleanup" +} + +on_exit() { + echo "$1" > ${ovs_dir}/cleanup.tmp + cat ${ovs_dir}/cleanup >> ${ovs_dir}/cleanup.tmp + mv ${ovs_dir}/cleanup.tmp ${ovs_dir}/cleanup +} + +ovs_setenv() { + sandbox=$1 + + ovs_dir=$ovs_base${1:+/$1}; export ovs_dir + + test -e ${ovs_dir}/cleanup || : > ${ovs_dir}/cleanup +} + +ovs_sbx() { + if test "X$2" != X; then + (ovs_setenv $1; shift; "$@" >> ${ovs_dir}/debug.log) + else + ovs_setenv $1 + fi +} + +ovs_add_dp () { + info "Adding DP/Bridge IF: sbx:$1 dp:$2 {$3, $4, $5}" + sbxname="$1" + shift + ovs_sbx "$sbxname" python3 $ovs_base/ovs-dpctl.py add-dp $* + on_exit "ovs_sbx $sbxname python3 $ovs_base/ovs-dpctl.py del-dp $1;" +} + +ovs_add_if () { + info "Adding IF to DP: br:$2 if:$3" + if [ "$4" != "-u" ]; then + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \ + || return 1 + else + python3 $ovs_base/ovs-dpctl.py add-if \ + -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err & + pid=$! + on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null" + fi +} + +ovs_del_if () { + info "Deleting IF from DP: br:$2 if:$3" + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-if "$2" "$3" || return 1 +} + +ovs_netns_spawn_daemon() { + sbx=$1 + shift + netns=$1 + shift + info "spawning cmd: $*" + ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & + pid=$! + ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null" +} + +ovs_add_netns_and_veths () { + info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}" + ovs_sbx "$1" ip netns add "$3" || return 1 + on_exit "ovs_sbx $1 ip netns del $3" + ovs_sbx "$1" ip link add "$4" type veth peer name "$5" || return 1 + on_exit "ovs_sbx $1 ip link del $4 >/dev/null 2>&1" + ovs_sbx "$1" ip link set "$4" up || return 1 + ovs_sbx "$1" ip link set "$5" netns "$3" || return 1 + ovs_sbx "$1" ip netns exec "$3" ip link set "$5" up || return 1 + + if [ "$6" != "" ]; then + ovs_sbx "$1" ip netns exec "$3" ip addr add "$6" dev "$5" \ + || return 1 + fi + + if [ "$7" != "-u" ]; then + ovs_add_if "$1" "$2" "$4" || return 1 + else + ovs_add_if "$1" "$2" "$4" -u || return 1 + fi + + [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \ + tcpdump -i any -s 65535 + + return 0 +} + +ovs_add_flow () { + info "Adding flow to DP: sbx:$1 br:$2 flow:$3 act:$4" + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-flow "$2" "$3" "$4" + if [ $? -ne 0 ]; then + echo "Flow [ $3 : $4 ] failed" >> ${ovs_dir}/debug.log + return 1 + fi + return 0 +} + +ovs_del_flows () { + info "Deleting all flows from DP: sbx:$1 br:$2" + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-flows "$2" + return 0 +} + +ovs_drop_record_and_run () { + local sbx=$1 + shift + + perf record -a -q -e skb:kfree_skb -o ${ovs_dir}/perf.data $* \ + >> ${ovs_dir}/stdout 2>> ${ovs_dir}/stderr + return $? +} + +ovs_drop_reason_count() +{ + local reason=$1 + + local perf_output=`perf script -i ${ovs_dir}/perf.data -F trace:event,trace` + local pattern="skb:kfree_skb:.*reason: $reason" + + return `echo "$perf_output" | grep "$pattern" | wc -l` +} + +usage() { + echo + echo "$0 [OPTIONS] [TEST]..." + echo "If no TEST argument is given, all tests will be run." + echo + echo "Options" + echo " -t: capture traffic via tcpdump" + echo " -v: verbose" + echo " -p: pause on failure" + echo + echo "Available tests${tests}" + exit 1 +} + +# drop_reason test +# - drop packets and verify the right drop reason is reported +test_drop_reason() { + which perf >/dev/null 2>&1 || return $ksft_skip + + sbx_add "test_drop_reason" || return $? + + ovs_add_dp "test_drop_reason" dropreason || return 1 + + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_drop_reason" "dropreason" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + # Setup client namespace + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + + # Setup server namespace + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + # Check if drop reasons can be sent + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(10)' 2>/dev/null + if [ $? == 1 ]; then + info "no support for drop reasons - skipping" + ovs_exit_sig + return $ksft_skip + fi + + ovs_del_flows "test_drop_reason" dropreason + + # Allow ARP + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + + # Allow client ICMP traffic but drop return path + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=1),icmp()" '2' + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' + + ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 + ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION + if [[ "$?" -ne "2" ]]; then + info "Did not detect expected drops: $?" + return 1 + fi + + # Drop UDP 6000 traffic with an explicit action and an error code. + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=6000)" \ + 'drop(42)' + # Drop UDP 7000 traffic with an explicit action with no error code. + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=7000)" \ + 'drop(0)' + + ovs_drop_record_and_run \ + "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 + ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR + if [[ "$?" -ne "1" ]]; then + info "Did not detect expected explicit error drops: $?" + return 1 + fi + + ovs_drop_record_and_run \ + "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 + ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION + if [[ "$?" -ne "1" ]]; then + info "Did not detect expected explicit drops: $?" + return 1 + fi + + return 0 +} + +# arp_ping test +# - client has 1500 byte MTU +# - server has 1500 byte MTU +# - send ARP ping between two ns +test_arp_ping () { + + which arping >/dev/null 2>&1 || return $ksft_skip + + sbx_add "test_arp_ping" || return $? + + ovs_add_dp "test_arp_ping" arpping || return 1 + + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_arp_ping" "arpping" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + # Setup client namespace + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + HW_CLIENT=`ip netns exec client ip link show dev c1 | grep -E 'link/ether [0-9a-f:]+' | awk '{print $2;}'` + info "Client hwaddr: $HW_CLIENT" + + # Setup server namespace + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + HW_SERVER=`ip netns exec server ip link show dev s1 | grep -E 'link/ether [0-9a-f:]+' | awk '{print $2;}'` + info "Server hwaddr: $HW_SERVER" + + ovs_add_flow "test_arp_ping" arpping \ + "in_port(1),eth(),eth_type(0x0806),arp(sip=172.31.110.10,tip=172.31.110.20,sha=$HW_CLIENT,tha=ff:ff:ff:ff:ff:ff)" '2' || return 1 + ovs_add_flow "test_arp_ping" arpping \ + "in_port(2),eth(),eth_type(0x0806),arp()" '1' || return 1 + + ovs_sbx "test_arp_ping" ip netns exec client arping -I c1 172.31.110.20 -c 1 || return 1 + + return 0 +} + +# ct_connect_v4 test +# - client has 1500 byte MTU +# - server has 1500 byte MTU +# - use ICMP to ping in each direction +# - only allow CT state stuff to pass through new in c -> s +test_ct_connect_v4 () { + + which nc >/dev/null 2>/dev/null || return $ksft_skip + + sbx_add "test_ct_connect_v4" || return $? + + ovs_add_dp "test_ct_connect_v4" ct4 || return 1 + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_ct_connect_v4" "ct4" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + # Add forwarding for ARP and ip packets - completely wildcarded + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'ct_state(-trk),eth(),eth_type(0x0800),ipv4()' \ + 'ct(commit),recirc(0x1)' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'recirc_id(0x1),ct_state(+trk+new),in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' \ + '2' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'recirc_id(0x1),ct_state(+trk+est),in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' \ + '2' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'recirc_id(0x1),ct_state(+trk+est),in_port(2),eth(),eth_type(0x0800),ipv4(dst=172.31.110.10)' \ + '1' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'recirc_id(0x1),ct_state(+trk+inv),eth(),eth_type(0x0800),ipv4()' 'drop' || \ + return 1 + + # do a ping + ovs_sbx "test_ct_connect_v4" ip netns exec client ping 172.31.110.20 -c 3 || return 1 + + # create an echo server in 'server' + echo "server" | \ + ovs_netns_spawn_daemon "test_ct_connect_v4" "server" \ + nc -lvnp 4443 + ovs_sbx "test_ct_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.20 4443 || return 1 + + # Now test in the other direction (should fail) + echo "client" | \ + ovs_netns_spawn_daemon "test_ct_connect_v4" "client" \ + nc -lvnp 4443 + ovs_sbx "test_ct_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.10 4443 + if [ $? == 0 ]; then + info "ct connect to client was successful" + return 1 + fi + + info "done..." + return 0 +} + +# connect_v4 test +# - client has 1500 byte MTU +# - server has 1500 byte MTU +# - use ICMP to ping in each direction +test_connect_v4 () { + + sbx_add "test_connect_v4" || return $? + + ovs_add_dp "test_connect_v4" cv4 || return 1 + + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_connect_v4" "cv4" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + # Add forwarding for ARP and ip packets - completely wildcarded + ovs_add_flow "test_connect_v4" cv4 \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_connect_v4" cv4 \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + ovs_add_flow "test_connect_v4" cv4 \ + 'in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' '2' || return 1 + ovs_add_flow "test_connect_v4" cv4 \ + 'in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20)' '1' || return 1 + + # do a ping + ovs_sbx "test_connect_v4" ip netns exec client ping 172.31.110.20 -c 3 || return 1 + + info "done..." + return 0 +} + +# nat_connect_v4 test +# - client has 1500 byte MTU +# - server has 1500 byte MTU +# - use ICMP to ping in each direction +# - only allow CT state stuff to pass through new in c -> s +test_nat_connect_v4 () { + which nc >/dev/null 2>/dev/null || return $ksft_skip + + sbx_add "test_nat_connect_v4" || return $? + + ovs_add_dp "test_nat_connect_v4" nat4 || return 1 + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_nat_connect_v4" "nat4" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + ip netns exec client ip route add default via 172.31.110.20 + + ovs_add_flow "test_nat_connect_v4" nat4 \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_nat_connect_v4" nat4 \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + ovs_add_flow "test_nat_connect_v4" nat4 \ + "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20)" \ + "ct(commit,nat(dst=172.31.110.20)),recirc(0x1)" + ovs_add_flow "test_nat_connect_v4" nat4 \ + "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \ + "ct(commit,nat),recirc(0x2)" + + ovs_add_flow "test_nat_connect_v4" nat4 \ + "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" "2" + ovs_add_flow "test_nat_connect_v4" nat4 \ + "recirc_id(0x2),ct_state(+trk-inv),in_port(2),eth(),eth_type(0x0800),ipv4()" "1" + + # do a ping + ovs_sbx "test_nat_connect_v4" ip netns exec client ping 192.168.0.20 -c 3 || return 1 + + # create an echo server in 'server' + echo "server" | \ + ovs_netns_spawn_daemon "test_nat_connect_v4" "server" \ + nc -lvnp 4443 + ovs_sbx "test_nat_connect_v4" ip netns exec client nc -i 1 -zv 192.168.0.20 4443 || return 1 + + # Now test in the other direction (should fail) + echo "client" | \ + ovs_netns_spawn_daemon "test_nat_connect_v4" "client" \ + nc -lvnp 4443 + ovs_sbx "test_nat_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.10 4443 + if [ $? == 0 ]; then + info "connect to client was successful" + return 1 + fi + + info "done..." + return 0 +} + +# nat_related_v4 test +# - client->server ip packets go via SNAT +# - client solicits ICMP destination unreachable packet from server +# - undo NAT for ICMP reply and test dst ip has been updated +test_nat_related_v4 () { + which nc >/dev/null 2>/dev/null || return $ksft_skip + + sbx_add "test_nat_related_v4" || return $? + + ovs_add_dp "test_nat_related_v4" natrelated4 || return 1 + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_nat_related_v4" "natrelated4" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + ip netns exec server ip route add 192.168.0.20/32 via 172.31.110.10 + + # Allow ARP + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "in_port(1),eth(),eth_type(0x0806),arp()" "2" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "in_port(2),eth(),eth_type(0x0806),arp()" "1" || return 1 + + # Allow IP traffic from client->server, rewrite source IP with SNAT to 192.168.0.20 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=172.31.110.20)" \ + "ct(commit,nat(src=192.168.0.20)),recirc(0x1)" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" \ + "2" || return 1 + + # Allow related ICMP responses back from server and undo NAT to restore original IP + # Drop any ICMP related packets where dst ip hasn't been restored back to original IP + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \ + "ct(commit,nat),recirc(0x2)" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,dst=172.31.110.10,proto=1),icmp()" \ + "1" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20,proto=1),icmp()" \ + "drop" || return 1 + + # Solicit destination unreachable response from server + ovs_sbx "test_nat_related_v4" ip netns exec client \ + bash -c "echo a | nc -u -w 1 172.31.110.20 10000" + + # Check to make sure no packets matched the drop rule with incorrect dst ip + python3 "$ovs_base/ovs-dpctl.py" dump-flows natrelated4 \ + | grep "drop" | grep "packets:0" >/dev/null || return 1 + + info "done..." + return 0 +} + +# netlink_validation +# - Create a dp +# - check no warning with "old version" simulation +test_netlink_checks () { + sbx_add "test_netlink_checks" || return 1 + + info "setting up new DP" + ovs_add_dp "test_netlink_checks" nv0 || return 1 + # now try again + PRE_TEST=$(dmesg | grep -E "RIP: [0-9a-fA-Fx]+:ovs_dp_cmd_new\+") + ovs_add_dp "test_netlink_checks" nv0 -V 0 || return 1 + POST_TEST=$(dmesg | grep -E "RIP: [0-9a-fA-Fx]+:ovs_dp_cmd_new\+") + if [ "$PRE_TEST" != "$POST_TEST" ]; then + info "failed - gen warning" + return 1 + fi + + ovs_add_netns_and_veths "test_netlink_checks" nv0 left left0 l0 || \ + return 1 + ovs_add_netns_and_veths "test_netlink_checks" nv0 right right0 r0 || \ + return 1 + [ $(python3 $ovs_base/ovs-dpctl.py show nv0 | grep port | \ + wc -l) == 3 ] || \ + return 1 + ovs_del_if "test_netlink_checks" nv0 right0 || return 1 + [ $(python3 $ovs_base/ovs-dpctl.py show nv0 | grep port | \ + wc -l) == 2 ] || \ + return 1 + + info "Checking clone depth" + ERR_MSG="Flow actions may not be safe on all matching packets" + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow "test_netlink_checks" nv0 \ + 'in_port(1),eth(),eth_type(0x800),ipv4()' \ + 'clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(drop)))))))))))))))))' \ + >/dev/null 2>&1 && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + + if [ "$PRE_TEST" == "$POST_TEST" ]; then + info "failed - clone depth too large" + return 1 + fi + + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow "test_netlink_checks" nv0 \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \ + &> /dev/null && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + if [ "$PRE_TEST" == "$POST_TEST" ]; then + info "failed - error not generated" + return 1 + fi + return 0 +} + +test_upcall_interfaces() { + sbx_add "test_upcall_interfaces" || return 1 + + info "setting up new DP" + ovs_add_dp "test_upcall_interfaces" ui0 -V 2:1 || return 1 + + ovs_add_netns_and_veths "test_upcall_interfaces" ui0 upc left0 l0 \ + 172.31.110.1/24 -u || return 1 + + sleep 1 + info "sending arping" + ip netns exec upc arping -I l0 172.31.110.20 -c 1 \ + >$ovs_dir/arping.stdout 2>$ovs_dir/arping.stderr + + grep -E "MISS upcall\[0/yes\]: .*arp\(sip=172.31.110.1,tip=172.31.110.20,op=1,sha=" $ovs_dir/left0.out >/dev/null 2>&1 || return 1 + return 0 +} + +run_test() { + ( + tname="$1" + tdesc="$2" + + if ! lsmod | grep openvswitch >/dev/null 2>&1; then + stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}" + return $ksft_skip + fi + + if python3 ovs-dpctl.py -h 2>&1 | \ + grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then + stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}" + return $ksft_skip + fi + printf "TEST: %-60s [START]\n" "${tname}" + + unset IFS + + eval test_${tname} + ret=$? + + if [ $ret -eq 0 ]; then + printf "TEST: %-60s [ OK ]\n" "${tdesc}" + ovs_exit_sig + rm -rf "$ovs_dir" + elif [ $ret -eq 1 ]; then + printf "TEST: %-60s [FAIL]\n" "${tdesc}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "Pausing. Logs in $ovs_dir/. Hit enter to continue" + read a + fi + ovs_exit_sig + [ "${PAUSE_ON_FAIL}" = "yes" ] || rm -rf "$ovs_dir" + exit 1 + elif [ $ret -eq $ksft_skip ]; then + printf "TEST: %-60s [SKIP]\n" "${tdesc}" + elif [ $ret -eq 2 ]; then + rm -rf test_${tname} + run_test "$1" "$2" + fi + + return $ret + ) + ret=$? + case $ret in + 0) + [ $all_skipped = true ] && [ $exitcode=$ksft_skip ] && exitcode=0 + all_skipped=false + ;; + $ksft_skip) + [ $all_skipped = true ] && exitcode=$ksft_skip + ;; + *) + all_skipped=false + exitcode=1 + ;; + esac + + return $ret +} + + +exitcode=0 +desc=0 +all_skipped=true + +while getopts :pvt o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=1;; + t) if which tcpdump > /dev/null 2>&1; then + TRACING=1 + else + echo "=== tcpdump not available, tracing disabled" + fi + ;; + *) usage;; + esac +done +shift $(($OPTIND-1)) + +IFS=" +" + +for arg do + # Check first that all requested tests are available before running any + command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; } +done + +name="" +desc="" +for t in ${tests}; do + [ "${name}" = "" ] && name="${t}" && continue + [ "${desc}" = "" ] && desc="${t}" + + run_this=1 + for arg do + [ "${arg}" != "${arg#--*}" ] && continue + [ "${arg}" = "${name}" ] && run_this=1 && break + run_this=0 + done + if [ $run_this -eq 1 ]; then + run_test "${name}" "${desc}" + fi + name="" + desc="" +done + +exit ${exitcode} diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py new file mode 100644 index 000000000000..5e0e539a323d --- /dev/null +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -0,0 +1,2236 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +# Controls the openvswitch module. Part of the kselftest suite, but +# can be used for some diagnostic purpose as well. + +import argparse +import errno +import ipaddress +import logging +import multiprocessing +import re +import struct +import sys +import time +import types +import uuid + +try: + from pyroute2 import NDB + + from pyroute2.netlink import NLA_F_NESTED + from pyroute2.netlink import NLM_F_ACK + from pyroute2.netlink import NLM_F_DUMP + from pyroute2.netlink import NLM_F_REQUEST + from pyroute2.netlink import genlmsg + from pyroute2.netlink import nla + from pyroute2.netlink import nlmsg_atoms + from pyroute2.netlink.exceptions import NetlinkError + from pyroute2.netlink.generic import GenericNetlinkSocket + import pyroute2 + +except ModuleNotFoundError: + print("Need to install the python pyroute2 package >= 0.6.") + sys.exit(0) + + +OVS_DATAPATH_FAMILY = "ovs_datapath" +OVS_VPORT_FAMILY = "ovs_vport" +OVS_FLOW_FAMILY = "ovs_flow" +OVS_PACKET_FAMILY = "ovs_packet" +OVS_METER_FAMILY = "ovs_meter" +OVS_CT_LIMIT_FAMILY = "ovs_ct_limit" + +OVS_DATAPATH_VERSION = 2 +OVS_DP_CMD_NEW = 1 +OVS_DP_CMD_DEL = 2 +OVS_DP_CMD_GET = 3 +OVS_DP_CMD_SET = 4 + +OVS_VPORT_CMD_NEW = 1 +OVS_VPORT_CMD_DEL = 2 +OVS_VPORT_CMD_GET = 3 +OVS_VPORT_CMD_SET = 4 + +OVS_FLOW_CMD_NEW = 1 +OVS_FLOW_CMD_DEL = 2 +OVS_FLOW_CMD_GET = 3 +OVS_FLOW_CMD_SET = 4 + + +def macstr(mac): + outstr = ":".join(["%02X" % i for i in mac]) + return outstr + + +def strcspn(str1, str2): + tot = 0 + for char in str1: + if str2.find(char) != -1: + return tot + tot += 1 + return tot + + +def strspn(str1, str2): + tot = 0 + for char in str1: + if str2.find(char) == -1: + return tot + tot += 1 + return tot + + +def intparse(statestr, defmask="0xffffffff"): + totalparse = strspn(statestr, "0123456789abcdefABCDEFx/") + # scan until "/" + count = strspn(statestr, "x0123456789abcdefABCDEF") + + firstnum = statestr[:count] + if firstnum[-1] == "/": + firstnum = firstnum[:-1] + k = int(firstnum, 0) + + m = None + if defmask is not None: + secondnum = defmask + if statestr[count] == "/": + secondnum = statestr[count + 1 :] # this is wrong... + m = int(secondnum, 0) + + return statestr[totalparse + 1 :], k, m + + +def parse_flags(flag_str, flag_vals): + bitResult = 0 + maskResult = 0 + + if len(flag_str) == 0: + return flag_str, bitResult, maskResult + + if flag_str[0].isdigit(): + idx = 0 + while flag_str[idx].isdigit() or flag_str[idx] == "x": + idx += 1 + digits = flag_str[:idx] + flag_str = flag_str[idx:] + + bitResult = int(digits, 0) + maskResult = int(digits, 0) + + while len(flag_str) > 0 and (flag_str[0] == "+" or flag_str[0] == "-"): + if flag_str[0] == "+": + setFlag = True + elif flag_str[0] == "-": + setFlag = False + + flag_str = flag_str[1:] + + flag_len = 0 + while ( + flag_str[flag_len] != "+" + and flag_str[flag_len] != "-" + and flag_str[flag_len] != "," + and flag_str[flag_len] != ")" + ): + flag_len += 1 + + flag = flag_str[0:flag_len] + + if flag in flag_vals: + if maskResult & flag_vals[flag]: + raise KeyError( + "Flag %s set once, cannot be set in multiples" % flag + ) + + if setFlag: + bitResult |= flag_vals[flag] + + maskResult |= flag_vals[flag] + else: + raise KeyError("Missing flag value: %s" % flag) + + flag_str = flag_str[flag_len:] + + return flag_str, bitResult, maskResult + + +def parse_ct_state(statestr): + ct_flags = { + "new": 1 << 0, + "est": 1 << 1, + "rel": 1 << 2, + "rpl": 1 << 3, + "inv": 1 << 4, + "trk": 1 << 5, + "snat": 1 << 6, + "dnat": 1 << 7, + } + + return parse_flags(statestr, ct_flags) + + +def convert_mac(data): + def to_bytes(mac): + mac_split = mac.split(":") + ret = bytearray([int(i, 16) for i in mac_split]) + return bytes(ret) + + mac_str, _, mask_str = data.partition('/') + + if not mac_str: + mac_str = mask_str = "00:00:00:00:00:00" + elif not mask_str: + mask_str = "FF:FF:FF:FF:FF:FF" + + return to_bytes(mac_str), to_bytes(mask_str) + +def convert_ipv4(data): + ip, _, mask = data.partition('/') + + if not ip: + ip = mask = 0 + elif not mask: + mask = 0xFFFFFFFF + elif mask.isdigit(): + mask = (0xFFFFFFFF << (32 - int(mask))) & 0xFFFFFFFF + + return int(ipaddress.IPv4Address(ip)), int(ipaddress.IPv4Address(mask)) + +def convert_int(size): + def convert_int_sized(data): + value, _, mask = data.partition('/') + + if not value: + return 0, 0 + elif not mask: + return int(value, 0), pow(2, size) - 1 + else: + return int(value, 0), int(mask, 0) + + return convert_int_sized + +def parse_starts_block(block_str, scanstr, returnskipped, scanregex=False): + if scanregex: + m = re.search(scanstr, block_str) + if m is None: + if returnskipped: + return block_str + return False + if returnskipped: + block_str = block_str[len(m.group(0)) :] + return block_str + return True + + if block_str.startswith(scanstr): + if returnskipped: + block_str = block_str[len(scanstr) :] + else: + return True + + if returnskipped: + return block_str + + return False + + +def parse_extract_field( + block_str, fieldstr, scanfmt, convert, masked=False, defval=None +): + if fieldstr and not block_str.startswith(fieldstr): + return block_str, defval + + if fieldstr: + str_skiplen = len(fieldstr) + str_skipped = block_str[str_skiplen:] + if str_skiplen == 0: + return str_skipped, defval + else: + str_skiplen = 0 + str_skipped = block_str + + m = re.search(scanfmt, str_skipped) + if m is None: + raise ValueError("Bad fmt string") + + data = m.group(0) + if convert: + data = convert(m.group(0)) + + str_skipped = str_skipped[len(m.group(0)) :] + if masked: + if str_skipped[0] == "/": + raise ValueError("Masking support TBD...") + + str_skipped = str_skipped[strspn(str_skipped, ", ") :] + return str_skipped, data + + +class ovs_dp_msg(genlmsg): + # include the OVS version + # We need a custom header rather than just being able to rely on + # genlmsg because fields ends up not expressing everything correctly + # if we use the canonical example of setting fields = (('customfield',),) + fields = genlmsg.fields + (("dpifindex", "I"),) + + +class ovsactions(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_ACTION_ATTR_UNSPEC", "none"), + ("OVS_ACTION_ATTR_OUTPUT", "uint32"), + ("OVS_ACTION_ATTR_USERSPACE", "userspace"), + ("OVS_ACTION_ATTR_SET", "none"), + ("OVS_ACTION_ATTR_PUSH_VLAN", "none"), + ("OVS_ACTION_ATTR_POP_VLAN", "flag"), + ("OVS_ACTION_ATTR_SAMPLE", "none"), + ("OVS_ACTION_ATTR_RECIRC", "uint32"), + ("OVS_ACTION_ATTR_HASH", "none"), + ("OVS_ACTION_ATTR_PUSH_MPLS", "none"), + ("OVS_ACTION_ATTR_POP_MPLS", "flag"), + ("OVS_ACTION_ATTR_SET_MASKED", "none"), + ("OVS_ACTION_ATTR_CT", "ctact"), + ("OVS_ACTION_ATTR_TRUNC", "uint32"), + ("OVS_ACTION_ATTR_PUSH_ETH", "none"), + ("OVS_ACTION_ATTR_POP_ETH", "flag"), + ("OVS_ACTION_ATTR_CT_CLEAR", "flag"), + ("OVS_ACTION_ATTR_PUSH_NSH", "none"), + ("OVS_ACTION_ATTR_POP_NSH", "flag"), + ("OVS_ACTION_ATTR_METER", "none"), + ("OVS_ACTION_ATTR_CLONE", "recursive"), + ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), + ("OVS_ACTION_ATTR_ADD_MPLS", "none"), + ("OVS_ACTION_ATTR_DEC_TTL", "none"), + ("OVS_ACTION_ATTR_DROP", "uint32"), + ) + + class ctact(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_CT_ATTR_NONE", "none"), + ("OVS_CT_ATTR_COMMIT", "flag"), + ("OVS_CT_ATTR_ZONE", "uint16"), + ("OVS_CT_ATTR_MARK", "none"), + ("OVS_CT_ATTR_LABELS", "none"), + ("OVS_CT_ATTR_HELPER", "asciiz"), + ("OVS_CT_ATTR_NAT", "natattr"), + ("OVS_CT_ATTR_FORCE_COMMIT", "flag"), + ("OVS_CT_ATTR_EVENTMASK", "uint32"), + ("OVS_CT_ATTR_TIMEOUT", "asciiz"), + ) + + class natattr(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_NAT_ATTR_NONE", "none"), + ("OVS_NAT_ATTR_SRC", "flag"), + ("OVS_NAT_ATTR_DST", "flag"), + ("OVS_NAT_ATTR_IP_MIN", "ipaddr"), + ("OVS_NAT_ATTR_IP_MAX", "ipaddr"), + ("OVS_NAT_ATTR_PROTO_MIN", "uint16"), + ("OVS_NAT_ATTR_PROTO_MAX", "uint16"), + ("OVS_NAT_ATTR_PERSISTENT", "flag"), + ("OVS_NAT_ATTR_PROTO_HASH", "flag"), + ("OVS_NAT_ATTR_PROTO_RANDOM", "flag"), + ) + + def dpstr(self, more=False): + print_str = "nat(" + + if self.get_attr("OVS_NAT_ATTR_SRC"): + print_str += "src" + elif self.get_attr("OVS_NAT_ATTR_DST"): + print_str += "dst" + else: + print_str += "XXX-unknown-nat" + + if self.get_attr("OVS_NAT_ATTR_IP_MIN") or self.get_attr( + "OVS_NAT_ATTR_IP_MAX" + ): + if self.get_attr("OVS_NAT_ATTR_IP_MIN"): + print_str += "=%s," % str( + self.get_attr("OVS_NAT_ATTR_IP_MIN") + ) + + if self.get_attr("OVS_NAT_ATTR_IP_MAX"): + print_str += "-%s," % str( + self.get_attr("OVS_NAT_ATTR_IP_MAX") + ) + else: + print_str += "," + + if self.get_attr("OVS_NAT_ATTR_PROTO_MIN"): + print_str += "proto_min=%d," % self.get_attr( + "OVS_NAT_ATTR_PROTO_MIN" + ) + + if self.get_attr("OVS_NAT_ATTR_PROTO_MAX"): + print_str += "proto_max=%d," % self.get_attr( + "OVS_NAT_ATTR_PROTO_MAX" + ) + + if self.get_attr("OVS_NAT_ATTR_PERSISTENT"): + print_str += "persistent," + if self.get_attr("OVS_NAT_ATTR_HASH"): + print_str += "hash," + if self.get_attr("OVS_NAT_ATTR_RANDOM"): + print_str += "random" + print_str += ")" + return print_str + + def dpstr(self, more=False): + print_str = "ct(" + + if self.get_attr("OVS_CT_ATTR_COMMIT") is not None: + print_str += "commit," + if self.get_attr("OVS_CT_ATTR_ZONE") is not None: + print_str += "zone=%d," % self.get_attr("OVS_CT_ATTR_ZONE") + if self.get_attr("OVS_CT_ATTR_HELPER") is not None: + print_str += "helper=%s," % self.get_attr("OVS_CT_ATTR_HELPER") + if self.get_attr("OVS_CT_ATTR_NAT") is not None: + print_str += self.get_attr("OVS_CT_ATTR_NAT").dpstr(more) + print_str += "," + if self.get_attr("OVS_CT_ATTR_FORCE_COMMIT") is not None: + print_str += "force," + if self.get_attr("OVS_CT_ATTR_EVENTMASK") is not None: + print_str += "emask=0x%X," % self.get_attr( + "OVS_CT_ATTR_EVENTMASK" + ) + if self.get_attr("OVS_CT_ATTR_TIMEOUT") is not None: + print_str += "timeout=%s" % self.get_attr( + "OVS_CT_ATTR_TIMEOUT" + ) + print_str += ")" + return print_str + + class userspace(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_USERSPACE_ATTR_UNUSED", "none"), + ("OVS_USERSPACE_ATTR_PID", "uint32"), + ("OVS_USERSPACE_ATTR_USERDATA", "array(uint8)"), + ("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", "uint32"), + ) + + def dpstr(self, more=False): + print_str = "userspace(" + if self.get_attr("OVS_USERSPACE_ATTR_PID") is not None: + print_str += "pid=%d," % self.get_attr( + "OVS_USERSPACE_ATTR_PID" + ) + if self.get_attr("OVS_USERSPACE_ATTR_USERDATA") is not None: + print_str += "userdata=" + for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"): + print_str += "%x." % f + if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None: + print_str += "egress_tun_port=%d" % self.get_attr( + "OVS_USERSPACE_ATTR_TUN_PORT" + ) + print_str += ")" + return print_str + + def dpstr(self, more=False): + print_str = "" + + for field in self.nla_map: + if field[1] == "none" or self.get_attr(field[0]) is None: + continue + if print_str != "": + print_str += "," + + if field[1] == "uint32": + if field[0] == "OVS_ACTION_ATTR_OUTPUT": + print_str += "%d" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_RECIRC": + print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_TRUNC": + print_str += "trunc(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_DROP": + print_str += "drop(%d)" % int(self.get_attr(field[0])) + elif field[1] == "flag": + if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": + print_str += "ct_clear" + elif field[0] == "OVS_ACTION_ATTR_POP_VLAN": + print_str += "pop_vlan" + elif field[0] == "OVS_ACTION_ATTR_POP_ETH": + print_str += "pop_eth" + elif field[0] == "OVS_ACTION_ATTR_POP_NSH": + print_str += "pop_nsh" + elif field[0] == "OVS_ACTION_ATTR_POP_MPLS": + print_str += "pop_mpls" + else: + datum = self.get_attr(field[0]) + if field[0] == "OVS_ACTION_ATTR_CLONE": + print_str += "clone(" + print_str += datum.dpstr(more) + print_str += ")" + else: + print_str += datum.dpstr(more) + + return print_str + + def parse(self, actstr): + totallen = len(actstr) + while len(actstr) != 0: + parsed = False + parencount = 0 + if actstr.startswith("drop"): + # If no reason is provided, the implicit drop is used (i.e no + # action). If some reason is given, an explicit action is used. + reason = None + if actstr.startswith("drop("): + parencount += 1 + + actstr, reason = parse_extract_field( + actstr, + "drop(", + "([0-9]+)", + lambda x: int(x, 0), + False, + None, + ) + + if reason is not None: + self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason]) + parsed = True + else: + actstr = actstr[len("drop"): ] + return (totallen - len(actstr)) + + elif parse_starts_block(actstr, "^(\d+)", False, True): + actstr, output = parse_extract_field( + actstr, None, "(\d+)", lambda x: int(x), False, "0" + ) + self["attrs"].append(["OVS_ACTION_ATTR_OUTPUT", output]) + parsed = True + elif parse_starts_block(actstr, "recirc(", False): + actstr, recircid = parse_extract_field( + actstr, + "recirc(", + "([0-9a-fA-Fx]+)", + lambda x: int(x, 0), + False, + 0, + ) + parencount += 1 + self["attrs"].append(["OVS_ACTION_ATTR_RECIRC", recircid]) + parsed = True + + parse_flat_map = ( + ("ct_clear", "OVS_ACTION_ATTR_CT_CLEAR"), + ("pop_vlan", "OVS_ACTION_ATTR_POP_VLAN"), + ("pop_eth", "OVS_ACTION_ATTR_POP_ETH"), + ("pop_nsh", "OVS_ACTION_ATTR_POP_NSH"), + ) + + for flat_act in parse_flat_map: + if parse_starts_block(actstr, flat_act[0], False): + actstr = actstr[len(flat_act[0]):] + self["attrs"].append([flat_act[1]]) + actstr = actstr[strspn(actstr, ", ") :] + parsed = True + + if parse_starts_block(actstr, "clone(", False): + parencount += 1 + subacts = ovsactions() + actstr = actstr[len("clone("):] + parsedLen = subacts.parse(actstr) + lst = [] + self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts)) + actstr = actstr[parsedLen:] + parsed = True + elif parse_starts_block(actstr, "ct(", False): + parencount += 1 + actstr = actstr[len("ct(") :] + ctact = ovsactions.ctact() + + for scan in ( + ("commit", "OVS_CT_ATTR_COMMIT", None), + ("force_commit", "OVS_CT_ATTR_FORCE_COMMIT", None), + ("zone", "OVS_CT_ATTR_ZONE", int), + ("mark", "OVS_CT_ATTR_MARK", int), + ("helper", "OVS_CT_ATTR_HELPER", lambda x, y: str(x)), + ("timeout", "OVS_CT_ATTR_TIMEOUT", lambda x, y: str(x)), + ): + if actstr.startswith(scan[0]): + actstr = actstr[len(scan[0]) :] + if scan[2] is not None: + if actstr[0] != "=": + raise ValueError("Invalid ct attr") + actstr = actstr[1:] + pos = strcspn(actstr, ",)") + datum = scan[2](actstr[:pos], 0) + ctact["attrs"].append([scan[1], datum]) + actstr = actstr[pos:] + else: + ctact["attrs"].append([scan[1], None]) + actstr = actstr[strspn(actstr, ", ") :] + # it seems strange to put this here, but nat() is a complex + # sub-action and this lets it sit anywhere in the ct() action + if actstr.startswith("nat"): + actstr = actstr[3:] + natact = ovsactions.ctact.natattr() + + if actstr.startswith("("): + parencount += 1 + t = None + actstr = actstr[1:] + if actstr.startswith("src"): + t = "OVS_NAT_ATTR_SRC" + actstr = actstr[3:] + elif actstr.startswith("dst"): + t = "OVS_NAT_ATTR_DST" + actstr = actstr[3:] + + actstr, ip_block_min = parse_extract_field( + actstr, "=", "([0-9a-fA-F\.]+)", str, False + ) + actstr, ip_block_max = parse_extract_field( + actstr, "-", "([0-9a-fA-F\.]+)", str, False + ) + + actstr, proto_min = parse_extract_field( + actstr, ":", "(\d+)", int, False + ) + actstr, proto_max = parse_extract_field( + actstr, "-", "(\d+)", int, False + ) + + if t is not None: + natact["attrs"].append([t, None]) + + if ip_block_min is not None: + natact["attrs"].append( + ["OVS_NAT_ATTR_IP_MIN", ip_block_min] + ) + if ip_block_max is not None: + natact["attrs"].append( + ["OVS_NAT_ATTR_IP_MAX", ip_block_max] + ) + if proto_min is not None: + natact["attrs"].append( + ["OVS_NAT_ATTR_PROTO_MIN", proto_min] + ) + if proto_max is not None: + natact["attrs"].append( + ["OVS_NAT_ATTR_PROTO_MAX", proto_max] + ) + + for natscan in ( + ("persistent", "OVS_NAT_ATTR_PERSISTENT"), + ("hash", "OVS_NAT_ATTR_PROTO_HASH"), + ("random", "OVS_NAT_ATTR_PROTO_RANDOM"), + ): + if actstr.startswith(natscan[0]): + actstr = actstr[len(natscan[0]) :] + natact["attrs"].append([natscan[1], None]) + actstr = actstr[strspn(actstr, ", ") :] + + ctact["attrs"].append(["OVS_CT_ATTR_NAT", natact]) + actstr = actstr[strspn(actstr, ", ") :] + + self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact]) + parsed = True + + actstr = actstr[strspn(actstr, ", ") :] + while parencount > 0: + parencount -= 1 + actstr = actstr[strspn(actstr, " "):] + if len(actstr) and actstr[0] != ")": + raise ValueError("Action str: '%s' unbalanced" % actstr) + actstr = actstr[1:] + + if len(actstr) and actstr[0] == ")": + return (totallen - len(actstr)) + + actstr = actstr[strspn(actstr, ", ") :] + + if not parsed: + raise ValueError("Action str: '%s' not supported" % actstr) + + return (totallen - len(actstr)) + + +class ovskey(nla): + nla_flags = NLA_F_NESTED + nla_map = ( + ("OVS_KEY_ATTR_UNSPEC", "none"), + ("OVS_KEY_ATTR_ENCAP", "none"), + ("OVS_KEY_ATTR_PRIORITY", "uint32"), + ("OVS_KEY_ATTR_IN_PORT", "uint32"), + ("OVS_KEY_ATTR_ETHERNET", "ethaddr"), + ("OVS_KEY_ATTR_VLAN", "uint16"), + ("OVS_KEY_ATTR_ETHERTYPE", "be16"), + ("OVS_KEY_ATTR_IPV4", "ovs_key_ipv4"), + ("OVS_KEY_ATTR_IPV6", "ovs_key_ipv6"), + ("OVS_KEY_ATTR_TCP", "ovs_key_tcp"), + ("OVS_KEY_ATTR_UDP", "ovs_key_udp"), + ("OVS_KEY_ATTR_ICMP", "ovs_key_icmp"), + ("OVS_KEY_ATTR_ICMPV6", "ovs_key_icmpv6"), + ("OVS_KEY_ATTR_ARP", "ovs_key_arp"), + ("OVS_KEY_ATTR_ND", "ovs_key_nd"), + ("OVS_KEY_ATTR_SKB_MARK", "uint32"), + ("OVS_KEY_ATTR_TUNNEL", "none"), + ("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"), + ("OVS_KEY_ATTR_TCP_FLAGS", "be16"), + ("OVS_KEY_ATTR_DP_HASH", "uint32"), + ("OVS_KEY_ATTR_RECIRC_ID", "uint32"), + ("OVS_KEY_ATTR_MPLS", "array(ovs_key_mpls)"), + ("OVS_KEY_ATTR_CT_STATE", "uint32"), + ("OVS_KEY_ATTR_CT_ZONE", "uint16"), + ("OVS_KEY_ATTR_CT_MARK", "uint32"), + ("OVS_KEY_ATTR_CT_LABELS", "none"), + ("OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4", "ovs_key_ct_tuple_ipv4"), + ("OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6", "ovs_key_ct_tuple_ipv6"), + ("OVS_KEY_ATTR_NSH", "none"), + ("OVS_KEY_ATTR_PACKET_TYPE", "none"), + ("OVS_KEY_ATTR_ND_EXTENSIONS", "none"), + ("OVS_KEY_ATTR_TUNNEL_INFO", "none"), + ("OVS_KEY_ATTR_IPV6_EXTENSIONS", "none"), + ) + + class ovs_key_proto(nla): + fields = ( + ("src", "!H"), + ("dst", "!H"), + ) + + fields_map = ( + ("src", "src", "%d", lambda x: int(x) if x else 0, + convert_int(16)), + ("dst", "dst", "%d", lambda x: int(x) if x else 0, + convert_int(16)), + ) + + def __init__( + self, + protostr, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + self.proto_str = protostr + nla.__init__( + self, + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + def parse(self, flowstr, typeInst): + if not flowstr.startswith(self.proto_str): + return None, None + + k = typeInst() + m = typeInst() + + flowstr = flowstr[len(self.proto_str) :] + if flowstr.startswith("("): + flowstr = flowstr[1:] + + keybits = b"" + maskbits = b"" + for f in self.fields_map: + if flowstr.startswith(f[1]): + # the following assumes that the field looks + # something like 'field.' where '.' is a + # character that we don't exactly care about. + flowstr = flowstr[len(f[1]) + 1 :] + splitchar = 0 + for c in flowstr: + if c == "," or c == ")": + break + splitchar += 1 + data = flowstr[:splitchar] + flowstr = flowstr[splitchar:] + else: + data = "" + + if len(f) > 4: + k[f[0]], m[f[0]] = f[4](data) + else: + k[f[0]] = f[3](data) + m[f[0]] = f[3](data) + + flowstr = flowstr[strspn(flowstr, ", ") :] + if len(flowstr) == 0: + return flowstr, k, m + + flowstr = flowstr[strspn(flowstr, "), ") :] + + return flowstr, k, m + + def dpstr(self, masked=None, more=False): + outstr = self.proto_str + "(" + first = False + for f in self.fields_map: + if first: + outstr += "," + if masked is None: + outstr += "%s=" % f[0] + if isinstance(f[2], str): + outstr += f[2] % self[f[1]] + else: + outstr += f[2](self[f[1]]) + first = True + elif more or f[3](masked[f[1]]) != 0: + outstr += "%s=" % f[0] + if isinstance(f[2], str): + outstr += f[2] % self[f[1]] + else: + outstr += f[2](self[f[1]]) + outstr += "/" + if isinstance(f[2], str): + outstr += f[2] % masked[f[1]] + else: + outstr += f[2](masked[f[1]]) + first = True + outstr += ")" + return outstr + + class ethaddr(ovs_key_proto): + fields = ( + ("src", "!6s"), + ("dst", "!6s"), + ) + + fields_map = ( + ( + "src", + "src", + macstr, + lambda x: int.from_bytes(x, "big"), + convert_mac, + ), + ( + "dst", + "dst", + macstr, + lambda x: int.from_bytes(x, "big"), + convert_mac, + ), + ) + + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "eth", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_ipv4(ovs_key_proto): + fields = ( + ("src", "!I"), + ("dst", "!I"), + ("proto", "B"), + ("tos", "B"), + ("ttl", "B"), + ("frag", "B"), + ) + + fields_map = ( + ( + "src", + "src", + lambda x: str(ipaddress.IPv4Address(x)), + int, + convert_ipv4, + ), + ( + "dst", + "dst", + lambda x: str(ipaddress.IPv4Address(x)), + int, + convert_ipv4, + ), + ("proto", "proto", "%d", lambda x: int(x) if x else 0, + convert_int(8)), + ("tos", "tos", "%d", lambda x: int(x) if x else 0, + convert_int(8)), + ("ttl", "ttl", "%d", lambda x: int(x) if x else 0, + convert_int(8)), + ("frag", "frag", "%d", lambda x: int(x) if x else 0, + convert_int(8)), + ) + + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "ipv4", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_ipv6(ovs_key_proto): + fields = ( + ("src", "!16s"), + ("dst", "!16s"), + ("label", "!I"), + ("proto", "B"), + ("tclass", "B"), + ("hlimit", "B"), + ("frag", "B"), + ) + + fields_map = ( + ( + "src", + "src", + lambda x: str(ipaddress.IPv6Address(x)), + lambda x: int.from_bytes(x, "big"), + lambda x: ipaddress.IPv6Address(x), + ), + ( + "dst", + "dst", + lambda x: str(ipaddress.IPv6Address(x)), + lambda x: int.from_bytes(x, "big"), + lambda x: ipaddress.IPv6Address(x), + ), + ("label", "label", "%d", int), + ("proto", "proto", "%d", int), + ("tclass", "tclass", "%d", int), + ("hlimit", "hlimit", "%d", int), + ("frag", "frag", "%d", int), + ) + + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "ipv6", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_tcp(ovs_key_proto): + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "tcp", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_udp(ovs_key_proto): + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "udp", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_sctp(ovs_key_proto): + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "sctp", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_icmp(ovs_key_proto): + fields = ( + ("type", "B"), + ("code", "B"), + ) + + fields_map = ( + ("type", "type", "%d", lambda x: int(x) if x else 0), + ("code", "code", "%d", lambda x: int(x) if x else 0), + ) + + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "icmp", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_icmpv6(ovs_key_icmp): + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "icmpv6", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_arp(ovs_key_proto): + fields = ( + ("sip", "!I"), + ("tip", "!I"), + ("op", "!H"), + ("sha", "!6s"), + ("tha", "!6s"), + ("pad", "xx"), + ) + + fields_map = ( + ( + "sip", + "sip", + lambda x: str(ipaddress.IPv4Address(x)), + int, + convert_ipv4, + ), + ( + "tip", + "tip", + lambda x: str(ipaddress.IPv4Address(x)), + int, + convert_ipv4, + ), + ("op", "op", "%d", lambda x: int(x) if x else 0), + ( + "sha", + "sha", + macstr, + lambda x: int.from_bytes(x, "big"), + convert_mac, + ), + ( + "tha", + "tha", + macstr, + lambda x: int.from_bytes(x, "big"), + convert_mac, + ), + ) + + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "arp", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_nd(ovs_key_proto): + fields = ( + ("target", "!16s"), + ("sll", "!6s"), + ("tll", "!6s"), + ) + + fields_map = ( + ( + "target", + "target", + lambda x: str(ipaddress.IPv6Address(x)), + lambda x: int.from_bytes(x, "big"), + ), + ("sll", "sll", macstr, lambda x: int.from_bytes(x, "big")), + ("tll", "tll", macstr, lambda x: int.from_bytes(x, "big")), + ) + + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "nd", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_ct_tuple_ipv4(ovs_key_proto): + fields = ( + ("src", "!I"), + ("dst", "!I"), + ("tp_src", "!H"), + ("tp_dst", "!H"), + ("proto", "B"), + ) + + fields_map = ( + ( + "src", + "src", + lambda x: str(ipaddress.IPv4Address(x)), + int, + convert_ipv4, + ), + ( + "dst", + "dst", + lambda x: str(ipaddress.IPv4Address(x)), + int, + convert_ipv4, + ), + ("tp_src", "tp_src", "%d", int), + ("tp_dst", "tp_dst", "%d", int), + ("proto", "proto", "%d", int), + ) + + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "ct_tuple4", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_ct_tuple_ipv6(nla): + fields = ( + ("src", "!16s"), + ("dst", "!16s"), + ("tp_src", "!H"), + ("tp_dst", "!H"), + ("proto", "B"), + ) + + fields_map = ( + ( + "src", + "src", + lambda x: str(ipaddress.IPv6Address(x)), + lambda x: int.from_bytes(x, "big", convertmac), + ), + ( + "dst", + "dst", + lambda x: str(ipaddress.IPv6Address(x)), + lambda x: int.from_bytes(x, "big"), + ), + ("tp_src", "tp_src", "%d", int), + ("tp_dst", "tp_dst", "%d", int), + ("proto", "proto", "%d", int), + ) + + def __init__( + self, + data=None, + offset=None, + parent=None, + length=None, + init=None, + ): + ovskey.ovs_key_proto.__init__( + self, + "ct_tuple6", + data=data, + offset=offset, + parent=parent, + length=length, + init=init, + ) + + class ovs_key_mpls(nla): + fields = (("lse", ">I"),) + + def parse(self, flowstr, mask=None): + for field in ( + ("OVS_KEY_ATTR_PRIORITY", "skb_priority", intparse), + ("OVS_KEY_ATTR_SKB_MARK", "skb_mark", intparse), + ("OVS_KEY_ATTR_RECIRC_ID", "recirc_id", intparse), + ("OVS_KEY_ATTR_DP_HASH", "dp_hash", intparse), + ("OVS_KEY_ATTR_CT_STATE", "ct_state", parse_ct_state), + ("OVS_KEY_ATTR_CT_ZONE", "ct_zone", intparse), + ("OVS_KEY_ATTR_CT_MARK", "ct_mark", intparse), + ("OVS_KEY_ATTR_IN_PORT", "in_port", intparse), + ( + "OVS_KEY_ATTR_ETHERNET", + "eth", + ovskey.ethaddr, + ), + ( + "OVS_KEY_ATTR_ETHERTYPE", + "eth_type", + lambda x: intparse(x, "0xffff"), + ), + ( + "OVS_KEY_ATTR_IPV4", + "ipv4", + ovskey.ovs_key_ipv4, + ), + ( + "OVS_KEY_ATTR_IPV6", + "ipv6", + ovskey.ovs_key_ipv6, + ), + ( + "OVS_KEY_ATTR_ARP", + "arp", + ovskey.ovs_key_arp, + ), + ( + "OVS_KEY_ATTR_TCP", + "tcp", + ovskey.ovs_key_tcp, + ), + ( + "OVS_KEY_ATTR_UDP", + "udp", + ovskey.ovs_key_udp, + ), + ( + "OVS_KEY_ATTR_ICMP", + "icmp", + ovskey.ovs_key_icmp, + ), + ( + "OVS_KEY_ATTR_TCP_FLAGS", + "tcp_flags", + lambda x: parse_flags(x, None), + ), + ): + fld = field[1] + "(" + if not flowstr.startswith(fld): + continue + + if not isinstance(field[2], types.FunctionType): + nk = field[2]() + flowstr, k, m = nk.parse(flowstr, field[2]) + else: + flowstr = flowstr[len(fld) :] + flowstr, k, m = field[2](flowstr) + + if m and mask is not None: + mask["attrs"].append([field[0], m]) + self["attrs"].append([field[0], k]) + + flowstr = flowstr[strspn(flowstr, "),") :] + + return flowstr + + def dpstr(self, mask=None, more=False): + print_str = "" + + for field in ( + ( + "OVS_KEY_ATTR_PRIORITY", + "skb_priority", + "%d", + lambda x: False, + True, + ), + ( + "OVS_KEY_ATTR_SKB_MARK", + "skb_mark", + "%d", + lambda x: False, + True, + ), + ( + "OVS_KEY_ATTR_RECIRC_ID", + "recirc_id", + "0x%08X", + lambda x: False, + True, + ), + ( + "OVS_KEY_ATTR_DP_HASH", + "dp_hash", + "0x%08X", + lambda x: False, + True, + ), + ( + "OVS_KEY_ATTR_CT_STATE", + "ct_state", + "0x%04x", + lambda x: False, + True, + ), + ( + "OVS_KEY_ATTR_CT_ZONE", + "ct_zone", + "0x%04x", + lambda x: False, + True, + ), + ( + "OVS_KEY_ATTR_CT_MARK", + "ct_mark", + "0x%08x", + lambda x: False, + True, + ), + ( + "OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4", + None, + None, + False, + False, + ), + ( + "OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6", + None, + None, + False, + False, + ), + ( + "OVS_KEY_ATTR_IN_PORT", + "in_port", + "%d", + lambda x: True, + True, + ), + ("OVS_KEY_ATTR_ETHERNET", None, None, False, False), + ( + "OVS_KEY_ATTR_ETHERTYPE", + "eth_type", + "0x%04x", + lambda x: int(x) == 0xFFFF, + True, + ), + ("OVS_KEY_ATTR_IPV4", None, None, False, False), + ("OVS_KEY_ATTR_IPV6", None, None, False, False), + ("OVS_KEY_ATTR_ARP", None, None, False, False), + ("OVS_KEY_ATTR_TCP", None, None, False, False), + ( + "OVS_KEY_ATTR_TCP_FLAGS", + "tcp_flags", + "0x%04x", + lambda x: False, + True, + ), + ("OVS_KEY_ATTR_UDP", None, None, False, False), + ("OVS_KEY_ATTR_SCTP", None, None, False, False), + ("OVS_KEY_ATTR_ICMP", None, None, False, False), + ("OVS_KEY_ATTR_ICMPV6", None, None, False, False), + ("OVS_KEY_ATTR_ND", None, None, False, False), + ): + v = self.get_attr(field[0]) + if v is not None: + m = None if mask is None else mask.get_attr(field[0]) + if field[4] is False: + print_str += v.dpstr(m, more) + print_str += "," + else: + if m is None or field[3](m): + print_str += field[1] + "(" + print_str += field[2] % v + print_str += ")," + elif more or m != 0: + print_str += field[1] + "(" + print_str += (field[2] % v) + "/" + (field[2] % m) + print_str += ")," + + return print_str + + +class OvsPacket(GenericNetlinkSocket): + OVS_PACKET_CMD_MISS = 1 # Flow table miss + OVS_PACKET_CMD_ACTION = 2 # USERSPACE action + OVS_PACKET_CMD_EXECUTE = 3 # Apply actions to packet + + class ovs_packet_msg(ovs_dp_msg): + nla_map = ( + ("OVS_PACKET_ATTR_UNSPEC", "none"), + ("OVS_PACKET_ATTR_PACKET", "array(uint8)"), + ("OVS_PACKET_ATTR_KEY", "ovskey"), + ("OVS_PACKET_ATTR_ACTIONS", "ovsactions"), + ("OVS_PACKET_ATTR_USERDATA", "none"), + ("OVS_PACKET_ATTR_EGRESS_TUN_KEY", "none"), + ("OVS_PACKET_ATTR_UNUSED1", "none"), + ("OVS_PACKET_ATTR_UNUSED2", "none"), + ("OVS_PACKET_ATTR_PROBE", "none"), + ("OVS_PACKET_ATTR_MRU", "uint16"), + ("OVS_PACKET_ATTR_LEN", "uint32"), + ("OVS_PACKET_ATTR_HASH", "uint64"), + ) + + def __init__(self): + GenericNetlinkSocket.__init__(self) + self.bind(OVS_PACKET_FAMILY, OvsPacket.ovs_packet_msg) + + def upcall_handler(self, up=None): + print("listening on upcall packet handler:", self.epid) + while True: + try: + msgs = self.get() + for msg in msgs: + if not up: + continue + if msg["cmd"] == OvsPacket.OVS_PACKET_CMD_MISS: + up.miss(msg) + elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_ACTION: + up.action(msg) + elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE: + up.execute(msg) + else: + print("Unkonwn cmd: %d" % msg["cmd"]) + except NetlinkError as ne: + raise ne + + +class OvsDatapath(GenericNetlinkSocket): + OVS_DP_F_VPORT_PIDS = 1 << 1 + OVS_DP_F_DISPATCH_UPCALL_PER_CPU = 1 << 3 + + class dp_cmd_msg(ovs_dp_msg): + """ + Message class that will be used to communicate with the kernel module. + """ + + nla_map = ( + ("OVS_DP_ATTR_UNSPEC", "none"), + ("OVS_DP_ATTR_NAME", "asciiz"), + ("OVS_DP_ATTR_UPCALL_PID", "array(uint32)"), + ("OVS_DP_ATTR_STATS", "dpstats"), + ("OVS_DP_ATTR_MEGAFLOW_STATS", "megaflowstats"), + ("OVS_DP_ATTR_USER_FEATURES", "uint32"), + ("OVS_DP_ATTR_PAD", "none"), + ("OVS_DP_ATTR_MASKS_CACHE_SIZE", "uint32"), + ("OVS_DP_ATTR_PER_CPU_PIDS", "array(uint32)"), + ) + + class dpstats(nla): + fields = ( + ("hit", "=Q"), + ("missed", "=Q"), + ("lost", "=Q"), + ("flows", "=Q"), + ) + + class megaflowstats(nla): + fields = ( + ("mask_hit", "=Q"), + ("masks", "=I"), + ("padding", "=I"), + ("cache_hits", "=Q"), + ("pad1", "=Q"), + ) + + def __init__(self): + GenericNetlinkSocket.__init__(self) + self.bind(OVS_DATAPATH_FAMILY, OvsDatapath.dp_cmd_msg) + + def info(self, dpname, ifindex=0): + msg = OvsDatapath.dp_cmd_msg() + msg["cmd"] = OVS_DP_CMD_GET + msg["version"] = OVS_DATAPATH_VERSION + msg["reserved"] = 0 + msg["dpifindex"] = ifindex + msg["attrs"].append(["OVS_DP_ATTR_NAME", dpname]) + + try: + reply = self.nlm_request( + msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST + ) + reply = reply[0] + except NetlinkError as ne: + if ne.code == errno.ENODEV: + reply = None + else: + raise ne + + return reply + + def create( + self, dpname, shouldUpcall=False, versionStr=None, p=OvsPacket() + ): + msg = OvsDatapath.dp_cmd_msg() + msg["cmd"] = OVS_DP_CMD_NEW + if versionStr is None: + msg["version"] = OVS_DATAPATH_VERSION + else: + msg["version"] = int(versionStr.split(":")[0], 0) + msg["reserved"] = 0 + msg["dpifindex"] = 0 + msg["attrs"].append(["OVS_DP_ATTR_NAME", dpname]) + + dpfeatures = 0 + if versionStr is not None and versionStr.find(":") != -1: + dpfeatures = int(versionStr.split(":")[1], 0) + else: + if versionStr is None or versionStr.find(":") == -1: + dpfeatures |= OvsDatapath.OVS_DP_F_DISPATCH_UPCALL_PER_CPU + dpfeatures &= ~OvsDatapath.OVS_DP_F_VPORT_PIDS + + nproc = multiprocessing.cpu_count() + procarray = [] + for i in range(1, nproc): + procarray += [int(p.epid)] + msg["attrs"].append(["OVS_DP_ATTR_UPCALL_PID", procarray]) + msg["attrs"].append(["OVS_DP_ATTR_USER_FEATURES", dpfeatures]) + if not shouldUpcall: + msg["attrs"].append(["OVS_DP_ATTR_UPCALL_PID", [0]]) + + try: + reply = self.nlm_request( + msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK + ) + reply = reply[0] + except NetlinkError as ne: + if ne.code == errno.EEXIST: + reply = None + else: + raise ne + + return reply + + def destroy(self, dpname): + msg = OvsDatapath.dp_cmd_msg() + msg["cmd"] = OVS_DP_CMD_DEL + msg["version"] = OVS_DATAPATH_VERSION + msg["reserved"] = 0 + msg["dpifindex"] = 0 + msg["attrs"].append(["OVS_DP_ATTR_NAME", dpname]) + + try: + reply = self.nlm_request( + msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK + ) + reply = reply[0] + except NetlinkError as ne: + if ne.code == errno.ENODEV: + reply = None + else: + raise ne + + return reply + + +class OvsVport(GenericNetlinkSocket): + OVS_VPORT_TYPE_NETDEV = 1 + OVS_VPORT_TYPE_INTERNAL = 2 + OVS_VPORT_TYPE_GRE = 3 + OVS_VPORT_TYPE_VXLAN = 4 + OVS_VPORT_TYPE_GENEVE = 5 + + class ovs_vport_msg(ovs_dp_msg): + nla_map = ( + ("OVS_VPORT_ATTR_UNSPEC", "none"), + ("OVS_VPORT_ATTR_PORT_NO", "uint32"), + ("OVS_VPORT_ATTR_TYPE", "uint32"), + ("OVS_VPORT_ATTR_NAME", "asciiz"), + ("OVS_VPORT_ATTR_OPTIONS", "none"), + ("OVS_VPORT_ATTR_UPCALL_PID", "array(uint32)"), + ("OVS_VPORT_ATTR_STATS", "vportstats"), + ("OVS_VPORT_ATTR_PAD", "none"), + ("OVS_VPORT_ATTR_IFINDEX", "uint32"), + ("OVS_VPORT_ATTR_NETNSID", "uint32"), + ) + + class vportstats(nla): + fields = ( + ("rx_packets", "=Q"), + ("tx_packets", "=Q"), + ("rx_bytes", "=Q"), + ("tx_bytes", "=Q"), + ("rx_errors", "=Q"), + ("tx_errors", "=Q"), + ("rx_dropped", "=Q"), + ("tx_dropped", "=Q"), + ) + + def type_to_str(vport_type): + if vport_type == OvsVport.OVS_VPORT_TYPE_NETDEV: + return "netdev" + elif vport_type == OvsVport.OVS_VPORT_TYPE_INTERNAL: + return "internal" + elif vport_type == OvsVport.OVS_VPORT_TYPE_GRE: + return "gre" + elif vport_type == OvsVport.OVS_VPORT_TYPE_VXLAN: + return "vxlan" + elif vport_type == OvsVport.OVS_VPORT_TYPE_GENEVE: + return "geneve" + raise ValueError("Unknown vport type:%d" % vport_type) + + def str_to_type(vport_type): + if vport_type == "netdev": + return OvsVport.OVS_VPORT_TYPE_NETDEV + elif vport_type == "internal": + return OvsVport.OVS_VPORT_TYPE_INTERNAL + elif vport_type == "gre": + return OvsVport.OVS_VPORT_TYPE_INTERNAL + elif vport_type == "vxlan": + return OvsVport.OVS_VPORT_TYPE_VXLAN + elif vport_type == "geneve": + return OvsVport.OVS_VPORT_TYPE_GENEVE + raise ValueError("Unknown vport type: '%s'" % vport_type) + + def __init__(self, packet=OvsPacket()): + GenericNetlinkSocket.__init__(self) + self.bind(OVS_VPORT_FAMILY, OvsVport.ovs_vport_msg) + self.upcall_packet = packet + + def info(self, vport_name, dpifindex=0, portno=None): + msg = OvsVport.ovs_vport_msg() + + msg["cmd"] = OVS_VPORT_CMD_GET + msg["version"] = OVS_DATAPATH_VERSION + msg["reserved"] = 0 + msg["dpifindex"] = dpifindex + + if portno is None: + msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_name]) + else: + msg["attrs"].append(["OVS_VPORT_ATTR_PORT_NO", portno]) + + try: + reply = self.nlm_request( + msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST + ) + reply = reply[0] + except NetlinkError as ne: + if ne.code == errno.ENODEV: + reply = None + else: + raise ne + return reply + + def attach(self, dpindex, vport_ifname, ptype): + msg = OvsVport.ovs_vport_msg() + + msg["cmd"] = OVS_VPORT_CMD_NEW + msg["version"] = OVS_DATAPATH_VERSION + msg["reserved"] = 0 + msg["dpifindex"] = dpindex + port_type = OvsVport.str_to_type(ptype) + + msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type]) + msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname]) + msg["attrs"].append( + ["OVS_VPORT_ATTR_UPCALL_PID", [self.upcall_packet.epid]] + ) + + try: + reply = self.nlm_request( + msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK + ) + reply = reply[0] + except NetlinkError as ne: + if ne.code == errno.EEXIST: + reply = None + else: + raise ne + return reply + + def reset_upcall(self, dpindex, vport_ifname, p=None): + msg = OvsVport.ovs_vport_msg() + + msg["cmd"] = OVS_VPORT_CMD_SET + msg["version"] = OVS_DATAPATH_VERSION + msg["reserved"] = 0 + msg["dpifindex"] = dpindex + msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname]) + + if p == None: + p = self.upcall_packet + else: + self.upcall_packet = p + + msg["attrs"].append(["OVS_VPORT_ATTR_UPCALL_PID", [p.epid]]) + + try: + reply = self.nlm_request( + msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK + ) + reply = reply[0] + except NetlinkError as ne: + raise ne + return reply + + def detach(self, dpindex, vport_ifname): + msg = OvsVport.ovs_vport_msg() + + msg["cmd"] = OVS_VPORT_CMD_DEL + msg["version"] = OVS_DATAPATH_VERSION + msg["reserved"] = 0 + msg["dpifindex"] = dpindex + msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname]) + + try: + reply = self.nlm_request( + msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK + ) + reply = reply[0] + except NetlinkError as ne: + if ne.code == errno.ENODEV: + reply = None + else: + raise ne + return reply + + def upcall_handler(self, handler=None): + self.upcall_packet.upcall_handler(handler) + + +class OvsFlow(GenericNetlinkSocket): + class ovs_flow_msg(ovs_dp_msg): + nla_map = ( + ("OVS_FLOW_ATTR_UNSPEC", "none"), + ("OVS_FLOW_ATTR_KEY", "ovskey"), + ("OVS_FLOW_ATTR_ACTIONS", "ovsactions"), + ("OVS_FLOW_ATTR_STATS", "flowstats"), + ("OVS_FLOW_ATTR_TCP_FLAGS", "uint8"), + ("OVS_FLOW_ATTR_USED", "uint64"), + ("OVS_FLOW_ATTR_CLEAR", "none"), + ("OVS_FLOW_ATTR_MASK", "ovskey"), + ("OVS_FLOW_ATTR_PROBE", "none"), + ("OVS_FLOW_ATTR_UFID", "array(uint32)"), + ("OVS_FLOW_ATTR_UFID_FLAGS", "uint32"), + ) + + class flowstats(nla): + fields = ( + ("packets", "=Q"), + ("bytes", "=Q"), + ) + + def dpstr(self, more=False): + ufid = self.get_attr("OVS_FLOW_ATTR_UFID") + ufid_str = "" + if ufid is not None: + ufid_str = ( + "ufid:{:08x}-{:04x}-{:04x}-{:04x}-{:04x}{:08x}".format( + ufid[0], + ufid[1] >> 16, + ufid[1] & 0xFFFF, + ufid[2] >> 16, + ufid[2] & 0, + ufid[3], + ) + ) + + key_field = self.get_attr("OVS_FLOW_ATTR_KEY") + keymsg = None + if key_field is not None: + keymsg = key_field + + mask_field = self.get_attr("OVS_FLOW_ATTR_MASK") + maskmsg = None + if mask_field is not None: + maskmsg = mask_field + + acts_field = self.get_attr("OVS_FLOW_ATTR_ACTIONS") + actsmsg = None + if acts_field is not None: + actsmsg = acts_field + + print_str = "" + + if more: + print_str += ufid_str + "," + + if keymsg is not None: + print_str += keymsg.dpstr(maskmsg, more) + + stats = self.get_attr("OVS_FLOW_ATTR_STATS") + if stats is None: + print_str += " packets:0, bytes:0," + else: + print_str += " packets:%d, bytes:%d," % ( + stats["packets"], + stats["bytes"], + ) + + used = self.get_attr("OVS_FLOW_ATTR_USED") + print_str += " used:" + if used is None: + print_str += "never," + else: + used_time = int(used) + cur_time_sec = time.clock_gettime(time.CLOCK_MONOTONIC) + used_time = (cur_time_sec * 1000) - used_time + print_str += "{}s,".format(used_time / 1000) + + print_str += " actions:" + if ( + actsmsg is None + or "attrs" not in actsmsg + or len(actsmsg["attrs"]) == 0 + ): + print_str += "drop" + else: + print_str += actsmsg.dpstr(more) + + return print_str + + def parse(self, flowstr, actstr, dpidx=0): + OVS_UFID_F_OMIT_KEY = 1 << 0 + OVS_UFID_F_OMIT_MASK = 1 << 1 + OVS_UFID_F_OMIT_ACTIONS = 1 << 2 + + self["cmd"] = 0 + self["version"] = 0 + self["reserved"] = 0 + self["dpifindex"] = 0 + + if flowstr.startswith("ufid:"): + count = 5 + while flowstr[count] != ",": + count += 1 + ufidstr = flowstr[5:count] + flowstr = flowstr[count + 1 :] + else: + ufidstr = str(uuid.uuid4()) + uuidRawObj = uuid.UUID(ufidstr).fields + + self["attrs"].append( + [ + "OVS_FLOW_ATTR_UFID", + [ + uuidRawObj[0], + uuidRawObj[1] << 16 | uuidRawObj[2], + uuidRawObj[3] << 24 + | uuidRawObj[4] << 16 + | uuidRawObj[5] & (0xFF << 32) >> 32, + uuidRawObj[5] & (0xFFFFFFFF), + ], + ] + ) + self["attrs"].append( + [ + "OVS_FLOW_ATTR_UFID_FLAGS", + int( + OVS_UFID_F_OMIT_KEY + | OVS_UFID_F_OMIT_MASK + | OVS_UFID_F_OMIT_ACTIONS + ), + ] + ) + + k = ovskey() + m = ovskey() + k.parse(flowstr, m) + self["attrs"].append(["OVS_FLOW_ATTR_KEY", k]) + self["attrs"].append(["OVS_FLOW_ATTR_MASK", m]) + + a = ovsactions() + a.parse(actstr) + self["attrs"].append(["OVS_FLOW_ATTR_ACTIONS", a]) + + def __init__(self): + GenericNetlinkSocket.__init__(self) + + self.bind(OVS_FLOW_FAMILY, OvsFlow.ovs_flow_msg) + + def add_flow(self, dpifindex, flowmsg): + """ + Send a new flow message to the kernel. + + dpifindex should be a valid datapath obtained by calling + into the OvsDatapath lookup + + flowmsg is a flow object obtained by calling a dpparse + """ + + flowmsg["cmd"] = OVS_FLOW_CMD_NEW + flowmsg["version"] = OVS_DATAPATH_VERSION + flowmsg["reserved"] = 0 + flowmsg["dpifindex"] = dpifindex + + try: + reply = self.nlm_request( + flowmsg, + msg_type=self.prid, + msg_flags=NLM_F_REQUEST | NLM_F_ACK, + ) + reply = reply[0] + except NetlinkError as ne: + print(flowmsg) + raise ne + return reply + + def del_flows(self, dpifindex): + """ + Send a del message to the kernel that will drop all flows. + + dpifindex should be a valid datapath obtained by calling + into the OvsDatapath lookup + """ + + flowmsg = OvsFlow.ovs_flow_msg() + flowmsg["cmd"] = OVS_FLOW_CMD_DEL + flowmsg["version"] = OVS_DATAPATH_VERSION + flowmsg["reserved"] = 0 + flowmsg["dpifindex"] = dpifindex + + try: + reply = self.nlm_request( + flowmsg, + msg_type=self.prid, + msg_flags=NLM_F_REQUEST | NLM_F_ACK, + ) + reply = reply[0] + except NetlinkError as ne: + print(flowmsg) + raise ne + return reply + + def dump(self, dpifindex, flowspec=None): + """ + Returns a list of messages containing flows. + + dpifindex should be a valid datapath obtained by calling + into the OvsDatapath lookup + + flowpsec is a string which represents a flow in the dpctl + format. + """ + msg = OvsFlow.ovs_flow_msg() + + msg["cmd"] = OVS_FLOW_CMD_GET + msg["version"] = OVS_DATAPATH_VERSION + msg["reserved"] = 0 + msg["dpifindex"] = dpifindex + + msg_flags = NLM_F_REQUEST | NLM_F_ACK + if flowspec is None: + msg_flags |= NLM_F_DUMP + rep = None + + try: + rep = self.nlm_request( + msg, + msg_type=self.prid, + msg_flags=msg_flags, + ) + except NetlinkError as ne: + raise ne + return rep + + def miss(self, packetmsg): + seq = packetmsg["header"]["sequence_number"] + keystr = "(none)" + key_field = packetmsg.get_attr("OVS_PACKET_ATTR_KEY") + if key_field is not None: + keystr = key_field.dpstr(None, True) + + pktdata = packetmsg.get_attr("OVS_PACKET_ATTR_PACKET") + pktpres = "yes" if pktdata is not None else "no" + + print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True) + + def execute(self, packetmsg): + print("userspace execute command") + + def action(self, packetmsg): + print("userspace action command") + + +def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): + dp_name = dp_lookup_rep.get_attr("OVS_DP_ATTR_NAME") + base_stats = dp_lookup_rep.get_attr("OVS_DP_ATTR_STATS") + megaflow_stats = dp_lookup_rep.get_attr("OVS_DP_ATTR_MEGAFLOW_STATS") + user_features = dp_lookup_rep.get_attr("OVS_DP_ATTR_USER_FEATURES") + masks_cache_size = dp_lookup_rep.get_attr("OVS_DP_ATTR_MASKS_CACHE_SIZE") + + print("%s:" % dp_name) + print( + " lookups: hit:%d missed:%d lost:%d" + % (base_stats["hit"], base_stats["missed"], base_stats["lost"]) + ) + print(" flows:%d" % base_stats["flows"]) + pkts = base_stats["hit"] + base_stats["missed"] + avg = (megaflow_stats["mask_hit"] / pkts) if pkts != 0 else 0.0 + print( + " masks: hit:%d total:%d hit/pkt:%f" + % (megaflow_stats["mask_hit"], megaflow_stats["masks"], avg) + ) + print(" caches:") + print(" masks-cache: size:%d" % masks_cache_size) + + if user_features is not None: + print(" features: 0x%X" % user_features) + + # port print out + for iface in ndb.interfaces: + rep = vpl.info(iface.ifname, ifindex) + if rep is not None: + print( + " port %d: %s (%s)" + % ( + rep.get_attr("OVS_VPORT_ATTR_PORT_NO"), + rep.get_attr("OVS_VPORT_ATTR_NAME"), + OvsVport.type_to_str(rep.get_attr("OVS_VPORT_ATTR_TYPE")), + ) + ) + + +def main(argv): + nlmsg_atoms.ovskey = ovskey + nlmsg_atoms.ovsactions = ovsactions + + # version check for pyroute2 + prverscheck = pyroute2.__version__.split(".") + if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6: + print("Need to upgrade the python pyroute2 package to >= 0.6.") + sys.exit(0) + + parser = argparse.ArgumentParser() + parser.add_argument( + "-v", + "--verbose", + action="count", + help="Increment 'verbose' output counter.", + default=0, + ) + subparsers = parser.add_subparsers() + + showdpcmd = subparsers.add_parser("show") + showdpcmd.add_argument( + "showdp", metavar="N", type=str, nargs="?", help="Datapath Name" + ) + + adddpcmd = subparsers.add_parser("add-dp") + adddpcmd.add_argument("adddp", help="Datapath Name") + adddpcmd.add_argument( + "-u", + "--upcall", + action="store_true", + help="Leave open a reader for upcalls", + ) + adddpcmd.add_argument( + "-V", + "--versioning", + required=False, + help="Specify a custom version / feature string", + ) + + deldpcmd = subparsers.add_parser("del-dp") + deldpcmd.add_argument("deldp", help="Datapath Name") + + addifcmd = subparsers.add_parser("add-if") + addifcmd.add_argument("dpname", help="Datapath Name") + addifcmd.add_argument("addif", help="Interface name for adding") + addifcmd.add_argument( + "-u", + "--upcall", + action="store_true", + help="Leave open a reader for upcalls", + ) + addifcmd.add_argument( + "-t", + "--ptype", + type=str, + default="netdev", + choices=["netdev", "internal"], + help="Interface type (default netdev)", + ) + delifcmd = subparsers.add_parser("del-if") + delifcmd.add_argument("dpname", help="Datapath Name") + delifcmd.add_argument("delif", help="Interface name for adding") + + dumpflcmd = subparsers.add_parser("dump-flows") + dumpflcmd.add_argument("dumpdp", help="Datapath Name") + + addflcmd = subparsers.add_parser("add-flow") + addflcmd.add_argument("flbr", help="Datapath name") + addflcmd.add_argument("flow", help="Flow specification") + addflcmd.add_argument("acts", help="Flow actions") + + delfscmd = subparsers.add_parser("del-flows") + delfscmd.add_argument("flsbr", help="Datapath name") + + args = parser.parse_args() + + if args.verbose > 0: + if args.verbose > 1: + logging.basicConfig(level=logging.DEBUG) + + ovspk = OvsPacket() + ovsdp = OvsDatapath() + ovsvp = OvsVport(ovspk) + ovsflow = OvsFlow() + ndb = NDB() + + sys.setrecursionlimit(100000) + + if hasattr(args, "showdp"): + found = False + for iface in ndb.interfaces: + rep = None + if args.showdp is None: + rep = ovsdp.info(iface.ifname, 0) + elif args.showdp == iface.ifname: + rep = ovsdp.info(iface.ifname, 0) + + if rep is not None: + found = True + print_ovsdp_full(rep, iface.index, ndb, ovsvp) + + if not found: + msg = "No DP found" + if args.showdp is not None: + msg += ":'%s'" % args.showdp + print(msg) + elif hasattr(args, "adddp"): + rep = ovsdp.create(args.adddp, args.upcall, args.versioning, ovspk) + if rep is None: + print("DP '%s' already exists" % args.adddp) + else: + print("DP '%s' added" % args.adddp) + if args.upcall: + ovspk.upcall_handler(ovsflow) + elif hasattr(args, "deldp"): + ovsdp.destroy(args.deldp) + elif hasattr(args, "addif"): + rep = ovsdp.info(args.dpname, 0) + if rep is None: + print("DP '%s' not found." % args.dpname) + return 1 + dpindex = rep["dpifindex"] + rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype) + msg = "vport '%s'" % args.addif + if rep and rep["header"]["error"] is None: + msg += " added." + else: + msg += " failed to add." + if args.upcall: + if rep is None: + rep = ovsvp.reset_upcall(dpindex, args.addif, ovspk) + ovsvp.upcall_handler(ovsflow) + elif hasattr(args, "delif"): + rep = ovsdp.info(args.dpname, 0) + if rep is None: + print("DP '%s' not found." % args.dpname) + return 1 + rep = ovsvp.detach(rep["dpifindex"], args.delif) + msg = "vport '%s'" % args.delif + if rep and rep["header"]["error"] is None: + msg += " removed." + else: + msg += " failed to remove." + elif hasattr(args, "dumpdp"): + rep = ovsdp.info(args.dumpdp, 0) + if rep is None: + print("DP '%s' not found." % args.dumpdp) + return 1 + rep = ovsflow.dump(rep["dpifindex"]) + for flow in rep: + print(flow.dpstr(True if args.verbose > 0 else False)) + elif hasattr(args, "flbr"): + rep = ovsdp.info(args.flbr, 0) + if rep is None: + print("DP '%s' not found." % args.flbr) + return 1 + flow = OvsFlow.ovs_flow_msg() + flow.parse(args.flow, args.acts, rep["dpifindex"]) + ovsflow.add_flow(rep["dpifindex"], flow) + elif hasattr(args, "flsbr"): + rep = ovsdp.info(args.flsbr, 0) + if rep is None: + print("DP '%s' not found." % args.flsbr) + ovsflow.del_flows(rep["dpifindex"]) + + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 6bbf69a28e12..cfc84958025a 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Check that route PMTU values match expectations, and that initial device MTU @@ -26,6 +26,15 @@ # - pmtu_ipv6 # Same as pmtu_ipv4, except for locked PMTU tests, using IPv6 # +# - pmtu_ipv4_dscp_icmp_exception +# Set up the same network topology as pmtu_ipv4, but use non-default +# routing table in A. A fib-rule is used to jump to this routing table +# based on DSCP. Send ICMPv4 packets with the expected DSCP value and +# verify that ECN doesn't interfere with the creation of PMTU exceptions. +# +# - pmtu_ipv4_dscp_udp_exception +# Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP. +# # - pmtu_ipv4_vxlan4_exception # Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel # over IPv4 between A and B, routed via R1. On the link between R1 and B, @@ -118,6 +127,16 @@ # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU # changes alone won't affect PMTU # +# - pmtu_vti4_udp_exception +# Same as pmtu_vti4_exception, but using ESP-in-UDP +# +# - pmtu_vti4_udp_routed_exception +# Set up vti tunnel on top of veth connected through routing namespace and +# add xfrm states and policies with ESP-in-UDP encapsulation. Check that +# route exception is not created if link layer MTU is not exceeded, then +# lower MTU on second part of routed environment and check that exception +# is created with the expected PMTU. +# # - pmtu_vti6_exception # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is @@ -125,6 +144,13 @@ # decrease and increase MTU of tunnel, checking that route exception PMTU # changes accordingly # +# - pmtu_vti6_udp_exception +# Same as pmtu_vti6_exception, but using ESP-in-UDP +# +# - pmtu_vti6_udp_routed_exception +# Same as pmtu_vti6_udp_routed_exception but with routing between vti +# endpoints +# # - pmtu_vti4_default_mtu # Set up vti4 tunnel on top of veth, in two namespaces with matching # endpoints. Check that MTU assigned to vti interface is the MTU of the @@ -162,10 +188,18 @@ # - list_flush_ipv6_exception # Using the same topology as in pmtu_ipv6, create exceptions, and check # they are shown when listing exception caches, gone after flushing them +# +# - pmtu_ipv4_route_change +# Use the same topology as in pmtu_ipv4, but issue a route replacement +# command and delete the corresponding device afterward. This tests for +# proper cleanup of the PMTU exceptions by the route replacement path. +# Device unregistration should complete successfully +# +# - pmtu_ipv6_route_change +# Same as above but with IPv6 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh +source net_helper.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -178,6 +212,8 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) tests=" pmtu_ipv4_exception ipv4: PMTU exceptions 1 pmtu_ipv6_exception ipv6: PMTU exceptions 1 + pmtu_ipv4_dscp_icmp_exception ICMPv4 with DSCP and ECN: PMTU exceptions 1 + pmtu_ipv4_dscp_udp_exception UDPv4 with DSCP and ECN: PMTU exceptions 1 pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1 pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1 pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1 @@ -216,6 +252,10 @@ tests=" pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1 pmtu_vti6_exception vti6: PMTU exceptions 0 pmtu_vti4_exception vti4: PMTU exceptions 0 + pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0 + pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0 pmtu_vti4_default_mtu vti4: default MTU assignment 0 pmtu_vti6_default_mtu vti6: default MTU assignment 0 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 @@ -224,18 +264,9 @@ tests=" cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1 list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 - list_flush_ipv6_exception ipv6: list and flush cached exceptions 1" - -NS_A="ns-A" -NS_B="ns-B" -NS_C="ns-C" -NS_R1="ns-R1" -NS_R2="ns-R2" -ns_a="ip netns exec ${NS_A}" -ns_b="ip netns exec ${NS_B}" -ns_c="ip netns exec ${NS_C}" -ns_r1="ip netns exec ${NS_R1}" -ns_r2="ip netns exec ${NS_R2}" + list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 + pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an @@ -269,7 +300,6 @@ routes=" A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 B default ${prefix6}:${b_r1}::2 " - USE_NH="no" # ns family nh id destination gateway nexthops=" @@ -294,6 +324,9 @@ routes_nh=" B 6 default 61 " +policy_mark=0x04 +rt_table=main + veth4_a_addr="192.168.1.1" veth4_b_addr="192.168.1.2" veth4_c_addr="192.168.2.10" @@ -316,6 +349,9 @@ dummy6_mask="64" err_buf= tcpdump_pids= +nettest_pids= +socat_pids= +tmpoutfile= err() { err_buf="${err_buf}${1} @@ -344,6 +380,16 @@ run_cmd() { return $rc } +run_cmd_bg() { + cmd="$*" + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: %s &\n" "${cmd}" + fi + + $cmd 2>&1 & +} + # Find the auto-generated name for this namespace nsname() { eval echo \$NS_$1 @@ -355,7 +401,7 @@ setup_fou_or_gue() { encap="${3}" if [ "${outer}" = "4" ]; then - modprobe fou || return 2 + modprobe fou || return $ksft_skip a_addr="${prefix4}.${a_r1}.1" b_addr="${prefix4}.${b_r1}.1" if [ "${inner}" = "4" ]; then @@ -366,7 +412,7 @@ setup_fou_or_gue() { ipproto="41" fi else - modprobe fou6 || return 2 + modprobe fou6 || return $ksft_skip a_addr="${prefix6}:${a_r1}::1" b_addr="${prefix6}:${b_r1}::1" if [ "${inner}" = "4" ]; then @@ -380,8 +426,8 @@ setup_fou_or_gue() { fi fi - run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 - run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 + run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip + run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto} run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 @@ -455,7 +501,7 @@ setup_ipvX_over_ipvY() { fi fi - run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2 + run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode} run_cmd ${ns_a} ip link set ip_a up @@ -487,13 +533,17 @@ setup_ip6ip6() { } setup_namespaces() { + setup_ns NS_A NS_B NS_C NS_R1 NS_R2 for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do - ip netns add ${n} || return 1 - # Disable DAD, so that we don't have to wait to use the # configured IPv6 addresses ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0 done + ns_a="ip netns exec ${NS_A}" + ns_b="ip netns exec ${NS_B}" + ns_c="ip netns exec ${NS_C}" + ns_r1="ip netns exec ${NS_R1}" + ns_r2="ip netns exec ${NS_R2}" } setup_veth() { @@ -538,6 +588,14 @@ setup_vti6() { setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} } +setup_vti4routed() { + setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} +} + +setup_vti6routed() { + setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} +} + setup_vxlan_or_geneve() { type="${1}" a_addr="${2}" @@ -609,18 +667,38 @@ setup_xfrm() { proto=${1} veth_a_addr="${2}" veth_b_addr="${3}" + encap=${4} - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel } +setup_nettest_xfrm() { + if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + return 1 + fi + fi + + [ ${1} -eq 6 ] && proto="-6" || proto="" + port=${2} + + run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5 + nettest_pids="${nettest_pids} $!" + + run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5 + nettest_pids="${nettest_pids} $!" +} + setup_xfrm4() { setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} } @@ -629,6 +707,26 @@ setup_xfrm6() { setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} } +setup_xfrm4udp() { + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udp() { + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 +} + +setup_xfrm4udprouted() { + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udprouted() { + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 +} + setup_routing_old() { for i in ${routes}; do [ "${ns}" = "" ] && ns="${i}" && continue @@ -637,7 +735,7 @@ setup_routing_old() { ns_name="$(nsname ${ns})" - ip -n ${ns_name} route add ${addr} via ${gw} + ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}" ns=""; addr=""; gw="" done @@ -667,7 +765,7 @@ setup_routing_new() { ns_name="$(nsname ${ns})" - ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid} + ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}" ns=""; fam=""; addr=""; nhid="" done @@ -712,12 +810,30 @@ setup_routing() { return 0 } +setup_policy_routing() { + setup_routing + + ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \ + table "${rt_table}" + + # Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to + # have an option do to it. + tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio + tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio + tc -netns "${NS_A}" filter add dev veth_A-R1 \ + protocol ipv4 flower ip_proto udp \ + action pedit ex munge ip df set 0x40 pipe csum ip and udp + tc -netns "${NS_A}" filter add dev veth_A-R2 \ + protocol ipv4 flower ip_proto udp \ + action pedit ex munge ip df set 0x40 pipe csum ip and udp +} + setup_bridge() { - run_cmd ${ns_a} ip link add br0 type bridge || return 2 + run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip run_cmd ${ns_a} ip link set br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C - run_cmd ${ns_c} ip link set veth_A-C netns ns-A + run_cmd ${ns_c} ip link set veth_A-C netns ${NS_A} run_cmd ${ns_a} ip link set veth_A-C up run_cmd ${ns_c} ip link set veth_C-A up @@ -765,7 +881,7 @@ setup_ovs_vxlan6() { } setup_ovs_bridge() { - run_cmd ovs-vsctl add-br ovs_br0 || return 2 + run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip run_cmd ip link set ovs_br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -789,7 +905,6 @@ setup_ovs_bridge() { setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip - cleanup for arg do eval setup_${arg} || { echo " ${arg} not supported"; return 1; } done @@ -800,7 +915,7 @@ trace() { for arg do [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue - ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & + ${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & tcpdump_pids="${tcpdump_pids} $!" ns_cmd= done @@ -813,14 +928,23 @@ cleanup() { done tcpdump_pids= - for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do - ip netns del ${n} 2> /dev/null + for pid in ${nettest_pids}; do + kill ${pid} + done + nettest_pids= + + for pid in ${socat_pids}; do + kill "${pid}" done + socat_pids= + + cleanup_all_ns ip link del veth_A-C 2>/dev/null ip link del veth_A-R1 2>/dev/null ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null + rm -f "$tmpoutfile" } mtu() { @@ -860,15 +984,21 @@ link_get_mtu() { route_get_dst_exception() { ns_cmd="${1}" dst="${2}" + dsfield="${3}" - ${ns_cmd} ip route get "${dst}" + if [ -z "${dsfield}" ]; then + dsfield=0 + fi + + ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}" } route_get_dst_pmtu_from_exception() { ns_cmd="${1}" dst="${2}" + dsfield="${3}" - mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})" + mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")" } check_pmtu_value() { @@ -887,7 +1017,7 @@ check_pmtu_value() { test_pmtu_ipvX() { family=${1} - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -978,6 +1108,95 @@ test_pmtu_ipv6_exception() { test_pmtu_ipvX 6 } +test_pmtu_ipv4_dscp_icmp_exception() { + rt_table=100 + + setup namespaces policy_routing || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1 + + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + + # Create route exceptions + dsfield=${policy_mark} # No ECN bit set (Not-ECT) + run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}" + + dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0)) + run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}" + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 +} + +test_pmtu_ipv4_dscp_udp_exception() { + rt_table=100 + + if ! which socat > /dev/null 2>&1; then + echo "'socat' command not found; skipping tests" + return $ksft_skip + fi + + setup namespaces policy_routing || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1 + + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + + # Create route exceptions + run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1 + socat_pids="${socat_pids} $!" + + dsfield=${policy_mark} # No ECN bit set (Not-ECT) + run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \ + UDP:"${dst1}":50000,tos="${dsfield}" + + dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0)) + run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \ + UDP:"${dst2}":50000,tos="${dsfield}" + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 +} + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { type=${1} family=${2} @@ -985,11 +1204,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ${type}4 || return 2 + setup namespaces routing ${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ${type}6 || return 2 + setup namespaces routing ${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1060,11 +1279,11 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing bridge bridged_${type}4 || return 2 + setup namespaces routing bridge bridged_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing bridge bridged_${type}6 || return 2 + setup namespaces routing bridge bridged_${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1103,6 +1322,41 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface" pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface" + + tmpoutfile=$(mktemp) + + # Flush Exceptions, retry with TCP + run_cmd ${ns_a} ip route flush cached ${dst} + run_cmd ${ns_b} ip route flush cached ${dst} + run_cmd ${ns_c} ip route flush cached ${dst} + + for target in "${ns_a}" "${ns_c}" ; do + if [ ${family} -eq 4 ]; then + TCPDST=TCP:${dst}:50000 + else + TCPDST="TCP:[${dst}]:50000" + fi + ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000,reuseaddr STDOUT > $tmpoutfile & + local socat_pid=$! + + wait_local_port_listen ${NS_B} 50000 tcp + + dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + + wait ${socat_pid} + size=$(du -sb $tmpoutfile) + size=${size%%/tmp/*} + + [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 + done + + rm -f "$tmpoutfile" + + # Check that exceptions were created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface" + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface" } test_pmtu_ipv4_br_vxlan4_exception() { @@ -1144,11 +1398,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ovs_bridge ovs_${type}4 || return 2 + setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ovs_bridge ovs_${type}6 || return 2 + setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1230,7 +1484,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() { encap=${3} ll_mtu=4000 - setup namespaces routing ${encap}${outer_family}${inner_family} || return 2 + setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1309,7 +1563,7 @@ test_pmtu_ipvX_over_ipvY_exception() { outer=${2} ll_mtu=4000 - setup namespaces routing ip${inner}ip${outer} || return 2 + setup namespaces routing ip${inner}ip${outer} || return $ksft_skip trace "${ns_a}" ip_a "${ns_b}" ip_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ @@ -1363,7 +1617,7 @@ test_pmtu_ipv6_ipv6_exception() { } test_pmtu_vti4_exception() { - setup namespaces veth vti4 xfrm4 || return 2 + setup namespaces veth vti4 xfrm4 || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti4_a "${ns_b}" vti4_b @@ -1393,7 +1647,67 @@ test_pmtu_vti4_exception() { } test_pmtu_vti6_exception() { - setup namespaces veth vti6 xfrm6 || return 2 + setup namespaces veth vti6 xfrm6 || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + fail=0 + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_a 4000 + mtu "${ns_b}" veth_b 4000 + mtu "${ns_a}" vti6_a 5000 + mtu "${ns_b}" vti6_b 5000 + run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 + + # Decrease tunnel MTU, check for PMTU decrease in route exception + mtu "${ns_a}" vti6_a 3000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 + + # Increase tunnel MTU, check for PMTU increase in route exception + mtu "${ns_a}" vti6_a 9000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 + + return ${fail} +} + +test_pmtu_vti4_udp_exception() { + setup namespaces veth vti4 xfrm4udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_a ${veth_mtu} + mtu "${ns_b}" veth_b ${veth_mtu} + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now exceed link layer MTU by one byte, check that exception is created + # with the right PMTU value + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" +} + +test_pmtu_vti6_udp_exception() { + setup namespaces veth vti6 xfrm6udp || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti6_a "${ns_b}" vti6_b fail=0 @@ -1422,8 +1736,77 @@ test_pmtu_vti6_exception() { return ${fail} } +test_pmtu_vti4_udp_routed_exception() { + setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" +} + +test_pmtu_vti6_udp_routed_exception() { + setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 40)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 48)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + # mtu "${ns_a}" vti6_a ${vti_mtu} + # mtu "${ns_b}" vti6_b ${vti_mtu} + + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr} + + # Check that exception was not created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" + +} + test_pmtu_vti4_default_mtu() { - setup namespaces veth vti4 || return 2 + setup namespaces veth vti4 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv4 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1435,7 +1818,7 @@ test_pmtu_vti4_default_mtu() { } test_pmtu_vti6_default_mtu() { - setup namespaces veth vti6 || return 2 + setup namespaces veth vti6 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv6 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1447,10 +1830,10 @@ test_pmtu_vti6_default_mtu() { } test_pmtu_vti4_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 - [ $? -ne 0 ] && err " vti not supported" && return 2 + [ $? -ne 0 ] && err " vti not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti4_a fail=0 @@ -1485,10 +1868,10 @@ test_pmtu_vti4_link_add_mtu() { } test_pmtu_vti6_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 - [ $? -ne 0 ] && err " vti6 not supported" && return 2 + [ $? -ne 0 ] && err " vti6 not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti6_a fail=0 @@ -1523,10 +1906,10 @@ test_pmtu_vti6_link_add_mtu() { } test_pmtu_vti6_link_change_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy - [ $? -ne 0 ] && err " dummy not supported" && return 2 + [ $? -ne 0 ] && err " dummy not supported" && return $ksft_skip run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy run_cmd ${ns_a} ip link set dummy0 up run_cmd ${ns_a} ip link set dummy1 up @@ -1574,15 +1957,22 @@ check_command() { return 0 } +check_running() { + pid=${1} + cmd=${2} + + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] +} + test_cleanup_vxlanX_exception() { outer="${1}" encap="vxlan" ll_mtu=4000 - check_command taskset || return 2 + check_command taskset || return $ksft_skip cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) - setup namespaces routing ${encap}${outer} || return 2 + setup namespaces routing ${encap}${outer} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1604,11 +1994,12 @@ test_cleanup_vxlanX_exception() { ${ns_a} ip link del dev veth_A-R1 & iplink_pid=$! - sleep 1 - if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then - err " can't delete veth device in a timely manner, PMTU dst likely leaked" - return 1 - fi + for i in $(seq 1 20); do + check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0 + sleep 0.1 + done + err " can't delete veth device in a timely manner, PMTU dst likely leaked" + return 1 } test_cleanup_ipv6_exception() { @@ -1626,6 +2017,10 @@ run_test() { unset IFS + # Since cleanup() relies on variables modified by this subshell, it + # has to run in this context. + trap cleanup EXIT + if [ "$VERBOSE" = "1" ]; then printf "\n##########################################################################\n\n" fi @@ -1644,7 +2039,7 @@ run_test() { fi err_flush exit 1 - elif [ $ret -eq 2 ]; then + elif [ $ret -eq $ksft_skip ]; then printf "TEST: %-60s [SKIP]\n" "${tdesc}" err_flush fi @@ -1652,7 +2047,19 @@ run_test() { return $ret ) ret=$? - [ $ret -ne 0 ] && exitcode=1 + case $ret in + 0) + all_skipped=false + [ $exitcode -eq $ksft_skip ] && exitcode=0 + ;; + $ksft_skip) + [ $all_skipped = true ] && exitcode=$ksft_skip + ;; + *) + all_skipped=false + exitcode=1 + ;; + esac return $ret } @@ -1667,7 +2074,7 @@ run_test_nh() { } test_list_flush_ipv4_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1721,7 +2128,7 @@ test_list_flush_ipv4_exception() { } test_list_flush_ipv6_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1770,6 +2177,63 @@ test_list_flush_ipv6_exception() { return ${fail} } +test_pmtu_ipvX_route_change() { + family=${1} + + setup namespaces routing || return 2 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + if [ ${family} -eq 4 ]; then + ping=ping + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + gw="${prefix4}.${a_r1}.2" + else + ping=${ping6} + dst1="${prefix6}:${b_r1}::1" + dst2="${prefix6}:${b_r2}::1" + gw="${prefix6}:${a_r1}::2" + fi + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Create route exceptions + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 + + # Replace the route from A to R1 + run_cmd ${ns_a} ip route change default via ${gw} + + # Delete the device in A + run_cmd ${ns_a} ip link del "veth_A-R1" +} + +test_pmtu_ipv4_route_change() { + test_pmtu_ipvX_route_change 4 +} + +test_pmtu_ipv6_route_change() { + test_pmtu_ipvX_route_change 6 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." @@ -1786,6 +2250,7 @@ usage() { # exitcode=0 desc=0 +all_skipped=true while getopts :ptv o do @@ -1840,7 +2305,7 @@ for t in ${tests}; do if [ $run_this -eq 1 ]; then run_test "${name}" "${desc}" # if test was skipped no need to retry with nexthop objects - [ $? -eq 2 ] && rerun_nh=0 + [ $? -eq $ksft_skip ] && rerun_nh=0 if [ "${rerun_nh}" = "1" ]; then run_test_nh "${name}" "${desc}" diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 2c522f7a0aec..1a736f700be4 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -53,15 +53,19 @@ #include <unistd.h> #include "psock_lib.h" +#include "../kselftest.h" #define RING_NUM_FRAMES 20 +static uint32_t cfg_max_num_members; + /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) { struct sockaddr_ll addr = {0}; - int fd, val; + struct fanout_args args; + int fd, val, err; fd = socket(PF_PACKET, SOCK_RAW, 0); if (fd < 0) { @@ -83,8 +87,18 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) exit(1); } - val = (((int) typeflags) << 16) | group_id; - if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { + if (cfg_max_num_members) { + args.id = group_id; + args.type_flags = typeflags; + args.max_num_members = cfg_max_num_members; + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, + sizeof(args)); + } else { + val = (((int) typeflags) << 16) | group_id; + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, + sizeof(val)); + } + if (err) { if (close(fd)) { perror("close packet"); exit(1); @@ -98,13 +112,13 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) static void sock_fanout_set_cbpf(int fd) { struct sock_filter bpf_filter[] = { - BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */ - BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */ + BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80), /* ldb [80] */ + BPF_STMT(BPF_RET | BPF_A, 0), /* ret A */ }; struct sock_fprog bpf_prog; bpf_prog.filter = bpf_filter; - bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); + bpf_prog.len = ARRAY_SIZE(bpf_filter); if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog, sizeof(bpf_prog))) { @@ -149,7 +163,7 @@ static void sock_fanout_set_ebpf(int fd) memset(&attr, 0, sizeof(attr)); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; attr.insns = (unsigned long) prog; - attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); + attr.insn_cnt = ARRAY_SIZE(prog); attr.license = (unsigned long) "GPL"; attr.log_buf = (unsigned long) log_buf, attr.log_size = sizeof(log_buf), @@ -286,6 +300,56 @@ static void test_control_group(void) } } +/* Test illegal max_num_members values */ +static void test_control_group_max_num_members(void) +{ + int fds[3]; + + fprintf(stderr, "test: control multiple sockets, max_num_members\n"); + + /* expected failure on greater than PACKET_FANOUT_MAX */ + cfg_max_num_members = (1 << 16) + 1; + if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) { + fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n"); + exit(1); + } + + cfg_max_num_members = 256; + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[0] == -1) { + fprintf(stderr, "ERROR: failed open\n"); + exit(1); + } + + /* expected failure on joining group with different max_num_members */ + cfg_max_num_members = 257; + if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) { + fprintf(stderr, "ERROR: set different max_num_members\n"); + exit(1); + } + + /* success on joining group with same max_num_members */ + cfg_max_num_members = 256; + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[1] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + + /* success on joining group with max_num_members unspecified */ + cfg_max_num_members = 0; + fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[2] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + + if (close(fds[2]) || close(fds[1]) || close(fds[0])) { + fprintf(stderr, "ERROR: closing sockets\n"); + exit(1); + } +} + /* Test creating a unique fanout group ids */ static void test_unique_fanout_group_ids(void) { @@ -426,8 +490,11 @@ int main(int argc, char **argv) test_control_single(); test_control_group(); + test_control_group_max_num_members(); test_unique_fanout_group_ids(); + /* PACKET_FANOUT_MAX */ + cfg_max_num_members = 1 << 16; /* find a set of ports that do not collide onto the same socket */ ret = test_datapath(PACKET_FANOUT_HASH, port_off, expect_hash[0], expect_hash[1]); diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index faa884385c45..6e4fef560873 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -14,6 +14,8 @@ #include <arpa/inet.h> #include <unistd.h> +#include "kselftest.h" + #define DATA_LEN 100 #define DATA_CHAR 'a' #define DATA_CHAR_1 'b' @@ -63,7 +65,7 @@ static __maybe_unused void pair_udp_setfilter(int fd) struct sock_fprog bpf_prog; bpf_prog.filter = bpf_filter; - bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); + bpf_prog.len = ARRAY_SIZE(bpf_filter); if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, sizeof(bpf_prog))) { diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c index 7d15e10a9fb6..edf1e6f80d41 100644 --- a/tools/testing/selftests/net/psock_snd.c +++ b/tools/testing/selftests/net/psock_snd.c @@ -389,6 +389,8 @@ int main(int argc, char **argv) error(1, errno, "ip link set mtu"); if (system("ip addr add dev lo 172.17.0.1/24")) error(1, errno, "ip addr add"); + if (system("sysctl -w net.ipv4.conf.lo.accept_local=1")) + error(1, errno, "sysctl lo.accept_local"); run_test(); diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh index 170be65e0816..1cbfeb5052ec 100755 --- a/tools/testing/selftests/net/psock_snd.sh +++ b/tools/testing/selftests/net/psock_snd.sh @@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)" echo "raw gso min size" ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}" -echo "raw gso min size - 1 (expected to fail)" -(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}") - echo "raw gso max size" ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}" diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c index 7b01b7c2ec10..066efd30e294 100644 --- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -30,25 +30,25 @@ struct reuse_opts { }; struct reuse_opts unreusable_opts[12] = { - {0, 0, 0, 0}, - {0, 0, 0, 1}, - {0, 0, 1, 0}, - {0, 0, 1, 1}, - {0, 1, 0, 0}, - {0, 1, 0, 1}, - {0, 1, 1, 0}, - {0, 1, 1, 1}, - {1, 0, 0, 0}, - {1, 0, 0, 1}, - {1, 0, 1, 0}, - {1, 0, 1, 1}, + {{0, 0}, {0, 0}}, + {{0, 0}, {0, 1}}, + {{0, 0}, {1, 0}}, + {{0, 0}, {1, 1}}, + {{0, 1}, {0, 0}}, + {{0, 1}, {0, 1}}, + {{0, 1}, {1, 0}}, + {{0, 1}, {1, 1}}, + {{1, 0}, {0, 0}}, + {{1, 0}, {0, 1}}, + {{1, 0}, {1, 0}}, + {{1, 0}, {1, 1}}, }; struct reuse_opts reusable_opts[4] = { - {1, 1, 0, 0}, - {1, 1, 0, 1}, - {1, 1, 1, 0}, - {1, 1, 1, 1}, + {{1, 1}, {0, 0}}, + {{1, 1}, {0, 1}}, + {{1, 1}, {1, 0}}, + {{1, 1}, {1, 1}}, }; int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport) diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index b5277106df1f..65aea27d761c 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -24,9 +24,7 @@ #include <sys/resource.h> #include <unistd.h> -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) -#endif +#include "../kselftest.h" struct test_params { int recv_family; @@ -330,7 +328,7 @@ static void test_extra_filter(const struct test_params p) if (bind(fd1, addr, sockaddr_size())) error(1, errno, "failed to bind recv socket 1"); - if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE) + if (!bind(fd2, addr, sockaddr_size()) || errno != EADDRINUSE) error(1, errno, "bind socket 2 should fail with EADDRINUSE"); free(addr); diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index c9f478b40996..c9ba36aa688e 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -86,7 +86,7 @@ static void attach_bpf(int fd) memset(&attr, 0, sizeof(attr)); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); + attr.insn_cnt = ARRAY_SIZE(prog); attr.insns = (unsigned long) &prog; attr.license = (unsigned long) &bpf_license; attr.log_buf = (unsigned long) &bpf_log_buf; @@ -211,12 +211,16 @@ static void test(int *rcv_fd, int len, int family, int proto) /* Forward iterate */ for (node = 0; node < len; ++node) { + if (!numa_bitmask_isbitset(numa_nodes_ptr, node)) + continue; send_from_node(node, family, proto); receive_on_node(rcv_fd, len, epfd, node, proto); } /* Reverse iterate */ for (node = len - 1; node >= 0; --node) { + if (!numa_bitmask_isbitset(numa_nodes_ptr, node)) + continue; send_from_node(node, family, proto); receive_on_node(rcv_fd, len, epfd, node, proto); } diff --git a/tools/testing/selftests/net/route_localnet.sh b/tools/testing/selftests/net/route_localnet.sh index 116bfeab72fa..e08701c750e3 100755 --- a/tools/testing/selftests/net/route_localnet.sh +++ b/tools/testing/selftests/net/route_localnet.sh @@ -18,8 +18,10 @@ setup() { ip route del 127.0.0.0/8 dev lo table local ip netns exec "${PEER_NS}" ip route del 127.0.0.0/8 dev lo table local - ifconfig veth0 127.25.3.4/24 up - ip netns exec "${PEER_NS}" ifconfig veth1 127.25.3.14/24 up + ip address add 127.25.3.4/24 dev veth0 + ip link set dev veth0 up + ip netns exec "${PEER_NS}" ip address add 127.25.3.14/24 dev veth1 + ip netns exec "${PEER_NS}" ip link set dev veth1 up ip route flush cache ip netns exec "${PEER_NS}" ip route flush cache diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh new file mode 100755 index 000000000000..4287a8529890 --- /dev/null +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly ksft_skip=4 +readonly cpus=$(nproc) +ret=0 + +[ $cpus -gt 2 ] || exit $ksft_skip + +readonly INITIAL_RPS_DEFAULT_MASK=$(cat /proc/sys/net/core/rps_default_mask) +readonly TAG="$(mktemp -u XXXXXX)" +readonly VETH="veth${TAG}" +readonly NETNS="ns-${TAG}" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up +} + +cleanup() { + echo $INITIAL_RPS_DEFAULT_MASK > /proc/sys/net/core/rps_default_mask + ip netns del $NETNS +} + +chk_rps() { + local rps_mask expected_rps_mask=$4 + local dev_name=$3 + local netns=$2 + local cmd="cat" + local msg=$1 + + [ -n "$netns" ] && cmd="ip netns exec $netns $cmd" + + rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus) + printf "%-60s" "$msg" + + # In case there is more than 32 CPUs we need to remove commas from masks + rps_mask=${rps_mask//,} + expected_rps_mask=${expected_rps_mask//,} + if [ $rps_mask -eq $expected_rps_mask ]; then + echo "[ ok ]" + else + echo "[fail] expected $expected_rps_mask found $rps_mask" + ret=1 + fi +} + +trap cleanup EXIT + +echo 0 > /proc/sys/net/core/rps_default_mask +setup +chk_rps "empty rps_default_mask" $NETNS lo 0 +cleanup + +echo 1 > /proc/sys/net/core/rps_default_mask +setup +chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK + +echo 3 > /proc/sys/net/core/rps_default_mask +chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0 + +ip link add name $VETH type veth peer netns $NETNS name $VETH +ip link set dev $VETH up +ip -n $NETNS link set dev $VETH up +chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 +chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +ip link del dev $VETH +ip netns del $NETNS + +setup +chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0 + +ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1 +ip link add name $VETH type veth peer netns $NETNS name $VETH +chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1 +chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0 + +exit $ret diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index c9ce3dfa42ee..bdf6f10d0558 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -4,10 +4,39 @@ # # set -e +ALL_TESTS=" + kci_test_polrouting + kci_test_route_get + kci_test_addrlft + kci_test_promote_secondaries + kci_test_tc + kci_test_gre + kci_test_gretap + kci_test_ip6gretap + kci_test_erspan + kci_test_ip6erspan + kci_test_bridge + kci_test_addrlabel + kci_test_ifalias + kci_test_vrf + kci_test_encap + kci_test_macsec + kci_test_macsec_offload + kci_test_ipsec + kci_test_ipsec_offload + kci_test_fdb_get + kci_test_neigh_get + kci_test_bridge_parent_id + kci_test_address_proto + kci_test_enslave_bonding +" + devdummy="test-dummy0" +VERBOSE=0 +PAUSE=no +PAUSE_ON_FAIL=no -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh # set global exit status, but never reset nonzero one. check_err() @@ -25,35 +54,102 @@ check_fail() fi } +run_cmd_common() +{ + local cmd="$*" + local out + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: ${cmd}" + fi + out=$($cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + return $rc +} + +run_cmd() { + run_cmd_common "$@" + rc=$? + check_err $rc + return $rc +} +run_cmd_fail() +{ + run_cmd_common "$@" + rc=$? + check_fail $rc + return $rc +} + +run_cmd_grep_common() +{ + local find="$1"; shift + local cmd="$*" + local out + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: ${cmd} 2>&1 | grep -q '${find}'" + fi + out=$($cmd 2>&1 | grep -q "${find}" 2>&1) + return $? +} + +run_cmd_grep() { + run_cmd_grep_common "$@" + rc=$? + check_err $rc + return $rc +} + +run_cmd_grep_fail() +{ + run_cmd_grep_common "$@" + rc=$? + check_fail $rc + return $rc +} + +end_test() +{ + echo "$*" + [ "${VERBOSE}" = "1" ] && echo + + if [[ $ret -ne 0 ]] && [[ "${PAUSE_ON_FAIL}" = "yes" ]]; then + echo "Hit enter to continue" + read a + fi; + + if [ "${PAUSE}" = "yes" ]; then + echo "Hit enter to continue" + read a + fi + +} + + kci_add_dummy() { - ip link add name "$devdummy" type dummy - check_err $? - ip link set "$devdummy" up - check_err $? + run_cmd ip link add name "$devdummy" type dummy + run_cmd ip link set "$devdummy" up } kci_del_dummy() { - ip link del dev "$devdummy" - check_err $? + run_cmd ip link del dev "$devdummy" } kci_test_netconf() { dev="$1" r=$ret - - ip netconf show dev "$dev" > /dev/null - check_err $? - + run_cmd ip netconf show dev "$dev" for f in 4 6; do - ip -$f netconf show dev "$dev" > /dev/null - check_err $? + run_cmd ip -$f netconf show dev "$dev" done if [ $ret -ne 0 ] ;then - echo "FAIL: ip netconf show $dev" + end_test "FAIL: ip netconf show $dev" test $r -eq 0 && ret=0 return 1 fi @@ -66,43 +162,27 @@ kci_test_bridge() vlandev="testbr-vlan1" local ret=0 - ip link add name "$devbr" type bridge - check_err $? - - ip link set dev "$devdummy" master "$devbr" - check_err $? - - ip link set "$devbr" up - check_err $? - - ip link add link "$devbr" name "$vlandev" type vlan id 1 - check_err $? - ip addr add dev "$vlandev" 10.200.7.23/30 - check_err $? - ip -6 addr add dev "$vlandev" dead:42::1234/64 - check_err $? - ip -d link > /dev/null - check_err $? - ip r s t all > /dev/null - check_err $? + run_cmd ip link add name "$devbr" type bridge + run_cmd ip link set dev "$devdummy" master "$devbr" + run_cmd ip link set "$devbr" up + run_cmd ip link add link "$devbr" name "$vlandev" type vlan id 1 + run_cmd ip addr add dev "$vlandev" 10.200.7.23/30 + run_cmd ip -6 addr add dev "$vlandev" dead:42::1234/64 + run_cmd ip -d link + run_cmd ip r s t all for name in "$devbr" "$vlandev" "$devdummy" ; do kci_test_netconf "$name" done - - ip -6 addr del dev "$vlandev" dead:42::1234/64 - check_err $? - - ip link del dev "$vlandev" - check_err $? - ip link del dev "$devbr" - check_err $? + run_cmd ip -6 addr del dev "$vlandev" dead:42::1234/64 + run_cmd ip link del dev "$vlandev" + run_cmd ip link del dev "$devbr" if [ $ret -ne 0 ];then - echo "FAIL: bridge setup" + end_test "FAIL: bridge setup" return 1 fi - echo "PASS: bridge setup" + end_test "PASS: bridge setup" } @@ -113,34 +193,23 @@ kci_test_gre() loc=10.0.0.1 local ret=0 - ip tunnel add $gredev mode gre remote $rem local $loc ttl 1 - check_err $? - ip link set $gredev up - check_err $? - ip addr add 10.23.7.10 dev $gredev - check_err $? - ip route add 10.23.8.0/30 dev $gredev - check_err $? - ip addr add dev "$devdummy" 10.23.7.11/24 - check_err $? - ip link > /dev/null - check_err $? - ip addr > /dev/null - check_err $? + run_cmd ip tunnel add $gredev mode gre remote $rem local $loc ttl 1 + run_cmd ip link set $gredev up + run_cmd ip addr add 10.23.7.10 dev $gredev + run_cmd ip route add 10.23.8.0/30 dev $gredev + run_cmd ip addr add dev "$devdummy" 10.23.7.11/24 + run_cmd ip link + run_cmd ip addr kci_test_netconf "$gredev" - - ip addr del dev "$devdummy" 10.23.7.11/24 - check_err $? - - ip link del $gredev - check_err $? + run_cmd ip addr del dev "$devdummy" 10.23.7.11/24 + run_cmd ip link del $gredev if [ $ret -ne 0 ];then - echo "FAIL: gre tunnel endpoint" + end_test "FAIL: gre tunnel endpoint" return 1 fi - echo "PASS: gre tunnel endpoint" + end_test "PASS: gre tunnel endpoint" } # tc uses rtnetlink too, for full tc testing @@ -150,56 +219,40 @@ kci_test_tc() dev=lo local ret=0 - tc qdisc add dev "$dev" root handle 1: htb - check_err $? - tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit - check_err $? - tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256 - check_err $? - tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256 - check_err $? - tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256 - check_err $? - tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10 - check_err $? - tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10 - check_err $? - tc filter show dev "$dev" parent 1:0 > /dev/null - check_err $? - tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 - check_err $? - tc filter show dev "$dev" parent 1:0 > /dev/null - check_err $? - tc qdisc del dev "$dev" root handle 1: htb - check_err $? + run_cmd tc qdisc add dev "$dev" root handle 1: htb + run_cmd tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit + run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256 + run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256 + run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256 + run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10 + run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10 + run_cmd tc filter show dev "$dev" parent 1:0 + run_cmd tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 + run_cmd tc filter show dev "$dev" parent 1:0 + run_cmd tc qdisc del dev "$dev" root handle 1: htb if [ $ret -ne 0 ];then - echo "FAIL: tc htb hierarchy" + end_test "FAIL: tc htb hierarchy" return 1 fi - echo "PASS: tc htb hierarchy" + end_test "PASS: tc htb hierarchy" } kci_test_polrouting() { local ret=0 - ip rule add fwmark 1 lookup 100 - check_err $? - ip route add local 0.0.0.0/0 dev lo table 100 - check_err $? - ip r s t all > /dev/null - check_err $? - ip rule del fwmark 1 lookup 100 - check_err $? - ip route del local 0.0.0.0/0 dev lo table 100 - check_err $? + run_cmd ip rule add fwmark 1 lookup 100 + run_cmd ip route add local 0.0.0.0/0 dev lo table 100 + run_cmd ip r s t all + run_cmd ip rule del fwmark 1 lookup 100 + run_cmd ip route del local 0.0.0.0/0 dev lo table 100 if [ $ret -ne 0 ];then - echo "FAIL: policy route test" + end_test "FAIL: policy route test" return 1 fi - echo "PASS: policy routing" + end_test "PASS: policy routing" } kci_test_route_get() @@ -207,65 +260,51 @@ kci_test_route_get() local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) local ret=0 - - ip route get 127.0.0.1 > /dev/null - check_err $? - ip route get 127.0.0.1 dev "$devdummy" > /dev/null - check_err $? - ip route get ::1 > /dev/null - check_err $? - ip route get fe80::1 dev "$devdummy" > /dev/null - check_err $? - ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null - check_err $? - ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null - check_err $? - ip addr add dev "$devdummy" 10.23.7.11/24 - check_err $? - ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null - check_err $? - ip route add 10.23.8.0/24 \ + run_cmd ip route get 127.0.0.1 + run_cmd ip route get 127.0.0.1 dev "$devdummy" + run_cmd ip route get ::1 + run_cmd ip route get fe80::1 dev "$devdummy" + run_cmd ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1 + run_cmd ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1 + run_cmd ip addr add dev "$devdummy" 10.23.7.11/24 + run_cmd ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" + run_cmd ip route add 10.23.8.0/24 \ nexthop via 10.23.7.13 dev "$devdummy" \ nexthop via 10.23.7.14 dev "$devdummy" - check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy=0 - ip route get 10.23.8.11 > /dev/null - check_err $? + run_cmd ip route get 10.23.8.11 sysctl -wq net.ipv4.fib_multipath_hash_policy=1 - ip route get 10.23.8.11 > /dev/null - check_err $? + run_cmd ip route get 10.23.8.11 sysctl -wq net.ipv4.fib_multipath_hash_policy="$hash_policy" - ip route del 10.23.8.0/24 - check_err $? - ip addr del dev "$devdummy" 10.23.7.11/24 - check_err $? + run_cmd ip route del 10.23.8.0/24 + run_cmd ip addr del dev "$devdummy" 10.23.7.11/24 + if [ $ret -ne 0 ];then - echo "FAIL: route get" + end_test "FAIL: route get" return 1 fi - echo "PASS: route get" + end_test "PASS: route get" } kci_test_addrlft() { for i in $(seq 10 100) ;do lft=$(((RANDOM%3) + 1)) - ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) - check_err $? + run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) done sleep 5 - - ip addr show dev "$devdummy" | grep "10.23.11." + run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy" if [ $? -eq 0 ]; then - echo "FAIL: preferred_lft addresses remaining" check_err 1 + end_test "FAIL: preferred_lft addresses remaining" return fi - echo "PASS: preferred_lft addresses have expired" + end_test "PASS: preferred_lft addresses have expired" } kci_test_promote_secondaries() @@ -284,27 +323,17 @@ kci_test_promote_secondaries() [ $promote -eq 0 ] && sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=0 - echo "PASS: promote_secondaries complete" + end_test "PASS: promote_secondaries complete" } kci_test_addrlabel() { local ret=0 - - ip addrlabel add prefix dead::/64 dev lo label 1 - check_err $? - - ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1" - check_err $? - - ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null - check_err $? - - ip addrlabel add prefix dead::/64 label 1 2> /dev/null - check_err $? - - ip addrlabel del prefix dead::/64 label 1 2> /dev/null - check_err $? + run_cmd ip addrlabel add prefix dead::/64 dev lo label 1 + run_cmd_grep "prefix dead::/64 dev lo label 1" ip addrlabel list + run_cmd ip addrlabel del prefix dead::/64 dev lo label 1 + run_cmd ip addrlabel add prefix dead::/64 label 1 + run_cmd ip addrlabel del prefix dead::/64 label 1 # concurrent add/delete for i in $(seq 1 1000); do @@ -320,11 +349,11 @@ kci_test_addrlabel() ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null if [ $ret -ne 0 ];then - echo "FAIL: ipv6 addrlabel" + end_test "FAIL: ipv6 addrlabel" return 1 fi - echo "PASS: ipv6 addrlabel" + end_test "PASS: ipv6 addrlabel" } kci_test_ifalias() @@ -332,35 +361,28 @@ kci_test_ifalias() local ret=0 namewant=$(uuidgen) syspathname="/sys/class/net/$devdummy/ifalias" - - ip link set dev "$devdummy" alias "$namewant" - check_err $? + run_cmd ip link set dev "$devdummy" alias "$namewant" if [ $ret -ne 0 ]; then - echo "FAIL: cannot set interface alias of $devdummy to $namewant" + end_test "FAIL: cannot set interface alias of $devdummy to $namewant" return 1 fi - - ip link show "$devdummy" | grep -q "alias $namewant" - check_err $? + run_cmd_grep "alias $namewant" ip link show "$devdummy" if [ -r "$syspathname" ] ; then read namehave < "$syspathname" if [ "$namewant" != "$namehave" ]; then - echo "FAIL: did set ifalias $namewant but got $namehave" + end_test "FAIL: did set ifalias $namewant but got $namehave" return 1 fi namewant=$(uuidgen) echo "$namewant" > "$syspathname" - ip link show "$devdummy" | grep -q "alias $namewant" - check_err $? + run_cmd_grep "alias $namewant" ip link show "$devdummy" # sysfs interface allows to delete alias again echo "" > "$syspathname" - - ip link show "$devdummy" | grep -q "alias $namewant" - check_fail $? + run_cmd_grep_fail "alias $namewant" ip link show "$devdummy" for i in $(seq 1 100); do uuidgen > "$syspathname" & @@ -369,57 +391,48 @@ kci_test_ifalias() wait # re-add the alias -- kernel should free mem when dummy dev is removed - ip link set dev "$devdummy" alias "$namewant" - check_err $? + run_cmd ip link set dev "$devdummy" alias "$namewant" + fi if [ $ret -ne 0 ]; then - echo "FAIL: set interface alias $devdummy to $namewant" + end_test "FAIL: set interface alias $devdummy to $namewant" return 1 fi - echo "PASS: set ifalias $namewant for $devdummy" + end_test "PASS: set ifalias $namewant for $devdummy" } kci_test_vrf() { vrfname="test-vrf" local ret=0 - - ip link show type vrf 2>/dev/null + run_cmd ip link show type vrf if [ $? -ne 0 ]; then - echo "SKIP: vrf: iproute2 too old" + end_test "SKIP: vrf: iproute2 too old" return $ksft_skip fi - - ip link add "$vrfname" type vrf table 10 - check_err $? + run_cmd ip link add "$vrfname" type vrf table 10 if [ $ret -ne 0 ];then - echo "FAIL: can't add vrf interface, skipping test" + end_test "FAIL: can't add vrf interface, skipping test" return 0 fi - - ip -br link show type vrf | grep -q "$vrfname" - check_err $? + run_cmd_grep "$vrfname" ip -br link show type vrf if [ $ret -ne 0 ];then - echo "FAIL: created vrf device not found" + end_test "FAIL: created vrf device not found" return 1 fi - ip link set dev "$vrfname" up - check_err $? - - ip link set dev "$devdummy" master "$vrfname" - check_err $? - ip link del dev "$vrfname" - check_err $? + run_cmd ip link set dev "$vrfname" up + run_cmd ip link set dev "$devdummy" master "$vrfname" + run_cmd ip link del dev "$vrfname" if [ $ret -ne 0 ];then - echo "FAIL: vrf" + end_test "FAIL: vrf" return 1 fi - echo "PASS: vrf" + end_test "PASS: vrf" } kci_test_encap_vxlan() @@ -427,151 +440,92 @@ kci_test_encap_vxlan() local ret=0 vxlan="test-vxlan0" vlan="test-vlan0" - testns="$1" - - ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \ - dev "$devdummy" dstport 4789 2>/dev/null + run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \ + dev "$devdummy" dstport 4789 if [ $? -ne 0 ]; then - echo "FAIL: can't add vxlan interface, skipping test" + end_test "FAIL: can't add vxlan interface, skipping test" return 0 fi - check_err $? - - ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan" - check_err $? - ip -netns "$testns" link set up dev "$vxlan" - check_err $? - - ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1 - check_err $? + run_cmd ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan" + run_cmd ip -netns "$testns" link set up dev "$vxlan" + run_cmd ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1 # changelink testcases - ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy" 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64 - check_err $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning - check_err $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan proxy 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan norsc 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan external 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan gbp 2>/dev/null - check_fail $? - - ip -netns "$testns" link set dev "$vxlan" type vxlan gpe 2>/dev/null - check_fail $? - - ip -netns "$testns" link del "$vxlan" - check_err $? + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43 + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy" + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit + + run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64 + run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning + + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan proxy + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan norsc + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan external + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gbp + run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gpe + run_cmd ip -netns "$testns" link del "$vxlan" if [ $ret -ne 0 ]; then - echo "FAIL: vxlan" + end_test "FAIL: vxlan" return 1 fi - echo "PASS: vxlan" + end_test "PASS: vxlan" } kci_test_encap_fou() { local ret=0 name="test-fou" - testns="$1" - - ip fou help 2>&1 |grep -q 'Usage: ip fou' + run_cmd_grep 'Usage: ip fou' ip fou help if [ $? -ne 0 ];then - echo "SKIP: fou: iproute2 too old" + end_test "SKIP: fou: iproute2 too old" return $ksft_skip fi if ! /sbin/modprobe -q -n fou; then - echo "SKIP: module fou is not found" + end_test "SKIP: module fou is not found" return $ksft_skip fi /sbin/modprobe -q fou - ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null + + run_cmd ip -netns "$testns" fou add port 7777 ipproto 47 if [ $? -ne 0 ];then - echo "FAIL: can't add fou port 7777, skipping test" + end_test "FAIL: can't add fou port 7777, skipping test" return 1 fi - - ip -netns "$testns" fou add port 8888 ipproto 4 - check_err $? - - ip -netns "$testns" fou del port 9999 2>/dev/null - check_fail $? - - ip -netns "$testns" fou del port 7777 - check_err $? - + run_cmd ip -netns "$testns" fou add port 8888 ipproto 4 + run_cmd_fail ip -netns "$testns" fou del port 9999 + run_cmd ip -netns "$testns" fou del port 7777 if [ $ret -ne 0 ]; then - echo "FAIL: fou" + end_test "FAIL: fou"s return 1 fi - echo "PASS: fou" + end_test "PASS: fou" } # test various encap methods, use netns to avoid unwanted interference kci_test_encap() { - testns="testns" local ret=0 - - ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then - echo "SKIP encap tests: cannot add net namespace $testns" + end_test "SKIP encap tests: cannot add net namespace $testns" return $ksft_skip fi - - ip -netns "$testns" link set lo up - check_err $? - - ip -netns "$testns" link add name "$devdummy" type dummy - check_err $? - ip -netns "$testns" link set "$devdummy" up - check_err $? - - kci_test_encap_vxlan "$testns" - check_err $? - kci_test_encap_fou "$testns" - check_err $? + run_cmd ip -netns "$testns" link set lo up + run_cmd ip -netns "$testns" link add name "$devdummy" type dummy + run_cmd ip -netns "$testns" link set "$devdummy" up + run_cmd kci_test_encap_vxlan + run_cmd kci_test_encap_fou ip netns del "$testns" return $ret @@ -581,41 +535,95 @@ kci_test_macsec() { msname="test_macsec0" local ret=0 - - ip macsec help 2>&1 | grep -q "^Usage: ip macsec" + run_cmd_grep "^Usage: ip macsec" ip macsec help if [ $? -ne 0 ]; then - echo "SKIP: macsec: iproute2 too old" + end_test "SKIP: macsec: iproute2 too old" return $ksft_skip fi + run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on + if [ $ret -ne 0 ];then + end_test "FAIL: can't add macsec interface, skipping test" + return 1 + fi + run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 + run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" + run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef + run_cmd ip macsec show + run_cmd ip link del dev "$msname" - ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on - check_err $? if [ $ret -ne 0 ];then - echo "FAIL: can't add macsec interface, skipping test" + end_test "FAIL: macsec" return 1 fi - ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 - check_err $? + end_test "PASS: macsec" +} - ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" - check_err $? +kci_test_macsec_offload() +{ + sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ + sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ + probed=false + local ret=0 + run_cmd_grep "^Usage: ip macsec" ip macsec help + if [ $? -ne 0 ]; then + end_test "SKIP: macsec: iproute2 too old" + return $ksft_skip + fi - ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef - check_err $? + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + fi - ip macsec show > /dev/null - check_err $? + # setup netdevsim since dummydev doesn't have offload support + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + run_cmd modprobe -q netdevsim - ip link del dev "$msname" - check_err $? + if [ $ret -ne 0 ]; then + end_test "SKIP: macsec_offload can't load netdevsim" + return $ksft_skip + fi + probed=true + fi - if [ $ret -ne 0 ];then - echo "FAIL: macsec" + echo "0" > /sys/bus/netdevsim/new_device + while [ ! -d $sysfsnet ] ; do :; done + udevadm settle + dev=`ls $sysfsnet` + + ip link set $dev up + if [ ! -d $sysfsd ] ; then + end_test "FAIL: macsec_offload can't create device $dev" + return 1 + fi + run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev + if [ $? -eq 1 ] ; then + end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload" return 1 fi + run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac + run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac + run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac + run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac + + msname=kci_macsec1 + run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 + run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" + run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ + key 00 0123456789abcdef0123456789abcdef + run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" + # clean up any leftovers + for msdev in kci_macsec{1,2,3,4} ; do + ip link del $msdev 2> /dev/null + done + echo 0 > /sys/bus/netdevsim/del_device + $probed && rmmod netdevsim - echo "PASS: macsec" + if [ $ret -ne 0 ]; then + end_test "FAIL: macsec_offload" + return 1 + fi + end_test "PASS: macsec_offload" } #------------------------------------------------------------------- @@ -647,8 +655,7 @@ kci_test_ipsec() ip addr add $srcip dev $devdummy # flush to be sure there's nothing configured - ip x s flush ; ip x p flush - check_err $? + run_cmd ip x s flush ; ip x p flush # start the monitor in the background tmpfile=`mktemp /var/run/ipsectestXXX` @@ -656,72 +663,57 @@ kci_test_ipsec() sleep 0.2 ipsecid="proto esp src $srcip dst $dstip spi 0x07" - ip x s add $ipsecid \ + run_cmd ip x s add $ipsecid \ mode transport reqid 0x07 replay-window 32 \ $algo sel src $srcip/24 dst $dstip/24 - check_err $? - lines=`ip x s list | grep $srcip | grep $dstip | wc -l` - test $lines -eq 2 - check_err $? - ip x s count | grep -q "SAD count 1" - check_err $? + lines=`ip x s list | grep $srcip | grep $dstip | wc -l` + run_cmd test $lines -eq 2 + run_cmd_grep "SAD count 1" ip x s count lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l` - test $lines -eq 2 - check_err $? - - ip x s delete $ipsecid - check_err $? + run_cmd test $lines -eq 2 + run_cmd ip x s delete $ipsecid lines=`ip x s list | wc -l` - test $lines -eq 0 - check_err $? + run_cmd test $lines -eq 0 ipsecsel="dir out src $srcip/24 dst $dstip/24" - ip x p add $ipsecsel \ + run_cmd ip x p add $ipsecsel \ tmpl proto esp src $srcip dst $dstip \ spi 0x07 mode transport reqid 0x07 - check_err $? + lines=`ip x p list | grep $srcip | grep $dstip | wc -l` - test $lines -eq 2 - check_err $? + run_cmd test $lines -eq 2 - ip x p count | grep -q "SPD IN 0 OUT 1 FWD 0" - check_err $? + run_cmd_grep "SPD IN 0 OUT 1 FWD 0" ip x p count lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l` - test $lines -eq 2 - check_err $? + run_cmd test $lines -eq 2 - ip x p delete $ipsecsel - check_err $? + run_cmd ip x p delete $ipsecsel lines=`ip x p list | wc -l` - test $lines -eq 0 - check_err $? + run_cmd test $lines -eq 0 # check the monitor results kill $mpid lines=`wc -l $tmpfile | cut "-d " -f1` - test $lines -eq 20 - check_err $? + run_cmd test $lines -eq 20 rm -rf $tmpfile # clean up any leftovers - ip x s flush - check_err $? - ip x p flush - check_err $? + run_cmd ip x s flush + run_cmd ip x p flush ip addr del $srcip/32 dev $devdummy if [ $ret -ne 0 ]; then - echo "FAIL: ipsec" + end_test "FAIL: ipsec" return 1 fi - echo "PASS: ipsec" + end_test "PASS: ipsec" } #------------------------------------------------------------------- @@ -747,12 +739,15 @@ kci_test_ipsec_offload() sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + fi + # setup netdevsim since dummydev doesn't have offload support if [ ! -w /sys/bus/netdevsim/new_device ] ; then - modprobe -q netdevsim - check_err $? + run_cmd modprobe -q netdevsim if [ $ret -ne 0 ]; then - echo "SKIP: ipsec_offload can't load netdevsim" + end_test "SKIP: ipsec_offload can't load netdevsim" return $ksft_skip fi probed=true @@ -766,11 +761,11 @@ kci_test_ipsec_offload() ip addr add $srcip dev $dev ip link set $dev up if [ ! -d $sysfsd ] ; then - echo "FAIL: ipsec_offload can't create device $dev" + end_test "FAIL: ipsec_offload can't create device $dev" return 1 fi if [ ! -f $sysfsf ] ; then - echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload" + end_test "FAIL: ipsec_offload netdevsim doesn't support IPsec offload" return 1 fi @@ -778,40 +773,41 @@ kci_test_ipsec_offload() ip x s flush ; ip x p flush # create offloaded SAs, both in and out - ip x p add dir out src $srcip/24 dst $dstip/24 \ + run_cmd ip x p add dir out src $srcip/24 dst $dstip/24 \ tmpl proto esp src $srcip dst $dstip spi 9 \ mode transport reqid 42 - check_err $? - ip x p add dir out src $dstip/24 dst $srcip/24 \ + + run_cmd ip x p add dir in src $dstip/24 dst $srcip/24 \ tmpl proto esp src $dstip dst $srcip spi 9 \ mode transport reqid 42 - check_err $? - ip x s add proto esp src $srcip dst $dstip spi 9 \ + run_cmd ip x s add proto esp src $srcip dst $dstip spi 9 \ mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \ offload dev $dev dir out - check_err $? - ip x s add proto esp src $dstip dst $srcip spi 9 \ + + run_cmd ip x s add proto esp src $dstip dst $srcip spi 9 \ mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \ offload dev $dev dir in - check_err $? + if [ $ret -ne 0 ]; then - echo "FAIL: ipsec_offload can't create SA" + end_test "FAIL: ipsec_offload can't create SA" return 1 fi # does offload show up in ip output lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"` if [ $lines -ne 2 ] ; then - echo "FAIL: ipsec_offload SA offload missing from list output" check_err 1 + end_test "FAIL: ipsec_offload SA offload missing from list output" fi + # we didn't create a peer, make sure we can Tx + ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55 # use ping to exercise the Tx path ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null # does driver have correct offload info - diff $sysfsf - << EOF + run_cmd diff $sysfsf - << EOF SA count=2 tx=3 sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000 sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 @@ -821,7 +817,7 @@ sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[1] key=0x34333231 38373635 32313039 36353433 EOF if [ $? -ne 0 ] ; then - echo "FAIL: ipsec_offload incorrect driver data" + end_test "FAIL: ipsec_offload incorrect driver data" check_err 1 fi @@ -830,255 +826,205 @@ EOF ip x p flush lines=`grep -c "SA count=0" $sysfsf` if [ $lines -ne 1 ] ; then - echo "FAIL: ipsec_offload SA not removed from driver" check_err 1 + end_test "FAIL: ipsec_offload SA not removed from driver" fi # clean up any leftovers + echo 0 > /sys/bus/netdevsim/del_device $probed && rmmod netdevsim if [ $ret -ne 0 ]; then - echo "FAIL: ipsec_offload" + end_test "FAIL: ipsec_offload" return 1 fi - echo "PASS: ipsec_offload" + end_test "PASS: ipsec_offload" } kci_test_gretap() { - testns="testns" DEV_NS=gretap00 local ret=0 - ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then - echo "SKIP gretap tests: cannot add net namespace $testns" + end_test "SKIP gretap tests: cannot add net namespace $testns" return $ksft_skip fi - ip link help gretap 2>&1 | grep -q "^Usage:" + run_cmd_grep "^Usage:" ip link help gretap if [ $? -ne 0 ];then - echo "SKIP: gretap: iproute2 too old" + end_test "SKIP: gretap: iproute2 too old" ip netns del "$testns" return $ksft_skip fi # test native tunnel - ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \ key 102 local 172.16.1.100 remote 172.16.1.200 - check_err $? - - ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode - ip -netns "$testns" link add dev "$DEV_NS" type gretap external - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap external + run_cmd ip -netns "$testns" link del "$DEV_NS" if [ $ret -ne 0 ]; then - echo "FAIL: gretap" + end_test "FAIL: gretap" ip netns del "$testns" return 1 fi - echo "PASS: gretap" + end_test "PASS: gretap" ip netns del "$testns" } kci_test_ip6gretap() { - testns="testns" DEV_NS=ip6gretap00 local ret=0 - ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then - echo "SKIP ip6gretap tests: cannot add net namespace $testns" + end_test "SKIP ip6gretap tests: cannot add net namespace $testns" return $ksft_skip fi - ip link help ip6gretap 2>&1 | grep -q "^Usage:" + run_cmd_grep "^Usage:" ip link help ip6gretap if [ $? -ne 0 ];then - echo "SKIP: ip6gretap: iproute2 too old" + end_test "SKIP: ip6gretap: iproute2 too old" ip netns del "$testns" return $ksft_skip fi # test native tunnel - ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \ key 102 local fc00:100::1 remote fc00:100::2 - check_err $? - - ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode - ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external + run_cmd ip -netns "$testns" link del "$DEV_NS" if [ $ret -ne 0 ]; then - echo "FAIL: ip6gretap" + end_test "FAIL: ip6gretap" ip netns del "$testns" return 1 fi - echo "PASS: ip6gretap" + end_test "PASS: ip6gretap" ip netns del "$testns" } kci_test_erspan() { - testns="testns" DEV_NS=erspan00 local ret=0 - - ip link help erspan 2>&1 | grep -q "^Usage:" + run_cmd_grep "^Usage:" ip link help erspan if [ $? -ne 0 ];then - echo "SKIP: erspan: iproute2 too old" + end_test "SKIP: erspan: iproute2 too old" return $ksft_skip fi - - ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then - echo "SKIP erspan tests: cannot add net namespace $testns" + end_test "SKIP erspan tests: cannot add net namespace $testns" return $ksft_skip fi # test native tunnel erspan v1 - ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \ key 102 local 172.16.1.100 remote 172.16.1.200 \ erspan_ver 1 erspan 488 - check_err $? - ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test native tunnel erspan v2 - ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \ key 102 local 172.16.1.100 remote 172.16.1.200 \ erspan_ver 2 erspan_dir ingress erspan_hwid 7 - check_err $? - ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - check_err $? - - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode - ip -netns "$testns" link add dev "$DEV_NS" type erspan external - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan external + run_cmd ip -netns "$testns" link del "$DEV_NS" if [ $ret -ne 0 ]; then - echo "FAIL: erspan" + end_test "FAIL: erspan" ip netns del "$testns" return 1 fi - echo "PASS: erspan" + end_test "PASS: erspan" ip netns del "$testns" } kci_test_ip6erspan() { - testns="testns" DEV_NS=ip6erspan00 local ret=0 - - ip link help ip6erspan 2>&1 | grep -q "^Usage:" + run_cmd_grep "^Usage:" ip link help ip6erspan if [ $? -ne 0 ];then - echo "SKIP: ip6erspan: iproute2 too old" + end_test "SKIP: ip6erspan: iproute2 too old" return $ksft_skip fi - - ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then - echo "SKIP ip6erspan tests: cannot add net namespace $testns" + end_test "SKIP ip6erspan tests: cannot add net namespace $testns" return $ksft_skip fi # test native tunnel ip6erspan v1 - ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \ key 102 local fc00:100::1 remote fc00:100::2 \ erspan_ver 1 erspan 488 - check_err $? - ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test native tunnel ip6erspan v2 - ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \ key 102 local fc00:100::1 remote fc00:100::2 \ erspan_ver 2 erspan_dir ingress erspan_hwid 7 - check_err $? - - ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - check_err $? - ip -netns "$testns" link set dev $DEV_NS up - check_err $? - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 + run_cmd ip -netns "$testns" link set dev $DEV_NS up + run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode - ip -netns "$testns" link add dev "$DEV_NS" \ + run_cmd ip -netns "$testns" link add dev "$DEV_NS" \ type ip6erspan external - check_err $? - ip -netns "$testns" link del "$DEV_NS" - check_err $? + run_cmd ip -netns "$testns" link del "$DEV_NS" if [ $ret -ne 0 ]; then - echo "FAIL: ip6erspan" + end_test "FAIL: ip6erspan" ip netns del "$testns" return 1 fi - echo "PASS: ip6erspan" + end_test "PASS: ip6erspan" ip netns del "$testns" } kci_test_fdb_get() { - IP="ip -netns testns" - BRIDGE="bridge -netns testns" brdev="test-br0" vxlandev="vxlan10" test_mac=de:ad:be:ef:13:37 @@ -1086,45 +1032,37 @@ kci_test_fdb_get() dstip="10.0.2.3" local ret=0 - bridge fdb help 2>&1 |grep -q 'bridge fdb get' + run_cmd_grep 'bridge fdb get' bridge fdb help if [ $? -ne 0 ];then - echo "SKIP: fdb get tests: iproute2 too old" + end_test "SKIP: fdb get tests: iproute2 too old" return $ksft_skip fi - ip netns add testns + setup_ns testns if [ $? -ne 0 ]; then - echo "SKIP fdb get tests: cannot add net namespace $testns" + end_test "SKIP fdb get tests: cannot add net namespace $testns" return $ksft_skip fi - - $IP link add "$vxlandev" type vxlan id 10 local $localip \ - dstport 4789 2>/dev/null - check_err $? - $IP link add name "$brdev" type bridge &>/dev/null - check_err $? - $IP link set dev "$vxlandev" master "$brdev" &>/dev/null - check_err $? - $BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null - check_err $? - $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null - check_err $? - - $BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" - check_err $? - $BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" - check_err $? - $BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip" - check_err $? - - ip netns del testns &>/dev/null + IP="ip -netns $testns" + BRIDGE="bridge -netns $testns" + run_cmd $IP link add "$vxlandev" type vxlan id 10 local $localip \ + dstport 4789 + run_cmd $IP link add name "$brdev" type bridge + run_cmd $IP link set dev "$vxlandev" master "$brdev" + run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" master + run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self + run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac brport "$vxlandev" + run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac br "$brdev" + run_cmd_grep "dev $vxlandev dst $dstip" $BRIDGE fdb get $test_mac dev "$vxlandev" self + + ip netns del $testns &>/dev/null if [ $ret -ne 0 ]; then - echo "FAIL: bridge fdb get" + end_test "FAIL: bridge fdb get" return 1 fi - echo "PASS: bridge fdb get" + end_test "PASS: bridge fdb get" } kci_test_neigh_get() @@ -1134,50 +1072,38 @@ kci_test_neigh_get() dstip6=dead::2 local ret=0 - ip neigh help 2>&1 |grep -q 'ip neigh get' + run_cmd_grep 'ip neigh get' ip neigh help if [ $? -ne 0 ];then - echo "SKIP: fdb get tests: iproute2 too old" + end_test "SKIP: fdb get tests: iproute2 too old" return $ksft_skip fi # ipv4 - ip neigh add $dstip lladdr $dstmac dev "$devdummy" > /dev/null - check_err $? - ip neigh get $dstip dev "$devdummy" 2> /dev/null | grep -q "$dstmac" - check_err $? - ip neigh del $dstip lladdr $dstmac dev "$devdummy" > /dev/null - check_err $? + run_cmd ip neigh add $dstip lladdr $dstmac dev "$devdummy" + run_cmd_grep "$dstmac" ip neigh get $dstip dev "$devdummy" + run_cmd ip neigh del $dstip lladdr $dstmac dev "$devdummy" # ipv4 proxy - ip neigh add proxy $dstip dev "$devdummy" > /dev/null - check_err $? - ip neigh get proxy $dstip dev "$devdummy" 2>/dev/null | grep -q "$dstip" - check_err $? - ip neigh del proxy $dstip dev "$devdummy" > /dev/null - check_err $? + run_cmd ip neigh add proxy $dstip dev "$devdummy" + run_cmd_grep "$dstip" ip neigh get proxy $dstip dev "$devdummy" + run_cmd ip neigh del proxy $dstip dev "$devdummy" # ipv6 - ip neigh add $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null - check_err $? - ip neigh get $dstip6 dev "$devdummy" 2> /dev/null | grep -q "$dstmac" - check_err $? - ip neigh del $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null - check_err $? + run_cmd ip neigh add $dstip6 lladdr $dstmac dev "$devdummy" + run_cmd_grep "$dstmac" ip neigh get $dstip6 dev "$devdummy" + run_cmd ip neigh del $dstip6 lladdr $dstmac dev "$devdummy" # ipv6 proxy - ip neigh add proxy $dstip6 dev "$devdummy" > /dev/null - check_err $? - ip neigh get proxy $dstip6 dev "$devdummy" 2>/dev/null | grep -q "$dstip6" - check_err $? - ip neigh del proxy $dstip6 dev "$devdummy" > /dev/null - check_err $? + run_cmd ip neigh add proxy $dstip6 dev "$devdummy" + run_cmd_grep "$dstip6" ip neigh get proxy $dstip6 dev "$devdummy" + run_cmd ip neigh del proxy $dstip6 dev "$devdummy" if [ $ret -ne 0 ];then - echo "FAIL: neigh get" + end_test "FAIL: neigh get" return 1 fi - echo "PASS: neigh get" + end_test "PASS: neigh get" } kci_test_bridge_parent_id() @@ -1187,10 +1113,9 @@ kci_test_bridge_parent_id() probed=false if [ ! -w /sys/bus/netdevsim/new_device ] ; then - modprobe -q netdevsim - check_err $? + run_cmd modprobe -q netdevsim if [ $ret -ne 0 ]; then - echo "SKIP: bridge_parent_id can't load netdevsim" + end_test "SKIP: bridge_parent_id can't load netdevsim" return $ksft_skip fi probed=true @@ -1203,13 +1128,11 @@ kci_test_bridge_parent_id() udevadm settle dev10=`ls ${sysfsnet}10/net/` dev20=`ls ${sysfsnet}20/net/` - - ip link add name test-bond0 type bond mode 802.3ad - ip link set dev $dev10 master test-bond0 - ip link set dev $dev20 master test-bond0 - ip link add name test-br0 type bridge - ip link set dev test-bond0 master test-br0 - check_err $? + run_cmd ip link add name test-bond0 type bond mode 802.3ad + run_cmd ip link set dev $dev10 master test-bond0 + run_cmd ip link set dev $dev20 master test-bond0 + run_cmd ip link add name test-br0 type bridge + run_cmd ip link set dev test-bond0 master test-br0 # clean up any leftovers ip link del dev test-br0 @@ -1219,82 +1142,191 @@ kci_test_bridge_parent_id() $probed && rmmod netdevsim if [ $ret -ne 0 ]; then - echo "FAIL: bridge_parent_id" + end_test "FAIL: bridge_parent_id" return 1 fi - echo "PASS: bridge_parent_id" + end_test "PASS: bridge_parent_id" } -kci_test_rtnl() +address_get_proto() { + local addr=$1; shift + + ip -N -j address show dev "$devdummy" | + jq -e -r --arg addr "${addr%/*}" \ + '.[].addr_info[] | select(.local == $addr) | .protocol' +} + +address_count() +{ + ip -N -j address show dev "$devdummy" "$@" | + jq -e -r '[.[].addr_info[] | .local | select(. != null)] | length' +} + +do_test_address_proto() +{ + local what=$1; shift + local addr=$1; shift + local addr2=${addr%/*}2/${addr#*/} + local addr3=${addr%/*}3/${addr#*/} + local proto + local count local ret=0 - kci_add_dummy - if [ $ret -ne 0 ];then - echo "FAIL: cannot add dummy interface" - return 1 - fi + local err - kci_test_polrouting - check_err $? - kci_test_route_get + ip address add dev "$devdummy" "$addr3" check_err $? - kci_test_addrlft + proto=$(address_get_proto "$addr3") + [[ "$proto" == null ]] check_err $? - kci_test_promote_secondaries - check_err $? - kci_test_tc - check_err $? - kci_test_gre + + ip address add dev "$devdummy" "$addr2" proto 0x99 check_err $? - kci_test_gretap + proto=$(address_get_proto "$addr2") + [[ "$proto" == 0x99 ]] check_err $? - kci_test_ip6gretap + + ip address add dev "$devdummy" "$addr" proto 0xab check_err $? - kci_test_erspan + proto=$(address_get_proto "$addr") + [[ "$proto" == 0xab ]] check_err $? - kci_test_ip6erspan + + ip address replace dev "$devdummy" "$addr" proto 0x11 + proto=$(address_get_proto "$addr") check_err $? - kci_test_bridge + [[ "$proto" == 0x11 ]] check_err $? - kci_test_addrlabel + + count=$(address_count) check_err $? - kci_test_ifalias + (( count >= 3 )) # $addr, $addr2 and $addr3 plus any kernel addresses check_err $? - kci_test_vrf + + count=$(address_count proto 0) check_err $? - kci_test_encap + (( count == 1 )) # just $addr3 check_err $? - kci_test_macsec + + count=$(address_count proto 0x11) check_err $? - kci_test_ipsec + (( count == 2 )) # $addr and $addr3 check_err $? - kci_test_ipsec_offload + + count=$(address_count proto 0xab) check_err $? - kci_test_fdb_get + (( count == 1 )) # just $addr3 check_err $? - kci_test_neigh_get + + ip address del dev "$devdummy" "$addr" + ip address del dev "$devdummy" "$addr2" + ip address del dev "$devdummy" "$addr3" + + if [ $ret -ne 0 ]; then + end_test "FAIL: address proto $what" + return 1 + fi + end_test "PASS: address proto $what" +} + +kci_test_address_proto() +{ + local ret=0 + + do_test_address_proto IPv4 192.0.2.1/28 check_err $? - kci_test_bridge_parent_id + + do_test_address_proto IPv6 2001:db8:1::1/64 check_err $? + return $ret +} + +kci_test_enslave_bonding() +{ + local bond="bond123" + local ret=0 + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP bonding tests: cannot add net namespace $testns" + return $ksft_skip + fi + + run_cmd ip -netns $testns link add dev $bond type bond mode balance-rr + run_cmd ip -netns $testns link add dev $devdummy type dummy + run_cmd ip -netns $testns link set dev $devdummy up + run_cmd ip -netns $testns link set dev $devdummy master $bond down + if [ $ret -ne 0 ]; then + end_test "FAIL: initially up interface added to a bond and set down" + ip netns del "$testns" + return 1 + fi + + end_test "PASS: enslave interface in a bond" + ip netns del "$testns" +} + +kci_test_rtnl() +{ + local current_test + local ret=0 + + kci_add_dummy + if [ $ret -ne 0 ];then + end_test "FAIL: cannot add dummy interface" + return 1 + fi + + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + check_err $? + done + kci_del_dummy return $ret } +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $(echo $ALL_TESTS)) + -v Verbose mode (show commands and output) + -P Pause after every test + -p Pause after every failing test before cleanup (for debugging) +EOF +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then - echo "SKIP: Need root privileges" + end_test "SKIP: Need root privileges" exit $ksft_skip fi for x in ip tc;do $x -Version 2>/dev/null >/dev/null if [ $? -ne 0 ];then - echo "SKIP: Could not run test without the $x tool" + end_test "SKIP: Could not run test without the $x tool" exit $ksft_skip fi done +while getopts t:hvpP o; do + case $o in + t) TESTS=$OPTARG;; + v) VERBOSE=1;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +[ $PAUSE = "yes" ] && PAUSE_ON_FAIL="no" + kci_test_rtnl exit $? diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index 8b42e8b04e0f..a59cb6a3c4f5 100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests @@ -1,9 +1,12 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + if [ $(id -u) != 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi ret=0 diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index e4613ce4ed69..9eb42570294d 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -18,7 +18,7 @@ #include <linux/net_tstamp.h> #include <linux/errqueue.h> -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#include "../kselftest.h" struct options { int so_timestamp; diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c new file mode 100644 index 000000000000..f02f1f95d227 --- /dev/null +++ b/tools/testing/selftests/net/sctp_hello.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len) +{ + if (ss->ss_family == AF_INET) { + struct sockaddr_in *a = (struct sockaddr_in *)ss; + + a->sin_addr.s_addr = inet_addr(ip); + a->sin_port = htons(atoi(port)); + *len = sizeof(*a); + } else { + struct sockaddr_in6 *a = (struct sockaddr_in6 *)ss; + + a->sin6_family = AF_INET6; + inet_pton(AF_INET6, ip, &a->sin6_addr); + a->sin6_port = htons(atoi(port)); + *len = sizeof(*a); + } +} + +static int do_client(int argc, char *argv[]) +{ + struct sockaddr_storage ss; + char buf[] = "hello"; + int csk, ret, len; + + if (argc < 5) { + printf("%s client -4|6 IP PORT [IP PORT]\n", argv[0]); + return -1; + } + + bzero((void *)&ss, sizeof(ss)); + ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6; + csk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP); + if (csk < 0) { + printf("failed to create socket\n"); + return -1; + } + + if (argc >= 7) { + set_addr(&ss, argv[5], argv[6], &len); + ret = bind(csk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to bind to address\n"); + return -1; + } + } + + set_addr(&ss, argv[3], argv[4], &len); + ret = connect(csk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to connect to peer\n"); + return -1; + } + + ret = send(csk, buf, strlen(buf) + 1, 0); + if (ret < 0) { + printf("failed to send msg %d\n", ret); + return -1; + } + close(csk); + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_storage ss; + int lsk, csk, ret, len; + char buf[20]; + + if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s server|client ...\n", argv[0]); + return -1; + } + + if (!strcmp(argv[1], "client")) + return do_client(argc, argv); + + if (argc < 5) { + printf("%s server -4|6 IP PORT [IFACE]\n", argv[0]); + return -1; + } + + ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6; + lsk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP); + if (lsk < 0) { + printf("failed to create lsk\n"); + return -1; + } + + if (argc >= 6) { + ret = setsockopt(lsk, SOL_SOCKET, SO_BINDTODEVICE, + argv[5], strlen(argv[5]) + 1); + if (ret < 0) { + printf("failed to bind to device\n"); + return -1; + } + } + + set_addr(&ss, argv[3], argv[4], &len); + ret = bind(lsk, (struct sockaddr *)&ss, len); + if (ret < 0) { + printf("failed to bind to address\n"); + return -1; + } + + ret = listen(lsk, 5); + if (ret < 0) { + printf("failed to listen on port\n"); + return -1; + } + + csk = accept(lsk, (struct sockaddr *)NULL, (socklen_t *)NULL); + if (csk < 0) { + printf("failed to accept new client\n"); + return -1; + } + + ret = recv(csk, buf, sizeof(buf), 0); + if (ret <= 0) { + printf("failed to recv msg %d\n", ret); + return -1; + } + close(csk); + close(lsk); + + return 0; +} diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh new file mode 100755 index 000000000000..c854034b6aa1 --- /dev/null +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP VRF. +# TOPO: CLIENT_NS1 (veth1) <---> (veth1) -> vrf_s1 +# SERVER_NS +# CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2 + +source lib.sh +CLIENT_IP4="10.0.0.1" +CLIENT_IP6="2000::1" +CLIENT_PORT=1234 + +SERVER_IP4="10.0.0.2" +SERVER_IP6="2000::2" +SERVER_PORT=1234 + +setup() { + modprobe sctp + modprobe sctp_diag + setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS + + ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + + ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1 + ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2 + + ip -n $CLIENT_NS1 link set veth1 up + ip -n $CLIENT_NS1 addr add $CLIENT_IP4/24 dev veth1 + ip -n $CLIENT_NS1 addr add $CLIENT_IP6/24 dev veth1 + + ip -n $CLIENT_NS2 link set veth1 up + ip -n $CLIENT_NS2 addr add $CLIENT_IP4/24 dev veth1 + ip -n $CLIENT_NS2 addr add $CLIENT_IP6/24 dev veth1 + + ip -n $SERVER_NS link add dummy1 type dummy + ip -n $SERVER_NS link set dummy1 up + ip -n $SERVER_NS link add vrf-1 type vrf table 10 + ip -n $SERVER_NS link add vrf-2 type vrf table 20 + ip -n $SERVER_NS link set vrf-1 up + ip -n $SERVER_NS link set vrf-2 up + ip -n $SERVER_NS link set veth1 master vrf-1 + ip -n $SERVER_NS link set veth2 master vrf-2 + + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev dummy1 + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth1 + ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth2 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev dummy1 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth1 + ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth2 + + ip -n $SERVER_NS link set veth1 up + ip -n $SERVER_NS link set veth2 up + ip -n $SERVER_NS route add table 10 $CLIENT_IP4 dev veth1 src $SERVER_IP4 + ip -n $SERVER_NS route add table 20 $CLIENT_IP4 dev veth2 src $SERVER_IP4 + ip -n $SERVER_NS route add $CLIENT_IP4 dev veth1 src $SERVER_IP4 + ip -n $SERVER_NS route add table 10 $CLIENT_IP6 dev veth1 src $SERVER_IP6 + ip -n $SERVER_NS route add table 20 $CLIENT_IP6 dev veth2 src $SERVER_IP6 + ip -n $SERVER_NS route add $CLIENT_IP6 dev veth1 src $SERVER_IP6 +} + +cleanup() { + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS +} + +wait_server() { + local IFACE=$1 + local CNT=0 + + until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \ + grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do + [ $((CNT++)) = "20" ] && { RET=3; return $RET; } + sleep 0.1 + done +} + +do_test() { + local CLIENT_NS=$1 + local IFACE=$2 + + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE 2>&1 >/dev/null & + disown + wait_server $IFACE || return $RET + timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + RET=$? + return $RET +} + +do_testx() { + local IFACE1=$1 + local IFACE2=$2 + + ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE1 2>&1 >/dev/null & + disown + wait_server $IFACE1 || return $RET + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ + $SERVER_PORT $IFACE2 2>&1 >/dev/null & + disown + wait_server $IFACE2 || return $RET + timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \ + timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + RET=$? + return $RET +} + +testup() { + ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null + echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y " + do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 02: nobind, connect from client 2, l3mdev_accept=1, N " + do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null + echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N " + do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 04: nobind, connect from client 2, l3mdev_accept=0, N " + do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 05: bind veth2 in server, connect from client 1, N " + do_test $CLIENT_NS1 veth2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 06: bind veth1 in server, connect from client 1, Y " + do_test $CLIENT_NS1 veth1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 07: bind vrf-1 in server, connect from client 1, Y " + do_test $CLIENT_NS1 vrf-1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 08: bind vrf-2 in server, connect from client 1, N " + do_test $CLIENT_NS1 vrf-2 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 09: bind vrf-2 in server, connect from client 2, Y " + do_test $CLIENT_NS2 vrf-2 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 10: bind vrf-1 in server, connect from client 2, N " + do_test $CLIENT_NS2 vrf-1 && { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 11: bind vrf-1 & 2 in server, connect from client 1 & 2, Y " + do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" + + echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N " + do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; } + echo "[PASS]" +} + +trap cleanup EXIT +setup && echo "Testing For SCTP VRF:" && \ +CLIENT_IP=$CLIENT_IP4 SERVER_IP=$SERVER_IP4 AF="-4" testup && echo "***v4 Tests Done***" && +CLIENT_IP=$CLIENT_IP6 SERVER_IP=$SERVER_IP6 AF="-6" testup && echo "***v6 Tests Done***" +exit $? diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings new file mode 100644 index 000000000000..ed8418e8217a --- /dev/null +++ b/tools/testing/selftests/net/settings @@ -0,0 +1 @@ +timeout=3600 diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh new file mode 100644 index 000000000000..2070b57849de --- /dev/null +++ b/tools/testing/selftests/net/setup_loopback.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" +readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" +readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" +readonly HARD_IRQS="$(< ${IRQ_PATH})" +readonly server_ns=$(mktemp -u server-XXXXXXXX) +readonly client_ns=$(mktemp -u client-XXXXXXXX) + +netdev_check_for_carrier() { + local -r dev="$1" + + for i in {1..5}; do + carrier="$(cat /sys/class/net/${dev}/carrier)" + if [[ "${carrier}" -ne 1 ]] ; then + echo "carrier not ready yet..." >&2 + sleep 1 + else + echo "carrier ready" >&2 + break + fi + done + echo "${carrier}" +} + +# Assumes that there is no existing ipvlan device on the physical device +setup_loopback_environment() { + local dev="$1" + + # Fail hard if cannot turn on loopback mode for current NIC + ethtool -K "${dev}" loopback on || exit 1 + sleep 1 + + # Check for the carrier + carrier=$(netdev_check_for_carrier ${dev}) + if [[ "${carrier}" -ne 1 ]] ; then + echo "setup_loopback_environment failed" + exit 1 + fi +} + +setup_macvlan_ns(){ + local -r link_dev="$1" + local -r ns_name="$2" + local -r ns_dev="$3" + local -r ns_mac="$4" + local -r addr="$5" + + ip link add link "${link_dev}" dev "${ns_dev}" \ + address "${ns_mac}" type macvlan + exit_code=$? + if [[ "${exit_code}" -ne 0 ]]; then + echo "setup_macvlan_ns failed" + exit $exit_code + fi + + [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" + ip link set dev "${ns_dev}" netns "${ns_name}" + ip -netns "${ns_name}" link set dev "${ns_dev}" up + if [[ -n "${addr}" ]]; then + ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}" + fi + + sleep 1 +} + +cleanup_macvlan_ns(){ + while (( $# >= 2 )); do + ns_name="$1" + ns_dev="$2" + ip -netns "${ns_name}" link del dev "${ns_dev}" + ip netns del "${ns_name}" + shift 2 + done +} + +cleanup_loopback(){ + local -r dev="$1" + + ethtool -K "${dev}" loopback off + sleep 1 + + # Check for the carrier + carrier=$(netdev_check_for_carrier ${dev}) + if [[ "${carrier}" -ne 1 ]] ; then + echo "setup_loopback_environment failed" + exit 1 + fi +} + +setup_interrupt() { + # Use timer on host to trigger the network stack + # Also disable device interrupt to not depend on NIC interrupt + # Reduce test flakiness caused by unexpected interrupts + echo 100000 >"${FLUSH_PATH}" + echo 50 >"${IRQ_PATH}" +} + +setup_ns() { + # Set up server_ns namespace and client_ns namespace + setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}" + setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" +} + +cleanup_ns() { + cleanup_macvlan_ns ${server_ns} server ${client_ns} client +} + +setup() { + setup_loopback_environment "${dev}" + setup_interrupt +} + +cleanup() { + cleanup_loopback "${dev}" + + echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" + echo "${HARD_IRQS}" >"${IRQ_PATH}" +} diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh new file mode 100644 index 000000000000..1f78a87f6f37 --- /dev/null +++ b/tools/testing/selftests/net/setup_veth.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly server_ns=$(mktemp -u server-XXXXXXXX) +readonly client_ns=$(mktemp -u client-XXXXXXXX) + +setup_veth_ns() { + local -r link_dev="$1" + local -r ns_name="$2" + local -r ns_dev="$3" + local -r ns_mac="$4" + + [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" + echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 + ip -netns "${ns_name}" link set dev "${ns_dev}" up + + ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off +} + +setup_ns() { + # Set up server_ns namespace and client_ns namespace + ip link add name server type veth peer name client + + setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}" + setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" +} + +cleanup_ns() { + local ns_name + + for ns_name in ${client_ns} ${server_ns}; do + [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}" + done +} + +setup() { + # no global init setup step needed + : +} + +cleanup() { + cleanup_ns +} diff --git a/tools/testing/selftests/net/sk_bind_sendto_listen.c b/tools/testing/selftests/net/sk_bind_sendto_listen.c new file mode 100644 index 000000000000..b420d830f72c --- /dev/null +++ b/tools/testing/selftests/net/sk_bind_sendto_listen.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <unistd.h> + +int main(void) +{ + int fd1, fd2, one = 1; + struct sockaddr_in6 bind_addr = { + .sin6_family = AF_INET6, + .sin6_port = htons(20000), + .sin6_flowinfo = htonl(0), + .sin6_addr = {}, + .sin6_scope_id = 0, + }; + + inet_pton(AF_INET6, "::", &bind_addr.sin6_addr); + + fd1 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP); + if (fd1 < 0) { + error(1, errno, "socket fd1"); + return -1; + } + + if (setsockopt(fd1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) { + error(1, errno, "setsockopt(SO_REUSEADDR) fd1"); + goto out_err1; + } + + if (bind(fd1, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) { + error(1, errno, "bind fd1"); + goto out_err1; + } + + if (sendto(fd1, NULL, 0, MSG_FASTOPEN, (struct sockaddr *)&bind_addr, + sizeof(bind_addr))) { + error(1, errno, "sendto fd1"); + goto out_err1; + } + + fd2 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP); + if (fd2 < 0) { + error(1, errno, "socket fd2"); + goto out_err1; + } + + if (setsockopt(fd2, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) { + error(1, errno, "setsockopt(SO_REUSEADDR) fd2"); + goto out_err2; + } + + if (bind(fd2, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) { + error(1, errno, "bind fd2"); + goto out_err2; + } + + if (sendto(fd2, NULL, 0, MSG_FASTOPEN, (struct sockaddr *)&bind_addr, + sizeof(bind_addr)) != -1) { + error(1, errno, "sendto fd2"); + goto out_err2; + } + + if (listen(fd2, 0)) { + error(1, errno, "listen"); + goto out_err2; + } + + close(fd2); + close(fd1); + return 0; + +out_err2: + close(fd2); + +out_err1: + close(fd1); + return -1; +} diff --git a/tools/testing/selftests/net/sk_connect_zero_addr.c b/tools/testing/selftests/net/sk_connect_zero_addr.c new file mode 100644 index 000000000000..4be418aefd9f --- /dev/null +++ b/tools/testing/selftests/net/sk_connect_zero_addr.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <unistd.h> + +int main(void) +{ + int fd1, fd2, one = 1; + struct sockaddr_in6 bind_addr = { + .sin6_family = AF_INET6, + .sin6_port = htons(20000), + .sin6_flowinfo = htonl(0), + .sin6_addr = {}, + .sin6_scope_id = 0, + }; + + inet_pton(AF_INET6, "::", &bind_addr.sin6_addr); + + fd1 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP); + if (fd1 < 0) { + error(1, errno, "socket fd1"); + return -1; + } + + if (setsockopt(fd1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) { + error(1, errno, "setsockopt(SO_REUSEADDR) fd1"); + goto out_err1; + } + + if (bind(fd1, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) { + error(1, errno, "bind fd1"); + goto out_err1; + } + + if (listen(fd1, 0)) { + error(1, errno, "listen"); + goto out_err1; + } + + fd2 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP); + if (fd2 < 0) { + error(1, errno, "socket fd2"); + goto out_err1; + } + + if (connect(fd2, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) { + error(1, errno, "bind fd2"); + goto out_err2; + } + + close(fd2); + close(fd1); + return 0; + +out_err2: + close(fd2); +out_err1: + close(fd1); + return -1; +} diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c new file mode 100644 index 000000000000..e9fa14e10732 --- /dev/null +++ b/tools/testing/selftests/net/so_incoming_cpu.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#define _GNU_SOURCE +#include <sched.h> + +#include <fcntl.h> + +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/sysinfo.h> + +#include "../kselftest_harness.h" + +FIXTURE(so_incoming_cpu) +{ + int *servers; + union { + struct sockaddr addr; + struct sockaddr_in in_addr; + }; + socklen_t addrlen; +}; + +enum when_to_set { + BEFORE_REUSEPORT, + BEFORE_LISTEN, + AFTER_LISTEN, + AFTER_ALL_LISTEN, +}; + +FIXTURE_VARIANT(so_incoming_cpu) +{ + int when_to_set; +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, before_reuseport) +{ + .when_to_set = BEFORE_REUSEPORT, +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, before_listen) +{ + .when_to_set = BEFORE_LISTEN, +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, after_listen) +{ + .when_to_set = AFTER_LISTEN, +}; + +FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen) +{ + .when_to_set = AFTER_ALL_LISTEN, +}; + +static void write_sysctl(struct __test_metadata *_metadata, + char *filename, char *string) +{ + int fd, len, ret; + + fd = open(filename, O_WRONLY); + ASSERT_NE(fd, -1); + + len = strlen(string); + ret = write(fd, string, len); + ASSERT_EQ(ret, len); +} + +static void setup_netns(struct __test_metadata *_metadata) +{ + ASSERT_EQ(unshare(CLONE_NEWNET), 0); + ASSERT_EQ(system("ip link set lo up"), 0); + + write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001"); + write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0"); +} + +#define NR_PORT (60001 - 10000 - 1) +#define NR_CLIENT_PER_SERVER_DEFAULT 32 +static int nr_client_per_server, nr_server, nr_client; + +FIXTURE_SETUP(so_incoming_cpu) +{ + setup_netns(_metadata); + + nr_server = get_nprocs(); + ASSERT_LE(2, nr_server); + + if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT) + nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT; + else + nr_client_per_server = NR_PORT / nr_server; + + nr_client = nr_client_per_server * nr_server; + + self->servers = malloc(sizeof(int) * nr_server); + ASSERT_NE(self->servers, NULL); + + self->in_addr.sin_family = AF_INET; + self->in_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + self->in_addr.sin_port = htons(0); + self->addrlen = sizeof(struct sockaddr_in); +} + +FIXTURE_TEARDOWN(so_incoming_cpu) +{ + int i; + + for (i = 0; i < nr_server; i++) + close(self->servers[i]); + + free(self->servers); +} + +void set_so_incoming_cpu(struct __test_metadata *_metadata, int fd, int cpu) +{ + int ret; + + ret = setsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, sizeof(int)); + ASSERT_EQ(ret, 0); +} + +int create_server(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self, + const FIXTURE_VARIANT(so_incoming_cpu) *variant, + int cpu) +{ + int fd, ret; + + fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); + ASSERT_NE(fd, -1); + + if (variant->when_to_set == BEFORE_REUSEPORT) + set_so_incoming_cpu(_metadata, fd, cpu); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &(int){1}, sizeof(int)); + ASSERT_EQ(ret, 0); + + ret = bind(fd, &self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + if (variant->when_to_set == BEFORE_LISTEN) + set_so_incoming_cpu(_metadata, fd, cpu); + + /* We don't use nr_client_per_server here not to block + * this test at connect() if SO_INCOMING_CPU is broken. + */ + ret = listen(fd, nr_client); + ASSERT_EQ(ret, 0); + + if (variant->when_to_set == AFTER_LISTEN) + set_so_incoming_cpu(_metadata, fd, cpu); + + return fd; +} + +void create_servers(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self, + const FIXTURE_VARIANT(so_incoming_cpu) *variant) +{ + int i, ret; + + for (i = 0; i < nr_server; i++) { + self->servers[i] = create_server(_metadata, self, variant, i); + + if (i == 0) { + ret = getsockname(self->servers[i], &self->addr, &self->addrlen); + ASSERT_EQ(ret, 0); + } + } + + if (variant->when_to_set == AFTER_ALL_LISTEN) { + for (i = 0; i < nr_server; i++) + set_so_incoming_cpu(_metadata, self->servers[i], i); + } +} + +void create_clients(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self) +{ + cpu_set_t cpu_set; + int i, j, fd, ret; + + for (i = 0; i < nr_server; i++) { + CPU_ZERO(&cpu_set); + + CPU_SET(i, &cpu_set); + ASSERT_EQ(CPU_COUNT(&cpu_set), 1); + ASSERT_NE(CPU_ISSET(i, &cpu_set), 0); + + /* Make sure SYN will be processed on the i-th CPU + * and finally distributed to the i-th listener. + */ + ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set); + ASSERT_EQ(ret, 0); + + for (j = 0; j < nr_client_per_server; j++) { + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_NE(fd, -1); + + ret = connect(fd, &self->addr, self->addrlen); + ASSERT_EQ(ret, 0); + + close(fd); + } + } +} + +void verify_incoming_cpu(struct __test_metadata *_metadata, + FIXTURE_DATA(so_incoming_cpu) *self) +{ + int i, j, fd, cpu, ret, total = 0; + socklen_t len = sizeof(int); + + for (i = 0; i < nr_server; i++) { + for (j = 0; j < nr_client_per_server; j++) { + /* If we see -EAGAIN here, SO_INCOMING_CPU is broken */ + fd = accept(self->servers[i], &self->addr, &self->addrlen); + ASSERT_NE(fd, -1); + + ret = getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len); + ASSERT_EQ(ret, 0); + ASSERT_EQ(cpu, i); + + close(fd); + total++; + } + } + + ASSERT_EQ(total, nr_client); + TH_LOG("SO_INCOMING_CPU is very likely to be " + "working correctly with %d sockets.", total); +} + +TEST_F(so_incoming_cpu, test1) +{ + create_servers(_metadata, self, variant); + create_clients(_metadata, self); + verify_incoming_cpu(_metadata, self); +} + +TEST_F(so_incoming_cpu, test2) +{ + int server; + + create_servers(_metadata, self, variant); + + /* No CPU specified */ + server = create_server(_metadata, self, variant, -1); + close(server); + + create_clients(_metadata, self); + verify_incoming_cpu(_metadata, self); +} + +TEST_F(so_incoming_cpu, test3) +{ + int server, client; + + create_servers(_metadata, self, variant); + + /* No CPU specified */ + server = create_server(_metadata, self, variant, -1); + + create_clients(_metadata, self); + + /* Never receive any requests */ + client = accept(server, &self->addr, &self->addrlen); + ASSERT_EQ(client, -1); + + verify_incoming_cpu(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/so_netns_cookie.c b/tools/testing/selftests/net/so_netns_cookie.c new file mode 100644 index 000000000000..b39e87e967cd --- /dev/null +++ b/tools/testing/selftests/net/so_netns_cookie.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sched.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/socket.h> + +#ifndef SO_NETNS_COOKIE +#define SO_NETNS_COOKIE 71 +#endif + +#define pr_err(fmt, ...) \ + ({ \ + fprintf(stderr, "%s:%d:" fmt ": %m\n", \ + __func__, __LINE__, ##__VA_ARGS__); \ + 1; \ + }) + +int main(int argc, char *argvp[]) +{ + uint64_t cookie1, cookie2; + socklen_t vallen; + int sock1, sock2; + + sock1 = socket(AF_INET, SOCK_STREAM, 0); + if (sock1 < 0) + return pr_err("Unable to create TCP socket"); + + vallen = sizeof(cookie1); + if (getsockopt(sock1, SOL_SOCKET, SO_NETNS_COOKIE, &cookie1, &vallen) != 0) + return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)"); + + if (!cookie1) + return pr_err("SO_NETNS_COOKIE returned zero cookie"); + + if (unshare(CLONE_NEWNET)) + return pr_err("unshare"); + + sock2 = socket(AF_INET, SOCK_STREAM, 0); + if (sock2 < 0) + return pr_err("Unable to create TCP socket"); + + vallen = sizeof(cookie2); + if (getsockopt(sock2, SOL_SOCKET, SO_NETNS_COOKIE, &cookie2, &vallen) != 0) + return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)"); + + if (!cookie2) + return pr_err("SO_NETNS_COOKIE returned zero cookie"); + + if (cookie1 == cookie2) + return pr_err("SO_NETNS_COOKIE returned identical cookies for distinct ns"); + + close(sock1); + close(sock2); + return 0; +} diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 3155fbbf644b..8457b7ccbc09 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -2,9 +2,12 @@ /* * Test the SO_TXTIME API * - * Takes two streams of { payload, delivery time }[], one input and one output. - * Sends the input stream and verifies arrival matches the output stream. - * The two streams can differ due to out-of-order delivery and drops. + * Takes a stream of { payload, delivery time }[], to be sent across two + * processes. Start this program on two separate network namespaces or + * connected hosts, one instance in transmit mode and the other in receive + * mode using the '-r' option. Receiver will compare arrival timestamps to + * the expected stream. Sender will read transmit timestamps from the error + * queue. The streams can differ due to out-of-order delivery and drops. */ #define _GNU_SOURCE @@ -28,14 +31,17 @@ #include <sys/types.h> #include <time.h> #include <unistd.h> +#include <poll.h> static int cfg_clockid = CLOCK_TAI; -static bool cfg_do_ipv4; -static bool cfg_do_ipv6; static uint16_t cfg_port = 8000; static int cfg_variance_us = 4000; +static uint64_t cfg_start_time_ns; +static int cfg_mark; +static bool cfg_rx; static uint64_t glob_tstart; +static uint64_t tdeliver_max; /* encode one timed transmission (of a 1B payload) */ struct timed_send { @@ -44,18 +50,21 @@ struct timed_send { }; #define MAX_NUM_PKT 8 -static struct timed_send cfg_in[MAX_NUM_PKT]; -static struct timed_send cfg_out[MAX_NUM_PKT]; +static struct timed_send cfg_buf[MAX_NUM_PKT]; static int cfg_num_pkt; static int cfg_errq_level; static int cfg_errq_type; -static uint64_t gettime_ns(void) +static struct sockaddr_storage cfg_dst_addr; +static struct sockaddr_storage cfg_src_addr; +static socklen_t cfg_alen; + +static uint64_t gettime_ns(clockid_t clock) { struct timespec ts; - if (clock_gettime(cfg_clockid, &ts)) + if (clock_gettime(clock, &ts)) error(1, errno, "gettime"); return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec; @@ -75,6 +84,8 @@ static void do_send_one(int fdt, struct timed_send *ts) msg.msg_iov = &iov; msg.msg_iovlen = 1; + msg.msg_name = (struct sockaddr *)&cfg_dst_addr; + msg.msg_namelen = cfg_alen; if (ts->delay_us >= 0) { memset(control, 0, sizeof(control)); @@ -82,6 +93,8 @@ static void do_send_one(int fdt, struct timed_send *ts) msg.msg_controllen = sizeof(control); tdeliver = glob_tstart + ts->delay_us * 1000; + tdeliver_max = tdeliver_max > tdeliver ? + tdeliver_max : tdeliver; cm = CMSG_FIRSTHDR(&msg); cm->cmsg_level = SOL_SOCKET; @@ -98,7 +111,7 @@ static void do_send_one(int fdt, struct timed_send *ts) } -static bool do_recv_one(int fdr, struct timed_send *ts) +static void do_recv_one(int fdr, struct timed_send *ts) { int64_t tstop, texpect; char rbuf[2]; @@ -106,13 +119,13 @@ static bool do_recv_one(int fdr, struct timed_send *ts) ret = recv(fdr, rbuf, sizeof(rbuf), 0); if (ret == -1 && errno == EAGAIN) - return true; + error(1, EAGAIN, "recv: timeout"); if (ret == -1) error(1, errno, "read"); if (ret != 1) error(1, 0, "read: %dB", ret); - tstop = (gettime_ns() - glob_tstart) / 1000; + tstop = (gettime_ns(cfg_clockid) - glob_tstart) / 1000; texpect = ts->delay_us >= 0 ? ts->delay_us : 0; fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n", @@ -121,10 +134,11 @@ static bool do_recv_one(int fdr, struct timed_send *ts) if (rbuf[0] != ts->data) error(1, 0, "payload mismatch. expected %c", ts->data); - if (llabs(tstop - texpect) > cfg_variance_us) - error(1, 0, "exceeds variance (%d us)", cfg_variance_us); - - return false; + if (llabs(tstop - texpect) > cfg_variance_us) { + fprintf(stderr, "exceeds variance (%d us)\n", cfg_variance_us); + if (!getenv("KSFT_MACHINE_SLOW")) + exit(1); + } } static void do_recv_verify_empty(int fdr) @@ -137,18 +151,18 @@ static void do_recv_verify_empty(int fdr) error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno); } -static void do_recv_errqueue_timeout(int fdt) +static int do_recv_errqueue_timeout(int fdt) { char control[CMSG_SPACE(sizeof(struct sock_extended_err)) + CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr) + 1]; struct sock_extended_err *err; + int ret, num_tstamp = 0; struct msghdr msg = {0}; struct iovec iov = {0}; struct cmsghdr *cm; int64_t tstamp = 0; - int ret; iov.iov_base = data; iov.iov_len = sizeof(data); @@ -196,7 +210,7 @@ static void do_recv_errqueue_timeout(int fdt) default: error(1, 0, "errqueue: errno %u code %u\n", err->ee_errno, err->ee_code); - }; + } tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info; tstamp -= (int64_t) glob_tstart; @@ -206,9 +220,47 @@ static void do_recv_errqueue_timeout(int fdt) msg.msg_flags = 0; msg.msg_controllen = sizeof(control); + num_tstamp++; } - error(1, 0, "recv: timeout"); + return num_tstamp; +} + +static void recv_errqueue_msgs(int fdt) +{ + struct pollfd pfd = { .fd = fdt, .events = POLLERR }; + const int timeout_ms = 10; + int ret, num_tstamp = 0; + + do { + ret = poll(&pfd, 1, timeout_ms); + if (ret == -1) + error(1, errno, "poll"); + + if (ret && (pfd.revents & POLLERR)) + num_tstamp += do_recv_errqueue_timeout(fdt); + + if (num_tstamp == cfg_num_pkt) + break; + + } while (gettime_ns(cfg_clockid) < tdeliver_max); +} + +static void start_time_wait(void) +{ + uint64_t now; + int err; + + if (!cfg_start_time_ns) + return; + + now = gettime_ns(CLOCK_REALTIME); + if (cfg_start_time_ns < now) + return; + + err = usleep((cfg_start_time_ns - now) / 1000); + if (err) + error(1, errno, "usleep"); } static void setsockopt_txtime(int fd) @@ -245,6 +297,10 @@ static int setup_tx(struct sockaddr *addr, socklen_t alen) setsockopt_txtime(fd); + if (cfg_mark && + setsockopt(fd, SOL_SOCKET, SO_MARK, &cfg_mark, sizeof(cfg_mark))) + error(1, errno, "setsockopt mark"); + return fd; } @@ -266,31 +322,70 @@ static int setup_rx(struct sockaddr *addr, socklen_t alen) return fd; } -static void do_test(struct sockaddr *addr, socklen_t alen) +static void do_test_tx(struct sockaddr *addr, socklen_t alen) { - int fdt, fdr, i; + int fdt, i; fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n", addr->sa_family == PF_INET ? '4' : '6', cfg_clockid == CLOCK_TAI ? "tai" : "monotonic"); fdt = setup_tx(addr, alen); - fdr = setup_rx(addr, alen); - glob_tstart = gettime_ns(); + start_time_wait(); + glob_tstart = gettime_ns(cfg_clockid); for (i = 0; i < cfg_num_pkt; i++) - do_send_one(fdt, &cfg_in[i]); + do_send_one(fdt, &cfg_buf[i]); + + recv_errqueue_msgs(fdt); + + if (close(fdt)) + error(1, errno, "close t"); +} + +static void do_test_rx(struct sockaddr *addr, socklen_t alen) +{ + int fdr, i; + + fdr = setup_rx(addr, alen); + + start_time_wait(); + glob_tstart = gettime_ns(cfg_clockid); + for (i = 0; i < cfg_num_pkt; i++) - if (do_recv_one(fdr, &cfg_out[i])) - do_recv_errqueue_timeout(fdt); + do_recv_one(fdr, &cfg_buf[i]); do_recv_verify_empty(fdr); if (close(fdr)) error(1, errno, "close r"); - if (close(fdt)) - error(1, errno, "close t"); +} + +static void setup_sockaddr(int domain, const char *str_addr, + struct sockaddr_storage *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (str_addr && + inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + } } static int parse_io(const char *optarg, struct timed_send *array) @@ -323,17 +418,46 @@ static int parse_io(const char *optarg, struct timed_send *array) return aoff / 2; } +static void usage(const char *progname) +{ + fprintf(stderr, "\nUsage: %s [options] <payload>\n" + "Options:\n" + " -4 only IPv4\n" + " -6 only IPv6\n" + " -c <clock> monotonic or tai (default)\n" + " -D <addr> destination IP address (server)\n" + " -S <addr> source IP address (client)\n" + " -r run rx mode\n" + " -t <nsec> start time (UTC nanoseconds)\n" + " -m <mark> socket mark\n" + "\n", + progname); + exit(1); +} + static void parse_opts(int argc, char **argv) { - int c, ilen, olen; + char *daddr = NULL, *saddr = NULL; + int domain = PF_UNSPEC; + int c; - while ((c = getopt(argc, argv, "46c:")) != -1) { + while ((c = getopt(argc, argv, "46c:S:D:rt:m:")) != -1) { switch (c) { case '4': - cfg_do_ipv4 = true; + if (domain != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + domain = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + cfg_errq_level = SOL_IP; + cfg_errq_type = IP_RECVERR; break; case '6': - cfg_do_ipv6 = true; + if (domain != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + domain = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + cfg_errq_level = SOL_IPV6; + cfg_errq_type = IPV6_RECVERR; break; case 'c': if (!strcmp(optarg, "tai")) @@ -344,50 +468,50 @@ static void parse_opts(int argc, char **argv) else error(1, 0, "unknown clock id %s", optarg); break; + case 'S': + saddr = optarg; + break; + case 'D': + daddr = optarg; + break; + case 'r': + cfg_rx = true; + break; + case 't': + cfg_start_time_ns = strtoll(optarg, NULL, 0); + break; + case 'm': + cfg_mark = strtol(optarg, NULL, 0); + break; default: - error(1, 0, "parse error at %d", optind); + usage(argv[0]); } } - if (argc - optind != 2) - error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]); + if (argc - optind != 1) + usage(argv[0]); - ilen = parse_io(argv[optind], cfg_in); - olen = parse_io(argv[optind + 1], cfg_out); - if (ilen != olen) - error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen); - cfg_num_pkt = ilen; + if (domain == PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + if (!daddr) + error(1, 0, "-D <server addr> required\n"); + if (!cfg_rx && !saddr) + error(1, 0, "-S <client addr> required\n"); + + setup_sockaddr(domain, daddr, &cfg_dst_addr); + setup_sockaddr(domain, saddr, &cfg_src_addr); + + cfg_num_pkt = parse_io(argv[optind], cfg_buf); } int main(int argc, char **argv) { parse_opts(argc, argv); - if (cfg_do_ipv6) { - struct sockaddr_in6 addr6 = {0}; - - addr6.sin6_family = AF_INET6; - addr6.sin6_port = htons(cfg_port); - addr6.sin6_addr = in6addr_loopback; - - cfg_errq_level = SOL_IPV6; - cfg_errq_type = IPV6_RECVERR; - - do_test((void *)&addr6, sizeof(addr6)); - } - - if (cfg_do_ipv4) { - struct sockaddr_in addr4 = {0}; - - addr4.sin_family = AF_INET; - addr4.sin_port = htons(cfg_port); - addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - - cfg_errq_level = SOL_IP; - cfg_errq_type = IP_RECVERR; - - do_test((void *)&addr4, sizeof(addr4)); - } + if (cfg_rx) + do_test_rx((void *)&cfg_dst_addr, cfg_alen); + else + do_test_tx((void *)&cfg_src_addr, cfg_alen); return 0; } diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh index 3f7800eaecb1..5e861ad32a42 100755 --- a/tools/testing/selftests/net/so_txtime.sh +++ b/tools/testing/selftests/net/so_txtime.sh @@ -3,34 +3,108 @@ # # Regression tests for the SO_TXTIME interface -# Run in network namespace -if [[ $# -eq 0 ]]; then - if ! ./in_netns.sh $0 __subprocess; then - # test is time sensitive, can be flaky - echo "test failed: retry once" - ./in_netns.sh $0 __subprocess +set -e + +readonly ksft_skip=4 +readonly DEV="veth0" +readonly BIN="./so_txtime" + +readonly RAND="$(mktemp -u XXXXXX)" +readonly NSPREFIX="ns-${RAND}" +readonly NS1="${NSPREFIX}1" +readonly NS2="${NSPREFIX}2" + +readonly SADDR4='192.168.1.1' +readonly DADDR4='192.168.1.2' +readonly SADDR6='fd::1' +readonly DADDR6='fd::2' + +cleanup() { + ip netns del "${NS2}" + ip netns del "${NS1}" +} + +trap cleanup EXIT + +# Create virtual ethernet pair between network namespaces +ip netns add "${NS1}" +ip netns add "${NS2}" + +ip link add "${DEV}" netns "${NS1}" type veth \ + peer name "${DEV}" netns "${NS2}" + +# Bring the devices up +ip -netns "${NS1}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DEV}" up + +# Set fixed MAC addresses on the devices +ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 +ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 + +# Add fixed IP addresses to the devices +ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" +ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" +ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad +ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad + +run_test() { + local readonly IP="$1" + local readonly CLOCK="$2" + local readonly TXARGS="$3" + local readonly RXARGS="$4" + + if [[ "${IP}" == "4" ]]; then + local readonly SADDR="${SADDR4}" + local readonly DADDR="${DADDR4}" + elif [[ "${IP}" == "6" ]]; then + local readonly SADDR="${SADDR6}" + local readonly DADDR="${DADDR6}" + else + echo "Invalid IP version ${IP}" + exit 1 fi - exit $? -fi + local readonly START="$(date +%s%N --date="+ 0.1 seconds")" -set -e + ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r & + ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}" + wait "$!" +} -tc qdisc add dev lo root fq -./so_txtime -4 -6 -c mono a,-1 a,-1 -./so_txtime -4 -6 -c mono a,0 a,0 -./so_txtime -4 -6 -c mono a,10 a,10 -./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20 -./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20 - -if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then - ! ./so_txtime -4 -6 -c tai a,-1 a,-1 - ! ./so_txtime -4 -6 -c tai a,0 a,0 - ./so_txtime -4 -6 -c tai a,10 a,10 - ./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20 - ./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20 +do_test() { + run_test $@ + [ $? -ne 0 ] && ret=1 +} + +do_fail_test() { + run_test $@ + [ $? -eq 0 ] && ret=1 +} + +ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq +set +e +ret=0 +do_test 4 mono a,-1 a,-1 +do_test 6 mono a,0 a,0 +do_test 6 mono a,10 a,10 +do_test 4 mono a,10,b,20 a,10,b,20 +do_test 6 mono a,20,b,10 b,20,a,20 + +if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then + do_fail_test 4 tai a,-1 a,-1 + do_fail_test 6 tai a,0 a,0 + do_test 6 tai a,10 a,10 + do_test 4 tai a,10,b,20 a,10,b,20 + do_test 6 tai a,20,b,10 b,10,a,20 else echo "tc ($(tc -V)) does not support qdisc etf. skipping" + [ $ret -eq 0 ] && ret=$ksft_skip fi -echo OK. All tests passed +if [ $ret -eq 0 ]; then + echo OK. All tests passed +elif [[ $ret -ne $ksft_skip && -n "$KSFT_MACHINE_SLOW" ]]; then + echo "Ignoring errors due to slow environment" 1>&2 + ret=0 +fi +exit $ret diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c index afca1ead677f..db1aeb8c5d1e 100644 --- a/tools/testing/selftests/net/socket.c +++ b/tools/testing/selftests/net/socket.c @@ -7,6 +7,8 @@ #include <sys/socket.h> #include <netinet/in.h> +#include "../kselftest.h" + struct socket_testcase { int domain; int type; @@ -31,7 +33,6 @@ static struct socket_testcase tests[] = { { AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 }, }; -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define ERR_STRING_SZ 64 static int run_tests(void) diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh new file mode 100755 index 000000000000..02d617040793 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -0,0 +1,573 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it> + +# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for +# implementing IPv4/IPv6 L3 VPN use cases. +# +# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and +# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and +# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a +# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic. +# The SRv6 End.DT46 Behavior implementation is meant to support the +# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel. +# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies +# the setup and operations of SRv6 VPNs. +# +# Hereafter a network diagram is shown, where two different tenants (named 100 +# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with +# each other across an IPv6 network. +# +# Only hosts belonging to the same tenant (and to the same VPN) can communicate +# with each other. Instead, the communication among hosts of different tenants +# is forbidden. +# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the +# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected +# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 +# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach +# hs-t200-3 and vice versa. +# +# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DT46 Behavior and c) VRF. +# +# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly +# consider an example where, within the same domain of tenant 100, the host +# hs-t100-1 pings the host hs-t100-2. +# +# First of all, L2 reachability of the host hs-t100-2 is taken into account by +# the router rt-1 which acts as a arp/ndp proxy. +# +# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2, +# the router rt-1 receives the packet on the internal veth-t100 interface. Such +# interface is enslaved to the VRF vrf-100 whose associated table contains the +# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the +# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6) +# core network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and +# performs the lookup on the vrf-100 table using the destination address of +# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the +# host hs-t100-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the roles of rt-1 +# and rt-2 are swapped. +# +# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6 +# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are +# able to connect with each other. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-t100-1 netns | | hs-t100-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::254/64 | | | | cafe::254/64 | | +# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | | +# | +-------+-------+ | localsid | | | | localsid | +-------+-------- | +# | | | table | | | | table | | | +# | +----+----+ +----------+ | | +----------+ +----+----+ | +# | | vrf-100 | | | | vrf-100 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | veth0 | | | | veth0 | | +# | | fd00::1/64 |.|...|.| fd00::2/64 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | vrf-200 | | | | vrf-200 | | +# | +----+----+ | | +----+----+ | +# | | | | | | +# | +-------+-------+ | | +-------+-------- | +# | | veth-t200 | | | | veth-t200 | | +# | | cafe::254/64 | | | | cafe::254/64 | | +# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | | +# | +---------------+ rt-1 netns | | rt-2 netns +---------------- | +# | . | | . | +# +-----------------------------------+ +-----------------------------------+ +# . . +# . . +# . . +# . . +# +-------------------+ +-------------------+ +# | . | | . | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::3/64 | | | | cafe::4/64 | | +# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | | +# | +-------------+ | | +-------------+ | +# | | | | +# | hs-t200-3 netns | | hs-t200-4 netns | +# | | | | +# +-------------------+ +-------------------+ +# +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table (table 90) +# +--------------------------------------------------+ +# |SID |Action | +# +--------------------------------------------------+ +# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100| +# +--------------------------------------------------+ +# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200| +# +--------------------------------------------------+ +# +# rt-1: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t100 | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t100 | +# +---------------------------------------------------+ +# +# rt-1: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::4 |apply seg6 encap segs fc00:12:200::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t200 | +# +---------------------------------------------------+ +# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t200 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table (table 90) +# +--------------------------------------------------+ +# |SID |Action | +# +--------------------------------------------------+ +# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100| +# +--------------------------------------------------+ +# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200| +# +--------------------------------------------------+ +# +# rt-2: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t100 | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t100 | +# +---------------------------------------------------+ +# +# rt-2: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::3 |apply seg6 encap segs fc00:21:200::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t200 | +# +---------------------------------------------------+ +# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t200 | +# +---------------------------------------------------+ +# + +source lib.sh + +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fd00 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fc00 +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + cleanup_all_ns +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local id=$1 + eval local nsname=\${rt_${id}} + + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_hs() +{ + local hid=$1 + local rid=$2 + local tid=$3 + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + # configure the VRF for the tenant X on the router which is directly + # connected to the source host. + ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid} + ip -netns ${rtname} link set vrf-${tid} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + # enslave the veth-tX interface to the vrf-X in the access router + ip -netns ${rtname} link set ${rtveth} master vrf-${tid} + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 + + ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} + local rtveth=veth-t${tid} + local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \ + via fd00::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \ + encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid} + + # all sids for VPNs start with a common locator which is fc00::/16. + # Routes for handling the SRv6 End.DT46 behavior instances are grouped + # together in the 'localsid' table. + # + # NOTE: added only once + if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \ + grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then + ip -netns ${rtdst_name} -6 rule add \ + to ${VPN_LOCATOR_SERVICE}::/16 \ + lookup ${LOCALSID_TABLE_ID} prio 999 + fi + + # set default routes to unreachable for both ipv4 and ipv6 + ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \ + vrf vrf-${tid} + + ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \ + vrf vrf-${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 + setup_hs 1 1 100 #args: host router tenant + setup_hs 2 2 100 + + # setup two hosts for the tenant 200 + # - host hs-3 is directly connected to the router rt-1; + # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 + setup_hs 3 1 200 + setup_hs 4 2 200 + + # setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host + # hs-t100-2 within the same tenant 100. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 + + # setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host + # hs-t200-4 within the same tenant 200. + setup_vpn_config 3 1 4 2 200 + setup_vpn_config 4 2 3 1 200 +} + +check_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} + + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + check_rt_connectivity ${rtsrc} ${rtdst} + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} + + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} + + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" + + check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" + +} + +check_and_log_hs_isolation() +{ + local hssrc=$1 + local tidsrc=$2 + local hsdst=$3 + local tiddst=$4 + + check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" + + check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" + +} + + +check_and_log_hs2gw_connectivity() +{ + local hssrc=$1 + local tid=$2 + + check_hs_ipv6_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" + + check_hs_ipv4_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" + +} + +router_tests() +{ + log_section "IPv6 routers connectivity test" + + check_and_log_rt_connectivity 1 2 + check_and_log_rt_connectivity 2 1 +} + +host2gateway_tests() +{ + log_section "IPv4/IPv6 connectivity test among hosts and gateway" + + check_and_log_hs2gw_connectivity 1 100 + check_and_log_hs2gw_connectivity 2 100 + + check_and_log_hs2gw_connectivity 3 200 + check_and_log_hs2gw_connectivity 4 200 +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 + + check_and_log_hs_connectivity 3 4 200 + check_and_log_hs_connectivity 4 3 200 +} + +host_vpn_isolation_tests() +{ + local i + local j + local k + local tmp + local l1="1 2" + local l2="3 4" + local t1=100 + local t2=200 + + log_section "SRv6 VPN isolation test among hosts in different tentants" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation ${i} ${t1} ${j} ${t2} + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + tmp=${t1}; t1=${t2}; t2=${tmp} + done +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +modprobe vrf &>/dev/null +if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh new file mode 100755 index 000000000000..79fb81e63c59 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -0,0 +1,496 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> + +# This test is designed for evaluating the new SRv6 End.DT4 behavior used for +# implementing IPv4 L3 VPN use cases. +# +# Hereafter a network diagram is shown, where two different tenants (named 100 +# and 200) offer IPv4 L3 VPN services allowing hosts to communicate with each +# other across an IPv6 network. +# +# Only hosts belonging to the same tenant (and to the same VPN) can communicate +# with each other. Instead, the communication among hosts of different tenants +# is forbidden. +# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv4 +# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the +# IPv4 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200 +# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice +# versa. +# +# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DT4 behavior and c) VRF. +# +# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-t100-1 pings +# the host hs-t100-2. +# +# First of all, L2 reachability of the host hs-t100-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-t100-1 sends an IPv4 packet destined to hs-t100-2, the +# router rt-1 receives the packet on the internal veth-t100 interface. Such +# interface is enslaved to the VRF vrf-100 whose associated table contains the +# SRv6 Encap route for encapsulating any IPv4 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DT4 behavior removes the outer IPv6+SRH headers and +# performs the lookup on the vrf-100 table using the destination address of +# the decapsulated IPv4 packet. Afterwards, the packet is sent to the host +# hs-t100-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# Of course, the IPv4 L3 VPN for tenant 200 works exactly as the IPv4 L3 VPN +# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to +# connect with each other. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-t100-1 netns | | hs-t100-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | | +# | +-------+-------+ | localsid | | | | localsid | +-------+-------- | +# | | | table | | | | table | | | +# | +----+----+ +----------+ | | +----------+ +----+----+ | +# | | vrf-100 | | | | vrf-100 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | veth0 | | | | veth0 | | +# | | fd00::1/64 |.|...|.| fd00::2/64 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | vrf-200 | | | | vrf-200 | | +# | +----+----+ | | +----+----+ | +# | | | | | | +# | +-------+-------+ | | +-------+-------- | +# | | veth-t200 | | | | veth-t200 | | +# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | | +# | +---------------+ rt-1 netns | | rt-2 netns +---------------- | +# | . | | . | +# +-----------------------------------+ +-----------------------------------+ +# . . +# . . +# . . +# . . +# +-------------------+ +-------------------+ +# | . | | . | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | | +# | +-------------+ | | +-------------+ | +# | | | | +# | hs-t200-3 netns | | hs-t200-4 netns | +# | | | | +# +-------------------+ +-------------------+ +# +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table (table 90) +# +-------------------------------------------------+ +# |SID |Action | +# +-------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DT4 vrftable 100| +# +-------------------------------------------------+ +# |fc00:21:200::6004|apply SRv6 End.DT4 vrftable 200| +# +-------------------------------------------------+ +# +# rt-1: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# rt-1: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t200 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table (table 90) +# +-------------------------------------------------+ +# |SID |Action | +# +-------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DT4 vrftable 100| +# +-------------------------------------------------+ +# |fc00:12:200::6004|apply SRv6 End.DT4 vrftable 200| +# +-------------------------------------------------+ +# +# rt-2: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# rt-2: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t200 | +# +---------------------------------------------------+ +# + +source lib.sh + +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fd00 +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fc00 +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + cleanup_all_ns +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local id=$1 + eval local nsname=\${rt_${id}} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_hs() +{ + local hid=$1 + local rid=$2 + local tid=$3 + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} + local rtveth=veth-t${tid} + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + # configure the VRF for the tenant X on the router which is directly + # connected to the source host. + ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid} + ip -netns ${rtname} link set vrf-${tid} up + + # enslave the veth-tX interface to the vrf-X in the access router + ip -netns ${rtname} link set ${rtveth} master vrf-${tid} + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 + + ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} + local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004 + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \ + via fd00::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \ + encap seg6local action End.DT4 vrftable ${tid} dev vrf-${tid} + + # all sids for VPNs start with a common locator which is fc00::/16. + # Routes for handling the SRv6 End.DT4 behavior instances are grouped + # together in the 'localsid' table. + # + # NOTE: added only once + if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \ + grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then + ip -netns ${rtdst_name} -6 rule add \ + to ${VPN_LOCATOR_SERVICE}::/16 \ + lookup ${LOCALSID_TABLE_ID} prio 999 + fi +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 + setup_hs 1 1 100 #args: host router tenant + setup_hs 2 2 100 + + # setup two hosts for the tenant 200 + # - host hs-3 is directly connected to the router rt-1; + # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 + setup_hs 3 1 200 + setup_hs 4 2 200 + + # setup the IPv4 L3 VPN which connects the host hs-t100-1 and host + # hs-t100-2 within the same tenant 100. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 + + # setup the IPv4 L3 VPN which connects the host hs-t200-3 and host + # hs-t200-4 within the same tenant 200. + setup_vpn_config 3 1 4 2 200 + setup_vpn_config 4 2 3 1 200 +} + +check_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} + + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + check_rt_connectivity ${rtsrc} ${rtdst} + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} + + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" +} + +check_and_log_hs_isolation() +{ + local hssrc=$1 + local tidsrc=$2 + local hsdst=$3 + local tiddst=$4 + + check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" +} + + +check_and_log_hs2gw_connectivity() +{ + local hssrc=$1 + local tid=$2 + + check_hs_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" +} + +router_tests() +{ + log_section "IPv6 routers connectivity test" + + check_and_log_rt_connectivity 1 2 + check_and_log_rt_connectivity 2 1 +} + +host2gateway_tests() +{ + log_section "IPv4 connectivity test among hosts and gateway" + + check_and_log_hs2gw_connectivity 1 100 + check_and_log_hs2gw_connectivity 2 100 + + check_and_log_hs2gw_connectivity 3 200 + check_and_log_hs2gw_connectivity 4 200 +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 + + check_and_log_hs_connectivity 3 4 200 + check_and_log_hs_connectivity 4 3 200 +} + +host_vpn_isolation_tests() +{ + local i + local j + local k + local tmp + local l1="1 2" + local l2="3 4" + local t1=100 + local t2=200 + + log_section "SRv6 VPN isolation test among hosts in different tentants" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation ${i} ${t1} ${j} ${t2} + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + tmp=${t1}; t1=${t2}; t2=${tmp} + done +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +modprobe vrf &>/dev/null +if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh new file mode 100755 index 000000000000..e408406d8489 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh @@ -0,0 +1,501 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# author: Paolo Lungaroni <paolo.lungaroni@cnit.it> + +# This test is designed for evaluating the new SRv6 End.DT6 behavior used for +# implementing IPv6 L3 VPN use cases. +# +# Hereafter a network diagram is shown, where two different tenants (named 100 +# and 200) offer IPv6 L3 VPN services allowing hosts to communicate with each +# other across an IPv6 network. +# +# Only hosts belonging to the same tenant (and to the same VPN) can communicate +# with each other. Instead, the communication among hosts of different tenants +# is forbidden. +# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv6 +# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the +# IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200 +# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice +# versa. +# +# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DT6 behavior and c) VRF. +# +# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-t100-1 pings +# the host hs-t100-2. +# +# First of all, L2 reachability of the host hs-t100-2 is taken into account by +# the router rt-1 which acts as a ndp proxy. +# +# When the host hs-t100-1 sends an IPv6 packet destined to hs-t100-2, the +# router rt-1 receives the packet on the internal veth-t100 interface. Such +# interface is enslaved to the VRF vrf-100 whose associated table contains the +# SRv6 Encap route for encapsulating any IPv6 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DT6 behavior removes the outer IPv6+SRH headers and +# performs the lookup on the vrf-100 table using the destination address of +# the decapsulated IPv6 packet. Afterwards, the packet is sent to the host +# hs-t100-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# Of course, the IPv6 L3 VPN for tenant 200 works exactly as the IPv6 L3 VPN +# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to +# connect with each other. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-t100-1 netns | | hs-t100-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::254/64 | +----------+ | | +----------+ | cafe::254/64 | | +# | +-------+-------+ | localsid | | | | localsid | +-------+-------- | +# | | | table | | | | table | | | +# | +----+----+ +----------+ | | +----------+ +----+----+ | +# | | vrf-100 | | | | vrf-100 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | veth0 | | | | veth0 | | +# | | fd00::1/64 |.|...|.| fd00::2/64 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | vrf-200 | | | | vrf-200 | | +# | +----+----+ | | +----+----+ | +# | | | | | | +# | +-------+-------+ | | +-------+-------- | +# | | veth-t200 | | | | veth-t200 | | +# | | cafe::254/64 | | | | cafe::254/64 | | +# | +---------------+ rt-1 netns | | rt-2 netns +---------------- | +# | . | | . | +# +-----------------------------------+ +-----------------------------------+ +# . . +# . . +# . . +# . . +# +-------------------+ +-------------------+ +# | . | | . | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::3/64 | | | | cafe::4/64 | | +# | +-------------+ | | +-------------+ | +# | | | | +# | hs-t200-3 netns | | hs-t200-4 netns | +# | | | | +# +-------------------+ +-------------------+ +# +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table (table 90) +# +-------------------------------------------------+ +# |SID |Action | +# +-------------------------------------------------+ +# |fc00:21:100::6006|apply SRv6 End.DT6 vrftable 100| +# +-------------------------------------------------+ +# |fc00:21:200::6006|apply SRv6 End.DT6 vrftable 200| +# +-------------------------------------------------+ +# +# rt-1: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6006| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# rt-1: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::4 |apply seg6 encap segs fc00:12:200::6006| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t200 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table (table 90) +# +-------------------------------------------------+ +# |SID |Action | +# +-------------------------------------------------+ +# |fc00:12:100::6006|apply SRv6 End.DT6 vrftable 100| +# +-------------------------------------------------+ +# |fc00:12:200::6006|apply SRv6 End.DT6 vrftable 200| +# +-------------------------------------------------+ +# +# rt-2: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6006| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# rt-2: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::3 |apply seg6 encap segs fc00:21:200::6006| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t200 | +# +---------------------------------------------------+ +# + +source lib.sh + +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fd00 +readonly IPv6_HS_NETWORK=cafe +readonly VPN_LOCATOR_SERVICE=fc00 +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + cleanup_all_ns +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local id=$1 + eval local nsname=\${rt_${id}} + + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_hs() +{ + local hid=$1 + local rid=$2 + local tid=$3 + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + # configure the VRF for the tenant X on the router which is directly + # connected to the source host. + ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid} + ip -netns ${rtname} link set vrf-${tid} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + # enslave the veth-tX interface to the vrf-X in the access router + ip -netns ${rtname} link set ${rtveth} master vrf-${tid} + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 + + ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} + local rtveth=veth-t${tid} + local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \ + via fd00::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \ + encap seg6local action End.DT6 vrftable ${tid} dev vrf-${tid} + + # all sids for VPNs start with a common locator which is fc00::/16. + # Routes for handling the SRv6 End.DT6 behavior instances are grouped + # together in the 'localsid' table. + # + # NOTE: added only once + if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \ + grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then + ip -netns ${rtdst_name} -6 rule add \ + to ${VPN_LOCATOR_SERVICE}::/16 \ + lookup ${LOCALSID_TABLE_ID} prio 999 + fi +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 + setup_hs 1 1 100 #args: host router tenant + setup_hs 2 2 100 + + # setup two hosts for the tenant 200 + # - host hs-3 is directly connected to the router rt-1; + # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 + setup_hs 3 1 200 + setup_hs 4 2 200 + + # setup the IPv6 L3 VPN which connects the host hs-t100-1 and host + # hs-t100-2 within the same tenant 100. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 + + # setup the IPv6 L3 VPN which connects the host hs-t200-3 and host + # hs-t200-4 within the same tenant 200. + setup_vpn_config 3 1 4 2 200 + setup_vpn_config 4 2 3 1 200 +} + +check_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} + + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + check_rt_connectivity ${rtsrc} ${rtdst} + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} + + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" +} + +check_and_log_hs_isolation() +{ + local hssrc=$1 + local tidsrc=$2 + local hsdst=$3 + local tiddst=$4 + + check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" +} + + +check_and_log_hs2gw_connectivity() +{ + local hssrc=$1 + local tid=$2 + + check_hs_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" +} + +router_tests() +{ + log_section "IPv6 routers connectivity test" + + check_and_log_rt_connectivity 1 2 + check_and_log_rt_connectivity 2 1 +} + +host2gateway_tests() +{ + log_section "IPv6 connectivity test among hosts and gateway" + + check_and_log_hs2gw_connectivity 1 100 + check_and_log_hs2gw_connectivity 2 100 + + check_and_log_hs2gw_connectivity 3 200 + check_and_log_hs2gw_connectivity 4 200 +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 + + check_and_log_hs_connectivity 3 4 200 + check_and_log_hs_connectivity 4 3 200 +} + +host_vpn_isolation_tests() +{ + local i + local j + local k + local tmp + local l1="1 2" + local l2="3 4" + local t1=100 + local t2=200 + + log_section "SRv6 VPN isolation test among hosts in different tentants" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation ${i} ${t1} ${j} ${t2} + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + tmp=${t1}; t1=${t2}; t2=${tmp} + done +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +modprobe vrf &>/dev/null +if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh new file mode 100755 index 000000000000..50563443a4ad --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh @@ -0,0 +1,869 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it> +# +# This script is designed to test the support for "flavors" in the SRv6 End +# behavior. +# +# Flavors defined in RFC8986 [1] represent additional operations that can modify +# or extend the existing SRv6 End, End.X and End.T behaviors. For the sake of +# convenience, we report the list of flavors described in [1] hereafter: +# - Penultimate Segment Pop (PSP); +# - Ultimate Segment Pop (USP); +# - Ultimate Segment Decapsulation (USD). +# +# The End, End.X, and End.T behaviors can support these flavors either +# individually or in combinations. +# Currently in this selftest we consider only the PSP flavor for the SRv6 End +# behavior. However, it is possible to extend the script as soon as other +# flavors will be supported in the kernel. +# +# The purpose of the PSP flavor consists in instructing the penultimate node +# listed in the SRv6 policy to remove (i.e. pop) the outermost SRH from the IPv6 +# header. +# A PSP enabled SRv6 End behavior instance processes the SRH by: +# - decrementing the Segment Left (SL) value from 1 to 0; +# - copying the last SID from the SID List into the IPv6 Destination Address +# (DA); +# - removing the SRH from the extension headers following the IPv6 header. +# +# Once the SRH is removed, the IPv6 packet is forwarded to the destination using +# the IPv6 DA updated during the PSP operation (i.e. the IPv6 DA corresponding +# to the last SID carried by the removed SRH). +# +# Although the PSP flavor can be set for any SRv6 End behavior instance on any +# SR node, it will be active only on such behaviors bound to a penultimate SID +# for a given SRv6 policy. +# SL=2 SL=1 SL=0 +# | | | +# For example, given the SRv6 policy (SID List := <X, Y, Z>): +# - a PSP enabled SRv6 End behavior bound to SID Y will apply the PSP operation +# as Segment Left (SL) is 1, corresponding to the Penultimate Segment of the +# SID List; +# - a PSP enabled SRv6 End behavior bound to SID X will *NOT* apply the PSP +# operation as the Segment Left is 2. This behavior instance will apply the +# "standard" End packet processing, ignoring the configured PSP flavor at +# all. +# +# [1] RFC8986: https://datatracker.ietf.org/doc/html/rfc8986 +# +# Network topology +# ================ +# +# The network topology used in this selftest is depicted hereafter, composed by +# two hosts (hs-1, hs-2) and four routers (rt-1, rt-2, rt-3, rt-4). +# Hosts hs-1 and hs-2 are connected to routers rt-1 and rt-2, respectively, +# allowing them to communicate with each other. +# Traffic exchanged between hs-1 and hs-2 can follow different network paths. +# The network operator, through specific SRv6 Policies can steer traffic to one +# path rather than another. In this selftest this is implemented as follows: +# +# i) The SRv6 H.Insert behavior applies SRv6 Policies on traffic received by +# connected hosts. It pushes the Segment Routing Header (SRH) after the +# IPv6 header. The SRH contains the SID List (i.e. SRv6 Policy) needed for +# steering traffic across the segments/waypoints specified in that list; +# +# ii) The SRv6 End behavior advances the active SID in the SID List carried by +# the SRH; +# +# iii) The PSP enabled SRv6 End behavior is used to remove the SRH when such +# behavior is configured on a node bound to the Penultimate Segment carried +# by the SID List. +# +# cafe::1 cafe::2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# | | +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in +# the IPv6 operator network. +# +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +---------------------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::ef1 is associated with the SRv6 End behavior with PSP flavor | +# +---------------------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for the SIDs. Reachability of +# SIDs is ensured by proper configuration of the IPv6 operator's network and +# SRv6 routers. +# +# +# SRv6 Policies +# ============= +# +# An SRv6 ingress router applies different SRv6 Policies to the traffic received +# from connected hosts on the basis of the destination addresses. +# In case of SRv6 H.Insert behavior, the SRv6 Policy enforcement consists of +# pushing the SRH (carrying a given SID List) after the existing IPv6 header. +# Note that in the inserting mode, there is no encapsulation at all. +# +# Before applying an SRv6 Policy using the SRv6 H.Insert behavior +# +------+---------+ +# | IPv6 | Payload | +# +------+---------+ +# +# After applying an SRv6 Policy using the SRv6 H.Insert behavior +# +------+-----+---------+ +# | IPv6 | SRH | Payload | +# +------+-----+---------+ +# +# Traffic from hs-1 to hs-2 +# ------------------------- +# +# Packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policy: +# +# i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::ef1,fcff:2::ef1,cafe::2 +# +# Router rt-1 is configured to enforce the Policy (i.a) through the SRv6 +# H.Insert behavior which pushes the SRH after the existing IPv6 header. This +# Policy steers the traffic from hs-1 across rt-3, rt-4, rt-2 and finally to the +# destination hs-2. +# +# As the packet reaches the router rt-3, the SRv6 End behavior bound to SID +# fcff:3::e is triggered. The behavior updates the Segment Left (from SL=3 to +# SL=2) in the SRH, the IPv6 DA with fcff:4::ef1 and forwards the packet to the +# next router on the path, i.e. rt-4. +# +# When router rt-4 receives the packet, the PSP enabled SRv6 End behavior bound +# to SID fcff:4::ef1 is executed. Since the SL=2, the PSP operation is *NOT* +# kicked in and the behavior applies the default End processing: the Segment +# Left is decreased (from SL=2 to SL=1), the IPv6 DA is updated with the SID +# fcff:2::ef1 and the packet is forwarded to router rt-2. +# +# The PSP enabled SRv6 End behavior on rt-2 is associated with SID fcff:2::ef1 +# and is executed as the packet is received. Because SL=1, the behavior applies +# the PSP processing on the packet as follows: i) SL is decreased, i.e. from +# SL=1 to SL=0; ii) last SID (cafe::2) is copied into the IPv6 DA; iii) the +# outermost SRH is removed from the extension headers following the IPv6 header. +# Once the PSP processing is completed, the packet is forwarded to the host hs-2 +# (destination). +# +# Traffic from hs-2 to hs-1 +# ------------------------- +# +# Packets generated from hs-2 and directed to hs-1 are handled by rt-2 which +# applies the following SRv6 Policy: +# +# i.b) IPv6 traffic, SID List=fcff:1::ef1,cafe::1 +# +# Router rt-2 is configured to enforce the Policy (i.b) through the SRv6 +# H.Insert behavior which pushes the SRH after the existing IPv6 header. This +# Policy steers the traffic from hs-2 across rt-1 and finally to the +# destination hs-1 +# +# +# When the router rt-1 receives the packet, the PSP enabled SRv6 End behavior +# associated with the SID fcff:1::ef1 is triggered. Since the SL=1, +# the PSP operation takes place: i) the SL is decremented; ii) the IPv6 DA is +# set with the last SID; iii) the SRH is removed from the extension headers +# after the IPv6 header. At this point, the packet with IPv6 DA=cafe::1 is sent +# to the destination, i.e. hs-1. + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly DUMMY_DEVNAME="dum0" +readonly RT2HS_DEVNAME="veth1" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv6_TESTS_ADDR=2001:db8::1 +readonly LOCATOR_SERVICE=fcff +readonly END_FUNC=000e +readonly END_PSP_FUNC=0ef1 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Given the description of a router <id:op> as an input, the function returns +# the <id> token which represents the ID of the router. +# i.e. input: "12:psp" +# output: "12" +__get_srv6_rtcfg_id() +{ + local element="$1" + + echo "${element}" | cut -d':' -f1 +} + +# Given the description of a router <id:op> as an input, the function returns +# the <op> token which represents the operation (e.g. End behavior with or +# withouth flavors) configured for the node. + +# Note that when the operation represents an End behavior with a list of +# flavors, the output is the ordered version of that list. +# i.e. input: "5:usp,psp,usd" +# output: "psp,usd,usp" +__get_srv6_rtcfg_op() +{ + local element="$1" + + # return the lexicographically ordered flavors + echo "${element}" | cut -d':' -f2 | sed 's/,/\n/g' | sort | \ + xargs | sed 's/ /,/g' +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link set lo up + + ip -netns "${nsname}" link add ${DUMMY_DEVNAME} type dummy + ip -netns "${nsname}" link set ${DUMMY_DEVNAME} up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that "dev" is a dummy interface chosen for + # the sake of simplicity). + ip -netns "${nsname}" -6 route \ + add "${LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${DUMMY_DEVNAME}" + + + # all SIDs start with a common locator. Routes and SRv6 Endpoint + # behavior instaces are grouped together in the 'localsid' table. + ip -netns "${nsname}" -6 rule \ + add to "${LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 + + # set default routes to unreachable + ip -netns "${nsname}" -6 route \ + add unreachable default metric 4278198272 \ + dev "${DUMMY_DEVNAME}" +} + +# This helper function builds and installs the SID List (i.e. SRv6 Policy) +# to be applied on incoming packets at the ingress node. Moreover, it +# configures the SRv6 nodes specified in the SID List to process the traffic +# according to the operations required by the Policy itself. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - compact way to represent a list of SRv6 routers with their operations +# (i.e. behaviors) that each of them needs to perform. Every <nodeid:op> +# element constructs a SID that is associated with the behavior <op> on +# the <nodeid> node. The list of such elements forms an SRv6 Policy. +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local policy_rts="$3" + local behavior_cfg + local in_nsname + local rt_nsname + local policy='' + local function + local fullsid + local op_type + local node + local n + + in_nsname="$(get_rtname "${encap_rt}")" + + for n in ${policy_rts}; do + node="$(__get_srv6_rtcfg_id "${n}")" + op_type="$(__get_srv6_rtcfg_op "${n}")" + rt_nsname="$(get_rtname "${node}")" + + case "${op_type}" in + "noflv") + policy="${policy}${LOCATOR_SERVICE}:${node}::${END_FUNC}," + function="${END_FUNC}" + behavior_cfg="End" + ;; + + "psp") + policy="${policy}${LOCATOR_SERVICE}:${node}::${END_PSP_FUNC}," + function="${END_PSP_FUNC}" + behavior_cfg="End flavors psp" + ;; + + *) + break + ;; + esac + + fullsid="${LOCATOR_SERVICE}:${node}::${function}" + + # add SRv6 Endpoint behavior to the selected router + if ! ip -netns "${rt_nsname}" -6 route get "${fullsid}" \ + &>/dev/null; then + ip -netns "${rt_nsname}" -6 route \ + add "${fullsid}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action ${behavior_cfg} \ + dev "${DUMMY_DEVNAME}" + fi + done + + # we need to remove the trailing comma to avoid inserting an empty + # address (::0) in the SID List. + policy="${policy%,}" + + # add SRv6 policy to incoming traffic sent by connected hosts + ip -netns "${in_nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" \ + encap seg6 mode inline segs "${policy}" \ + dev "${DUMMY_DEVNAME}" + + ip -netns "${in_nsname}" -6 neigh \ + add proxy "${IPv6_HS_NETWORK}::${dst}" \ + dev "${RT2HS_DEVNAME}" +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add veth0 type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr \ + add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad + + ip -netns "${hsname}" link set veth0 up + ip -netns "${hsname}" link set lo up + + ip -netns "${rtname}" addr \ + add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + ip netns exec "${rtname}" \ + sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1 +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + + # set up default SRv6 Endpoints (i.e. SRv6 End behavior) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # set up SRv6 policies + # create a connection between hosts hs-1 and hs-2. + # The path between hs-1 and hs-2 traverses SRv6 aware routers. + # For each direction two path are chosen: + # + # Direction hs-1 -> hs-2 (PSP flavor) + # - rt-1 (SRv6 H.Insert policy) + # - rt-3 (SRv6 End behavior) + # - rt-4 (SRv6 End flavor PSP with SL>1, acting as End behavior) + # - rt-2 (SRv6 End flavor PSP with SL=1) + # + # Direction hs-2 -> hs-1 (PSP flavor) + # - rt-2 (SRv6 H.Insert policy) + # - rt-1 (SRv6 End flavor PSP with SL=1) + setup_rt_policy_ipv6 2 1 "3:noflv 4:psp 2:psp" + setup_rt_policy_ipv6 1 2 "1:psp" + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_srv6_end_flv_psp_tests() +{ + log_section "SRv6 connectivity test hosts (h1 <-> h2, PSP flavor)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 +} + +test_iproute2_supp_or_ksft_skip() +{ + local flavor="$1" + + if ! ip route help 2>&1 | grep -qo "${flavor}"; then + echo "SKIP: Missing SRv6 ${flavor} flavor support in iproute2" + exit "${ksft_skip}" + fi +} + +test_kernel_supp_or_ksft_skip() +{ + local flavor="$1" + local test_netns + + test_netns="kflv-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns to test kernel support for flavors" + exit "${ksft_skip}" + fi + + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: Cannot set up dummy dev to test kernel support for flavors" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + if ! ip -netns "${test_netns}" link \ + set "${DUMMY_DEVNAME}" up; then + echo "SKIP: Cannot activate dummy dev to test kernel support for flavors" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + if ! ip -netns "${test_netns}" -6 route \ + add "${IPv6_TESTS_ADDR}" encap seg6local \ + action End flavors "${flavor}" dev "${DUMMY_DEVNAME}"; then + echo "SKIP: ${flavor} flavor not supported in kernel" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +test_dummy_dev_or_ksft_skip() +{ + local test_netns + + test_netns="dummy-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns for testing dummy dev support" + exit "${ksft_skip}" + fi + + modprobe dummy &>/dev/null || true + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: dummy dev not supported" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep +test_command_or_ksft_skip cut +test_command_or_ksft_skip sed +test_command_or_ksft_skip sort +test_command_or_ksft_skip xargs + +test_dummy_dev_or_ksft_skip +test_iproute2_supp_or_ksft_skip psp +test_kernel_supp_or_ksft_skip psp + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_srv6_end_flv_psp_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh new file mode 100755 index 000000000000..87e414cc417c --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh @@ -0,0 +1,1145 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# +# This script is designed for testing the support of NEXT-C-SID flavor for SRv6 +# End behavior. +# A basic knowledge of SRv6 architecture [1] and of the compressed SID approach +# [2] is assumed for the reader. +# +# The network topology used in the selftest is depicted hereafter, composed by +# two hosts and four routers. Hosts hs-1 and hs-2 are connected through an +# IPv4/IPv6 L3 VPN service, offered by routers rt-1, rt-2, rt-3 and rt-4 using +# the NEXT-C-SID flavor. The key components for such VPNs are: +# +# i) The SRv6 H.Encaps/H.Encaps.Red behaviors [1] apply SRv6 Policies on +# traffic received by connected hosts, initiating the VPN tunnel; +# +# ii) The SRv6 End behavior [1] advances the active SID in the SID List +# carried by the SRH; +# +# iii) The NEXT-C-SID mechanism [2] offers the possibility of encoding several +# SRv6 segments within a single 128-bit SID address, referred to as a +# Compressed SID (C-SID) container. In this way, the length of the SID +# List can be drastically reduced. +# The NEXT-C-SID is provided as a "flavor" of the SRv6 End behavior +# which advances the current C-SID (i.e. the Locator-Node Function defined +# in [2]) with the next one carried in the Argument, if available. +# When no more C-SIDs are available in the Argument, the SRv6 End behavior +# will apply the End function selecting the next SID in the SID List. +# +# iv) The SRv6 End.DT46 behavior [1] is used for removing the SRv6 Policy and, +# thus, it terminates the VPN tunnel. Such a behavior is capable of +# handling, at the same time, both tunneled IPv4 and IPv6 traffic. +# +# [1] https://datatracker.ietf.org/doc/html/rfc8986 +# [2] https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression +# +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +----+---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in +# the selftest network. +# +# Local SID/C-SID table +# ===================== +# +# Each SRv6 router is configured with a Local SID/C-SID table in which +# SIDs/C-SIDs are stored. Considering an SRv6 router rt-x, SIDs/C-SIDs are +# configured in the Local SID/C-SIDs table as follows: +# +# Local SID/C-SID table for SRv6 router rt-x +# +-----------------------------------------------------------+ +# |fcff:x::d46 is associated with the non-compressed SRv6 | +# | End.DT46 behavior | +# +-----------------------------------------------------------+ +# |fcbb:0:0x00::/48 is associated with the NEXT-C-SID flavor | +# | of SRv6 End behavior | +# +-----------------------------------------------------------+ +# |fcbb:0:0x00:d46::/64 is associated with the SRv6 End.DT46 | +# | behavior when NEXT-C-SID compression is turned on | +# +-----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved for implementing SRv6 services with regular +# (non compressed) SIDs. Reachability of SIDs is ensured by proper configuration +# of the IPv6 routing tables in the routers. +# Similarly, the fcbb:0::/32 prefix is reserved for implementing SRv6 VPN +# services leveraging the NEXT-C-SID compression mechanism. Indeed, the +# fcbb:0::/32 is used for encoding the Locator-Block while the Locator-Node +# Function is encoded with 16 bits. +# +# Incoming traffic classification and application of SRv6 Policies +# ================================================================ +# +# An SRv6 ingress router applies different SRv6 Policies to the traffic received +# from a connected host, considering the IPv4 or IPv6 destination address. +# SRv6 policy enforcement consists of encapsulating the received traffic into a +# new IPv6 packet with a given SID List contained in the SRH. +# When the SID List contains only one SID, the SRH could be omitted completely +# and that SID is stored directly in the IPv6 Destination Address (DA) (this is +# called "reduced" encapsulation). +# +# Test cases for NEXT-C-SID +# ========================= +# +# We consider two test cases for NEXT-C-SID: i) single SID and ii) double SID. +# +# In the single SID test case we have a number of segments that are all +# contained in a single Compressed SID (C-SID) container. Therefore the +# resulting SID List has only one SID. Using the reduced encapsulation format +# this will result in a packet with no SRH. +# +# In the double SID test case we have one segment carried in a Compressed SID +# (C-SID) container, followed by a regular (non compressed) SID. The resulting +# SID List has two segments and it is possible to test the advance to the next +# SID when all the C-SIDs in a C-SID container have been processed. Using the +# reduced encapsulation format this will result in a packet with an SRH +# containing 1 segment. +# +# For the single SID test case, we use the IPv4 addresses of hs-1 and hs-2, for +# the double SID test case, we use their IPv6 addresses. This is only done to +# simplify the test setup and avoid adding other hosts or multiple addresses on +# the same interface of a host. +# +# Traffic from hs-1 to hs-2 +# ------------------------- +# +# Packets generated from hs-1 and directed towards hs-2 are handled by rt-1 +# which applies the SRv6 Policies as follows: +# +# i) IPv6 DA=cafe::2, H.Encaps.Red with SID List=fcbb:0:0400:0300:0200:d46:: +# ii) IPv4 DA=10.0.0.2, H.Encaps.Red with SID List=fcbb:0:0300::,fcff:2::d46 +# +# ### i) single SID +# +# The router rt-1 is configured to enforce the given Policy through the SRv6 +# H.Encaps.Red behavior which avoids the presence of the SRH at all, since it +# pushes the single SID directly in the IPv6 DA. Such a SID encodes a whole +# C-SID container carrying several C-SIDs (e.g. 0400, 0300, etc). +# +# As the packet reaches the router rt-4, the enabled NEXT-C-SID SRv6 End +# behavior (associated with fcbb:0:0400::/48) is triggered. This behavior +# analyzes the IPv6 DA and checks whether the Argument of the C-SID container +# is zero or not. In this case, the Argument is *NOT* zero and the IPv6 DA is +# updated as follows: +# +# +---------------------------------------------------------------+ +# | Before applying the rt-4 enabled NEXT-C-SID SRv6 End behavior | +# +---------------------------------------------------------------+ +# | +---------- Argument | +# | vvvvvvvvvvvvvvvv | +# | IPv6 DA fcbb:0:0400:0300:0200:d46:: | +# | ^^^^ <-- shifting | +# | | | +# | Locator-Node Function | +# +---------------------------------------------------------------+ +# | After applying the rt-4 enabled NEXT-C-SID SRv6 End behavior | +# +---------------------------------------------------------------+ +# | +---------- Argument | +# | vvvvvvvvvvvv | +# | IPv6 DA fcbb:0:0300:0200:d46:: | +# | ^^^^ | +# | | | +# | Locator-Node Function | +# +---------------------------------------------------------------+ +# +# After having applied the enabled NEXT-C-SID SRv6 End behavior, the packet is +# sent to the next node, i.e. rt-3. +# +# The enabled NEXT-C-SID SRv6 End behavior on rt-3 is executed as the packet is +# received. This behavior processes the packet and updates the IPv6 DA with +# fcbb:0:0200:d46::, since the Argument is *NOT* zero. Then, the packet is sent +# to the router rt-2. +# +# The router rt-2 is configured for decapsulating the inner IPv6 packet and, +# for this reason, it applies the SRv6 End.DT46 behavior on the received +# packet. It is worth noting that the SRv6 End.DT46 behavior does not require +# the presence of the SRH: it is fully capable to operate properly on +# IPv4/IPv6-in-IPv6 encapsulations. +# At the end of the decap operation, the packet is sent to the +# host hs-2. +# +# ### ii) double SID +# +# The router rt-1 is configured to enforce the given Policy through the SRv6 +# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the +# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e. +# fcff:2::d46. Hence, the packet sent by hs-1 to hs-2 is encapsulated in an +# outer IPv6 header plus the SRH. +# +# As the packet reaches the node rt-3, the router applies the enabled NEXT-C-SID +# SRv6 End behavior. +# +# +---------------------------------------------------------------+ +# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End behavior | +# +---------------------------------------------------------------+ +# | +---------- Argument | +# | vvvv (Argument is all filled with zeros) | +# | IPv6 DA fcbb:0:0300:: | +# | ^^^^ | +# | | | +# | Locator-Node Function | +# +---------------------------------------------------------------+ +# | After applying the rt-3 enabled NEXT-C-SID SRv6 End behavior | +# +---------------------------------------------------------------+ +# | | +# | IPv6 DA fcff:2::d46 | +# | ^^^^^^^^^^^ | +# | | | +# | SID copied from the SID List contained in the SRH | +# +---------------------------------------------------------------+ +# +# Since the Argument of the C-SID container is zero, the behavior can not +# update the Locator-Node function with the next C-SID carried in the Argument +# itself. Thus, the enabled NEXT-C-SID SRv6 End behavior operates as the +# traditional End behavior: it updates the IPv6 DA by copying the next +# available SID in the SID List carried by the SRH. After that, the packet is +# sent to the node rt-2. +# +# Once the packet is received by rt-2, the router decapsulates the inner IPv6 +# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:2::d46) +# and sends it to the host hs-2. +# +# Traffic from hs-2 to hs-1 +# ------------------------- +# +# Packets generated from hs-2 and directed towards hs-1 are handled by rt-2 +# which applies the SRv6 Policies as follows: +# +# i) IPv6 DA=cafe::1, SID List=fcbb:0:0300:0400:0100:d46:: +# ii) IPv4 DA=10.0.0.1, SID List=fcbb:0:0300::,fcff:1::d46 +# +# For simplicity, such SRv6 Policies were chosen so that, in both use cases (i) +# and (ii), the network paths crossed by traffic from hs-2 to hs-1 are the same +# as those taken by traffic from hs-1 to hs-2. +# In this way, traffic from hs-2 to hs-1 is processed similarly to traffic from +# hs-1 to hs-2. So, the traffic processing scheme turns out to be the same as +# that adopted in the use cases already examined (of course, it is necessary to +# consider the different SIDs/C-SIDs). + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly DUMMY_DEVNAME="dum0" +readonly VRF_TID=100 +readonly VRF_DEVNAME="vrf-${VRF_TID}" +readonly RT2HS_DEVNAME="veth-t${VRF_TID}" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly DT46_FUNC=0d46 +readonly HEADEND_ENCAP="encap.red" + +# do not add ':' as separator +readonly LCBLOCK_ADDR=fcbb0000 +readonly LCBLOCK_BLEN=32 +# do not add ':' as separator +readonly LCNODEFUNC_FMT="0%d00" +readonly LCNODEFUNC_BLEN=16 + +readonly LCBLOCK_NODEFUNC_BLEN=$((LCBLOCK_BLEN + LCNODEFUNC_BLEN)) + +readonly CSID_CNTR_PREFIX="dead:beaf::/32" +# ID of the router used for testing the C-SID container cfgs +readonly CSID_CNTR_RT_ID_TEST=1 +# Routing table used for testing the C-SID container cfgs +readonly CSID_CNTR_RT_TABLE=91 + +# C-SID container configurations to be tested +# +# An entry of the array is defined as "a,b,c" where: +# - 'a' and 'b' elements represent respectively the Locator-Block length +# (lblen) in bits and the Locator-Node Function length (nflen) in bits. +# 'a' and 'b' can be set to default values using the placeholder "d" which +# indicates the default kernel values (32 for lblen and 16 for nflen); +# otherwise, any numeric value is accepted; +# - 'c' indicates whether the C-SID configuration provided by the values 'a' +# and 'b' should be considered valid ("y") or invalid ("n"). +declare -ra CSID_CONTAINER_CFGS=( + "d,d,y" + "d,16,y" + "16,d,y" + "16,32,y" + "32,16,y" + "48,8,y" + "8,48,y" + "d,0,n" + "0,d,n" + "32,0,n" + "0,32,n" + "17,d,n" + "d,17,n" + "120,16,n" + "16,120,n" + "0,128,n" + "128,0,n" + "130,0,n" + "0,130,n" + "0,0,n" +) + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy + + ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# build an ipv6 prefix/address based on the input string +# Note that the input string does not contain ':' and '::' which are considered +# to be implicit. +# e.g.: +# - input: fbcc00000400300 +# - output: fbcc:0000:0400:0300:0000:0000:0000:0000 +# ^^^^^^^^^^^^^^^^^^^ +# fill the address with 0s +build_ipv6_addr() +{ + local addr="$1" + local out="" + local strlen="${#addr}" + local padn + local i + + # add ":" every 4 digits (16 bits) + for (( i = 0; i < strlen; i++ )); do + if (( i > 0 && i < 32 && (i % 4) == 0 )); then + out="${out}:" + fi + + out="${out}${addr:$i:1}" + done + + # fill the remaining bits of the address with 0s + padn=$((32 - strlen)) + for (( i = padn; i > 0; i-- )); do + if (( i > 0 && i < 32 && (i % 4) == 0 )); then + out="${out}:" + fi + + out="${out}0" + done + + printf "${out}" +} + +build_csid() +{ + local nodeid="$1" + + printf "${LCNODEFUNC_FMT}" "${nodeid}" +} + +build_lcnode_func_prefix() +{ + local nodeid="$1" + local lcnodefunc + local prefix + local out + + lcnodefunc="$(build_csid "${nodeid}")" + prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}${lcnodefunc}")" + + out="${prefix}/${LCBLOCK_NODEFUNC_BLEN}" + + echo "${out}" +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + local lcnode_func_prefix + local lcblock_prefix + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + + # set the underlay network for C-SIDs reachability + lcnode_func_prefix="$(build_lcnode_func_prefix "${neigh}")" + + ip -netns "${nsname}" -6 route \ + add "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" + + # enabled NEXT-C-SID SRv6 End behavior (note that "dev" is the dummy + # dum0 device chosen for the sake of simplicity). + ip -netns "${nsname}" -6 route \ + add "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End flavors next-csid \ + lblen "${LCBLOCK_BLEN}" nflen "${LCNODEFUNC_BLEN}" \ + dev "${DUMMY_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behavior instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule \ + add to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 + + # common locator block for NEXT-C-SIDS compression mechanism. + lcblock_prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}")" + ip -netns "${nsname}" -6 rule \ + add to "${lcblock_prefix}/${LCBLOCK_BLEN}" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 +} + +# build and install the SRv6 policy into the ingress SRv6 router as well as the +# decap SID in the egress one. +# args: +# $1 - src host (evaluate automatically the ingress router) +# $2 - dst host (evaluate automatically the egress router) +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - single SID or double SID +# $5 - traffic type (IPv6 or IPv4) +__setup_l3vpn() +{ + local src="$1" + local dst="$2" + local end_rts="$3" + local mode="$4" + local traffic="$5" + local nsname + local policy + local container + local decapsid + local lcnfunc + local dt + local n + local rtsrc_nsname + local rtdst_nsname + + rtsrc_nsname="$(get_rtname "${src}")" + rtdst_nsname="$(get_rtname "${dst}")" + + container="${LCBLOCK_ADDR}" + + # build first SID (C-SID container) + for n in ${end_rts}; do + lcnfunc="$(build_csid "${n}")" + + container="${container}${lcnfunc}" + done + + if [ "${mode}" -eq 1 ]; then + # single SID policy + dt="$(build_csid "${dst}")${DT46_FUNC}" + container="${container}${dt}" + # build the full ipv6 address for the container + policy="$(build_ipv6_addr "${container}")" + + # build the decap SID used in the decap node + container="${LCBLOCK_ADDR}${dt}" + decapsid="$(build_ipv6_addr "${container}")" + else + # double SID policy + decapsid="${VPN_LOCATOR_SERVICE}:${dst}::${DT46_FUNC}" + + policy="$(build_ipv6_addr "${container}"),${decapsid}" + fi + + # apply encap policy + if [ "${traffic}" -eq 6 ]; then + ip -netns "${rtsrc_nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + + ip -netns "${rtsrc_nsname}" -6 neigh \ + add proxy "${IPv6_HS_NETWORK}::${dst}" \ + dev "${RT2HS_DEVNAME}" + else + # "dev" must be different from the one where the packet is + # received, otherwise the proxy arp does not work. + ip -netns "${rtsrc_nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + fi + + # apply decap + # Local End.DT46 behavior (decap) + ip -netns "${rtdst_nsname}" -6 route \ + add "${decapsid}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DT46 vrftable "${VRF_TID}" \ + dev "${VRF_DEVNAME}" +} + +# see __setup_l3vpn() +setup_ipv4_vpn_2sids() +{ + __setup_l3vpn "$1" "$2" "$3" 2 4 +} + +# see __setup_l3vpn() +setup_ipv6_vpn_1sid() +{ + __setup_l3vpn "$1" "$2" "$3" 1 6 +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add veth0 type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr \ + add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0 + + ip -netns "${hsname}" link set veth0 up + ip -netns "${hsname}" link set lo up + + # configure the VRF on the router which is directly connected to the + # source host. + ip -netns "${rtname}" link \ + add "${VRF_DEVNAME}" type vrf table "${VRF_TID}" + ip -netns "${rtname}" link set "${VRF_DEVNAME}" up + + # enslave the veth interface connecting the router with the host to the + # VRF in the access router + ip -netns "${rtname}" link \ + set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}" + + # set default routes to unreachable for both ipv6 and ipv4 + ip -netns "${rtname}" -6 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + ip -netns "${rtname}" -4 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + + ip -netns "${rtname}" addr \ + add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + ip netns exec "${rtname}" \ + sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1 + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 + + ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # set up SRv6 Policies + + # create an IPv6 VPN between hosts hs-1 and hs-2. + # + # Direction hs-1 -> hs-2 + # - rt-1 encap (H.Encaps.Red) + # - rt-4 SRv6 End behavior (NEXT-C-SID flavor) + # - rt-3 SRv6 End behavior (NEXT-C-SID flavor) + # - rt-2 SRv6 End.DT46 behavior + setup_ipv6_vpn_1sid 1 2 "4 3" + + # Direction hs2 -> hs-1 + # - rt-2 encap (H.Encaps.Red) + # - rt-3 SRv6 End behavior (NEXT-C-SID flavor) + # - rt-4 SRv6 End behavior (NEXT-C-SID flavor) + # - rt-1 SRv6 End.DT46 behavior + setup_ipv6_vpn_1sid 2 1 "3 4" + + # create an IPv4 VPN between hosts hs-1 and hs-2 + # + # Direction hs-1 -> hs-2 + # - rt-1 encap (H.Encaps.Red) + # - rt-3 SRv6 End behavior (NEXT-C-SID flavor) + # - rt-2 SRv6 End.DT46 behavior + setup_ipv4_vpn_2sids 1 2 "3" + + # Direction hs-2 -> hs-1 + # - rt-2 encap (H.Encaps.Red) + # - rt-3 SRv6 End behavior (NEXT-C-SID flavor) + # - rt-1 SRv6 End.DT46 behavior + setup_ipv4_vpn_2sids 2 1 "3" + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6)" + + check_and_log_hs_ipv6_connectivity 1 2 + check_and_log_hs_ipv6_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4)" + + check_and_log_hs_ipv4_connectivity 1 2 + check_and_log_hs_ipv4_connectivity 2 1 +} + +__nextcsid_end_behavior_test() +{ + local nsname="$1" + local cmd="$2" + local blen="$3" + local flen="$4" + local layout="" + + if [ "${blen}" != "d" ]; then + layout="${layout} lblen ${blen}" + fi + + if [ "${flen}" != "d" ]; then + layout="${layout} nflen ${flen}" + fi + + ip -netns "${nsname}" -6 route \ + "${cmd}" "${CSID_CNTR_PREFIX}" \ + table "${CSID_CNTR_RT_TABLE}" \ + encap seg6local action End flavors next-csid ${layout} \ + dev "${DUMMY_DEVNAME}" &>/dev/null + + return "$?" +} + +rt_x_nextcsid_end_behavior_test() +{ + local rt="$1" + local blen="$2" + local flen="$3" + local nsname + local ret + + nsname="$(get_rtname "${rt}")" + + __nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}" + ret="$?" + __nextcsid_end_behavior_test "${nsname}" "del" "${blen}" "${flen}" + + return "${ret}" +} + +__parse_csid_container_cfg() +{ + local cfg="$1" + local index="$2" + local out + + echo "${cfg}" | cut -d',' -f"${index}" +} + +csid_container_cfg_tests() +{ + local valid + local blen + local flen + local cfg + local ret + + log_section "C-SID Container config tests (legend: d='kernel default')" + + for cfg in "${CSID_CONTAINER_CFGS[@]}"; do + blen="$(__parse_csid_container_cfg "${cfg}" 1)" + flen="$(__parse_csid_container_cfg "${cfg}" 2)" + valid="$(__parse_csid_container_cfg "${cfg}" 3)" + + rt_x_nextcsid_end_behavior_test \ + "${CSID_CNTR_RT_ID_TEST}" \ + "${blen}" \ + "${flen}" + ret="$?" + + if [ "${valid}" == "y" ]; then + log_test "${ret}" 0 \ + "Accept valid C-SID container cfg (lblen=${blen}, nflen=${flen})" + else + log_test "${ret}" 2 \ + "Reject invalid C-SID container cfg (lblen=${blen}, nflen=${flen})" + fi + done +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "next-csid"; then + echo "SKIP: Missing SRv6 NEXT-C-SID flavor support in iproute2" + exit "${ksft_skip}" + fi +} + +test_dummy_dev_or_ksft_skip() +{ + local test_netns + + test_netns="dummy-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns for testing dummy dev support" + exit "${ksft_skip}" + fi + + modprobe dummy &>/dev/null || true + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: dummy dev not supported" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +test_vrf_or_ksft_skip() +{ + modprobe vrf &>/dev/null || true + if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep +test_command_or_ksft_skip cut + +test_iproute2_supp_or_ksft_skip +test_dummy_dev_or_ksft_skip +test_vrf_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +csid_container_cfg_tests + +router_tests +host2gateway_tests +host_vpn_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh new file mode 100755 index 000000000000..c79cb8ede17f --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -0,0 +1,1213 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it> +# +# This script is designed for testing the support of NEXT-C-SID flavor for SRv6 +# End.X behavior. +# A basic knowledge of SRv6 architecture [1] and of the compressed SID approach +# [2] is assumed for the reader. +# +# The network topology used in the selftest is depicted hereafter, composed of +# two hosts and four routers. Hosts hs-1 and hs-2 are connected through an +# IPv4/IPv6 L3 VPN service, offered by routers rt-1, rt-2, rt-3 and rt-4 using +# the NEXT-C-SID flavor. The key components for such VPNs are: +# +# i) The SRv6 H.Encaps/H.Encaps.Red behaviors [1] apply SRv6 Policies on +# traffic received by connected hosts, initiating the VPN tunnel; +# +# ii) The SRv6 End.X behavior [1] (Endpoint with L3 cross connect) is a +# variant of SRv6 End behavior. It advances the active SID in the SID +# List carried by the SRH and forwards the packet to an L3 adjacency; +# +# iii) The NEXT-C-SID mechanism [2] offers the possibility of encoding several +# SRv6 segments within a single 128-bit SID address, referred to as a +# Compressed SID (C-SID) container. In this way, the length of the SID +# List can be drastically reduced. +# The NEXT-C-SID is provided as a "flavor" of the SRv6 End.X behavior +# which advances the current C-SID (i.e. the Locator-Node Function defined +# in [2]) with the next one carried in the Argument, if available. +# When no more C-SIDs are available in the Argument, the SRv6 End.X +# behavior will apply the End.X function selecting the next SID in the SID +# List; +# +# iv) The SRv6 End.DT46 behavior [1] is used for removing the SRv6 Policy and, +# thus, it terminates the VPN tunnel. Such a behavior is capable of +# handling, at the same time, both tunneled IPv4 and IPv6 traffic. +# +# [1] https://datatracker.ietf.org/doc/html/rfc8986 +# [2] https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression +# +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +----+---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in +# the selftest network. +# +# Local SID/C-SID table +# ===================== +# +# Each SRv6 router is configured with a Local SID/C-SID table in which +# SIDs/C-SIDs are stored. Considering an SRv6 router rt-x, SIDs/C-SIDs are +# configured in the Local SID/C-SIDs table as follows: +# +# Local SID/C-SID table for SRv6 router rt-x +# +-----------------------------------------------------------+ +# |fcff:x::d46 is associated with the non-compressed SRv6 | +# | End.DT46 behavior | +# +-----------------------------------------------------------+ +# |fcbb:0:0x00::/48 is associated with the NEXT-C-SID flavor | +# | of SRv6 End.X behavior | +# +-----------------------------------------------------------+ +# |fcbb:0:0x00:d46::/64 is associated with the SRv6 End.DT46 | +# | behavior when NEXT-C-SID compression is turned on | +# +-----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved for implementing SRv6 services with regular +# (non compressed) SIDs. Reachability of SIDs is ensured by proper configuration +# of the IPv6 routing tables in the routers. +# Similarly, the fcbb:0::/32 prefix is reserved for implementing SRv6 VPN +# services leveraging the NEXT-C-SID compression mechanism. Indeed, the +# fcbb:0::/32 is used for encoding the Locator-Block while the Locator-Node +# Function is encoded with 16 bits. +# +# Incoming traffic classification and application of SRv6 Policies +# ================================================================ +# +# An SRv6 ingress router applies different SRv6 Policies to the traffic received +# from a connected host, considering the IPv4 or IPv6 destination address. +# SRv6 policy enforcement consists of encapsulating the received traffic into a +# new IPv6 packet with a given SID List contained in the SRH. +# When the SID List contains only one SID, the SRH could be omitted completely +# and that SID is stored directly in the IPv6 Destination Address (DA) (this is +# called "reduced" encapsulation). +# +# Test cases for NEXT-C-SID +# ========================= +# +# We consider two test cases for NEXT-C-SID: i) single SID and ii) double SID. +# +# In the single SID test case we have a number of segments that are all +# contained in a single Compressed SID (C-SID) container. Therefore the +# resulting SID List has only one SID. Using the reduced encapsulation format +# this will result in a packet with no SRH. +# +# In the double SID test case we have one segment carried in a Compressed SID +# (C-SID) container, followed by a regular (non compressed) SID. The resulting +# SID List has two segments and it is possible to test the advance to the next +# SID when all the C-SIDs in a C-SID container have been processed. Using the +# reduced encapsulation format this will result in a packet with an SRH +# containing 1 segment. +# +# For the single SID test case, we use the IPv6 addresses of hs-1 and hs-2, for +# the double SID test case, we use their IPv4 addresses. This is only done to +# simplify the test setup and avoid adding other hosts or multiple addresses on +# the same interface of a host. +# +# Traffic from hs-1 to hs-2 +# ------------------------- +# +# Packets generated from hs-1 and directed towards hs-2 are handled by rt-1 +# which applies the SRv6 Policies as follows: +# +# i) IPv6 DA=cafe::2, H.Encaps.Red with SID List=fcbb:0:0300:0200:d46:: +# ii) IPv4 DA=10.0.0.2, H.Encaps.Red with SID List=fcbb:0:0300::,fcff:2::d46 +# +# ### i) single SID +# +# The router rt-1 is configured to enforce the given Policy through the SRv6 +# H.Encaps.Red behavior which avoids the presence of the SRH at all, since it +# pushes the single SID directly in the IPv6 DA. Such a SID encodes a whole +# C-SID container carrying several C-SIDs (e.g. 0300, 0200, etc). +# +# As the packet reaches the router rt-3, the enabled NEXT-C-SID SRv6 End.X +# behavior (associated with fcbb:0:0300::/48) is triggered. This behavior +# analyzes the IPv6 DA and checks whether the Argument of the C-SID container +# is zero or not. In this case, the Argument is *NOT* zero and the IPv6 DA is +# updated as follows: +# +# +-----------------------------------------------------------------+ +# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior | +# +-----------------------------------------------------------------+ +# | +---------- Argument | +# | vvvvvvvvvv | +# | IPv6 DA fcbb:0:0300:0200:d46:: | +# | ^^^^ <-- shifting | +# | | | +# | Locator-Node Function | +# +-----------------------------------------------------------------+ +# | After applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior | +# +-----------------------------------------------------------------+ +# | +---------- Argument | +# | vvvvvv | +# | IPv6 DA fcbb:0:0200:d46:: | +# | ^^^^ | +# | | | +# | Locator-Node Function | +# +-----------------------------------------------------------------+ +# +# After having applied the enabled NEXT-C-SID SRv6 End.X behavior, the packet +# is sent to rt-4 node using the L3 adjacency address fcf0:0:3:4::4. +# +# The node rt-4 performs a plain IPv6 forward to the rt-2 router according to +# its Local SID table and using the IPv6 DA fcbb:0:0200:d46:: . +# +# The router rt-2 is configured for decapsulating the inner IPv6 packet and, +# for this reason, it applies the SRv6 End.DT46 behavior on the received +# packet. It is worth noting that the SRv6 End.DT46 behavior does not require +# the presence of the SRH: it is fully capable to operate properly on +# IPv4/IPv6-in-IPv6 encapsulations. +# At the end of the decap operation, the packet is sent to the host hs-2. +# +# ### ii) double SID +# +# The router rt-1 is configured to enforce the given Policy through the SRv6 +# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the +# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e. +# fcff:2::d46. Hence, the packet sent by hs-1 to hs-2 is encapsulated in an +# outer IPv6 header plus the SRH. +# +# As the packet reaches the node rt-3, the router applies the enabled NEXT-C-SID +# SRv6 End.X behavior. +# +# +-----------------------------------------------------------------+ +# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior | +# +-----------------------------------------------------------------+ +# | +---------- Argument | +# | vvvv (Argument is all filled with zeros) | +# | IPv6 DA fcbb:0:0300:: | +# | ^^^^ | +# | | | +# | Locator-Node Function | +# +-----------------------------------------------------------------+ +# | After applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior | +# +-----------------------------------------------------------------+ +# | | +# | IPv6 DA fcff:2::d46 | +# | ^^^^^^^^^^^ | +# | | | +# | SID copied from the SID List contained in the SRH | +# +-----------------------------------------------------------------+ +# +# Since the Argument of the C-SID container is zero, the behavior can not +# update the Locator-Node function with the next C-SID carried in the Argument +# itself. Thus, the enabled NEXT-C-SID SRv6 End.X behavior operates as the +# traditional End.X behavior: it updates the IPv6 DA by copying the next +# available SID in the SID List carried by the SRH. Next, the packet is +# forwarded to the rt-4 node using the L3 adjacency fcf0:3:4::4 previously +# configured for this behavior. +# +# The node rt-4 performs a plain IPv6 forward to the rt-2 router according to +# its Local SID table and using the IPv6 DA fcff:2::d46. +# +# Once the packet is received by rt-2, the router decapsulates the inner IPv4 +# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:2::d46) +# and sends it to the host hs-2. +# +# Traffic from hs-2 to hs-1 +# ------------------------- +# +# Packets generated from hs-2 and directed towards hs-1 are handled by rt-2 +# which applies the SRv6 Policies as follows: +# +# i) IPv6 DA=cafe::1, SID List=fcbb:0:0400:0100:d46:: +# ii) IPv4 DA=10.0.0.1, SID List=fcbb:0:0300::,fcff:1::d46 +# +# ### i) single SID +# +# The node hs-2 sends an IPv6 packet directed to node hs-1. The router rt-2 is +# directly connected to hs-2 and receives the packet. Rt-2 applies the +# H.Encap.Red behavior with policy i) described above. Since there is only one +# SID, the SRH header is omitted and the policy is inserted directly into the DA +# of IPv6 packet. +# +# The packet reaches the router rt-4 and the enabled NEXT-C-SID SRv6 End.X +# behavior (associated with fcbb:0:0400::/48) is triggered. This behavior +# analyzes the IPv6 DA and checks whether the Argument of the C-SID container +# is zero or not. The Argument is *NOT* zero and the C-SID in the IPv6 DA is +# advanced. At this point, the current IPv6 DA is fcbb:0:0100:d46:: . +# The enabled NEXT-C-SID SRv6 End.X behavior is configured with the L3 adjacency +# fcf0:0:1:4::1, used to route traffic to the rt-1 node. +# +# The router rt-1 is configured for decapsulating the inner packet. It applies +# the SRv6 End.DT46 behavior on the received packet. Decapsulation does not +# require the presence of the SRH. At the end of the decap operation, the packet +# is sent to the host hs-1. +# +# ### ii) double SID +# +# The router rt-2 is configured to enforce the given Policy through the SRv6 +# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the +# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e. +# fcff:1::d46. Hence, the packet sent by hs-2 to hs-1 is encapsulated in an +# outer IPv6 header plus the SRH. +# +# As the packet reaches the node rt-3, the enabled NEXT-C-SID SRv6 End.X +# behavior bound to the SID fcbb:0:0300::/48 is triggered. +# Since the Argument of the C-SID container is zero, the behavior can not +# update the Locator-Node function with the next C-SID carried in the Argument +# itself. Thus, the enabled NEXT-C-SID SRv6 End-X behavior operates as the +# traditional End.X behavior: it updates the IPv6 DA by copying the next +# available SID in the SID List carried by the SRH. After that, the packet is +# forwarded to the rt-4 node using the L3 adjacency (fcf0:3:4::4) previously +# configured for this behavior. +# +# The node rt-4 performs a plain IPv6 forward to the rt-1 router according to +# its Local SID table, considering the IPv6 DA fcff:1::d46. +# +# Once the packet is received by rt-1, the router decapsulates the inner IPv4 +# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46) +# and sends it to the host hs-1. + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly DUMMY_DEVNAME="dum0" +readonly VRF_TID=100 +readonly VRF_DEVNAME="vrf-${VRF_TID}" +readonly RT2HS_DEVNAME="veth-t${VRF_TID}" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly DT46_FUNC=0d46 +readonly HEADEND_ENCAP="encap.red" + +# do not add ':' as separator +readonly LCBLOCK_ADDR=fcbb0000 +readonly LCBLOCK_BLEN=32 +# do not add ':' as separator +readonly LCNODEFUNC_FMT="0%d00" +readonly LCNODEFUNC_BLEN=16 + +readonly LCBLOCK_NODEFUNC_BLEN=$((LCBLOCK_BLEN + LCNODEFUNC_BLEN)) + +readonly CSID_CNTR_PREFIX="dead:beaf::/32" +# ID of the router used for testing the C-SID container cfgs +readonly CSID_CNTR_RT_ID_TEST=1 +# Routing table used for testing the C-SID container cfgs +readonly CSID_CNTR_RT_TABLE=91 + +# C-SID container configurations to be tested +# +# An entry of the array is defined as "a,b,c" where: +# - 'a' and 'b' elements represent respectively the Locator-Block length +# (lblen) in bits and the Locator-Node Function length (nflen) in bits. +# 'a' and 'b' can be set to default values using the placeholder "d" which +# indicates the default kernel values (32 for lblen and 16 for nflen); +# otherwise, any numeric value is accepted; +# - 'c' indicates whether the C-SID configuration provided by the values 'a' +# and 'b' should be considered valid ("y") or invalid ("n"). +declare -ra CSID_CONTAINER_CFGS=( + "d,d,y" + "d,16,y" + "16,d,y" + "16,32,y" + "32,16,y" + "48,8,y" + "8,48,y" + "d,0,n" + "0,d,n" + "32,0,n" + "0,32,n" + "17,d,n" + "d,17,n" + "120,16,n" + "16,120,n" + "0,128,n" + "128,0,n" + "130,0,n" + "0,130,n" + "0,0,n" +) + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy + + ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up + ip -netns "${nsname}" link set lo up +} + +# build an ipv6 prefix/address based on the input string +# Note that the input string does not contain ':' and '::' which are considered +# to be implicit. +# e.g.: +# - input: fbcc00000400300 +# - output: fbcc:0000:0400:0300:0000:0000:0000:0000 +# ^^^^^^^^^^^^^^^^^^^ +# fill the address with 0s +build_ipv6_addr() +{ + local addr="$1" + local out="" + local strlen="${#addr}" + local padn + local i + + # add ":" every 4 digits (16 bits) + for (( i = 0; i < strlen; i++ )); do + if (( i > 0 && i < 32 && (i % 4) == 0 )); then + out="${out}:" + fi + + out="${out}${addr:$i:1}" + done + + # fill the remaining bits of the address with 0s + padn=$((32 - strlen)) + for (( i = padn; i > 0; i-- )); do + if (( i > 0 && i < 32 && (i % 4) == 0 )); then + out="${out}:" + fi + + out="${out}0" + done + + printf "${out}" +} + +build_csid() +{ + local nodeid="$1" + + printf "${LCNODEFUNC_FMT}" "${nodeid}" +} + +build_lcnode_func_prefix() +{ + local nodeid="$1" + local lcnodefunc + local prefix + local out + + lcnodefunc="$(build_csid "${nodeid}")" + prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}${lcnodefunc}")" + + out="${prefix}/${LCBLOCK_NODEFUNC_BLEN}" + + echo "${out}" +} + +set_end_x_nextcsid() +{ + local rt="$1" + local adj="$2" + + nsname="$(get_rtname "${rt}")" + net_prefix="$(get_network_prefix "${rt}" "${adj}")" + lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" + + # enabled NEXT-C-SID SRv6 End.X behavior (note that "dev" is the dummy + # dum0 device chosen for the sake of simplicity). + ip -netns "${nsname}" -6 route \ + replace "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.X nh6 "${net_prefix}::${adj}" \ + flavors next-csid lblen "${LCBLOCK_BLEN}" \ + nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" +} + +set_underlay_sids_reachability() +{ + local rt="$1" + local rt_neighs="$2" + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + replace "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + + # set the underlay network for C-SIDs reachability + lcnode_func_prefix="$(build_lcnode_func_prefix "${neigh}")" + + ip -netns "${nsname}" -6 route \ + replace "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + local lcnode_func_prefix + local lcblock_prefix + + nsname="$(get_rtname "${rt}")" + + set_underlay_sids_reachability "${rt}" "${rt_neighs}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behavior instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule \ + add to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 + + # common locator block for NEXT-C-SIDS compression mechanism. + lcblock_prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}")" + ip -netns "${nsname}" -6 rule \ + add to "${lcblock_prefix}/${LCBLOCK_BLEN}" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 +} + +# build and install the SRv6 policy into the ingress SRv6 router as well as the +# decap SID in the egress one. +# args: +# $1 - src host (evaluate automatically the ingress router) +# $2 - dst host (evaluate automatically the egress router) +# $3 - SRv6 routers configured for steering traffic (End.X behaviors) +# $4 - single SID or double SID +# $5 - traffic type (IPv6 or IPv4) +__setup_l3vpn() +{ + local src="$1" + local dst="$2" + local end_rts="$3" + local mode="$4" + local traffic="$5" + local nsname + local policy + local container + local decapsid + local lcnfunc + local dt + local n + local rtsrc_nsname + local rtdst_nsname + + rtsrc_nsname="$(get_rtname "${src}")" + rtdst_nsname="$(get_rtname "${dst}")" + + container="${LCBLOCK_ADDR}" + + # build first SID (C-SID container) + for n in ${end_rts}; do + lcnfunc="$(build_csid "${n}")" + + container="${container}${lcnfunc}" + done + + if [ "${mode}" -eq 1 ]; then + # single SID policy + dt="$(build_csid "${dst}")${DT46_FUNC}" + container="${container}${dt}" + # build the full ipv6 address for the container + policy="$(build_ipv6_addr "${container}")" + + # build the decap SID used in the decap node + container="${LCBLOCK_ADDR}${dt}" + decapsid="$(build_ipv6_addr "${container}")" + else + # double SID policy + decapsid="${VPN_LOCATOR_SERVICE}:${dst}::${DT46_FUNC}" + + policy="$(build_ipv6_addr "${container}"),${decapsid}" + fi + + # apply encap policy + if [ "${traffic}" -eq 6 ]; then + ip -netns "${rtsrc_nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + + ip -netns "${rtsrc_nsname}" -6 neigh \ + add proxy "${IPv6_HS_NETWORK}::${dst}" \ + dev "${RT2HS_DEVNAME}" + else + # "dev" must be different from the one where the packet is + # received, otherwise the proxy arp does not work. + ip -netns "${rtsrc_nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + fi + + # apply decap + # Local End.DT46 behavior (decap) + ip -netns "${rtdst_nsname}" -6 route \ + add "${decapsid}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DT46 vrftable "${VRF_TID}" \ + dev "${VRF_DEVNAME}" +} + +# see __setup_l3vpn() +setup_ipv4_vpn_2sids() +{ + __setup_l3vpn "$1" "$2" "$3" 2 4 +} + +# see __setup_l3vpn() +setup_ipv6_vpn_1sid() +{ + __setup_l3vpn "$1" "$2" "$3" 1 6 +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add veth0 type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr \ + add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0 + + ip -netns "${hsname}" link set veth0 up + ip -netns "${hsname}" link set lo up + + # configure the VRF on the router which is directly connected to the + # source host. + ip -netns "${rtname}" link \ + add "${VRF_DEVNAME}" type vrf table "${VRF_TID}" + ip -netns "${rtname}" link set "${VRF_DEVNAME}" up + + # enslave the veth interface connecting the router with the host to the + # VRF in the access router + ip -netns "${rtname}" link \ + set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}" + + # set default routes to unreachable for both ipv6 and ipv4 + ip -netns "${rtname}" -6 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + ip -netns "${rtname}" -4 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + + ip -netns "${rtname}" addr \ + add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + ip netns exec "${rtname}" \ + sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1 + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 + + ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # set up SRv6 Policies + + # create an IPv6 VPN between hosts hs-1 and hs-2. + # + # Direction hs-1 -> hs-2 + # - rt-1 encap (H.Encaps.Red) + # - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor) + # - rt-4 Plain IPv6 Forwarding to rt-2 + # - rt-2 SRv6 End.DT46 behavior + setup_ipv6_vpn_1sid 1 2 "3" + + # Direction hs2 -> hs-1 + # - rt-2 encap (H.Encaps.Red) + # - rt-4 SRv6 End.X behavior adj rt-1 (NEXT-C-SID flavor) + # - rt-1 SRv6 End.DT46 behavior + setup_ipv6_vpn_1sid 2 1 "4" + + # create an IPv4 VPN between hosts hs-1 and hs-2 + # + # Direction hs-1 -> hs-2 + # - rt-1 encap (H.Encaps.Red) + # - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor) + # - rt-4 Plain IPv6 Forwarding to rt-2 + # - rt-2 SRv6 End.DT46 behavior + setup_ipv4_vpn_2sids 1 2 "3" + + # Direction hs-2 -> hs-1 + # - rt-2 encap (H.Encaps.Red) + # - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor) + # - rt-4 Plain IPv6 Forwarding to rt-1 + # - rt-1 SRv6 End.DT46 behavior + setup_ipv4_vpn_2sids 2 1 "3" + + # Setup the adjacencies in the SRv6 aware routers + # - rt-3 SRv6 End.X adjacency with rt-4 + # - rt-4 SRv6 End.X adjacency with rt-1 + set_end_x_nextcsid 3 4 + set_end_x_nextcsid 4 1 + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6)" + + check_and_log_hs_ipv6_connectivity 1 2 + check_and_log_hs_ipv6_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4)" + + check_and_log_hs_ipv4_connectivity 1 2 + check_and_log_hs_ipv4_connectivity 2 1 +} + +__nextcsid_end_x_behavior_test() +{ + local nsname="$1" + local cmd="$2" + local blen="$3" + local flen="$4" + local layout="" + + if [ "${blen}" != "d" ]; then + layout="${layout} lblen ${blen}" + fi + + if [ "${flen}" != "d" ]; then + layout="${layout} nflen ${flen}" + fi + + ip -netns "${nsname}" -6 route \ + "${cmd}" "${CSID_CNTR_PREFIX}" \ + table "${CSID_CNTR_RT_TABLE}" \ + encap seg6local action End.X nh6 :: \ + flavors next-csid ${layout} \ + dev "${DUMMY_DEVNAME}" &>/dev/null + + return "$?" +} + +rt_x_nextcsid_end_x_behavior_test() +{ + local rt="$1" + local blen="$2" + local flen="$3" + local nsname + local ret + + nsname="$(get_rtname "${rt}")" + + __nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}" + ret="$?" + __nextcsid_end_x_behavior_test "${nsname}" "del" "${blen}" "${flen}" + + return "${ret}" +} + +__parse_csid_container_cfg() +{ + local cfg="$1" + local index="$2" + local out + + echo "${cfg}" | cut -d',' -f"${index}" +} + +csid_container_cfg_tests() +{ + local valid + local blen + local flen + local cfg + local ret + + log_section "C-SID Container config tests (legend: d='kernel default')" + + for cfg in "${CSID_CONTAINER_CFGS[@]}"; do + blen="$(__parse_csid_container_cfg "${cfg}" 1)" + flen="$(__parse_csid_container_cfg "${cfg}" 2)" + valid="$(__parse_csid_container_cfg "${cfg}" 3)" + + rt_x_nextcsid_end_x_behavior_test \ + "${CSID_CNTR_RT_ID_TEST}" \ + "${blen}" \ + "${flen}" + ret="$?" + + if [ "${valid}" == "y" ]; then + log_test "${ret}" 0 \ + "Accept valid C-SID container cfg (lblen=${blen}, nflen=${flen})" + else + log_test "${ret}" 2 \ + "Reject invalid C-SID container cfg (lblen=${blen}, nflen=${flen})" + fi + done +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "next-csid"; then + echo "SKIP: Missing SRv6 NEXT-C-SID flavor support in iproute2" + exit "${ksft_skip}" + fi +} + +test_dummy_dev_or_ksft_skip() +{ + local test_netns + + test_netns="dummy-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns for testing dummy dev support" + exit "${ksft_skip}" + fi + + modprobe dummy &>/dev/null || true + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: dummy dev not supported" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +test_vrf_or_ksft_skip() +{ + modprobe vrf &>/dev/null || true + if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep +test_command_or_ksft_skip cut + +test_iproute2_supp_or_ksft_skip +test_dummy_dev_or_ksft_skip +test_vrf_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +csid_container_cfg_tests + +router_tests +host2gateway_tests +host_vpn_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh new file mode 100755 index 000000000000..28a775654b92 --- /dev/null +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -0,0 +1,879 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# +# This script is designed for testing the SRv6 H.Encaps.Red behavior. +# +# Below is depicted the IPv6 network of an operator which offers advanced +# IPv4/IPv6 VPN services to hosts, enabling them to communicate with each +# other. +# In this example, hosts hs-1 and hs-2 are connected through an IPv4/IPv6 VPN +# service, while hs-3 and hs-4 are connected using an IPv6 only VPN. +# +# Routers rt-1,rt-2,rt-3 and rt-4 implement IPv4/IPv6 L3 VPN services +# leveraging the SRv6 architecture. The key components for such VPNs are: +# +# i) The SRv6 H.Encaps.Red behavior applies SRv6 Policies on traffic received +# by connected hosts, initiating the VPN tunnel. Such a behavior is an +# optimization of the SRv6 H.Encap aiming to reduce the length of the SID +# List carried in the pushed SRH. Specifically, the H.Encaps.Red removes +# the first SID contained in the SID List (i.e. SRv6 Policy) by storing it +# into the IPv6 Destination Address. When a SRv6 Policy is made of only one +# SID, the SRv6 H.Encaps.Red behavior omits the SRH at all and pushes that +# SID directly into the IPv6 DA; +# +# ii) The SRv6 End behavior advances the active SID in the SID List carried by +# the SRH; +# +# iii) The SRv6 End.DT46 behavior is used for removing the SRv6 Policy and, +# thus, it terminates the VPN tunnel. Such a behavior is capable of +# handling, at the same time, both tunneled IPv4 and IPv6 traffic. +# +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +--- +---+ +# | | | | +# | hs-4 | | hs-3 | +# | | | | +# +--------+ +--------+ +# cafe::4 cafe::3 +# 10.0.0.4 10.0.0.3 +# +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y +# in the IPv6 operator network. +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +----------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::d46 is associated with the SRv6 End.DT46 behavior | +# +----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN +# services. Reachability of SIDs is ensured by proper configuration of the IPv6 +# operator's network and SRv6 routers. +# +# # SRv6 Policies +# =============== +# +# An SRv6 ingress router applies SRv6 policies to the traffic received from a +# connected host. SRv6 policy enforcement consists of encapsulating the +# received traffic into a new IPv6 packet with a given SID List contained in +# the SRH. +# +# IPv4/IPv6 VPN between hs-1 and hs-2 +# ----------------------------------- +# +# Hosts hs-1 and hs-2 are connected using dedicated IPv4/IPv6 VPNs. +# Specifically, packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policies: +# +# i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::e,fcff:2::d46 +# ii.a) IPv4 traffic, SID List=fcff:2::d46 +# +# Policy (i.a) steers tunneled IPv6 traffic through SRv6 routers +# rt-3,rt-4,rt-2. Instead, Policy (ii.a) steers tunneled IPv4 traffic through +# rt-2. +# The H.Encaps.Red reduces the SID List (i.a) carried in SRH by removing the +# first SID (fcff:3::e) and pushing it into the IPv6 DA. In case of IPv4 +# traffic, the H.Encaps.Red omits the presence of SRH at all, since the SID +# List (ii.a) consists of only one SID that can be stored directly in the IPv6 +# DA. +# +# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following +# policies: +# +# i.b) IPv6 traffic, SID List=fcff:1::d46 +# ii.b) IPv4 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d46 +# +# Policy (i.b) steers tunneled IPv6 traffic through the SRv6 router rt-1. +# Conversely, Policy (ii.b) steers tunneled IPv4 traffic through SRv6 routers +# rt-4,rt-3,rt-1. +# The H.Encaps.Red omits the SRH at all in case of (i.b) by pushing the single +# SID (fcff::1::d46) inside the IPv6 DA. +# The H.Encaps.Red reduces the SID List (ii.b) in the SRH by removing the first +# SID (fcff:4::e) and pushing it into the IPv6 DA. +# +# In summary: +# hs-1->hs-2 |IPv6 DA=fcff:3::e|SRH SIDs=fcff:4::e,fcff:2::d46|IPv6|...| (i.a) +# hs-1->hs-2 |IPv6 DA=fcff:2::d46|IPv4|...| (ii.a) +# +# hs-2->hs-1 |IPv6 DA=fcff:1::d46|IPv6|...| (i.b) +# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d46|IPv4|...| (ii.b) +# +# +# IPv6 VPN between hs-3 and hs-4 +# ------------------------------ +# +# Hosts hs-3 and hs-4 are connected using a dedicated IPv6 only VPN. +# Specifically, packets generated from hs-3 and directed towards hs-4 are +# handled by rt-3 which applies the following SRv6 Policy: +# +# i.c) IPv6 traffic, SID List=fcff:2::e,fcff:4::d46 +# +# Policy (i.c) steers tunneled IPv6 traffic through SRv6 routers rt-2,rt-4. +# The H.Encaps.Red reduces the SID List (i.c) carried in SRH by pushing the +# first SID (fcff:2::e) in the IPv6 DA. +# +# On the reverse path (i.e. from hs-4 to hs-3) the router rt-4 applies the +# following SRv6 Policy: +# +# i.d) IPv6 traffic, SID List=fcff:1::e,fcff:3::d46. +# +# Policy (i.d) steers tunneled IPv6 traffic through SRv6 routers rt-1,rt-3. +# The H.Encaps.Red reduces the SID List (i.d) carried in SRH by pushing the +# first SID (fcff:1::e) in the IPv6 DA. +# +# In summary: +# hs-3->hs-4 |IPv6 DA=fcff:2::e|SRH SIDs=fcff:4::d46|IPv6|...| (i.c) +# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d) +# + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly VRF_TID=100 +readonly VRF_DEVNAME="vrf-${VRF_TID}" +readonly RT2HS_DEVNAME="veth-t${VRF_TID}" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly END_FUNC=000e +readonly DT46_FUNC=0d46 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that "dev" is dummy and the VRF is chosen + # for the sake of simplicity). + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${VRF_DEVNAME}" + + # Local End.DT46 behavior + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${DT46_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DT46 vrftable "${VRF_TID}" \ + dev "${VRF_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behavior instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule \ + add to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 + + # set default routes to unreachable for both ipv4 and ipv6 + ip -netns "${nsname}" -6 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + + ip -netns "${nsname}" -4 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" +} + +# build and install the SRv6 policy into the ingress SRv6 router. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - SRv6 router configured for removing the SRv6 Policy (router connected +# to the destination host) +# $5 - encap mode (full or red) +# $6 - traffic type (IPv6 or IPv4) +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local end_rts="$3" + local dec_rt="$4" + local mode="$5" + local traffic="$6" + local nsname + local policy='' + local n + + nsname="$(get_rtname "${encap_rt}")" + + for n in ${end_rts}; do + policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," + done + + policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" + + # add SRv6 policy to incoming traffic sent by connected hosts + if [ "${traffic}" -eq 6 ]; then + ip -netns "${nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + + ip -netns "${nsname}" -6 neigh \ + add proxy "${IPv6_HS_NETWORK}::${dst}" \ + dev "${RT2HS_DEVNAME}" + else + # "dev" must be different from the one where the packet is + # received, otherwise the proxy arp does not work. + ip -netns "${nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + fi +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 +} + +#see __setup_rt_policy +setup_rt_policy_ipv4() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add veth0 type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr \ + add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0 + + ip -netns "${hsname}" link set veth0 up + ip -netns "${hsname}" link set lo up + + # configure the VRF on the router which is directly connected to the + # source host. + ip -netns "${rtname}" link \ + add "${VRF_DEVNAME}" type vrf table "${VRF_TID}" + ip -netns "${rtname}" link set "${VRF_DEVNAME}" up + + # enslave the veth interface connecting the router with the host to the + # VRF in the access router + ip -netns "${rtname}" link \ + set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}" + + ip -netns "${rtname}" addr \ + add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + ip netns exec "${rtname}" \ + sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1 + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 + + ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2 3 4"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + setup_hs 3 3 + setup_hs 4 4 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # set up SRv6 policies + + # create an IPv6 VPN between hosts hs-1 and hs-2. + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.Encaps.Red) + # - rt-3,rt-4 (SRv6 End behaviors) + # - rt-2 (SRv6 End.DT46 behavior) + # + # Direction hs-2 -> hs-1 (H.Encaps.Red) + # - rt-1 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red + setup_rt_policy_ipv6 1 2 "" 1 encap.red + + # create an IPv4 VPN between hosts hs-1 and hs-2 + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.Encaps.Red) + # - rt-2 (SRv6 End.DT46 behavior) + # + # Direction hs-2 -> hs-1 (H.Encaps.Red) + # - rt-4,rt-3 (SRv6 End behaviors) + # - rt-1 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv4 2 1 "" 2 encap.red + setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red + + # create an IPv6 VPN between hosts hs-3 and hs-4 + # the network path between hs-3 and hs-4 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-3 -> hs-4 (H.Encaps.Red) + # - rt-2 (SRv6 End Behavior) + # - rt-4 (SRv6 End.DT46 behavior) + # + # Direction hs-4 -> hs-3 (H.Encaps.Red) + # - rt-1 (SRv6 End behavior) + # - rt-3 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv6 4 3 "2" 4 encap.red + setup_rt_policy_ipv6 3 4 "1" 3 encap.red + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +check_and_log_hs_ipv6_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + # in this case, the connectivity test must fail + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 1 "IPv6 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}" +} + +check_and_log_hs_ipv4_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + # in this case, the connectivity test must fail + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 1 "IPv4 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}" +} + +check_and_log_hs_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv6_isolation "${hssrc}" "${hsdst}" + check_and_log_hs_ipv4_isolation "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4/IPv6)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h3 <-> h4, IPv6 only)" + + check_and_log_hs_ipv6_connectivity 3 4 + check_and_log_hs_ipv6_connectivity 4 3 +} + +host_vpn_isolation_tests() +{ + local l1="1 2" + local l2="3 4" + local tmp + local i + local j + local k + + log_section "SRv6 VPN isolation test among hosts" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation "${i}" "${j}" + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + done + + log_section "SRv6 VPN isolation test among hosts (h2 <-> h4, IPv4 only)" + + check_and_log_hs_ipv4_isolation 2 4 + check_and_log_hs_ipv4_isolation 4 2 +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "encap.red"; then + echo "SKIP: Missing SRv6 encap.red support in iproute2" + exit "${ksft_skip}" + fi +} + +test_vrf_or_ksft_skip() +{ + modprobe vrf &>/dev/null || true + if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep + +test_iproute2_supp_or_ksft_skip +test_vrf_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh new file mode 100755 index 000000000000..cb4177d41b21 --- /dev/null +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -0,0 +1,821 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# +# This script is designed for testing the SRv6 H.L2Encaps.Red behavior. +# +# Below is depicted the IPv6 network of an operator which offers L2 VPN +# services to hosts, enabling them to communicate with each other. +# In this example, hosts hs-1 and hs-2 are connected through an L2 VPN service. +# Currently, the SRv6 subsystem in Linux allows hosts hs-1 and hs-2 to exchange +# full L2 frames as long as they carry IPv4/IPv6. +# +# Routers rt-1,rt-2,rt-3 and rt-4 implement L2 VPN services +# leveraging the SRv6 architecture. The key components for such VPNs are: +# +# i) The SRv6 H.L2Encaps.Red behavior applies SRv6 Policies on traffic +# received by connected hosts, initiating the VPN tunnel. Such a behavior +# is an optimization of the SRv6 H.L2Encap aiming to reduce the +# length of the SID List carried in the pushed SRH. Specifically, the +# H.L2Encaps.Red removes the first SID contained in the SID List (i.e. SRv6 +# Policy) by storing it into the IPv6 Destination Address. When a SRv6 +# Policy is made of only one SID, the SRv6 H.L2Encaps.Red behavior omits +# the SRH at all and pushes that SID directly into the IPv6 DA; +# +# ii) The SRv6 End behavior advances the active SID in the SID List +# carried by the SRH; +# +# iii) The SRv6 End.DX2 behavior is used for removing the SRv6 Policy +# and, thus, it terminates the VPN tunnel. The decapsulated L2 frame is +# sent over the interface connected with the destination host. +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y +# in the IPv6 operator network. +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +----------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::d2 is associated with the SRv6 End.DX2 behavior | +# +----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN +# services. Reachability of SIDs is ensured by proper configuration of the IPv6 +# operator's network and SRv6 routers. +# +# SRv6 Policies +# ============= +# +# An SRv6 ingress router applies SRv6 policies to the traffic received from a +# connected host. SRv6 policy enforcement consists of encapsulating the +# received traffic into a new IPv6 packet with a given SID List contained in +# the SRH. +# +# L2 VPN between hs-1 and hs-2 +# ---------------------------- +# +# Hosts hs-1 and hs-2 are connected using a dedicated L2 VPN. +# Specifically, packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policies: +# +# i.a) L2 traffic, SID List=fcff:2::d2 +# +# Policy (i.a) steers tunneled L2 traffic through SRv6 router rt-2. +# The H.L2Encaps.Red omits the presence of SRH at all, since the SID List +# consists of only one SID (fcff:2::d2) that can be stored directly in the IPv6 +# DA. +# +# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following +# policies: +# +# i.b) L2 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d2 +# +# Policy (i.b) steers tunneled L2 traffic through the SRv6 routers +# rt-4,rt-3,rt2. The H.L2Encaps.Red reduces the SID List in the SRH by removing +# the first SID (fcff:4::e) and pushing it into the IPv6 DA. +# +# In summary: +# hs-1->hs-2 |IPv6 DA=fcff:2::d2|eth|...| (i.a) +# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b) +# + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly DUMMY_DEVNAME="dum0" +readonly RT2HS_DEVNAME="veth-hs" +readonly HS_VETH_NAME="veth0" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly MAC_PREFIX=00:00:00:c0:01 +readonly END_FUNC=000e +readonly DX2_FUNC=00d2 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy + + ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that dev "${DUMMY_DEVNAME}" is a dummy + # interface) + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${DUMMY_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behaviors instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule add \ + to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 +} + +# build and install the SRv6 policy into the ingress SRv6 router. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - SRv6 router configured for removing the SRv6 Policy (router connected +# to the destination host) +# $5 - encap mode (full or red) +# $6 - traffic type (IPv6 or IPv4) +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local end_rts="$3" + local dec_rt="$4" + local mode="$5" + local traffic="$6" + local nsname + local policy='' + local n + + nsname="$(get_rtname "${encap_rt}")" + + for n in ${end_rts}; do + policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," + done + + policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DX2_FUNC}" + + # add SRv6 policy to incoming traffic sent by connected hosts + if [ "${traffic}" -eq 6 ]; then + ip -netns "${nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev dum0 + else + ip -netns "${nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev dum0 + fi +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 +} + +#see __setup_rt_policy +setup_rt_policy_ipv4() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 +} + +setup_decap() +{ + local rt="$1" + local nsname + + nsname="$(get_rtname "${rt}")" + + # Local End.DX2 behavior + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${DX2_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DX2 oif "${RT2HS_DEVNAME}" \ + dev "${RT2HS_DEVNAME}" +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add "${HS_VETH_NAME}" type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr add "${IPv6_HS_NETWORK}::${hs}/64" \ + dev "${HS_VETH_NAME}" nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" \ + dev "${HS_VETH_NAME}" + + ip -netns "${hsname}" link set "${HS_VETH_NAME}" up + ip -netns "${hsname}" link set lo up + + ip -netns "${rtname}" addr add "${IPv6_HS_NETWORK}::254/64" \ + dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 +} + +# set an auto-generated mac address +# args: +# $1 - name of the node (e.g.: hs-1, rt-3, etc) +# $2 - id of the node (e.g.: 1 for hs-1, 3 for rt-3, etc) +# $3 - host part of the IPv6 network address +# $4 - name of the network interface to which the generated mac address must +# be set. +set_mac_address() +{ + local nodename="$1" + local nodeid="$2" + local host="$3" + local ifname="$4" + local nsname + + nsname=$(get_nodename "${nodename}") + + ip -netns "${nsname}" link set dev "${ifname}" down + + ip -netns "${nsname}" link set address "${MAC_PREFIX}:${nodeid}" \ + dev "${ifname}" + + # the IPv6 address must be set once again after the MAC address has + # been changed. + ip -netns "${nsname}" addr add "${IPv6_HS_NETWORK}::${host}/64" \ + dev "${ifname}" nodad + + ip -netns "${nsname}" link set dev "${ifname}" up +} + +set_host_l2peer() +{ + local hssrc="$1" + local hsdst="$2" + local ipprefix="$3" + local proto="$4" + local hssrc_name + local ipaddr + + hssrc_name="$(get_hsname "${hssrc}")" + + if [ "${proto}" -eq 6 ]; then + ipaddr="${ipprefix}::${hsdst}" + else + ipaddr="${ipprefix}.${hsdst}" + fi + + ip -netns "${hssrc_name}" route add "${ipaddr}" dev "${HS_VETH_NAME}" + + ip -netns "${hssrc_name}" neigh \ + add "${ipaddr}" lladdr "${MAC_PREFIX}:${hsdst}" \ + dev "${HS_VETH_NAME}" +} + +# setup an SRv6 L2 VPN between host hs-x and hs-y (currently, the SRv6 +# subsystem only supports L2 frames whose layer-3 is IPv4/IPv6). +# args: +# $1 - source host +# $2 - SRv6 routers configured for steering tunneled traffic +# $3 - destination host +setup_l2vpn() +{ + local hssrc="$1" + local end_rts="$2" + local hsdst="$3" + local rtsrc="${hssrc}" + local rtdst="${hsdst}" + + # set fixed mac for source node and the neigh MAC address + set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" + set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6 + set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4 + + # we have to set the mac address of the veth-host (on ingress router) + # to the mac address of the remote peer (L2 VPN destination host). + # Otherwise, traffic coming from the source host is dropped at the + # ingress router. + set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" + + # set the SRv6 Policies at the ingress router + setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ + l2encap.red 6 + setup_rt_policy_ipv4 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ + l2encap.red 4 + + # set the decap behavior + setup_decap "${rtsrc}" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DX2) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # create a L2 VPN between hs-1 and hs-2. + # NB: currently, H.L2Encap* enables tunneling of L2 frames whose + # layer-3 is IPv4/IPv6. + # + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.L2Encaps.Red) + # - rt-2 (SRv6 End.DX2 behavior) + # + # Direction hs-2 -> hs-1 (H.L2Encaps.Red) + # - rt-4,rt-3 (SRv6 End behaviors) + # - rt-1 (SRv6 End.DX2 behavior) + setup_l2vpn 1 "" 2 + setup_l2vpn 2 "4 3" 1 + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 L2 VPN connectivity test hosts (h1 <-> h2)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 +} + +test_dummy_dev_or_ksft_skip() +{ + local test_netns + + test_netns="dummy-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns for testing dummy dev support" + exit "${ksft_skip}" + fi + + modprobe dummy &>/dev/null || true + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: dummy dev not supported" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "l2encap.red"; then + echo "SKIP: Missing SRv6 l2encap.red support in iproute2" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep + +test_iproute2_supp_or_ksft_skip +test_dummy_dev_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_vpn_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/stress_reuseport_listen.c b/tools/testing/selftests/net/stress_reuseport_listen.c new file mode 100644 index 000000000000..ef800bb35a8e --- /dev/null +++ b/tools/testing/selftests/net/stress_reuseport_listen.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +/* Test listening on the same port 443 with multiple VIPS. + * Each VIP:443 will have multiple sk listening on by using + * SO_REUSEPORT. + */ + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <error.h> +#include <errno.h> +#include <time.h> +#include <arpa/inet.h> + +#define IP6_LADDR_START "2401:dead::1" +#define IP6_LPORT 443 +#define NSEC_PER_SEC 1000000000L +#define NSEC_PER_USEC 1000L + +static unsigned int nr_socks_per_vip; +static unsigned int nr_vips; + +static int *bind_reuseport_sock6(void) +{ + int *lfds, *cur_fd, err, optvalue = 1; + struct sockaddr_in6 sa6 = {}; + unsigned int i, j; + + sa6.sin6_family = AF_INET6; + sa6.sin6_port = htons(IP6_LPORT); + err = inet_pton(AF_INET6, IP6_LADDR_START, &sa6.sin6_addr); + if (err != 1) + error(1, err, "inet_pton(%s)", IP6_LADDR_START); + + lfds = malloc(nr_vips * nr_socks_per_vip * sizeof(lfds[0])); + if (!lfds) + error(1, errno, "cannot alloc array of lfds"); + + cur_fd = lfds; + for (i = 0; i < nr_vips; i++) { + for (j = 0; j < nr_socks_per_vip; j++) { + *cur_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (*cur_fd == -1) + error(1, errno, + "lfds[%u,%u] = socket(AF_INET6)", i, j); + + err = setsockopt(*cur_fd, SOL_SOCKET, SO_REUSEPORT, + &optvalue, sizeof(optvalue)); + if (err) + error(1, errno, + "setsockopt(lfds[%u,%u], SO_REUSEPORT)", + i, j); + + err = bind(*cur_fd, (struct sockaddr *)&sa6, + sizeof(sa6)); + if (err) + error(1, errno, "bind(lfds[%u,%u])", i, j); + cur_fd++; + } + sa6.sin6_addr.s6_addr32[3]++; + } + + return lfds; +} + +int main(int argc, const char *argv[]) +{ + struct timespec start_ts, end_ts; + unsigned long start_ns, end_ns; + unsigned int nr_lsocks; + int *lfds, i, err; + + if (argc != 3 || atoi(argv[1]) <= 0 || atoi(argv[2]) <= 0) + error(1, 0, "Usage: %s <nr_vips> <nr_socks_per_vip>\n", + argv[0]); + + nr_vips = atoi(argv[1]); + nr_socks_per_vip = atoi(argv[2]); + nr_lsocks = nr_vips * nr_socks_per_vip; + lfds = bind_reuseport_sock6(); + + clock_gettime(CLOCK_MONOTONIC, &start_ts); + for (i = 0; i < nr_lsocks; i++) { + err = listen(lfds[i], 0); + if (err) + error(1, errno, "listen(lfds[%d])", i); + } + clock_gettime(CLOCK_MONOTONIC, &end_ts); + + start_ns = start_ts.tv_sec * NSEC_PER_SEC + start_ts.tv_nsec; + end_ns = end_ts.tv_sec * NSEC_PER_SEC + end_ts.tv_nsec; + + printf("listen %d socks took %lu.%lu\n", nr_lsocks, + (end_ns - start_ns) / NSEC_PER_SEC, + (end_ns - start_ns) / NSEC_PER_USEC); + + for (i = 0; i < nr_lsocks; i++) + close(lfds[i]); + + free(lfds); + return 0; +} diff --git a/tools/testing/selftests/net/stress_reuseport_listen.sh b/tools/testing/selftests/net/stress_reuseport_listen.sh new file mode 100755 index 000000000000..94d5d1a1c90f --- /dev/null +++ b/tools/testing/selftests/net/stress_reuseport_listen.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2022 Meta Platforms, Inc. and affiliates. + +source lib.sh +NR_FILES=24100 +SAVED_NR_FILES=$(ulimit -n) + +setup() { + setup_ns NS + ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1 + ulimit -n $NR_FILES +} + +cleanup() { + cleanup_ns $NS + ulimit -n $SAVED_NR_FILES +} + +trap cleanup EXIT +setup +# 300 different vips listen on port 443 +# Each vip:443 sockaddr has 80 LISTEN sock by using SO_REUSEPORT +# Total 24000 listening socks +ip netns exec $NS ./stress_reuseport_listen 300 80 diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c new file mode 100644 index 000000000000..247c3b3ac1c9 --- /dev/null +++ b/tools/testing/selftests/net/tap.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <linux/virtio_net.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include "../kselftest_harness.h" + +static const char param_dev_tap_name[] = "xmacvtap0"; +static const char param_dev_dummy_name[] = "xdummy0"; +static unsigned char param_hwaddr_src[] = { 0x00, 0xfe, 0x98, 0x14, 0x22, 0x42 }; +static unsigned char param_hwaddr_dest[] = { + 0x00, 0xfe, 0x98, 0x94, 0xd2, 0x43 +}; + +#define MAX_RTNL_PAYLOAD (2048) +#define PKT_DATA 0xCB +#define TEST_PACKET_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU) + +static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type, + unsigned short len) +{ + struct rtattr *rta = + (struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len)); + rta->rta_type = type; + rta->rta_len = RTA_LENGTH(len); + nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len); + return rta; +} + +static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type) +{ + return rtattr_add(nh, type, 0); +} + +static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + uint8_t *end = (uint8_t *)nh + nh->nlmsg_len; + + attr->rta_len = end - (uint8_t *)attr; +} + +static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type, + const char *s) +{ + struct rtattr *rta = rtattr_add(nh, type, strlen(s)); + + memcpy(RTA_DATA(rta), s, strlen(s)); + return rta; +} + +static struct rtattr *rtattr_add_strsz(struct nlmsghdr *nh, unsigned short type, + const char *s) +{ + struct rtattr *rta = rtattr_add(nh, type, strlen(s) + 1); + + strcpy(RTA_DATA(rta), s); + return rta; +} + +static struct rtattr *rtattr_add_any(struct nlmsghdr *nh, unsigned short type, + const void *arr, size_t len) +{ + struct rtattr *rta = rtattr_add(nh, type, len); + + memcpy(RTA_DATA(rta), arr, len); + return rta; +} + +static int dev_create(const char *dev, const char *link_type, + int (*fill_rtattr)(struct nlmsghdr *nh), + int (*fill_info_data)(struct nlmsghdr *nh)) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + unsigned char data[MAX_RTNL_PAYLOAD]; + } req; + struct rtattr *link_info, *info_data; + int ret, rtnl; + + rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); + if (rtnl < 0) { + fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno)); + return 1; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; + req.nh.nlmsg_type = RTM_NEWLINK; + + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_type = 1; + req.info.ifi_index = 0; + req.info.ifi_flags = IFF_BROADCAST | IFF_UP; + req.info.ifi_change = 0xffffffff; + + rtattr_add_str(&req.nh, IFLA_IFNAME, dev); + + if (fill_rtattr) { + ret = fill_rtattr(&req.nh); + if (ret) + return ret; + } + + link_info = rtattr_begin(&req.nh, IFLA_LINKINFO); + + rtattr_add_strsz(&req.nh, IFLA_INFO_KIND, link_type); + + if (fill_info_data) { + info_data = rtattr_begin(&req.nh, IFLA_INFO_DATA); + ret = fill_info_data(&req.nh); + if (ret) + return ret; + rtattr_end(&req.nh, info_data); + } + + rtattr_end(&req.nh, link_info); + + ret = send(rtnl, &req, req.nh.nlmsg_len, 0); + if (ret < 0) + fprintf(stderr, "%s: send %s\n", __func__, strerror(errno)); + ret = (unsigned int)ret != req.nh.nlmsg_len; + + close(rtnl); + return ret; +} + +static int dev_delete(const char *dev) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + unsigned char data[MAX_RTNL_PAYLOAD]; + } req; + int ret, rtnl; + + rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); + if (rtnl < 0) { + fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno)); + return 1; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_type = RTM_DELLINK; + + req.info.ifi_family = AF_UNSPEC; + + rtattr_add_str(&req.nh, IFLA_IFNAME, dev); + + ret = send(rtnl, &req, req.nh.nlmsg_len, 0); + if (ret < 0) + fprintf(stderr, "%s: send %s\n", __func__, strerror(errno)); + + ret = (unsigned int)ret != req.nh.nlmsg_len; + + close(rtnl); + return ret; +} + +static int macvtap_fill_rtattr(struct nlmsghdr *nh) +{ + int ifindex; + + ifindex = if_nametoindex(param_dev_dummy_name); + if (ifindex == 0) { + fprintf(stderr, "%s: ifindex %s\n", __func__, strerror(errno)); + return -errno; + } + + rtattr_add_any(nh, IFLA_LINK, &ifindex, sizeof(ifindex)); + rtattr_add_any(nh, IFLA_ADDRESS, param_hwaddr_src, ETH_ALEN); + + return 0; +} + +static int opentap(const char *devname) +{ + int ifindex; + char buf[256]; + int fd; + struct ifreq ifr; + + ifindex = if_nametoindex(devname); + if (ifindex == 0) { + fprintf(stderr, "%s: ifindex %s\n", __func__, strerror(errno)); + return -errno; + } + + sprintf(buf, "/dev/tap%d", ifindex); + fd = open(buf, O_RDWR | O_NONBLOCK); + if (fd < 0) { + fprintf(stderr, "%s: open %s\n", __func__, strerror(errno)); + return -errno; + } + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, devname); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | IFF_MULTI_QUEUE; + if (ioctl(fd, TUNSETIFF, &ifr, sizeof(ifr)) < 0) + return -errno; + return fd; +} + +size_t build_eth(uint8_t *buf, uint16_t proto) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + + eth->h_proto = htons(proto); + memcpy(eth->h_source, param_hwaddr_src, ETH_ALEN); + memcpy(eth->h_dest, param_hwaddr_dest, ETH_ALEN); + + return ETH_HLEN; +} + +static uint32_t add_csum(const uint8_t *buf, int len) +{ + uint32_t sum = 0; + uint16_t *sbuf = (uint16_t *)buf; + + while (len > 1) { + sum += *sbuf++; + len -= 2; + } + + if (len) + sum += *(uint8_t *)sbuf; + + return sum; +} + +static uint16_t finish_ip_csum(uint32_t sum) +{ + uint16_t lo = sum & 0xffff; + uint16_t hi = sum >> 16; + + return ~(lo + hi); + +} + +static uint16_t build_ip_csum(const uint8_t *buf, int len, + uint32_t sum) +{ + sum += add_csum(buf, len); + return finish_ip_csum(sum); +} + +static int build_ipv4_header(uint8_t *buf, int payload_len) +{ + struct iphdr *iph = (struct iphdr *)buf; + + iph->ihl = 5; + iph->version = 4; + iph->ttl = 8; + iph->tot_len = + htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len); + iph->id = htons(1337); + iph->protocol = IPPROTO_UDP; + iph->saddr = htonl((172 << 24) | (17 << 16) | 2); + iph->daddr = htonl((172 << 24) | (17 << 16) | 1); + iph->check = build_ip_csum(buf, iph->ihl << 2, 0); + + return iph->ihl << 2; +} + +static int build_udp_packet(uint8_t *buf, int payload_len, bool csum_off) +{ + const int ip4alen = sizeof(uint32_t); + struct udphdr *udph = (struct udphdr *)buf; + int len = sizeof(*udph) + payload_len; + uint32_t sum = 0; + + udph->source = htons(22); + udph->dest = htons(58822); + udph->len = htons(len); + + memset(buf + sizeof(struct udphdr), PKT_DATA, payload_len); + + sum = add_csum(buf - 2 * ip4alen, 2 * ip4alen); + sum += htons(IPPROTO_UDP) + udph->len; + + if (!csum_off) + sum += add_csum(buf, len); + + udph->check = finish_ip_csum(sum); + + return sizeof(*udph) + payload_len; +} + +size_t build_test_packet_valid_udp_gso(uint8_t *buf, size_t payload_len) +{ + uint8_t *cur = buf; + struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf; + + vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr); + vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + vh->csum_start = ETH_HLEN + sizeof(struct iphdr); + vh->csum_offset = __builtin_offsetof(struct udphdr, check); + vh->gso_type = VIRTIO_NET_HDR_GSO_UDP; + vh->gso_size = ETH_DATA_LEN - sizeof(struct iphdr); + cur += sizeof(*vh); + + cur += build_eth(cur, ETH_P_IP); + cur += build_ipv4_header(cur, payload_len); + cur += build_udp_packet(cur, payload_len, true); + + return cur - buf; +} + +size_t build_test_packet_valid_udp_csum(uint8_t *buf, size_t payload_len) +{ + uint8_t *cur = buf; + struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf; + + vh->flags = VIRTIO_NET_HDR_F_DATA_VALID; + vh->gso_type = VIRTIO_NET_HDR_GSO_NONE; + cur += sizeof(*vh); + + cur += build_eth(cur, ETH_P_IP); + cur += build_ipv4_header(cur, payload_len); + cur += build_udp_packet(cur, payload_len, false); + + return cur - buf; +} + +size_t build_test_packet_crash_tap_invalid_eth_proto(uint8_t *buf, + size_t payload_len) +{ + uint8_t *cur = buf; + struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf; + + vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr); + vh->flags = 0; + vh->gso_type = VIRTIO_NET_HDR_GSO_UDP; + vh->gso_size = ETH_DATA_LEN - sizeof(struct iphdr); + cur += sizeof(*vh); + + cur += build_eth(cur, 0); + cur += sizeof(struct iphdr) + sizeof(struct udphdr); + cur += build_ipv4_header(cur, payload_len); + cur += build_udp_packet(cur, payload_len, true); + cur += payload_len; + + return cur - buf; +} + +FIXTURE(tap) +{ + int fd; +}; + +FIXTURE_SETUP(tap) +{ + int ret; + + ret = dev_create(param_dev_dummy_name, "dummy", NULL, NULL); + EXPECT_EQ(ret, 0); + + ret = dev_create(param_dev_tap_name, "macvtap", macvtap_fill_rtattr, + NULL); + EXPECT_EQ(ret, 0); + + self->fd = opentap(param_dev_tap_name); + ASSERT_GE(self->fd, 0); +} + +FIXTURE_TEARDOWN(tap) +{ + int ret; + + if (self->fd != -1) + close(self->fd); + + ret = dev_delete(param_dev_tap_name); + EXPECT_EQ(ret, 0); + + ret = dev_delete(param_dev_dummy_name); + EXPECT_EQ(ret, 0); +} + +TEST_F(tap, test_packet_valid_udp_gso) +{ + uint8_t pkt[TEST_PACKET_SZ]; + size_t off; + int ret; + + memset(pkt, 0, sizeof(pkt)); + off = build_test_packet_valid_udp_gso(pkt, 1021); + ret = write(self->fd, pkt, off); + ASSERT_EQ(ret, off); +} + +TEST_F(tap, test_packet_valid_udp_csum) +{ + uint8_t pkt[TEST_PACKET_SZ]; + size_t off; + int ret; + + memset(pkt, 0, sizeof(pkt)); + off = build_test_packet_valid_udp_csum(pkt, 1024); + ret = write(self->fd, pkt, off); + ASSERT_EQ(ret, off); +} + +TEST_F(tap, test_packet_crash_tap_invalid_eth_proto) +{ + uint8_t pkt[TEST_PACKET_SZ]; + size_t off; + int ret; + + memset(pkt, 0, sizeof(pkt)); + off = build_test_packet_crash_tap_invalid_eth_proto(pkt, 1024); + ret = write(self->fd, pkt, off); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EINVAL); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/tcp_ao/.gitignore b/tools/testing/selftests/net/tcp_ao/.gitignore new file mode 100644 index 000000000000..e8bb81b715b7 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/.gitignore @@ -0,0 +1,2 @@ +*_ipv4 +*_ipv6 diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile new file mode 100644 index 000000000000..522d991e310e --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_BOTH_AF := bench-lookups +TEST_BOTH_AF += connect +TEST_BOTH_AF += connect-deny +TEST_BOTH_AF += icmps-accept icmps-discard +TEST_BOTH_AF += key-management +TEST_BOTH_AF += restore +TEST_BOTH_AF += rst +TEST_BOTH_AF += self-connect +TEST_BOTH_AF += seq-ext +TEST_BOTH_AF += setsockopt-closed +TEST_BOTH_AF += unsigned-md5 + +TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4) +TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6) + +TEST_GEN_PROGS := $(TEST_IPV4_PROGS) $(TEST_IPV6_PROGS) + +top_srcdir := ../../../../.. +include ../../lib.mk + +HOSTAR ?= ar + +LIBDIR := $(OUTPUT)/lib +LIB := $(LIBDIR)/libaotst.a +LDLIBS += $(LIB) -pthread +LIBDEPS := lib/aolib.h Makefile + +CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing +CFLAGS += $(KHDR_INCLUDES) +CFLAGS += -iquote ./lib/ -I ../../../../include/ + +# Library +LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c +LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o) +EXTRA_CLEAN += $(LIBOBJ) $(LIB) + +$(LIB): $(LIBOBJ) + $(HOSTAR) rcs $@ $^ + +$(LIBDIR)/%.o: ./lib/%.c $(LIBDEPS) + mkdir -p $(LIBDIR) + $(CC) $< $(CFLAGS) $(CPPFLAGS) -o $@ -c + +$(TEST_GEN_PROGS): $(LIB) + +$(OUTPUT)/%_ipv4: %.c + $(LINK.c) $^ $(LDLIBS) -o $@ + +$(OUTPUT)/%_ipv6: %.c + $(LINK.c) -DIPV6_TEST $^ $(LDLIBS) -o $@ + +$(OUTPUT)/icmps-accept_ipv4: CFLAGS+= -DTEST_ICMPS_ACCEPT +$(OUTPUT)/icmps-accept_ipv6: CFLAGS+= -DTEST_ICMPS_ACCEPT +$(OUTPUT)/bench-lookups_ipv4: LDLIBS+= -lm +$(OUTPUT)/bench-lookups_ipv6: LDLIBS+= -lm diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c new file mode 100644 index 000000000000..a1e6e007c291 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <arpa/inet.h> +#include <inttypes.h> +#include <math.h> +#include <stdlib.h> +#include <stdio.h> +#include <time.h> + +#include "../../../../include/linux/bits.h" +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +#define BENCH_NR_ITERS 100 /* number of times to run gathering statistics */ + +static void gen_test_ips(union tcp_addr *ips, size_t ips_nr, bool use_rand) +{ + union tcp_addr net = {}; + size_t i, j; + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + if (!use_rand) { + for (i = 0; i < ips_nr; i++) + ips[i] = gen_tcp_addr(net, 2 * i + 1); + return; + } + for (i = 0; i < ips_nr; i++) { + size_t r = (size_t)random() | 0x1; + + ips[i] = gen_tcp_addr(net, r); + + for (j = i - 1; j > 0 && i > 0; j--) { + if (!memcmp(&ips[i], &ips[j], sizeof(union tcp_addr))) { + i--; /* collision */ + break; + } + } + } +} + +static void test_add_routes(union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < ips_nr; i++) { + union tcp_addr *p = (union tcp_addr *)&ips[i]; + int err; + + err = ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p); + if (err && err != -EEXIST) + test_error("Failed to add route"); + } +} + +static void server_apply_keys(int lsk, union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < ips_nr; i++) { + union tcp_addr *p = (union tcp_addr *)&ips[i]; + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100)) + test_error("setsockopt(TCP_AO)"); + } +} + +static const size_t nr_keys[] = { 512, 1024, 2048, 4096, 8192 }; +static union tcp_addr *test_ips; + +struct bench_stats { + uint64_t min; + uint64_t max; + uint64_t nr; + double mean; + double s2; +}; + +static struct bench_tests { + struct bench_stats delete_last_key; + struct bench_stats add_key; + struct bench_stats delete_rand_key; + struct bench_stats connect_last_key; + struct bench_stats connect_rand_key; + struct bench_stats delete_async; +} bench_results[ARRAY_SIZE(nr_keys)]; + +#define NSEC_PER_SEC 1000000000ULL + +static void measure_call(struct bench_stats *st, + void (*f)(int, void *), int sk, void *arg) +{ + struct timespec start = {}, end = {}; + double delta; + uint64_t nsec; + + if (clock_gettime(CLOCK_MONOTONIC, &start)) + test_error("clock_gettime()"); + + f(sk, arg); + + if (clock_gettime(CLOCK_MONOTONIC, &end)) + test_error("clock_gettime()"); + + nsec = (end.tv_sec - start.tv_sec) * NSEC_PER_SEC; + if (end.tv_nsec >= start.tv_nsec) + nsec += end.tv_nsec - start.tv_nsec; + else + nsec -= start.tv_nsec - end.tv_nsec; + + if (st->nr == 0) { + st->min = st->max = nsec; + } else { + if (st->min > nsec) + st->min = nsec; + if (st->max < nsec) + st->max = nsec; + } + + /* Welford-Knuth algorithm */ + st->nr++; + delta = (double)nsec - st->mean; + st->mean += delta / st->nr; + st->s2 += delta * ((double)nsec - st->mean); +} + +static void delete_mkt(int sk, void *arg) +{ + struct tcp_ao_del *ao = arg; + + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, ao, sizeof(*ao))) + test_error("setsockopt(TCP_AO_DEL_KEY)"); +} + +static void add_back_mkt(int sk, void *arg) +{ + union tcp_addr *p = arg; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100)) + test_error("setsockopt(TCP_AO)"); +} + +static void bench_delete(int lsk, struct bench_stats *add, + struct bench_stats *del, + union tcp_addr *ips, size_t ips_nr, + bool rand_order, bool async) +{ + struct tcp_ao_del ao_del = {}; + union tcp_addr *p; + size_t i; + + ao_del.sndid = 100; + ao_del.rcvid = 100; + ao_del.del_async = !!async; + ao_del.prefix = DEFAULT_TEST_PREFIX; + + /* Remove the first added */ + p = (union tcp_addr *)&ips[0]; + tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0); + + for (i = 0; i < BENCH_NR_ITERS; i++) { + measure_call(del, delete_mkt, lsk, (void *)&ao_del); + + /* Restore it back */ + measure_call(add, add_back_mkt, lsk, (void *)p); + + /* + * Slowest for FILO-linked-list: + * on (i) iteration removing ips[i] element. When it gets + * added to the list back - it becomes first to fetch, so + * on (i + 1) iteration go to ips[i + 1] element. + */ + if (rand_order) + p = (union tcp_addr *)&ips[rand() % ips_nr]; + else + p = (union tcp_addr *)&ips[i % ips_nr]; + tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0); + } +} + +static void bench_connect_srv(int lsk, union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < BENCH_NR_ITERS; i++) { + int sk; + + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + close(sk); + } +} + +static void test_print_stats(const char *desc, size_t nr, struct bench_stats *bs) +{ + test_ok("%-20s\t%zu keys: min=%" PRIu64 "ms max=%" PRIu64 "ms mean=%gms stddev=%g", + desc, nr, bs->min / 1000000, bs->max / 1000000, + bs->mean / 1000000, sqrt((bs->mean / 1000000) / bs->nr)); +} + +static void *server_fn(void *arg) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(nr_keys); i++) { + struct bench_tests *bt = &bench_results[i]; + int lsk; + + test_ips = malloc(nr_keys[i] * sizeof(union tcp_addr)); + if (!test_ips) + test_error("malloc()"); + + lsk = test_listen_socket(this_ip_addr, test_server_port + i, 1); + + gen_test_ips(test_ips, nr_keys[i], false); + test_add_routes(test_ips, nr_keys[i]); + test_set_optmem(KERNEL_TCP_AO_KEY_SZ_ROUND_UP * nr_keys[i]); + server_apply_keys(lsk, test_ips, nr_keys[i]); + + synchronize_threads(); + bench_connect_srv(lsk, test_ips, nr_keys[i]); + bench_connect_srv(lsk, test_ips, nr_keys[i]); + + /* The worst case for FILO-list */ + bench_delete(lsk, &bt->add_key, &bt->delete_last_key, + test_ips, nr_keys[i], false, false); + test_print_stats("Add a new key", + nr_keys[i], &bt->add_key); + test_print_stats("Delete: worst case", + nr_keys[i], &bt->delete_last_key); + + bench_delete(lsk, &bt->add_key, &bt->delete_rand_key, + test_ips, nr_keys[i], true, false); + test_print_stats("Delete: random-search", + nr_keys[i], &bt->delete_rand_key); + + bench_delete(lsk, &bt->add_key, &bt->delete_async, + test_ips, nr_keys[i], false, true); + test_print_stats("Delete: async", nr_keys[i], &bt->delete_async); + + free(test_ips); + close(lsk); + } + + return NULL; +} + +static void connect_client(int sk, void *arg) +{ + size_t *p = arg; + + if (test_connect_socket(sk, this_ip_dest, test_server_port + *p) <= 0) + test_error("failed to connect()"); +} + +static void client_addr_setup(int sk, union tcp_addr taddr) +{ +#ifdef IPV6_TEST + struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_port = 0, + .sin6_addr = taddr.a6, + }; +#else + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = 0, + .sin_addr = taddr.a4, + }; +#endif + int ret; + + ret = ip_addr_add(veth_name, TEST_FAMILY, taddr, TEST_PREFIX); + if (ret && ret != -EEXIST) + test_error("Failed to add ip address"); + ret = ip_route_add(veth_name, TEST_FAMILY, taddr, this_ip_dest); + if (ret && ret != -EEXIST) + test_error("Failed to add route"); + + if (bind(sk, &addr, sizeof(addr))) + test_error("bind()"); +} + +static void bench_connect_client(size_t port_off, struct bench_tests *bt, + union tcp_addr *ips, size_t ips_nr, bool rand_order) +{ + struct bench_stats *con; + union tcp_addr *p; + size_t i; + + if (rand_order) + con = &bt->connect_rand_key; + else + con = &bt->connect_last_key; + + p = (union tcp_addr *)&ips[0]; + + for (i = 0; i < BENCH_NR_ITERS; i++) { + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + client_addr_setup(sk, *p); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + + measure_call(con, connect_client, sk, (void *)&port_off); + + close(sk); + + /* + * Slowest for FILO-linked-list: + * on (i) iteration removing ips[i] element. When it gets + * added to the list back - it becomes first to fetch, so + * on (i + 1) iteration go to ips[i + 1] element. + */ + if (rand_order) + p = (union tcp_addr *)&ips[rand() % ips_nr]; + else + p = (union tcp_addr *)&ips[i % ips_nr]; + } +} + +static void *client_fn(void *arg) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(nr_keys); i++) { + struct bench_tests *bt = &bench_results[i]; + + synchronize_threads(); + bench_connect_client(i, bt, test_ips, nr_keys[i], false); + test_print_stats("Connect: worst case", + nr_keys[i], &bt->connect_last_key); + + bench_connect_client(i, bt, test_ips, nr_keys[i], false); + test_print_stats("Connect: random-search", + nr_keys[i], &bt->connect_last_key); + } + synchronize_threads(); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(30, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config new file mode 100644 index 000000000000..d3277a9de987 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/config @@ -0,0 +1,10 @@ +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_RMD160=y +CONFIG_CRYPTO_SHA1=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_TCP_AO=y +CONFIG_TCP_MD5SIG=y +CONFIG_VETH=m diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c new file mode 100644 index 000000000000..185a2f6e5ff3 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +#define fault(type) (inj == FAULT_ ## type) + +static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, false, false, + prefix, 0, sndid, rcvid, maclen, + 0, strlen(key), key); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static void try_accept(const char *tst_name, unsigned int port, const char *pwd, + union tcp_addr addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + const char *cnt_name, test_cnt cnt_expected, + fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + int lsk, err, sk = 0; + time_t timeout; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (pwd && test_add_key_maclen(lsk, pwd, maclen, addr, prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (cnt_name) + before_cnt = netstat_get_one(cnt_name, NULL); + if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + err = test_wait_fd(lsk, timeout, 0); + if (err == -ETIMEDOUT) { + if (!fault(TIMEOUT)) + test_fail("timed out for accept()"); + } else if (err < 0) { + test_error("test_wait_fd()"); + } else { + if (fault(TIMEOUT)) + test_fail("ready to accept"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) { + test_error("accept()"); + } else { + if (fault(TIMEOUT)) + test_fail("%s: accepted", tst_name); + } + } + + if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + + close(lsk); + if (pwd) + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + + if (!cnt_name) + goto out; + + after_cnt = netstat_get_one(cnt_name, NULL); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + +out: + synchronize_threads(); /* close() */ + if (sk > 0) + close(sk); +} + +static void *server_fn(void *arg) +{ + union tcp_addr wrong_addr, network_addr; + unsigned int port = test_server_port; + + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + + try_accept("Non-AO server + AO client", port++, NULL, + this_ip_dest, -1, 100, 100, 0, + "TCPAOKeyNotFound", 0, FAULT_TIMEOUT); + + try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, + "TCPAORequired", TEST_CNT_AO_REQUIRED, FAULT_TIMEOUT); + + try_accept("Wrong password", port++, "something that is not DEFAULT_TEST_PASSWORD", + this_ip_dest, -1, 100, 100, 0, + "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT); + + try_accept("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 101, 0, + "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + + try_accept("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 101, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, FAULT_TIMEOUT); + + try_accept("Different maclen", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 8, + "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT); + + try_accept("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + wrong_addr, -1, 100, 100, 0, + "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + + try_accept("Client: Wrong addr", port++, NULL, + this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT); + + try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 200, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + try_accept("Server: prefix match", port++, DEFAULT_TEST_PASSWORD, + network_addr, 16, 100, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + try_accept("Client: prefix match", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + /* client exits */ + synchronize_threads(); + return NULL; +} + +static void try_connect(const char *tst_name, unsigned int port, + const char *pwd, union tcp_addr addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid, + test_cnt cnt_expected, fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + time_t timeout; + int sk, ret; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret < 0) { + if (fault(KEYREJECT) && ret == -EKEYREJECTED) { + test_ok("%s: connect() was prevented", tst_name); + } else if (ret == -ETIMEDOUT && fault(TIMEOUT)) { + test_ok("%s", tst_name); + } else if (ret == -ECONNREFUSED && + (fault(TIMEOUT) || fault(KEYREJECT))) { + test_ok("%s: refused to connect", tst_name); + } else { + test_error("%s: connect() returned %d", tst_name, ret); + } + goto out; + } + + if (fault(TIMEOUT) || fault(KEYREJECT)) + test_fail("%s: connected", tst_name); + else + test_ok("%s: connected", tst_name); + if (pwd && ret > 0) { + if (test_get_tcp_ao_counters(sk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + } +out: + synchronize_threads(); /* close() */ + + if (ret > 0) + close(sk); +} + +static void *client_fn(void *arg) +{ + union tcp_addr wrong_addr, network_addr; + unsigned int port = test_server_port; + + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + + try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("AO server + Non-AO client", port++, NULL, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Client: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + wrong_addr, -1, 100, 100, 0, FAULT_KEYREJECT); + + try_connect("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 200, TEST_CNT_GOOD, 0); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + try_connect("Server: prefix match", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, TEST_CNT_GOOD, 0); + + try_connect("Client: prefix match", port++, DEFAULT_TEST_PASSWORD, + network_addr, 16, 100, 100, TEST_CNT_GOOD, 0); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(21, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c new file mode 100644 index 000000000000..81653b47f303 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/connect.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +static void *server_fn(void *arg) +{ + int sk, lsk; + ssize_t bytes; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); + + bytes = test_server_run(sk, 0, 0); + + test_fail("server served: %zd", bytes); + return NULL; +} + +static void *client_fn(void *arg) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + uint64_t before_aogood, after_aogood; + const size_t nr_packets = 20; + struct netstat *ns_before, *ns_after; + struct tcp_ao_counters ao1, ao2; + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + synchronize_threads(); + + ns_before = netstat_read(); + before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + return NULL; + } + + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + netstat_print_diff(ns_before, ns_after); + netstat_free(ns_before); + netstat_free(ns_after); + + if (nr_packets > (after_aogood - before_aogood)) { + test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)", + nr_packets, after_aogood, before_aogood); + return NULL; + } + if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD)) + return NULL; + + test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64, + before_aogood, ao1.ao_info_pkt_good, + ao1.key_cnts[0].pkt_good, + after_aogood, ao2.ao_info_pkt_good, + ao2.key_cnts[0].pkt_good, + nr_packets); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(1, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/icmps-accept.c b/tools/testing/selftests/net/tcp_ao/icmps-accept.c new file mode 120000 index 000000000000..0a5bb85eb260 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/icmps-accept.c @@ -0,0 +1 @@ +icmps-discard.c
\ No newline at end of file diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c new file mode 100644 index 000000000000..d69bcba3c929 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -0,0 +1,449 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Selftest that verifies that incomping ICMPs are ignored, + * the TCP connection stays alive, no hard or soft errors get reported + * to the usespace and the counter for ignored ICMPs is updated. + * + * RFC5925, 7.8: + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4 + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol + * unreachable, port unreachable, and fragmentation needed -- ’hard + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1 + * (administratively prohibited) and Code 4 (port unreachable) intended + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN- + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs. + * + * Author: Dmitry Safonov <dima@arista.com> + */ +#include <inttypes.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/ipv6.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <sys/socket.h> +#include "aolib.h" +#include "../../../../include/linux/compiler.h" + +const size_t packets_nr = 20; +const size_t packet_size = 100; +const char *tcpao_icmps = "TCPAODroppedIcmps"; + +#ifdef IPV6_TEST +const char *dst_unreach = "Icmp6InDestUnreachs"; +const int sk_ip_level = SOL_IPV6; +const int sk_recverr = IPV6_RECVERR; +#else +const char *dst_unreach = "InDestUnreachs"; +const int sk_ip_level = SOL_IP; +const int sk_recverr = IP_RECVERR; +#endif + +/* Server is expected to fail with hard error if ::accept_icmp is set */ +#ifdef TEST_ICMPS_ACCEPT +# define test_icmps_fail test_ok +# define test_icmps_ok test_fail +#else +# define test_icmps_fail test_fail +# define test_icmps_ok test_ok +#endif + +static void serve_interfered(int sk) +{ + ssize_t test_quota = packet_size * packets_nr * 10; + uint64_t dest_unreach_a, dest_unreach_b; + uint64_t icmp_ignored_a, icmp_ignored_b; + struct tcp_ao_counters ao_cnt1, ao_cnt2; + bool counter_not_found; + struct netstat *ns_after, *ns_before; + ssize_t bytes; + + ns_before = netstat_read(); + dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL); + icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL); + if (test_get_tcp_ao_counters(sk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + bytes = test_server_run(sk, test_quota, 0); + ns_after = netstat_read(); + netstat_print_diff(ns_before, ns_after); + dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL); + icmp_ignored_b = netstat_get(ns_after, tcpao_icmps, + &counter_not_found); + if (test_get_tcp_ao_counters(sk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + + netstat_free(ns_before); + netstat_free(ns_after); + + if (dest_unreach_a >= dest_unreach_b) { + test_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, + dst_unreach, dest_unreach_a, dest_unreach_b); + return; + } + test_ok("%s delivered %" PRIu64, + dst_unreach, dest_unreach_b - dest_unreach_a); + if (bytes < 0) + test_icmps_fail("Server failed with %zd: %s", bytes, strerrordesc_np(-bytes)); + else + test_icmps_ok("Server survived %zd bytes of traffic", test_quota); + if (counter_not_found) { + test_fail("Not found %s counter", tcpao_icmps); + return; + } +#ifdef TEST_ICMPS_ACCEPT + test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD); +#else + test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); +#endif + if (icmp_ignored_a >= icmp_ignored_b) { + test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, + tcpao_icmps, icmp_ignored_a, icmp_ignored_b); + return; + } + test_icmps_ok("ICMPs ignored %" PRIu64, icmp_ignored_b - icmp_ignored_a); +} + +static void *server_fn(void *arg) +{ + int val, sk, lsk; + bool accept_icmps = false; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + +#ifdef TEST_ICMPS_ACCEPT + accept_icmps = true; +#endif + + if (test_set_ao_flags(lsk, false, accept_icmps)) + test_error("setsockopt(TCP_AO_INFO)"); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + /* Fail on hard ip errors, such as dest unreachable (RFC1122) */ + val = 1; + if (setsockopt(sk, sk_ip_level, sk_recverr, &val, sizeof(val))) + test_error("setsockopt()"); + + synchronize_threads(); + + serve_interfered(sk); + return NULL; +} + +static size_t packets_sent; +static size_t icmps_sent; + +static uint32_t checksum4_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + size_t i; + + for (i = 0; i < len / sizeof(uint16_t); i++) + sum += words[i]; + if (len & 1) + sum += ((char *)data)[len - 1]; + return sum; +} + +static uint16_t checksum4_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum4_nofold(data, len, sum); + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + return ~sum; +} + +static void set_ip4hdr(struct iphdr *iph, size_t packet_len, int proto, + struct sockaddr_in *src, struct sockaddr_in *dst) +{ + iph->version = 4; + iph->ihl = 5; + iph->tos = 0; + iph->tot_len = htons(packet_len); + iph->ttl = 2; + iph->protocol = proto; + iph->saddr = src->sin_addr.s_addr; + iph->daddr = dst->sin_addr.s_addr; + iph->check = checksum4_fold((void *)iph, iph->ihl << 1, 0); +} + +static void icmp_interfere4(uint8_t type, uint8_t code, uint32_t rcv_nxt, + struct sockaddr_in *src, struct sockaddr_in *dst) +{ + int sk = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + struct { + struct iphdr iph; + struct icmphdr icmph; + struct iphdr iphe; + struct { + uint16_t sport; + uint16_t dport; + uint32_t seq; + } tcph; + } packet = {}; + size_t packet_len; + ssize_t bytes; + + if (sk < 0) + test_error("socket(AF_INET, SOCK_RAW, IPPROTO_RAW)"); + + packet_len = sizeof(packet); + set_ip4hdr(&packet.iph, packet_len, IPPROTO_ICMP, src, dst); + + packet.icmph.type = type; + packet.icmph.code = code; + if (code == ICMP_FRAG_NEEDED) { + randomize_buffer(&packet.icmph.un.frag.mtu, + sizeof(packet.icmph.un.frag.mtu)); + } + + packet_len = sizeof(packet.iphe) + sizeof(packet.tcph); + set_ip4hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src); + + packet.tcph.sport = dst->sin_port; + packet.tcph.dport = src->sin_port; + packet.tcph.seq = htonl(rcv_nxt); + + packet_len = sizeof(packet) - sizeof(packet.iph); + packet.icmph.checksum = checksum4_fold((void *)&packet.icmph, + packet_len, 0); + + bytes = sendto(sk, &packet, sizeof(packet), 0, + (struct sockaddr *)dst, sizeof(*dst)); + if (bytes != sizeof(packet)) + test_error("send(): %zd", bytes); + icmps_sent++; + + close(sk); +} + +static void set_ip6hdr(struct ipv6hdr *iph, size_t packet_len, int proto, + struct sockaddr_in6 *src, struct sockaddr_in6 *dst) +{ + iph->version = 6; + iph->payload_len = htons(packet_len); + iph->nexthdr = proto; + iph->hop_limit = 2; + iph->saddr = src->sin6_addr; + iph->daddr = dst->sin6_addr; +} + +static inline uint16_t csum_fold(uint32_t csum) +{ + uint32_t sum = csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (uint16_t)~sum; +} + +static inline uint32_t csum_add(uint32_t csum, uint32_t addend) +{ + uint32_t res = csum; + + res += addend; + return res + (res < addend); +} + +noinline uint32_t checksum6_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + size_t i; + + for (i = 0; i < len / sizeof(uint16_t); i++) + sum = csum_add(sum, words[i]); + if (len & 1) + sum = csum_add(sum, ((char *)data)[len - 1]); + return sum; +} + +noinline uint16_t icmp6_checksum(struct sockaddr_in6 *src, + struct sockaddr_in6 *dst, + void *ptr, size_t len, uint8_t proto) +{ + struct { + struct in6_addr saddr; + struct in6_addr daddr; + uint32_t payload_len; + uint8_t zero[3]; + uint8_t nexthdr; + } pseudo_header = {}; + uint32_t sum; + + pseudo_header.saddr = src->sin6_addr; + pseudo_header.daddr = dst->sin6_addr; + pseudo_header.payload_len = htonl(len); + pseudo_header.nexthdr = proto; + + sum = checksum6_nofold(&pseudo_header, sizeof(pseudo_header), 0); + sum = checksum6_nofold(ptr, len, sum); + + return csum_fold(sum); +} + +static void icmp6_interfere(int type, int code, uint32_t rcv_nxt, + struct sockaddr_in6 *src, struct sockaddr_in6 *dst) +{ + int sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW); + struct sockaddr_in6 dst_raw = *dst; + struct { + struct ipv6hdr iph; + struct icmp6hdr icmph; + struct ipv6hdr iphe; + struct { + uint16_t sport; + uint16_t dport; + uint32_t seq; + } tcph; + } packet = {}; + size_t packet_len; + ssize_t bytes; + + + if (sk < 0) + test_error("socket(AF_INET6, SOCK_RAW, IPPROTO_RAW)"); + + packet_len = sizeof(packet) - sizeof(packet.iph); + set_ip6hdr(&packet.iph, packet_len, IPPROTO_ICMPV6, src, dst); + + packet.icmph.icmp6_type = type; + packet.icmph.icmp6_code = code; + + packet_len = sizeof(packet.iphe) + sizeof(packet.tcph); + set_ip6hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src); + + packet.tcph.sport = dst->sin6_port; + packet.tcph.dport = src->sin6_port; + packet.tcph.seq = htonl(rcv_nxt); + + packet_len = sizeof(packet) - sizeof(packet.iph); + + packet.icmph.icmp6_cksum = icmp6_checksum(src, dst, + (void *)&packet.icmph, packet_len, IPPROTO_ICMPV6); + + dst_raw.sin6_port = htons(IPPROTO_RAW); + bytes = sendto(sk, &packet, sizeof(packet), 0, + (struct sockaddr *)&dst_raw, sizeof(dst_raw)); + if (bytes != sizeof(packet)) + test_error("send(): %zd", bytes); + icmps_sent++; + + close(sk); +} + +static uint32_t get_rcv_nxt(int sk) +{ + int val = TCP_REPAIR_ON; + uint32_t ret; + socklen_t sz = sizeof(ret); + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); + val = TCP_RECV_QUEUE; + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + if (getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &ret, &sz)) + test_error("getsockopt(TCP_QUEUE_SEQ)"); + val = TCP_REPAIR_OFF_NO_WP; + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); + return ret; +} + +static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *dst) +{ + struct sockaddr_in *saddr4 = src; + struct sockaddr_in *daddr4 = dst; + struct sockaddr_in6 *saddr6 = src; + struct sockaddr_in6 *daddr6 = dst; + size_t i; + + if (saddr4->sin_family != daddr4->sin_family) + test_error("Different address families"); + + for (i = 0; i < nr; i++) { + if (saddr4->sin_family == AF_INET) { + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, + rcv_nxt, saddr4, daddr4); + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, + rcv_nxt, saddr4, daddr4); + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + rcv_nxt, saddr4, daddr4); + icmps_sent += 3; + } else if (saddr4->sin_family == AF_INET6) { + icmp6_interfere(ICMPV6_DEST_UNREACH, + ICMPV6_ADM_PROHIBITED, + rcv_nxt, saddr6, daddr6); + icmp6_interfere(ICMPV6_DEST_UNREACH, + ICMPV6_PORT_UNREACH, + rcv_nxt, saddr6, daddr6); + icmps_sent += 2; + } else { + test_error("Not ip address family"); + } + } +} + +static void send_interfered(int sk) +{ + const unsigned int timeout = TEST_TIMEOUT_SEC; + struct sockaddr_in6 src, dst; + socklen_t addr_sz; + + addr_sz = sizeof(src); + if (getsockname(sk, &src, &addr_sz)) + test_error("getsockname()"); + addr_sz = sizeof(dst); + if (getpeername(sk, &dst, &addr_sz)) + test_error("getpeername()"); + + while (1) { + uint32_t rcv_nxt; + + if (test_client_verify(sk, packet_size, packets_nr, timeout)) { + test_fail("client: connection is broken"); + return; + } + packets_sent += packets_nr; + rcv_nxt = get_rcv_nxt(sk); + icmp_interfere(packets_nr, rcv_nxt, (void *)&src, (void *)&dst); + } +} + +static void *client_fn(void *arg) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + synchronize_threads(); + + send_interfered(sk); + + /* Not expecting client to quit */ + test_fail("client disconnected"); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(3, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c new file mode 100644 index 000000000000..24e62120b792 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -0,0 +1,1186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +const size_t nr_packets = 20; +const size_t msg_len = 100; +const size_t quota = nr_packets * msg_len; +union tcp_addr wrong_addr; +#define SECOND_PASSWORD "at all times sincere friends of freedom have been rare" +#define fault(type) (inj == FAULT_ ## type) + +static const int test_vrf_ifindex = 200; +static const uint8_t test_vrf_tabid = 42; +static void setup_vrfs(void) +{ + int err; + + if (!kernel_config_has(KCONFIG_NET_VRF)) + return; + + err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1); + if (err) + test_error("Failed to add a VRF: %d", err); + + err = link_set_up("ksft-vrf"); + if (err) + test_error("Failed to bring up a VRF"); + + err = ip_route_add_vrf(veth_name, TEST_FAMILY, + this_ip_addr, this_ip_dest, test_vrf_tabid); + if (err) + test_error("Failed to add a route to VRF"); +} + + +static int prepare_sk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("test_add_key()"); + + if (addr && test_add_key(sk, SECOND_PASSWORD, *addr, + DEFAULT_TEST_PREFIX, sndid, rcvid)) + test_error("test_add_key()"); + + return sk; +} + +static int prepare_lsk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid) +{ + int sk = prepare_sk(addr, sndid, rcvid); + + if (listen(sk, 10)) + test_error("listen()"); + + return sk; +} + +static int test_del_key(int sk, uint8_t sndid, uint8_t rcvid, bool async, + int current_key, int rnext_key) +{ + struct tcp_ao_info_opt ao_info = {}; + struct tcp_ao_getsockopt key = {}; + struct tcp_ao_del del = {}; + sockaddr_af sockaddr; + int err; + + tcp_addr_to_sockaddr_in(&del.addr, &this_ip_dest, 0); + del.prefix = DEFAULT_TEST_PREFIX; + del.sndid = sndid; + del.rcvid = rcvid; + + if (current_key >= 0) { + del.set_current = 1; + del.current_key = (uint8_t)current_key; + } + if (rnext_key >= 0) { + del.set_rnext = 1; + del.rnext = (uint8_t)rnext_key; + } + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, &del, sizeof(del)); + if (err < 0) + return -errno; + + if (async) + return 0; + + tcp_addr_to_sockaddr_in(&sockaddr, &this_ip_dest, 0); + err = test_get_one_ao(sk, &key, &sockaddr, sizeof(sockaddr), + DEFAULT_TEST_PREFIX, sndid, rcvid); + if (!err) + return -EEXIST; + if (err != -E2BIG) + test_error("getsockopt()"); + if (current_key < 0 && rnext_key < 0) + return 0; + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (current_key >= 0 && ao_info.current_key != (uint8_t)current_key) + return -ENOTRECOVERABLE; + if (rnext_key >= 0 && ao_info.rnext != (uint8_t)rnext_key) + return -ENOTRECOVERABLE; + return 0; +} + +static void try_delete_key(char *tst_name, int sk, uint8_t sndid, uint8_t rcvid, + bool async, int current_key, int rnext_key, + fault_t inj) +{ + int err; + + err = test_del_key(sk, sndid, rcvid, async, current_key, rnext_key); + if ((err == -EBUSY && fault(BUSY)) || (err == -EINVAL && fault(CURRNEXT))) { + test_ok("%s: key deletion was prevented", tst_name); + return; + } + if (err && fault(FIXME)) { + test_xfail("%s: failed to delete the key %u:%u %d", + tst_name, sndid, rcvid, err); + return; + } + if (!err) { + if (fault(BUSY) || fault(CURRNEXT)) { + test_fail("%s: the key was deleted %u:%u %d", tst_name, + sndid, rcvid, err); + } else { + test_ok("%s: the key was deleted", tst_name); + } + return; + } + test_fail("%s: can't delete the key %u:%u %d", tst_name, sndid, rcvid, err); +} + +static int test_set_key(int sk, int current_keyid, int rnext_keyid) +{ + struct tcp_ao_info_opt ao_info = {}; + int err; + + if (current_keyid >= 0) { + ao_info.set_current = 1; + ao_info.current_key = (uint8_t)current_keyid; + } + if (rnext_keyid >= 0) { + ao_info.set_rnext = 1; + ao_info.rnext = (uint8_t)rnext_keyid; + } + + err = test_set_ao_info(sk, &ao_info); + if (err) + return err; + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (current_keyid >= 0 && ao_info.current_key != (uint8_t)current_keyid) + return -ENOTRECOVERABLE; + if (rnext_keyid >= 0 && ao_info.rnext != (uint8_t)rnext_keyid) + return -ENOTRECOVERABLE; + return 0; +} + +static int test_add_current_rnext_key(int sk, const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, + set_current, set_rnext, + prefix, 0, sndid, rcvid, 0, keyflags, + strlen(key), key); + if (err) + return err; + + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static int __try_add_current_rnext_key(int sk, const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_info_opt ao_info = {}; + int err; + + err = test_add_current_rnext_key(sk, key, keyflags, in_addr, prefix, + set_current, set_rnext, sndid, rcvid); + if (err) + return err; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (set_current && ao_info.current_key != sndid) + return -ENOTRECOVERABLE; + if (set_rnext && ao_info.rnext != rcvid) + return -ENOTRECOVERABLE; + return 0; +} + +static void try_add_current_rnext_key(char *tst_name, int sk, const char *key, + uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid, fault_t inj) +{ + int err; + + err = __try_add_current_rnext_key(sk, key, keyflags, in_addr, prefix, + set_current, set_rnext, sndid, rcvid); + if (!err && !fault(CURRNEXT)) { + test_ok("%s", tst_name); + return; + } + if (err == -EINVAL && fault(CURRNEXT)) { + test_ok("%s", tst_name); + return; + } + test_fail("%s", tst_name); +} + +static void check_closed_socket(void) +{ + int sk; + + sk = prepare_sk(&this_ip_dest, 200, 200); + try_delete_key("closed socket, delete a key", sk, 200, 200, 0, -1, -1, 0); + try_delete_key("closed socket, delete all keys", sk, 100, 100, 0, -1, -1, 0); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + try_delete_key("closed socket, delete current key", sk, 100, 100, 0, -1, -1, FAULT_BUSY); + try_delete_key("closed socket, delete rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_add_key(sk, "Glory to heros!", this_ip_dest, + DEFAULT_TEST_PREFIX, 10, 11)) + test_error("test_add_key()"); + if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest, + DEFAULT_TEST_PREFIX, 12, 13)) + test_error("test_add_key()"); + try_delete_key("closed socket, delete a key + set current/rnext", sk, 100, 100, 0, 10, 13, 0); + try_delete_key("closed socket, force-delete current key", sk, 10, 11, 0, 200, -1, 0); + try_delete_key("closed socket, force-delete rnext key", sk, 12, 13, 0, -1, 200, 0); + try_delete_key("closed socket, delete current+rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + try_add_current_rnext_key("closed socket, add + change current key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + true, false, 10, 20, 0); + try_add_current_rnext_key("closed socket, add + change rnext key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + false, true, 20, 10, 0); + close(sk); +} + +static void assert_no_current_rnext(const char *tst_msg, int sk) +{ + struct tcp_ao_info_opt ao_info = {}; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + + errno = 0; + if (ao_info.set_current || ao_info.set_rnext) { + test_xfail("%s: the socket has current/rnext keys: %d:%d", + tst_msg, + (ao_info.set_current) ? ao_info.current_key : -1, + (ao_info.set_rnext) ? ao_info.rnext : -1); + } else { + test_ok("%s: the socket has no current/rnext keys", tst_msg); + } +} + +static void assert_no_tcp_repair(void) +{ + struct tcp_ao_repair ao_img = {}; + socklen_t len = sizeof(ao_img); + int sk, err; + + sk = prepare_sk(&this_ip_dest, 200, 200); + test_enable_repair(sk); + if (listen(sk, 10)) + test_error("listen()"); + errno = 0; + err = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, &len); + if (err && errno == EPERM) + test_ok("listen socket, getsockopt(TCP_AO_REPAIR) is restricted"); + else + test_fail("listen socket, getsockopt(TCP_AO_REPAIR) works"); + errno = 0; + err = setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, sizeof(ao_img)); + if (err && errno == EPERM) + test_ok("listen socket, setsockopt(TCP_AO_REPAIR) is restricted"); + else + test_fail("listen socket, setsockopt(TCP_AO_REPAIR) works"); + close(sk); +} + +static void check_listen_socket(void) +{ + int sk, err; + + sk = prepare_lsk(&this_ip_dest, 200, 200); + try_delete_key("listen socket, delete a key", sk, 200, 200, 0, -1, -1, 0); + try_delete_key("listen socket, delete all keys", sk, 100, 100, 0, -1, -1, 0); + close(sk); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + err = test_set_key(sk, 100, -1); + if (err == -EINVAL) + test_ok("listen socket, setting current key not allowed"); + else + test_fail("listen socket, set current key"); + err = test_set_key(sk, -1, 200); + if (err == -EINVAL) + test_ok("listen socket, setting rnext key not allowed"); + else + test_fail("listen socket, set rnext key"); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + if (listen(sk, 10)) + test_error("listen()"); + assert_no_current_rnext("listen() after current/rnext keys set", sk); + try_delete_key("listen socket, delete current key from before listen()", sk, 100, 100, 0, -1, -1, FAULT_FIXME); + try_delete_key("listen socket, delete rnext key from before listen()", sk, 200, 200, 0, -1, -1, FAULT_FIXME); + close(sk); + + assert_no_tcp_repair(); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + if (test_add_key(sk, "Glory to heros!", this_ip_dest, + DEFAULT_TEST_PREFIX, 10, 11)) + test_error("test_add_key()"); + if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest, + DEFAULT_TEST_PREFIX, 12, 13)) + test_error("test_add_key()"); + try_delete_key("listen socket, delete a key + set current/rnext", sk, + 100, 100, 0, 10, 13, FAULT_CURRNEXT); + try_delete_key("listen socket, force-delete current key", sk, + 10, 11, 0, 200, -1, FAULT_CURRNEXT); + try_delete_key("listen socket, force-delete rnext key", sk, + 12, 13, 0, -1, 200, FAULT_CURRNEXT); + try_delete_key("listen socket, delete a key", sk, + 200, 200, 0, -1, -1, 0); + close(sk); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + try_add_current_rnext_key("listen socket, add + change current key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + true, false, 10, 20, FAULT_CURRNEXT); + try_add_current_rnext_key("listen socket, add + change rnext key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + false, true, 20, 10, FAULT_CURRNEXT); + close(sk); +} + +static const char *fips_fpath = "/proc/sys/crypto/fips_enabled"; +static bool is_fips_enabled(void) +{ + static int fips_checked = -1; + FILE *fenabled; + int enabled; + + if (fips_checked >= 0) + return !!fips_checked; + if (access(fips_fpath, R_OK)) { + if (errno != ENOENT) + test_error("Can't open %s", fips_fpath); + fips_checked = 0; + return false; + } + fenabled = fopen(fips_fpath, "r"); + if (!fenabled) + test_error("Can't open %s", fips_fpath); + if (fscanf(fenabled, "%d", &enabled) != 1) + test_error("Can't read from %s", fips_fpath); + fclose(fenabled); + fips_checked = !!enabled; + return !!fips_checked; +} + +struct test_key { + char password[TCP_AO_MAXKEYLEN]; + const char *alg; + unsigned int len; + uint8_t client_keyid; + uint8_t server_keyid; + uint8_t maclen; + uint8_t matches_client : 1, + matches_server : 1, + matches_vrf : 1, + is_current : 1, + is_rnext : 1, + used_on_server_tx : 1, + used_on_client_tx : 1, + skip_counters_checks : 1; +}; + +struct key_collection { + unsigned int nr_keys; + struct test_key *keys; +}; + +static struct key_collection collection; + +#define TEST_MAX_MACLEN 16 +const char *test_algos[] = { + "cmac(aes128)", + "hmac(sha1)", "hmac(sha512)", "hmac(sha384)", "hmac(sha256)", + "hmac(sha224)", "hmac(sha3-512)", + /* only if !CONFIG_FIPS */ +#define TEST_NON_FIPS_ALGOS 2 + "hmac(rmd160)", "hmac(md5)" +}; +const unsigned int test_maclens[] = { 1, 4, 12, 16 }; +#define MACLEN_SHIFT 2 +#define ALGOS_SHIFT 4 + +static unsigned int make_mask(unsigned int shift, unsigned int prev_shift) +{ + unsigned int ret = BIT(shift) - 1; + + return ret << prev_shift; +} + +static void init_key_in_collection(unsigned int index, bool randomized) +{ + struct test_key *key = &collection.keys[index]; + unsigned int algos_nr, algos_index; + + /* Same for randomized and non-randomized test flows */ + key->client_keyid = index; + key->server_keyid = 127 + index; + key->matches_client = 1; + key->matches_server = 1; + key->matches_vrf = 1; + /* not really even random, but good enough for a test */ + key->len = rand() % (TCP_AO_MAXKEYLEN - TEST_TCP_AO_MINKEYLEN); + key->len += TEST_TCP_AO_MINKEYLEN; + randomize_buffer(key->password, key->len); + + if (randomized) { + key->maclen = (rand() % TEST_MAX_MACLEN) + 1; + algos_index = rand(); + } else { + unsigned int shift = MACLEN_SHIFT; + + key->maclen = test_maclens[index & make_mask(shift, 0)]; + algos_index = index & make_mask(ALGOS_SHIFT, shift); + } + algos_nr = ARRAY_SIZE(test_algos); + if (is_fips_enabled()) + algos_nr -= TEST_NON_FIPS_ALGOS; + key->alg = test_algos[algos_index % algos_nr]; +} + +static int init_default_key_collection(unsigned int nr_keys, bool randomized) +{ + size_t key_sz = sizeof(collection.keys[0]); + + if (!nr_keys) { + free(collection.keys); + collection.keys = NULL; + return 0; + } + + /* + * All keys have uniq sndid/rcvid and sndid != rcvid in order to + * check for any bugs/issues for different keyids, visible to both + * peers. Keyid == 254 is unused. + */ + if (nr_keys > 127) + test_error("Test requires too many keys, correct the source"); + + collection.keys = reallocarray(collection.keys, nr_keys, key_sz); + if (!collection.keys) + return -ENOMEM; + + memset(collection.keys, 0, nr_keys * key_sz); + collection.nr_keys = nr_keys; + while (nr_keys--) + init_key_in_collection(nr_keys, randomized); + + return 0; +} + +static void test_key_error(const char *msg, struct test_key *key) +{ + test_error("%s: key: { %s, %u:%u, %u, %u:%u:%u:%u:%u (%u)}", + msg, key->alg, key->client_keyid, key->server_keyid, + key->maclen, key->matches_client, key->matches_server, + key->matches_vrf, key->is_current, key->is_rnext, key->len); +} + +static int test_add_key_cr(int sk, const char *pwd, unsigned int pwd_len, + union tcp_addr addr, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, + uint8_t maclen, const char *alg, + bool set_current, bool set_rnext) +{ + struct tcp_ao_add tmp = {}; + uint8_t keyflags = 0; + int err; + + if (!alg) + alg = DEFAULT_TEST_ALGO; + + if (vrf) + keyflags |= TCP_AO_KEYF_IFINDEX; + err = test_prepare_key(&tmp, alg, addr, set_current, set_rnext, + DEFAULT_TEST_PREFIX, vrf, sndid, rcvid, maclen, + keyflags, pwd_len, pwd); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static void verify_current_rnext(const char *tst, int sk, + int current_keyid, int rnext_keyid) +{ + struct tcp_ao_info_opt ao_info = {}; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + + errno = 0; + if (current_keyid >= 0) { + if (!ao_info.set_current) + test_fail("%s: the socket doesn't have current key", tst); + else if (ao_info.current_key != current_keyid) + test_fail("%s: current key is not the expected one %d != %u", + tst, current_keyid, ao_info.current_key); + else + test_ok("%s: current key %u as expected", + tst, ao_info.current_key); + } + if (rnext_keyid >= 0) { + if (!ao_info.set_rnext) + test_fail("%s: the socket doesn't have rnext key", tst); + else if (ao_info.rnext != rnext_keyid) + test_fail("%s: rnext key is not the expected one %d != %u", + tst, rnext_keyid, ao_info.rnext); + else + test_ok("%s: rnext key %u as expected", tst, ao_info.rnext); + } +} + + +static int key_collection_socket(bool server, unsigned int port) +{ + unsigned int i; + int sk; + + if (server) + sk = test_listen_socket(this_ip_addr, port, 1); + else + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + union tcp_addr *addr = &wrong_addr; + uint8_t sndid, rcvid, vrf; + bool set_current = false, set_rnext = false; + + if (key->matches_vrf) + vrf = 0; + else + vrf = test_vrf_ifindex; + if (server) { + if (key->matches_client) + addr = &this_ip_dest; + sndid = key->server_keyid; + rcvid = key->client_keyid; + } else { + if (key->matches_server) + addr = &this_ip_dest; + sndid = key->client_keyid; + rcvid = key->server_keyid; + key->used_on_client_tx = set_current = key->is_current; + key->used_on_server_tx = set_rnext = key->is_rnext; + } + + if (test_add_key_cr(sk, key->password, key->len, + *addr, vrf, sndid, rcvid, key->maclen, + key->alg, set_current, set_rnext)) + test_key_error("setsockopt(TCP_AO_ADD_KEY)", key); +#ifdef DEBUG + test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}", + server ? "server" : "client", i, collection.nr_keys, + key->alg, rcvid, sndid, key->maclen, + key->matches_client, key->matches_server, + key->is_current, key->is_rnext, key->len); +#endif + } + return sk; +} + +static void verify_counters(const char *tst_name, bool is_listen_sk, bool server, + struct tcp_ao_counters *a, struct tcp_ao_counters *b) +{ + unsigned int i; + + __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD); + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + uint8_t sndid, rcvid; + bool rx_cnt_expected; + + if (key->skip_counters_checks) + continue; + if (server) { + sndid = key->server_keyid; + rcvid = key->client_keyid; + rx_cnt_expected = key->used_on_client_tx; + } else { + sndid = key->client_keyid; + rcvid = key->server_keyid; + rx_cnt_expected = key->used_on_server_tx; + } + + test_tcp_ao_key_counters_cmp(tst_name, a, b, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, + sndid, rcvid); + } + test_tcp_ao_counters_free(a); + test_tcp_ao_counters_free(b); + test_ok("%s: passed counters checks", tst_name); +} + +static struct tcp_ao_getsockopt *lookup_key(struct tcp_ao_getsockopt *buf, + size_t len, int sndid, int rcvid) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (sndid >= 0 && buf[i].sndid != sndid) + continue; + if (rcvid >= 0 && buf[i].rcvid != rcvid) + continue; + return &buf[i]; + } + return NULL; +} + +static void verify_keys(const char *tst_name, int sk, + bool is_listen_sk, bool server) +{ + socklen_t len = sizeof(struct tcp_ao_getsockopt); + struct tcp_ao_getsockopt *keys; + bool passed_test = true; + unsigned int i; + + keys = calloc(collection.nr_keys, len); + if (!keys) + test_error("calloc()"); + + keys->nkeys = collection.nr_keys; + keys->get_all = 1; + + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, keys, &len)) { + free(keys); + test_error("getsockopt(TCP_AO_GET_KEYS)"); + } + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + struct tcp_ao_getsockopt *dump_key; + bool is_kdf_aes_128_cmac = false; + bool is_cmac_aes = false; + uint8_t sndid, rcvid; + bool matches = false; + + if (server) { + if (key->matches_client) + matches = true; + sndid = key->server_keyid; + rcvid = key->client_keyid; + } else { + if (key->matches_server) + matches = true; + sndid = key->client_keyid; + rcvid = key->server_keyid; + } + if (!key->matches_vrf) + matches = false; + /* no keys get removed on the original listener socket */ + if (is_listen_sk) + matches = true; + + dump_key = lookup_key(keys, keys->nkeys, sndid, rcvid); + if (matches != !!dump_key) { + test_fail("%s: key %u:%u %s%s on the socket", + tst_name, sndid, rcvid, + key->matches_vrf ? "" : "[vrf] ", + matches ? "disappeared" : "yet present"); + passed_test = false; + goto out; + } + if (!dump_key) + continue; + + if (!strcmp("cmac(aes128)", key->alg)) { + is_kdf_aes_128_cmac = (key->len != 16); + is_cmac_aes = true; + } + + if (is_cmac_aes) { + if (strcmp(dump_key->alg_name, "cmac(aes)")) { + test_fail("%s: key %u:%u cmac(aes) has unexpected alg %s", + tst_name, sndid, rcvid, + dump_key->alg_name); + passed_test = false; + continue; + } + } else if (strcmp(dump_key->alg_name, key->alg)) { + test_fail("%s: key %u:%u has unexpected alg %s != %s", + tst_name, sndid, rcvid, + dump_key->alg_name, key->alg); + passed_test = false; + continue; + } + if (is_kdf_aes_128_cmac) { + if (dump_key->keylen != 16) { + test_fail("%s: key %u:%u cmac(aes128) has unexpected len %u", + tst_name, sndid, rcvid, + dump_key->keylen); + continue; + } + } else if (dump_key->keylen != key->len) { + test_fail("%s: key %u:%u changed password len %u != %u", + tst_name, sndid, rcvid, + dump_key->keylen, key->len); + passed_test = false; + continue; + } + if (!is_kdf_aes_128_cmac && + memcmp(dump_key->key, key->password, key->len)) { + test_fail("%s: key %u:%u has different password", + tst_name, sndid, rcvid); + passed_test = false; + continue; + } + if (dump_key->maclen != key->maclen) { + test_fail("%s: key %u:%u changed maclen %u != %u", + tst_name, sndid, rcvid, + dump_key->maclen, key->maclen); + passed_test = false; + continue; + } + } + + if (passed_test) + test_ok("%s: The socket keys are consistent with the expectations", + tst_name); +out: + free(keys); +} + +static int start_server(const char *tst_name, unsigned int port, size_t quota, + struct tcp_ao_counters *begin, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters lsk_c1, lsk_c2; + ssize_t bytes; + int sk, lsk; + + synchronize_threads(); /* 1: key collection initialized */ + lsk = key_collection_socket(true, port); + if (test_get_tcp_ao_counters(lsk, &lsk_c1)) + test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* 2: MKTs added => connect() */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + if (test_get_tcp_ao_counters(sk, begin)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 3: accepted => send data */ + if (test_get_tcp_ao_counters(lsk, &lsk_c2)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, lsk, true, true); + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server alive", tst_name); + + verify_counters(tst_name, true, true, &lsk_c1, &lsk_c2); + + return sk; +} + +static void end_server(const char *tst_name, int sk, + struct tcp_ao_counters *begin) +{ + struct tcp_ao_counters end; + + if (test_get_tcp_ao_counters(sk, &end)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, sk, false, true); + + synchronize_threads(); /* 4: verified => closed */ + close(sk); + + verify_counters(tst_name, false, true, begin, &end); + synchronize_threads(); /* 5: counters */ +} + +static void try_server_run(const char *tst_name, unsigned int port, size_t quota, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_server(tst_name, port, quota, &tmp, + current_index, rnext_index); + end_server(tst_name, sk, &tmp); +} + +static void server_rotations(const char *tst_name, unsigned int port, + size_t quota, unsigned int rotations, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + unsigned int i; + int sk; + + sk = start_server(tst_name, port, quota, &tmp, + current_index, rnext_index); + + for (i = current_index + 1; rotations > 0; i++, rotations--) { + ssize_t bytes; + + if (i >= collection.nr_keys) + i = 0; + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + test_fail("%s: server served: %zd", tst_name, bytes); + return; + } + verify_current_rnext(tst_name, sk, + collection.keys[i].server_keyid, -1); + synchronize_threads(); /* verify current/rnext */ + } + end_server(tst_name, sk, &tmp); +} + +static int run_client(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index, + struct tcp_ao_counters *before, + const size_t msg_sz, const size_t msg_nr) +{ + int sk; + + synchronize_threads(); /* 1: key collection initialized */ + sk = key_collection_socket(false, port); + + if (current_index >= 0 || rnext_index >= 0) { + int sndid = -1, rcvid = -1; + + if (current_index >= 0) + sndid = collection.keys[current_index].client_keyid; + if (rnext_index >= 0) + rcvid = collection.keys[rnext_index].server_keyid; + if (test_set_key(sk, sndid, rcvid)) + test_error("failed to set current/rnext keys"); + } + if (before && test_get_tcp_ao_counters(sk, before)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 2: MKTs added => connect() */ + if (test_connect_socket(sk, this_ip_dest, port++) <= 0) + test_error("failed to connect()"); + if (current_index < 0) + current_index = nr_keys - 1; + if (rnext_index < 0) + rnext_index = nr_keys - 1; + collection.keys[current_index].used_on_client_tx = 1; + collection.keys[rnext_index].used_on_server_tx = 1; + + synchronize_threads(); /* 3: accepted => send data */ + if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + close(sk); + if (before) + test_tcp_ao_counters_free(before); + return -1; + } + + return sk; +} + +static int start_client(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index, + struct tcp_ao_counters *before, + const size_t msg_sz, const size_t msg_nr) +{ + if (init_default_key_collection(nr_keys, true)) + test_error("Failed to init the key collection"); + + return run_client(tst_name, port, nr_keys, current_index, + rnext_index, before, msg_sz, msg_nr); +} + +static void end_client(const char *tst_name, int sk, unsigned int nr_keys, + int current_index, int rnext_index, + struct tcp_ao_counters *start) +{ + struct tcp_ao_counters end; + + /* Some application may become dependent on this kernel choice */ + if (current_index < 0) + current_index = nr_keys - 1; + if (rnext_index < 0) + rnext_index = nr_keys - 1; + verify_current_rnext(tst_name, sk, + collection.keys[current_index].client_keyid, + collection.keys[rnext_index].server_keyid); + if (start && test_get_tcp_ao_counters(sk, &end)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, sk, false, false); + synchronize_threads(); /* 4: verify => closed */ + close(sk); + if (start) + verify_counters(tst_name, false, false, start, &end); + synchronize_threads(); /* 5: counters */ +} + +static void try_unmatched_keys(int sk, int *rnext_index) +{ + struct test_key *key; + unsigned int i = 0; + int err; + + do { + key = &collection.keys[i]; + if (!key->matches_server) + break; + } while (++i < collection.nr_keys); + if (key->matches_server) + test_error("all keys on client match the server"); + + err = test_add_key_cr(sk, key->password, key->len, wrong_addr, + 0, key->client_keyid, key->server_keyid, + key->maclen, key->alg, 0, 0); + if (!err) { + test_fail("Added a key with non-matching ip-address for established sk"); + return; + } + if (err == -EINVAL) + test_ok("Can't add a key with non-matching ip-address for established sk"); + else + test_error("Failed to add a key"); + + err = test_add_key_cr(sk, key->password, key->len, this_ip_dest, + test_vrf_ifindex, + key->client_keyid, key->server_keyid, + key->maclen, key->alg, 0, 0); + if (!err) { + test_fail("Added a key with non-matching VRF for established sk"); + return; + } + if (err == -EINVAL) + test_ok("Can't add a key with non-matching VRF for established sk"); + else + test_error("Failed to add a key"); + + for (i = 0; i < collection.nr_keys; i++) { + key = &collection.keys[i]; + if (!key->matches_client) + break; + } + if (key->matches_client) + test_error("all keys on server match the client"); + if (test_set_key(sk, -1, key->server_keyid)) + test_error("Can't change the current key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("verify failed"); + *rnext_index = i; +} + +static int client_non_matching(const char *tst_name, unsigned int port, + unsigned int nr_keys, + int current_index, int rnext_index, + const size_t msg_sz, const size_t msg_nr) +{ + unsigned int i; + + if (init_default_key_collection(nr_keys, true)) + test_error("Failed to init the key collection"); + + for (i = 0; i < nr_keys; i++) { + /* key (0, 0) matches */ + collection.keys[i].matches_client = !!((i + 3) % 4); + collection.keys[i].matches_server = !!((i + 2) % 4); + if (kernel_config_has(KCONFIG_NET_VRF)) + collection.keys[i].matches_vrf = !!((i + 1) % 4); + } + + return run_client(tst_name, port, nr_keys, current_index, + rnext_index, NULL, msg_sz, msg_nr); +} + +static void check_current_back(const char *tst_name, unsigned int port, + unsigned int nr_keys, + unsigned int current_index, unsigned int rnext_index, + unsigned int rotate_to_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1)) + test_error("Can't change the current key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("verify failed"); + /* There is a race here: between setting the current_key with + * setsockopt(TCP_AO_INFO) and starting to send some data - there + * might have been a segment received with the desired + * RNext_key set. In turn that would mean that the first outgoing + * segment will have the desired current_key (flipped back). + * Which is what the user/test wants. As it's racy, skip checking + * the counters, yet check what are the resulting current/rnext + * keys on both sides. + */ + collection.keys[rotate_to_index].skip_counters_checks = 1; + + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void roll_over_keys(const char *tst_name, unsigned int port, + unsigned int nr_keys, unsigned int rotations, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + unsigned int i; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + for (i = rnext_index + 1; rotations > 0; i++, rotations--) { + if (i >= collection.nr_keys) + i = 0; + if (test_set_key(sk, -1, collection.keys[i].server_keyid)) + test_error("Can't change the Rnext key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + close(sk); + test_tcp_ao_counters_free(&tmp); + return; + } + verify_current_rnext(tst_name, sk, -1, + collection.keys[i].server_keyid); + collection.keys[i].used_on_server_tx = 1; + synchronize_threads(); /* verify current/rnext */ + } + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void try_client_run(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void try_client_match(const char *tst_name, unsigned int port, + unsigned int nr_keys, + int current_index, int rnext_index) +{ + int sk; + + sk = client_non_matching(tst_name, port, nr_keys, current_index, + rnext_index, msg_len, nr_packets); + if (sk < 0) + return; + try_unmatched_keys(sk, &rnext_index); + end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + + setup_vrfs(); + try_server_run("server: Check current/rnext keys unset before connect()", + port++, quota, 19, 19); + try_server_run("server: Check current/rnext keys set before connect()", + port++, quota, 10, 10); + try_server_run("server: Check current != rnext keys set before connect()", + port++, quota, 5, 10); + try_server_run("server: Check current flapping back on peer's RnextKey request", + port++, quota * 2, 5, 10); + server_rotations("server: Rotate over all different keys", port++, + quota, 20, 0, 0); + try_server_run("server: Check accept() => established key matching", + port++, quota * 2, 0, 0); + + synchronize_threads(); /* don't race to exit: client exits */ + return NULL; +} + +static void check_established_socket(void) +{ + unsigned int port = test_server_port; + + setup_vrfs(); + try_client_run("client: Check current/rnext keys unset before connect()", + port++, 20, -1, -1); + try_client_run("client: Check current/rnext keys set before connect()", + port++, 20, 10, 10); + try_client_run("client: Check current != rnext keys set before connect()", + port++, 20, 10, 5); + check_current_back("client: Check current flapping back on peer's RnextKey request", + port++, 20, 10, 5, 2); + roll_over_keys("client: Rotate over all different keys", port++, + 20, 20, 0, 0); + try_client_match("client: Check connect() => established key matching", + port++, 20, 0, 0); +} + +static void *client_fn(void *arg) +{ + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + check_closed_socket(); + check_listen_socket(); + check_established_socket(); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(120, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h new file mode 100644 index 000000000000..fbc7f6111815 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -0,0 +1,605 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TCP-AO selftest library. Provides helpers to unshare network + * namespaces, create veth, assign ip addresses, set routes, + * manipulate socket options, read network counter and etc. + * Author: Dmitry Safonov <dima@arista.com> + */ +#ifndef _AOLIB_H_ +#define _AOLIB_H_ + +#include <arpa/inet.h> +#include <errno.h> +#include <linux/snmp.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "../../../../../include/linux/stringify.h" +#include "../../../../../include/linux/bits.h" + +#ifndef SOL_TCP +/* can't include <netinet/tcp.h> as including <linux/tcp.h> */ +# define SOL_TCP 6 /* TCP level */ +#endif + +/* Working around ksft, see the comment in lib/setup.c */ +extern void __test_msg(const char *buf); +extern void __test_ok(const char *buf); +extern void __test_fail(const char *buf); +extern void __test_xfail(const char *buf); +extern void __test_error(const char *buf); +extern void __test_skip(const char *buf); + +__attribute__((__format__(__printf__, 2, 3))) +static inline void __test_print(void (*fn)(const char *), const char *fmt, ...) +{ +#define TEST_MSG_BUFFER_SIZE 4096 + char buf[TEST_MSG_BUFFER_SIZE]; + va_list arg; + + va_start(arg, fmt); + vsnprintf(buf, sizeof(buf), fmt, arg); + va_end(arg); + fn(buf); +} + +#define test_print(fmt, ...) \ + __test_print(__test_msg, "%ld[%s:%u] " fmt "\n", \ + syscall(SYS_gettid), \ + __FILE__, __LINE__, ##__VA_ARGS__) + +#define test_ok(fmt, ...) \ + __test_print(__test_ok, fmt "\n", ##__VA_ARGS__) +#define test_skip(fmt, ...) \ + __test_print(__test_skip, fmt "\n", ##__VA_ARGS__) +#define test_xfail(fmt, ...) \ + __test_print(__test_xfail, fmt "\n", ##__VA_ARGS__) + +#define test_fail(fmt, ...) \ +do { \ + if (errno) \ + __test_print(__test_fail, fmt ": %m\n", ##__VA_ARGS__); \ + else \ + __test_print(__test_fail, fmt "\n", ##__VA_ARGS__); \ + test_failed(); \ +} while (0) + +#define KSFT_FAIL 1 +#define test_error(fmt, ...) \ +do { \ + if (errno) \ + __test_print(__test_error, "%ld[%s:%u] " fmt ": %m\n", \ + syscall(SYS_gettid), __FILE__, __LINE__, \ + ##__VA_ARGS__); \ + else \ + __test_print(__test_error, "%ld[%s:%u] " fmt "\n", \ + syscall(SYS_gettid), __FILE__, __LINE__, \ + ##__VA_ARGS__); \ + exit(KSFT_FAIL); \ +} while (0) + +enum test_fault { + FAULT_TIMEOUT = 1, + FAULT_KEYREJECT, + FAULT_PREINSTALL_AO, + FAULT_PREINSTALL_MD5, + FAULT_POSTINSTALL, + FAULT_BUSY, + FAULT_CURRNEXT, + FAULT_FIXME, +}; +typedef enum test_fault fault_t; + +enum test_needs_kconfig { + KCONFIG_NET_NS = 0, /* required */ + KCONFIG_VETH, /* required */ + KCONFIG_TCP_AO, /* required */ + KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */ + KCONFIG_NET_VRF, /* optional, for L3/VRF testing */ + __KCONFIG_LAST__ +}; +extern bool kernel_config_has(enum test_needs_kconfig k); +extern const char *tests_skip_reason[__KCONFIG_LAST__]; +static inline bool should_skip_test(const char *tst_name, + enum test_needs_kconfig k) +{ + if (kernel_config_has(k)) + return false; + test_skip("%s: %s", tst_name, tests_skip_reason[k]); + return true; +} + +union tcp_addr { + struct in_addr a4; + struct in6_addr a6; +}; + +typedef void *(*thread_fn)(void *); +extern void test_failed(void); +extern void __test_init(unsigned int ntests, int family, unsigned int prefix, + union tcp_addr addr1, union tcp_addr addr2, + thread_fn peer1, thread_fn peer2); + +static inline void test_init2(unsigned int ntests, + thread_fn peer1, thread_fn peer2, + int family, unsigned int prefix, + const char *addr1, const char *addr2) +{ + union tcp_addr taddr1, taddr2; + + if (inet_pton(family, addr1, &taddr1) != 1) + test_error("Can't convert ip address %s", addr1); + if (inet_pton(family, addr2, &taddr2) != 1) + test_error("Can't convert ip address %s", addr2); + + __test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2); +} +extern void test_add_destructor(void (*d)(void)); + +/* To adjust optmem socket limit, approximately estimate a number, + * that is bigger than sizeof(struct tcp_ao_key). + */ +#define KERNEL_TCP_AO_KEY_SZ_ROUND_UP 300 + +extern void test_set_optmem(size_t value); +extern size_t test_get_optmem(void); + +extern const struct sockaddr_in6 addr_any6; +extern const struct sockaddr_in addr_any4; + +#ifdef IPV6_TEST +# define __TEST_CLIENT_IP(n) ("2001:db8:" __stringify(n) "::1") +# define TEST_CLIENT_IP __TEST_CLIENT_IP(1) +# define TEST_WRONG_IP "2001:db8:253::1" +# define TEST_SERVER_IP "2001:db8:254::1" +# define TEST_NETWORK "2001::" +# define TEST_PREFIX 128 +# define TEST_FAMILY AF_INET6 +# define SOCKADDR_ANY addr_any6 +# define sockaddr_af struct sockaddr_in6 +#else +# define __TEST_CLIENT_IP(n) ("10.0." __stringify(n) ".1") +# define TEST_CLIENT_IP __TEST_CLIENT_IP(1) +# define TEST_WRONG_IP "10.0.253.1" +# define TEST_SERVER_IP "10.0.254.1" +# define TEST_NETWORK "10.0.0.0" +# define TEST_PREFIX 32 +# define TEST_FAMILY AF_INET +# define SOCKADDR_ANY addr_any4 +# define sockaddr_af struct sockaddr_in +#endif + +static inline union tcp_addr gen_tcp_addr(union tcp_addr net, size_t n) +{ + union tcp_addr ret = net; + +#ifdef IPV6_TEST + ret.a6.s6_addr32[3] = htonl(n & (BIT(32) - 1)); + ret.a6.s6_addr32[2] = htonl((n >> 32) & (BIT(32) - 1)); +#else + ret.a4.s_addr = htonl(ntohl(net.a4.s_addr) + n); +#endif + + return ret; +} + +static inline void tcp_addr_to_sockaddr_in(void *dest, + const union tcp_addr *src, + unsigned int port) +{ + sockaddr_af *out = dest; + + memset(out, 0, sizeof(*out)); +#ifdef IPV6_TEST + out->sin6_family = AF_INET6; + out->sin6_port = port; + out->sin6_addr = src->a6; +#else + out->sin_family = AF_INET; + out->sin_port = port; + out->sin_addr = src->a4; +#endif +} + +static inline void test_init(unsigned int ntests, + thread_fn peer1, thread_fn peer2) +{ + test_init2(ntests, peer1, peer2, TEST_FAMILY, TEST_PREFIX, + TEST_SERVER_IP, TEST_CLIENT_IP); +} +extern void synchronize_threads(void); +extern void switch_ns(int fd); + +extern __thread union tcp_addr this_ip_addr; +extern __thread union tcp_addr this_ip_dest; +extern int test_family; + +extern void randomize_buffer(void *buf, size_t buflen); +extern int open_netns(void); +extern int unshare_open_netns(void); +extern const char veth_name[]; +extern int add_veth(const char *name, int nsfda, int nsfdb); +extern int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd); +extern int ip_addr_add(const char *intf, int family, + union tcp_addr addr, uint8_t prefix); +extern int ip_route_add(const char *intf, int family, + union tcp_addr src, union tcp_addr dst); +extern int ip_route_add_vrf(const char *intf, int family, + union tcp_addr src, union tcp_addr dst, + uint8_t vrf); +extern int link_set_up(const char *intf); + +extern const unsigned int test_server_port; +extern int test_wait_fd(int sk, time_t sec, bool write); +extern int __test_connect_socket(int sk, const char *device, + void *addr, size_t addr_sz, time_t timeout); +extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz); + +static inline int test_listen_socket(const union tcp_addr taddr, + unsigned int port, int backlog) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return __test_listen_socket(backlog, (void *)&addr, sizeof(addr)); +} + +/* + * In order for selftests to work under CONFIG_CRYPTO_FIPS=y, + * the password should be loger than 14 bytes, see hmac_setkey() + */ +#define TEST_TCP_AO_MINKEYLEN 14 +#define DEFAULT_TEST_PASSWORD "In this hour, I do not believe that any darkness will endure." + +#ifndef DEFAULT_TEST_ALGO +#define DEFAULT_TEST_ALGO "cmac(aes128)" +#endif + +#ifdef IPV6_TEST +#define DEFAULT_TEST_PREFIX 128 +#else +#define DEFAULT_TEST_PREFIX 32 +#endif + +/* + * Timeout on syscalls where failure is not expected. + * You may want to rise it if the test machine is very busy. + */ +#ifndef TEST_TIMEOUT_SEC +#define TEST_TIMEOUT_SEC 5 +#endif + +/* + * Timeout on connect() where a failure is expected. + * If set to 0 - kernel will try to retransmit SYN number of times, set in + * /proc/sys/net/ipv4/tcp_syn_retries + * By default set to 1 to make tests pass faster on non-busy machine. + */ +#ifndef TEST_RETRANSMIT_SEC +#define TEST_RETRANSMIT_SEC 1 +#endif + +static inline int _test_connect_socket(int sk, const union tcp_addr taddr, + unsigned int port, time_t timeout) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return __test_connect_socket(sk, veth_name, + (void *)&addr, sizeof(addr), timeout); +} + +static inline int test_connect_socket(int sk, const union tcp_addr taddr, + unsigned int port) +{ + return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC); +} + +extern int __test_set_md5(int sk, void *addr, size_t addr_sz, + uint8_t prefix, int vrf, const char *password); +static inline int test_set_md5(int sk, const union tcp_addr in_addr, + uint8_t prefix, int vrf, const char *password) +{ + sockaddr_af addr; + + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + tcp_addr_to_sockaddr_in(&addr, &in_addr, 0); + return __test_set_md5(sk, (void *)&addr, sizeof(addr), + prefix, vrf, password); +} + +extern int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg, + void *addr, size_t addr_sz, bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + uint8_t keyflags, uint8_t keylen, const char *key); + +static inline int test_prepare_key(struct tcp_ao_add *ao, + const char *alg, union tcp_addr taddr, + bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + uint8_t keyflags, uint8_t keylen, const char *key) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, 0); + return test_prepare_key_sockaddr(ao, alg, (void *)&addr, sizeof(addr), + set_current, set_rnext, prefix, vrf, sndid, rcvid, + maclen, keyflags, keylen, key); +} + +static inline int test_prepare_def_key(struct tcp_ao_add *ao, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid) +{ + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + return test_prepare_key(ao, DEFAULT_TEST_ALGO, in_addr, false, false, + prefix, vrf, sndid, rcvid, 0, keyflags, + strlen(key), key); +} + +extern int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out, + void *addr, size_t addr_sz, + uint8_t prefix, uint8_t sndid, uint8_t rcvid); +extern int test_get_ao_info(int sk, struct tcp_ao_info_opt *out); +extern int test_set_ao_info(int sk, struct tcp_ao_info_opt *in); +extern int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a, + const struct tcp_ao_getsockopt *b); +extern int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a, + const struct tcp_ao_info_opt *b); + +static inline int test_verify_socket_key(int sk, struct tcp_ao_add *key) +{ + struct tcp_ao_getsockopt key2 = {}; + int err; + + err = test_get_one_ao(sk, &key2, &key->addr, sizeof(key->addr), + key->prefix, key->sndid, key->rcvid); + if (err) + return err; + + return test_cmp_getsockopt_setsockopt(key, &key2); +} + +static inline int test_add_key_vrf(int sk, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + uint8_t vrf, uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix, + vrf, sndid, rcvid); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static inline int test_add_key(int sk, const char *key, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + return test_add_key_vrf(sk, key, 0, in_addr, prefix, 0, sndid, rcvid); +} + +static inline int test_verify_socket_ao(int sk, struct tcp_ao_info_opt *ao) +{ + struct tcp_ao_info_opt ao2 = {}; + int err; + + err = test_get_ao_info(sk, &ao2); + if (err) + return err; + + return test_cmp_getsockopt_setsockopt_ao(ao, &ao2); +} + +static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps) +{ + struct tcp_ao_info_opt ao = {}; + int err; + + err = test_get_ao_info(sk, &ao); + /* Maybe ao_info wasn't allocated yet */ + if (err && err != -ENOENT) + return err; + + ao.ao_required = !!ao_required; + ao.accept_icmps = !!accept_icmps; + err = test_set_ao_info(sk, &ao); + if (err) + return err; + + return test_verify_socket_ao(sk, &ao); +} + +extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec); +extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, + const size_t msg_len, time_t timeout_sec); +extern int test_client_verify(int sk, const size_t msg_len, const size_t nr, + time_t timeout_sec); + +struct tcp_ao_key_counters { + uint8_t sndid; + uint8_t rcvid; + uint64_t pkt_good; + uint64_t pkt_bad; +}; + +struct tcp_ao_counters { + /* per-netns */ + uint64_t netns_ao_good; + uint64_t netns_ao_bad; + uint64_t netns_ao_key_not_found; + uint64_t netns_ao_required; + uint64_t netns_ao_dropped_icmp; + /* per-socket */ + uint64_t ao_info_pkt_good; + uint64_t ao_info_pkt_bad; + uint64_t ao_info_pkt_key_not_found; + uint64_t ao_info_pkt_ao_required; + uint64_t ao_info_pkt_dropped_icmp; + /* per-key */ + size_t nr_keys; + struct tcp_ao_key_counters *key_cnts; +}; +extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); + +#define TEST_CNT_KEY_GOOD BIT(0) +#define TEST_CNT_KEY_BAD BIT(1) +#define TEST_CNT_SOCK_GOOD BIT(2) +#define TEST_CNT_SOCK_BAD BIT(3) +#define TEST_CNT_SOCK_KEY_NOT_FOUND BIT(4) +#define TEST_CNT_SOCK_AO_REQUIRED BIT(5) +#define TEST_CNT_SOCK_DROPPED_ICMP BIT(6) +#define TEST_CNT_NS_GOOD BIT(7) +#define TEST_CNT_NS_BAD BIT(8) +#define TEST_CNT_NS_KEY_NOT_FOUND BIT(9) +#define TEST_CNT_NS_AO_REQUIRED BIT(10) +#define TEST_CNT_NS_DROPPED_ICMP BIT(11) +typedef uint16_t test_cnt; + +#define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD) +#define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD) +#define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \ + TEST_CNT_NS_KEY_NOT_FOUND) +#define TEST_CNT_AO_REQUIRED (TEST_CNT_SOCK_AO_REQUIRED | \ + TEST_CNT_NS_AO_REQUIRED) +#define TEST_CNT_AO_DROPPED_ICMP (TEST_CNT_SOCK_DROPPED_ICMP | \ + TEST_CNT_NS_DROPPED_ICMP) +#define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD) +#define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD) + +extern int __test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, struct tcp_ao_counters *after, + test_cnt expected); +extern int test_tcp_ao_key_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, struct tcp_ao_counters *after, + test_cnt expected, int sndid, int rcvid); +extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts); +/* + * Frees buffers allocated in test_get_tcp_ao_counters(). + * The function doesn't expect new keys or keys removed between calls + * to test_get_tcp_ao_counters(). Check key counters manually if they + * may change. + */ +static inline int test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected) +{ + int ret; + + ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected); + if (ret) + goto out; + ret = test_tcp_ao_key_counters_cmp(tst_name, before, after, + expected, -1, -1); +out: + test_tcp_ao_counters_free(before); + test_tcp_ao_counters_free(after); + return ret; +} + +struct netstat; +extern struct netstat *netstat_read(void); +extern void netstat_free(struct netstat *ns); +extern void netstat_print_diff(struct netstat *nsa, struct netstat *nsb); +extern uint64_t netstat_get(struct netstat *ns, + const char *name, bool *not_found); + +static inline uint64_t netstat_get_one(const char *name, bool *not_found) +{ + struct netstat *ns = netstat_read(); + uint64_t ret; + + ret = netstat_get(ns, name, not_found); + + netstat_free(ns); + return ret; +} + +struct tcp_sock_queue { + uint32_t seq; + void *buf; +}; + +struct tcp_sock_state { + struct tcp_info info; + struct tcp_repair_window trw; + struct tcp_sock_queue out; + int outq_len; /* output queue size (not sent + not acked) */ + int outq_nsd_len; /* output queue size (not sent only) */ + struct tcp_sock_queue in; + int inq_len; + int mss; + int timestamp; +}; + +extern void __test_sock_checkpoint(int sk, struct tcp_sock_state *state, + void *addr, size_t addr_size); +static inline void test_sock_checkpoint(int sk, struct tcp_sock_state *state, + sockaddr_af *saddr) +{ + __test_sock_checkpoint(sk, state, saddr, sizeof(*saddr)); +} +extern void test_ao_checkpoint(int sk, struct tcp_ao_repair *state); +extern void __test_sock_restore(int sk, const char *device, + struct tcp_sock_state *state, + void *saddr, void *daddr, size_t addr_size); +static inline void test_sock_restore(int sk, struct tcp_sock_state *state, + sockaddr_af *saddr, + const union tcp_addr daddr, + unsigned int dport) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &daddr, htons(dport)); + __test_sock_restore(sk, veth_name, state, saddr, &addr, sizeof(addr)); +} +extern void test_ao_restore(int sk, struct tcp_ao_repair *state); +extern void test_sock_state_free(struct tcp_sock_state *state); +extern void test_enable_repair(int sk); +extern void test_disable_repair(int sk); +extern void test_kill_sk(int sk); +static inline int test_add_repaired_key(int sk, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix, + 0, sndid, rcvid); + if (err) + return err; + + tmp.set_current = 1; + tmp.set_rnext = 1; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +#endif /* _AOLIB_H_ */ diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c new file mode 100644 index 000000000000..f279ffc3843b --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Check what features does the kernel support (where the selftest is running). + * Somewhat inspired by CRIU kerndat/kdat kernel features detector. + */ +#include <pthread.h> +#include "aolib.h" + +struct kconfig_t { + int _errno; /* the returned error if not supported */ + int (*check_kconfig)(int *error); +}; + +static int has_net_ns(int *err) +{ + if (access("/proc/self/ns/net", F_OK) < 0) { + *err = errno; + if (errno == ENOENT) + return 0; + test_print("Unable to access /proc/self/ns/net: %m"); + return -errno; + } + return *err = errno = 0; +} + +static int has_veth(int *err) +{ + int orig_netns, ns_a, ns_b; + + orig_netns = open_netns(); + ns_a = unshare_open_netns(); + ns_b = unshare_open_netns(); + + *err = add_veth("check_veth", ns_a, ns_b); + + switch_ns(orig_netns); + close(orig_netns); + close(ns_a); + close(ns_b); + return 0; +} + +static int has_tcp_ao(int *err) +{ + struct sockaddr_in addr = { + .sin_family = test_family, + }; + struct tcp_ao_add tmp = {}; + const char *password = DEFAULT_TEST_PASSWORD; + int sk, ret = 0; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + tmp.sndid = 100; + tmp.rcvid = 100; + tmp.keylen = strlen(password); + memcpy(tmp.key, password, strlen(password)); + strcpy(tmp.alg_name, "hmac(sha1)"); + memcpy(&tmp.addr, &addr, sizeof(addr)); + *err = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) { + *err = errno; + if (errno != ENOPROTOOPT) + ret = -errno; + } + close(sk); + return ret; +} + +static int has_tcp_md5(int *err) +{ + union tcp_addr addr_any = {}; + int sk, ret = 0; + + sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + /* + * Under CONFIG_CRYPTO_FIPS=y it fails with ENOMEM, rather with + * anything more descriptive. Oh well. + */ + *err = 0; + if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) { + *err = errno; + if (errno != ENOPROTOOPT && errno == ENOMEM) { + test_print("setsockopt(TCP_MD5SIG_EXT): %m"); + ret = -errno; + } + } + close(sk); + return ret; +} + +static int has_vrfs(int *err) +{ + int orig_netns, ns_test, ret = 0; + + orig_netns = open_netns(); + ns_test = unshare_open_netns(); + + *err = add_vrf("ksft-check", 55, 101, ns_test); + if (*err && *err != -EOPNOTSUPP) { + test_print("Failed to add a VRF: %d", *err); + ret = *err; + } + + switch_ns(orig_netns); + close(orig_netns); + close(ns_test); + return ret; +} + +static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER; +static struct kconfig_t kconfig[__KCONFIG_LAST__] = { + { -1, has_net_ns }, + { -1, has_veth }, + { -1, has_tcp_ao }, + { -1, has_tcp_md5 }, + { -1, has_vrfs }, +}; + +const char *tests_skip_reason[__KCONFIG_LAST__] = { + "Tests require network namespaces support (CONFIG_NET_NS)", + "Tests require veth support (CONFIG_VETH)", + "Tests require TCP-AO support (CONFIG_TCP_AO)", + "setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)", + "VRFs are not supported (CONFIG_NET_VRF)", +}; + +bool kernel_config_has(enum test_needs_kconfig k) +{ + bool ret; + + pthread_mutex_lock(&kconfig_lock); + if (kconfig[k]._errno == -1) { + if (kconfig[k].check_kconfig(&kconfig[k]._errno)) + test_error("Failed to initialize kconfig %u", k); + } + ret = kconfig[k]._errno == 0; + pthread_mutex_unlock(&kconfig_lock); + return ret; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/netlink.c b/tools/testing/selftests/net/tcp_ao/lib/netlink.c new file mode 100644 index 000000000000..7f108493a29a --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/netlink.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Original from tools/testing/selftests/net/ipsec.c */ +#include <linux/netlink.h> +#include <linux/random.h> +#include <linux/rtnetlink.h> +#include <linux/veth.h> +#include <net/if.h> +#include <stdint.h> +#include <string.h> +#include <sys/socket.h> + +#include "aolib.h" + +#define MAX_PAYLOAD 2048 + +static int netlink_sock(int *sock, uint32_t *seq_nr, int proto) +{ + if (*sock > 0) { + seq_nr++; + return 0; + } + + *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto); + if (*sock < 0) { + test_print("socket(AF_NETLINK)"); + return -1; + } + + randomize_buffer(seq_nr, sizeof(*seq_nr)); + + return 0; +} + +static int netlink_check_answer(int sock, bool quite) +{ + struct nlmsgerror { + struct nlmsghdr hdr; + int error; + struct nlmsghdr orig_msg; + } answer; + + if (recv(sock, &answer, sizeof(answer), 0) < 0) { + test_print("recv()"); + return -1; + } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) { + test_print("expected NLMSG_ERROR, got %d", + (int)answer.hdr.nlmsg_type); + return -1; + } else if (answer.error) { + if (!quite) { + test_print("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + } + return answer.error; + } + + return 0; +} + +static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh) +{ + return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len)); +} + +static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */ + struct rtattr *attr = rtattr_hdr(nh); + size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size); + + if (req_sz < nl_size) { + test_print("req buf is too small: %zu < %zu", req_sz, nl_size); + return -1; + } + nh->nlmsg_len = nl_size; + + attr->rta_len = RTA_LENGTH(size); + attr->rta_type = rta_type; + memcpy(RTA_DATA(attr), payload, size); + + return 0; +} + +static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + struct rtattr *ret = rtattr_hdr(nh); + + if (rtattr_pack(nh, req_sz, rta_type, payload, size)) + return 0; + + return ret; +} + +static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type) +{ + return _rtattr_begin(nh, req_sz, rta_type, 0, 0); +} + +static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + char *nlmsg_end = (char *)nh + nh->nlmsg_len; + + attr->rta_len = nlmsg_end - (char *)attr; +} + +static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz, + const char *peer, int ns) +{ + struct ifinfomsg pi; + struct rtattr *peer_attr; + + memset(&pi, 0, sizeof(pi)); + pi.ifi_family = AF_UNSPEC; + pi.ifi_change = 0xFFFFFFFF; + + peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi)); + if (!peer_attr) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer))) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns))) + return -1; + + rtattr_end(nh, peer_attr); + + return 0; +} + +static int __add_veth(int sock, uint32_t seq, const char *name, + int ns_a, int ns_b) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + static const char veth_type[] = "veth"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (veth_pack_peerb(&req.nh, sizeof(req), name, ns_b)) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, false); +} + +int add_veth(const char *name, int nsfda, int nsfdb) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __add_veth(route_sock, route_seq++, name, nsfda, nsfdb); + close(route_sock); + return ret; +} + +static int __ip_addr_add(int sock, uint32_t seq, const char *intf, + int family, union tcp_addr addr, uint8_t prefix) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifaddrmsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifa_family = family; + req.info.ifa_prefixlen = prefix; + req.info.ifa_index = if_nametoindex(intf); + req.info.ifa_flags = IFA_F_NODAD; + + if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, addr_len)) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, true); +} + +int ip_addr_add(const char *intf, int family, + union tcp_addr addr, uint8_t prefix) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __ip_addr_add(route_sock, route_seq++, intf, + family, addr, prefix); + + close(route_sock); + return ret; +} + +static int __ip_route_add(int sock, uint32_t seq, const char *intf, int family, + union tcp_addr src, union tcp_addr dst, uint8_t vrf) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rt; + char attrbuf[MAX_PAYLOAD]; + } req; + unsigned int index = if_nametoindex(intf); + size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt)); + req.nh.nlmsg_type = RTM_NEWROUTE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; + req.nh.nlmsg_seq = seq; + req.rt.rtm_family = family; + req.rt.rtm_dst_len = (family == AF_INET) ? 32 : 128; + req.rt.rtm_table = vrf; + req.rt.rtm_protocol = RTPROT_BOOT; + req.rt.rtm_scope = RT_SCOPE_UNIVERSE; + req.rt.rtm_type = RTN_UNICAST; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, addr_len)) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, addr_len)) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + + return netlink_check_answer(sock, true); +} + +int ip_route_add_vrf(const char *intf, int family, + union tcp_addr src, union tcp_addr dst, uint8_t vrf) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __ip_route_add(route_sock, route_seq++, intf, + family, src, dst, vrf); + + close(route_sock); + return ret; +} + +int ip_route_add(const char *intf, int family, + union tcp_addr src, union tcp_addr dst) +{ + return ip_route_add_vrf(intf, family, src, dst, RT_TABLE_MAIN); +} + +static int __link_set_up(int sock, uint32_t seq, const char *intf) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = if_nametoindex(intf); + req.info.ifi_flags = IFF_UP; + req.info.ifi_change = IFF_UP; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, false); +} + +int link_set_up(const char *intf) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __link_set_up(route_sock, route_seq++, intf); + + close(route_sock); + return ret; +} + +static int __add_vrf(int sock, uint32_t seq, const char *name, + uint32_t tabid, int ifindex, int nsfd) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + static const char vrf_type[] = "vrf"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = ifindex; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name))) + return -1; + + if (nsfd >= 0) + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, + &nsfd, sizeof(nsfd))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, vrf_type, sizeof(vrf_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_VRF_TABLE, + &tabid, sizeof(tabid))) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, true); +} + +int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __add_vrf(route_sock, route_seq++, name, tabid, ifindex, nsfd); + close(route_sock); + return ret; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c new file mode 100644 index 000000000000..2fb6dd8adba6 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include <stdio.h> +#include "../../../../../include/linux/compiler.h" +#include "../../../../../include/linux/kernel.h" +#include "aolib.h" + +struct netstat_counter { + uint64_t val; + char *name; +}; + +struct netstat { + char *header_name; + struct netstat *next; + size_t counters_nr; + struct netstat_counter *counters; +}; + +static struct netstat *lookup_type(struct netstat *ns, + const char *type, size_t len) +{ + while (ns != NULL) { + size_t cmp = max(len, strlen(ns->header_name)); + + if (!strncmp(ns->header_name, type, cmp)) + return ns; + ns = ns->next; + } + return NULL; +} + +static struct netstat *lookup_get(struct netstat *ns, + const char *type, const size_t len) +{ + struct netstat *ret; + + ret = lookup_type(ns, type, len); + if (ret != NULL) + return ret; + + ret = malloc(sizeof(struct netstat)); + if (!ret) + test_error("malloc()"); + + ret->header_name = strndup(type, len); + if (ret->header_name == NULL) + test_error("strndup()"); + ret->next = ns; + ret->counters_nr = 0; + ret->counters = NULL; + + return ret; +} + +static struct netstat *lookup_get_column(struct netstat *ns, const char *line) +{ + char *column; + + column = strchr(line, ':'); + if (!column) + test_error("can't parse netstat file"); + + return lookup_get(ns, line, column - line); +} + +static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line) +{ + struct netstat *type = lookup_get_column(*dest, line); + const char *pos = line; + size_t i, nr_elems = 0; + char tmp; + + while ((pos = strchr(pos, ' '))) { + nr_elems++; + pos++; + } + + *dest = type; + type->counters = reallocarray(type->counters, + type->counters_nr + nr_elems, + sizeof(struct netstat_counter)); + if (!type->counters) + test_error("reallocarray()"); + + pos = strchr(line, ' ') + 1; + + if (fscanf(fnetstat, type->header_name) == EOF) + test_error("fscanf(%s)", type->header_name); + if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':') + test_error("Unexpected netstat format (%c)", tmp); + + for (i = type->counters_nr; i < type->counters_nr + nr_elems; i++) { + struct netstat_counter *nc = &type->counters[i]; + const char *new_pos = strchr(pos, ' '); + const char *fmt = " %" PRIu64; + + if (new_pos == NULL) + new_pos = strchr(pos, '\n'); + + nc->name = strndup(pos, new_pos - pos); + if (nc->name == NULL) + test_error("strndup()"); + + if (unlikely(!strcmp(nc->name, "MaxConn"))) + fmt = " %" PRId64; /* MaxConn is signed, RFC 2012 */ + if (fscanf(fnetstat, fmt, &nc->val) != 1) + test_error("fscanf(%s)", nc->name); + pos = new_pos + 1; + } + type->counters_nr += nr_elems; + + if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != '\n') + test_error("Unexpected netstat format"); +} + +static const char *snmp6_name = "Snmp6"; +static void snmp6_read(FILE *fnetstat, struct netstat **dest) +{ + struct netstat *type = lookup_get(*dest, snmp6_name, strlen(snmp6_name)); + char *counter_name; + size_t i; + + for (i = type->counters_nr;; i++) { + struct netstat_counter *nc; + uint64_t counter; + + if (fscanf(fnetstat, "%ms", &counter_name) == EOF) + break; + if (fscanf(fnetstat, "%" PRIu64, &counter) == EOF) + test_error("Unexpected snmp6 format"); + type->counters = reallocarray(type->counters, i + 1, + sizeof(struct netstat_counter)); + if (!type->counters) + test_error("reallocarray()"); + nc = &type->counters[i]; + nc->name = counter_name; + nc->val = counter; + } + type->counters_nr = i; + *dest = type; +} + +struct netstat *netstat_read(void) +{ + struct netstat *ret = 0; + size_t line_sz = 0; + char *line = NULL; + FILE *fnetstat; + + /* + * Opening thread-self instead of /proc/net/... as the latter + * points to /proc/self/net/ which instantiates thread-leader's + * net-ns, see: + * commit 155134fef2b6 ("Revert "proc: Point /proc/{mounts,net} at..") + */ + errno = 0; + fnetstat = fopen("/proc/thread-self/net/netstat", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/netstat"); + + while (getline(&line, &line_sz, fnetstat) != -1) + netstat_read_type(fnetstat, &ret, line); + fclose(fnetstat); + + errno = 0; + fnetstat = fopen("/proc/thread-self/net/snmp", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/snmp"); + + while (getline(&line, &line_sz, fnetstat) != -1) + netstat_read_type(fnetstat, &ret, line); + fclose(fnetstat); + + errno = 0; + fnetstat = fopen("/proc/thread-self/net/snmp6", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/snmp6"); + + snmp6_read(fnetstat, &ret); + fclose(fnetstat); + + free(line); + return ret; +} + +void netstat_free(struct netstat *ns) +{ + while (ns != NULL) { + struct netstat *prev = ns; + size_t i; + + free(ns->header_name); + for (i = 0; i < ns->counters_nr; i++) + free(ns->counters[i].name); + free(ns->counters); + ns = ns->next; + free(prev); + } +} + +static inline void +__netstat_print_diff(uint64_t a, struct netstat *nsb, size_t i) +{ + if (unlikely(!strcmp(nsb->header_name, "MaxConn"))) { + test_print("%8s %25s: %" PRId64 " => %" PRId64, + nsb->header_name, nsb->counters[i].name, + a, nsb->counters[i].val); + return; + } + + test_print("%8s %25s: %" PRIu64 " => %" PRIu64, nsb->header_name, + nsb->counters[i].name, a, nsb->counters[i].val); +} + +void netstat_print_diff(struct netstat *nsa, struct netstat *nsb) +{ + size_t i, j; + + while (nsb != NULL) { + if (unlikely(strcmp(nsb->header_name, nsa->header_name))) { + for (i = 0; i < nsb->counters_nr; i++) + __netstat_print_diff(0, nsb, i); + nsb = nsb->next; + continue; + } + + if (nsb->counters_nr < nsa->counters_nr) + test_error("Unexpected: some counters disappeared!"); + + for (j = 0, i = 0; i < nsb->counters_nr; i++) { + if (strcmp(nsb->counters[i].name, nsa->counters[j].name)) { + __netstat_print_diff(0, nsb, i); + continue; + } + + if (nsa->counters[j].val == nsb->counters[i].val) { + j++; + continue; + } + + __netstat_print_diff(nsa->counters[j].val, nsb, i); + j++; + } + if (j != nsa->counters_nr) + test_error("Unexpected: some counters disappeared!"); + + nsb = nsb->next; + nsa = nsa->next; + } +} + +uint64_t netstat_get(struct netstat *ns, const char *name, bool *not_found) +{ + if (not_found) + *not_found = false; + + while (ns != NULL) { + size_t i; + + for (i = 0; i < ns->counters_nr; i++) { + if (!strcmp(name, ns->counters[i].name)) + return ns->counters[i].val; + } + + ns = ns->next; + } + + if (not_found) + *not_found = true; + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/repair.c b/tools/testing/selftests/net/tcp_ao/lib/repair.c new file mode 100644 index 000000000000..9893b3ba69f5 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/repair.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets + * It tests that TCP-AO enabled connection can be restored. + * For the proper socket repair see: + * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h + */ +#include <fcntl.h> +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include "aolib.h" + +#ifndef TCPOPT_MAXSEG +# define TCPOPT_MAXSEG 2 +#endif +#ifndef TCPOPT_WINDOW +# define TCPOPT_WINDOW 3 +#endif +#ifndef TCPOPT_SACK_PERMITTED +# define TCPOPT_SACK_PERMITTED 4 +#endif +#ifndef TCPOPT_TIMESTAMP +# define TCPOPT_TIMESTAMP 8 +#endif + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, /* Now a valid state */ + TCP_NEW_SYN_RECV, + + TCP_MAX_STATES /* Leave at the end! */ +}; + +static void test_sock_checkpoint_queue(int sk, int queue, int qlen, + struct tcp_sock_queue *q) +{ + socklen_t len; + int ret; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + len = sizeof(q->seq); + ret = getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &q->seq, &len); + if (ret || len != sizeof(q->seq)) + test_error("getsockopt(TCP_QUEUE_SEQ): %d", (int)len); + + if (!qlen) { + q->buf = NULL; + return; + } + + q->buf = malloc(qlen); + if (q->buf == NULL) + test_error("malloc()"); + ret = recv(sk, q->buf, qlen, MSG_PEEK | MSG_DONTWAIT); + if (ret != qlen) + test_error("recv(%d): %d", qlen, ret); +} + +void __test_sock_checkpoint(int sk, struct tcp_sock_state *state, + void *addr, size_t addr_size) +{ + socklen_t len = sizeof(state->info); + int ret; + + memset(state, 0, sizeof(*state)); + + ret = getsockopt(sk, SOL_TCP, TCP_INFO, &state->info, &len); + if (ret || len != sizeof(state->info)) + test_error("getsockopt(TCP_INFO): %d", (int)len); + + len = addr_size; + if (getsockname(sk, addr, &len) || len != addr_size) + test_error("getsockname(): %d", (int)len); + + len = sizeof(state->trw); + ret = getsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, &len); + if (ret || len != sizeof(state->trw)) + test_error("getsockopt(TCP_REPAIR_WINDOW): %d", (int)len); + + if (ioctl(sk, SIOCOUTQ, &state->outq_len)) + test_error("ioctl(SIOCOUTQ)"); + + if (ioctl(sk, SIOCOUTQNSD, &state->outq_nsd_len)) + test_error("ioctl(SIOCOUTQNSD)"); + test_sock_checkpoint_queue(sk, TCP_SEND_QUEUE, state->outq_len, &state->out); + + if (ioctl(sk, SIOCINQ, &state->inq_len)) + test_error("ioctl(SIOCINQ)"); + test_sock_checkpoint_queue(sk, TCP_RECV_QUEUE, state->inq_len, &state->in); + + if (state->info.tcpi_state == TCP_CLOSE) + state->outq_len = state->outq_nsd_len = 0; + + len = sizeof(state->mss); + ret = getsockopt(sk, SOL_TCP, TCP_MAXSEG, &state->mss, &len); + if (ret || len != sizeof(state->mss)) + test_error("getsockopt(TCP_MAXSEG): %d", (int)len); + + len = sizeof(state->timestamp); + ret = getsockopt(sk, SOL_TCP, TCP_TIMESTAMP, &state->timestamp, &len); + if (ret || len != sizeof(state->timestamp)) + test_error("getsockopt(TCP_TIMESTAMP): %d", (int)len); +} + +void test_ao_checkpoint(int sk, struct tcp_ao_repair *state) +{ + socklen_t len = sizeof(*state); + int ret; + + memset(state, 0, sizeof(*state)); + + ret = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, &len); + if (ret || len != sizeof(*state)) + test_error("getsockopt(TCP_AO_REPAIR): %d", (int)len); +} + +static void test_sock_restore_seq(int sk, int queue, uint32_t seq) +{ + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + if (setsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &seq, sizeof(seq))) + test_error("setsockopt(TCP_QUEUE_SEQ)"); +} + +static void test_sock_restore_queue(int sk, int queue, void *buf, int len) +{ + int chunk = len; + size_t off = 0; + + if (len == 0) + return; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + do { + int ret; + + ret = send(sk, buf + off, chunk, 0); + if (ret <= 0) { + if (chunk > 1024) { + chunk >>= 1; + continue; + } + test_error("send()"); + } + off += ret; + len -= ret; + } while (len > 0); +} + +void __test_sock_restore(int sk, const char *device, + struct tcp_sock_state *state, + void *saddr, void *daddr, size_t addr_size) +{ + struct tcp_repair_opt opts[4]; + unsigned int opt_nr = 0; + long flags; + + if (bind(sk, saddr, addr_size)) + test_error("bind()"); + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + test_sock_restore_seq(sk, TCP_RECV_QUEUE, state->in.seq - state->inq_len); + test_sock_restore_seq(sk, TCP_SEND_QUEUE, state->out.seq - state->outq_len); + + if (device != NULL && setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, + device, strlen(device) + 1)) + test_error("setsockopt(SO_BINDTODEVICE, %s)", device); + + if (connect(sk, daddr, addr_size)) + test_error("connect()"); + + if (state->info.tcpi_options & TCPI_OPT_SACK) { + opts[opt_nr].opt_code = TCPOPT_SACK_PERMITTED; + opts[opt_nr].opt_val = 0; + opt_nr++; + } + if (state->info.tcpi_options & TCPI_OPT_WSCALE) { + opts[opt_nr].opt_code = TCPOPT_WINDOW; + opts[opt_nr].opt_val = state->info.tcpi_snd_wscale + + (state->info.tcpi_rcv_wscale << 16); + opt_nr++; + } + if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) { + opts[opt_nr].opt_code = TCPOPT_TIMESTAMP; + opts[opt_nr].opt_val = 0; + opt_nr++; + } + opts[opt_nr].opt_code = TCPOPT_MAXSEG; + opts[opt_nr].opt_val = state->mss; + opt_nr++; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_OPTIONS, opts, opt_nr * sizeof(opts[0]))) + test_error("setsockopt(TCP_REPAIR_OPTIONS)"); + + if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) { + if (setsockopt(sk, SOL_TCP, TCP_TIMESTAMP, + &state->timestamp, opt_nr * sizeof(opts[0]))) + test_error("setsockopt(TCP_TIMESTAMP)"); + } + test_sock_restore_queue(sk, TCP_RECV_QUEUE, state->in.buf, state->inq_len); + test_sock_restore_queue(sk, TCP_SEND_QUEUE, state->out.buf, state->outq_len); + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, sizeof(state->trw))) + test_error("setsockopt(TCP_REPAIR_WINDOW)"); +} + +void test_ao_restore(int sk, struct tcp_ao_repair *state) +{ + if (setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, sizeof(*state))) + test_error("setsockopt(TCP_AO_REPAIR)"); +} + +void test_sock_state_free(struct tcp_sock_state *state) +{ + free(state->out.buf); + free(state->in.buf); +} + +void test_enable_repair(int sk) +{ + int val = TCP_REPAIR_ON; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); +} + +void test_disable_repair(int sk) +{ + int val = TCP_REPAIR_OFF_NO_WP; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); +} + +void test_kill_sk(int sk) +{ + test_enable_repair(sk); + close(sk); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c new file mode 100644 index 000000000000..92276f916f2f --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <fcntl.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include "aolib.h" + +/* + * Can't be included in the header: it defines static variables which + * will be unique to every object. Let's include it only once here. + */ +#include "../../../kselftest.h" + +/* Prevent overriding of one thread's output by another */ +static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER; + +void __test_msg(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_print_msg(buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_ok(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_pass(buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_fail(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_fail(buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_xfail(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_xfail(buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_error(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_error(buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_skip(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_skip(buf); + pthread_mutex_unlock(&ksft_print_lock); +} + +static volatile int failed; +static volatile int skipped; + +void test_failed(void) +{ + failed = 1; +} + +static void test_exit(void) +{ + if (failed) { + ksft_exit_fail(); + } else if (skipped) { + /* ksft_exit_skip() is different from ksft_exit_*() */ + ksft_print_cnts(); + exit(KSFT_SKIP); + } else { + ksft_exit_pass(); + } +} + +struct dlist_t { + void (*destruct)(void); + struct dlist_t *next; +}; +static struct dlist_t *destructors_list; + +void test_add_destructor(void (*d)(void)) +{ + struct dlist_t *p; + + p = malloc(sizeof(struct dlist_t)); + if (p == NULL) + test_error("malloc() failed"); + + p->next = destructors_list; + p->destruct = d; + destructors_list = p; +} + +static void test_destructor(void) __attribute__((destructor)); +static void test_destructor(void) +{ + while (destructors_list) { + struct dlist_t *p = destructors_list->next; + + destructors_list->destruct(); + free(destructors_list); + destructors_list = p; + } + test_exit(); +} + +static void sig_int(int signo) +{ + test_error("Caught SIGINT - exiting"); +} + +int open_netns(void) +{ + const char *netns_path = "/proc/self/ns/net"; + int fd; + + fd = open(netns_path, O_RDONLY); + if (fd < 0) + test_error("open(%s)", netns_path); + return fd; +} + +int unshare_open_netns(void) +{ + if (unshare(CLONE_NEWNET) != 0) + test_error("unshare()"); + + return open_netns(); +} + +void switch_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) + test_error("setns()"); +} + +int switch_save_ns(int new_ns) +{ + int ret = open_netns(); + + switch_ns(new_ns); + return ret; +} + +static int nsfd_outside = -1; +static int nsfd_parent = -1; +static int nsfd_child = -1; +const char veth_name[] = "ktst-veth"; + +static void init_namespaces(void) +{ + nsfd_outside = open_netns(); + nsfd_parent = unshare_open_netns(); + nsfd_child = unshare_open_netns(); +} + +static void link_init(const char *veth, int family, uint8_t prefix, + union tcp_addr addr, union tcp_addr dest) +{ + if (link_set_up(veth)) + test_error("Failed to set link up"); + if (ip_addr_add(veth, family, addr, prefix)) + test_error("Failed to add ip address"); + if (ip_route_add(veth, family, addr, dest)) + test_error("Failed to add route"); +} + +static unsigned int nr_threads = 1; + +static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t sync_cond = PTHREAD_COND_INITIALIZER; +static volatile unsigned int stage_threads[2]; +static volatile unsigned int stage_nr; + +/* synchronize all threads in the same stage */ +void synchronize_threads(void) +{ + unsigned int q = stage_nr; + + pthread_mutex_lock(&sync_lock); + stage_threads[q]++; + if (stage_threads[q] == nr_threads) { + stage_nr ^= 1; + stage_threads[stage_nr] = 0; + pthread_cond_signal(&sync_cond); + } + while (stage_threads[q] < nr_threads) + pthread_cond_wait(&sync_cond, &sync_lock); + pthread_mutex_unlock(&sync_lock); +} + +__thread union tcp_addr this_ip_addr; +__thread union tcp_addr this_ip_dest; +int test_family; + +struct new_pthread_arg { + thread_fn func; + union tcp_addr my_ip; + union tcp_addr dest_ip; +}; +static void *new_pthread_entry(void *arg) +{ + struct new_pthread_arg *p = arg; + + this_ip_addr = p->my_ip; + this_ip_dest = p->dest_ip; + p->func(NULL); /* shouldn't return */ + exit(KSFT_FAIL); +} + +static void __test_skip_all(const char *msg) +{ + ksft_set_plan(1); + ksft_print_header(); + skipped = 1; + test_skip("%s", msg); + exit(KSFT_SKIP); +} + +void __test_init(unsigned int ntests, int family, unsigned int prefix, + union tcp_addr addr1, union tcp_addr addr2, + thread_fn peer1, thread_fn peer2) +{ + struct sigaction sa = { + .sa_handler = sig_int, + .sa_flags = SA_RESTART, + }; + time_t seed = time(NULL); + + sigemptyset(&sa.sa_mask); + if (sigaction(SIGINT, &sa, NULL)) + test_error("Can't set SIGINT handler"); + + test_family = family; + if (!kernel_config_has(KCONFIG_NET_NS)) + __test_skip_all(tests_skip_reason[KCONFIG_NET_NS]); + if (!kernel_config_has(KCONFIG_VETH)) + __test_skip_all(tests_skip_reason[KCONFIG_VETH]); + if (!kernel_config_has(KCONFIG_TCP_AO)) + __test_skip_all(tests_skip_reason[KCONFIG_TCP_AO]); + + ksft_set_plan(ntests); + test_print("rand seed %u", (unsigned int)seed); + srand(seed); + + + ksft_print_header(); + init_namespaces(); + + if (add_veth(veth_name, nsfd_parent, nsfd_child)) + test_error("Failed to add veth"); + + switch_ns(nsfd_child); + link_init(veth_name, family, prefix, addr2, addr1); + if (peer2) { + struct new_pthread_arg targ; + pthread_t t; + + targ.my_ip = addr2; + targ.dest_ip = addr1; + targ.func = peer2; + nr_threads++; + if (pthread_create(&t, NULL, new_pthread_entry, &targ)) + test_error("Failed to create pthread"); + } + switch_ns(nsfd_parent); + link_init(veth_name, family, prefix, addr1, addr2); + + this_ip_addr = addr1; + this_ip_dest = addr2; + peer1(NULL); + if (failed) + exit(KSFT_FAIL); + else + exit(KSFT_PASS); +} + +/* /proc/sys/net/core/optmem_max artifically limits the amount of memory + * that can be allocated with sock_kmalloc() on each socket in the system. + * It is not virtualized in v6.7, so it has to written outside test + * namespaces. To be nice a test will revert optmem back to the old value. + * Keeping it simple without any file lock, which means the tests that + * need to set/increase optmem value shouldn't run in parallel. + * Also, not re-entrant. + * Since commit f5769faeec36 ("net: Namespace-ify sysctl_optmem_max") + * it is per-namespace, keeping logic for non-virtualized optmem_max + * for v6.7, which supports TCP-AO. + */ +static const char *optmem_file = "/proc/sys/net/core/optmem_max"; +static size_t saved_optmem; +static int optmem_ns = -1; + +static bool is_optmem_namespaced(void) +{ + if (optmem_ns == -1) { + int old_ns = switch_save_ns(nsfd_child); + + optmem_ns = !access(optmem_file, F_OK); + switch_ns(old_ns); + } + return !!optmem_ns; +} + +size_t test_get_optmem(void) +{ + int old_ns = 0; + FILE *foptmem; + size_t ret; + + if (!is_optmem_namespaced()) + old_ns = switch_save_ns(nsfd_outside); + foptmem = fopen(optmem_file, "r"); + if (!foptmem) + test_error("failed to open %s", optmem_file); + + if (fscanf(foptmem, "%zu", &ret) != 1) + test_error("can't read from %s", optmem_file); + fclose(foptmem); + if (!is_optmem_namespaced()) + switch_ns(old_ns); + return ret; +} + +static void __test_set_optmem(size_t new, size_t *old) +{ + int old_ns = 0; + FILE *foptmem; + + if (old != NULL) + *old = test_get_optmem(); + + if (!is_optmem_namespaced()) + old_ns = switch_save_ns(nsfd_outside); + foptmem = fopen(optmem_file, "w"); + if (!foptmem) + test_error("failed to open %s", optmem_file); + + if (fprintf(foptmem, "%zu", new) <= 0) + test_error("can't write %zu to %s", new, optmem_file); + fclose(foptmem); + if (!is_optmem_namespaced()) + switch_ns(old_ns); +} + +static void test_revert_optmem(void) +{ + if (saved_optmem == 0) + return; + + __test_set_optmem(saved_optmem, NULL); +} + +void test_set_optmem(size_t value) +{ + if (saved_optmem == 0) { + __test_set_optmem(value, &saved_optmem); + test_add_destructor(test_revert_optmem); + } else { + __test_set_optmem(value, NULL); + } +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c new file mode 100644 index 000000000000..15aeb0963058 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -0,0 +1,596 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <alloca.h> +#include <fcntl.h> +#include <inttypes.h> +#include <string.h> +#include "../../../../../include/linux/kernel.h" +#include "../../../../../include/linux/stringify.h" +#include "aolib.h" + +const unsigned int test_server_port = 7010; +int __test_listen_socket(int backlog, void *addr, size_t addr_sz) +{ + int err, sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + long flags; + + if (sk < 0) + test_error("socket()"); + + err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, veth_name, + strlen(veth_name) + 1); + if (err < 0) + test_error("setsockopt(SO_BINDTODEVICE)"); + + if (bind(sk, (struct sockaddr *)addr, addr_sz) < 0) + test_error("bind()"); + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + if (listen(sk, backlog)) + test_error("listen()"); + + return sk; +} + +int test_wait_fd(int sk, time_t sec, bool write) +{ + struct timeval tv = { .tv_sec = sec }; + struct timeval *ptv = NULL; + fd_set fds, efds; + int ret; + socklen_t slen = sizeof(ret); + + FD_ZERO(&fds); + FD_SET(sk, &fds); + FD_ZERO(&efds); + FD_SET(sk, &efds); + + if (sec) + ptv = &tv; + + errno = 0; + if (write) + ret = select(sk + 1, NULL, &fds, &efds, ptv); + else + ret = select(sk + 1, &fds, NULL, &efds, ptv); + if (ret < 0) + return -errno; + if (ret == 0) { + errno = ETIMEDOUT; + return -ETIMEDOUT; + } + + if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen)) + return -errno; + if (ret) + return -ret; + return 0; +} + +int __test_connect_socket(int sk, const char *device, + void *addr, size_t addr_sz, time_t timeout) +{ + long flags; + int err; + + if (device != NULL) { + err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, device, + strlen(device) + 1); + if (err < 0) + test_error("setsockopt(SO_BINDTODEVICE, %s)", device); + } + + if (!timeout) { + err = connect(sk, addr, addr_sz); + if (err) { + err = -errno; + goto out; + } + return 0; + } + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + if (connect(sk, addr, addr_sz) < 0) { + if (errno != EINPROGRESS) { + err = -errno; + goto out; + } + if (timeout < 0) + return sk; + err = test_wait_fd(sk, timeout, 1); + if (err) + goto out; + } + return sk; + +out: + close(sk); + return err; +} + +int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix, + int vrf, const char *password) +{ + size_t pwd_len = strlen(password); + struct tcp_md5sig md5sig = {}; + + md5sig.tcpm_keylen = pwd_len; + memcpy(md5sig.tcpm_key, password, pwd_len); + md5sig.tcpm_flags = TCP_MD5SIG_FLAG_PREFIX; + md5sig.tcpm_prefixlen = prefix; + if (vrf >= 0) { + md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX; + md5sig.tcpm_ifindex = (uint8_t)vrf; + } + memcpy(&md5sig.tcpm_addr, addr, addr_sz); + + errno = 0; + return setsockopt(sk, IPPROTO_TCP, TCP_MD5SIG_EXT, + &md5sig, sizeof(md5sig)); +} + + +int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg, + void *addr, size_t addr_sz, bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, uint8_t sndid, uint8_t rcvid, + uint8_t maclen, uint8_t keyflags, + uint8_t keylen, const char *key) +{ + memset(ao, 0, sizeof(struct tcp_ao_add)); + + ao->set_current = !!set_current; + ao->set_rnext = !!set_rnext; + ao->prefix = prefix; + ao->sndid = sndid; + ao->rcvid = rcvid; + ao->maclen = maclen; + ao->keyflags = keyflags; + ao->keylen = keylen; + ao->ifindex = vrf; + + memcpy(&ao->addr, addr, addr_sz); + + if (strlen(alg) > 64) + return -ENOBUFS; + strncpy(ao->alg_name, alg, 64); + + memcpy(ao->key, key, + (keylen > TCP_AO_MAXKEYLEN) ? TCP_AO_MAXKEYLEN : keylen); + return 0; +} + +static int test_get_ao_keys_nr(int sk) +{ + struct tcp_ao_getsockopt tmp = {}; + socklen_t tmp_sz = sizeof(tmp); + int ret; + + tmp.nkeys = 1; + tmp.get_all = 1; + + ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz); + if (ret) + return -errno; + return (int)tmp.nkeys; +} + +int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out, + void *addr, size_t addr_sz, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_getsockopt tmp = {}; + socklen_t tmp_sz = sizeof(tmp); + int ret; + + memcpy(&tmp.addr, addr, addr_sz); + tmp.prefix = prefix; + tmp.sndid = sndid; + tmp.rcvid = rcvid; + tmp.nkeys = 1; + + ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz); + if (ret) + return ret; + if (tmp.nkeys != 1) + return -E2BIG; + *out = tmp; + return 0; +} + +int test_get_ao_info(int sk, struct tcp_ao_info_opt *out) +{ + socklen_t sz = sizeof(*out); + + out->reserved = 0; + out->reserved2 = 0; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, out, &sz)) + return -errno; + if (sz != sizeof(*out)) + return -EMSGSIZE; + return 0; +} + +int test_set_ao_info(int sk, struct tcp_ao_info_opt *in) +{ + socklen_t sz = sizeof(*in); + + in->reserved = 0; + in->reserved2 = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, in, sz)) + return -errno; + return 0; +} + +int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a, + const struct tcp_ao_getsockopt *b) +{ + bool is_kdf_aes_128_cmac = false; + bool is_cmac_aes = false; + + if (!strcmp("cmac(aes128)", a->alg_name)) { + is_kdf_aes_128_cmac = (a->keylen != 16); + is_cmac_aes = true; + } + +#define __cmp_ao(member) \ +do { \ + if (b->member != a->member) { \ + test_fail("getsockopt(): " __stringify(member) " %u != %u", \ + b->member, a->member); \ + return -1; \ + } \ +} while(0) + __cmp_ao(sndid); + __cmp_ao(rcvid); + __cmp_ao(prefix); + __cmp_ao(keyflags); + __cmp_ao(ifindex); + if (a->maclen) { + __cmp_ao(maclen); + } else if (b->maclen != 12) { + test_fail("getsockopt(): expected default maclen 12, but it's %u", + b->maclen); + return -1; + } + if (!is_kdf_aes_128_cmac) { + __cmp_ao(keylen); + } else if (b->keylen != 16) { + test_fail("getsockopt(): expected keylen 16 for cmac(aes128), but it's %u", + b->keylen); + return -1; + } +#undef __cmp_ao + if (!is_kdf_aes_128_cmac && memcmp(b->key, a->key, a->keylen)) { + test_fail("getsockopt(): returned key is different `%s' != `%s'", + b->key, a->key); + return -1; + } + if (memcmp(&b->addr, &a->addr, sizeof(b->addr))) { + test_fail("getsockopt(): returned address is different"); + return -1; + } + if (!is_cmac_aes && strcmp(b->alg_name, a->alg_name)) { + test_fail("getsockopt(): returned algorithm %s is different than %s", b->alg_name, a->alg_name); + return -1; + } + if (is_cmac_aes && strcmp(b->alg_name, "cmac(aes)")) { + test_fail("getsockopt(): returned algorithm %s is different than cmac(aes)", b->alg_name); + return -1; + } + /* For a established key rotation test don't add a key with + * set_current = 1, as it's likely to change by peer's request; + * rather use setsockopt(TCP_AO_INFO) + */ + if (a->set_current != b->is_current) { + test_fail("getsockopt(): returned key is not Current_key"); + return -1; + } + if (a->set_rnext != b->is_rnext) { + test_fail("getsockopt(): returned key is not RNext_key"); + return -1; + } + + return 0; +} + +int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a, + const struct tcp_ao_info_opt *b) +{ + /* No check for ::current_key, as it may change by the peer */ + if (a->ao_required != b->ao_required) { + test_fail("getsockopt(): returned ao doesn't have ao_required"); + return -1; + } + if (a->accept_icmps != b->accept_icmps) { + test_fail("getsockopt(): returned ao doesn't accept ICMPs"); + return -1; + } + if (a->set_rnext && a->rnext != b->rnext) { + test_fail("getsockopt(): RNext KeyID has changed"); + return -1; + } +#define __cmp_cnt(member) \ +do { \ + if (b->member != a->member) { \ + test_fail("getsockopt(): " __stringify(member) " %llu != %llu", \ + b->member, a->member); \ + return -1; \ + } \ +} while(0) + if (a->set_counters) { + __cmp_cnt(pkt_good); + __cmp_cnt(pkt_bad); + __cmp_cnt(pkt_key_not_found); + __cmp_cnt(pkt_ao_required); + __cmp_cnt(pkt_dropped_icmp); + } +#undef __cmp_cnt + return 0; +} + +int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) +{ + struct tcp_ao_getsockopt *key_dump; + socklen_t key_dump_sz = sizeof(*key_dump); + struct tcp_ao_info_opt info = {}; + bool c1, c2, c3, c4, c5; + struct netstat *ns; + int err, nr_keys; + + memset(out, 0, sizeof(*out)); + + /* per-netns */ + ns = netstat_read(); + out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); + out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); + out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); + out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); + out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + netstat_free(ns); + if (c1 || c2 || c3 || c4 || c5) + return -EOPNOTSUPP; + + err = test_get_ao_info(sk, &info); + if (err) + return err; + + /* per-socket */ + out->ao_info_pkt_good = info.pkt_good; + out->ao_info_pkt_bad = info.pkt_bad; + out->ao_info_pkt_key_not_found = info.pkt_key_not_found; + out->ao_info_pkt_ao_required = info.pkt_ao_required; + out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; + + /* per-key */ + nr_keys = test_get_ao_keys_nr(sk); + if (nr_keys < 0) + return nr_keys; + if (nr_keys == 0) + test_error("test_get_ao_keys_nr() == 0"); + out->nr_keys = (size_t)nr_keys; + key_dump = calloc(nr_keys, key_dump_sz); + if (!key_dump) + return -errno; + + key_dump[0].nkeys = nr_keys; + key_dump[0].get_all = 1; + key_dump[0].get_all = 1; + err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, + key_dump, &key_dump_sz); + if (err) { + free(key_dump); + return -errno; + } + + out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0])); + if (!out->key_cnts) { + free(key_dump); + return -errno; + } + + while (nr_keys--) { + out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; + out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; + out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; + out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; + } + free(key_dump); + + return 0; +} + +int __test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected) +{ +#define __cmp_ao(cnt, expecting_inc) \ +do { \ + if (before->cnt > after->cnt) { \ + test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \ + tst_name ?: "", before->cnt, after->cnt); \ + return -1; \ + } \ + if ((before->cnt != after->cnt) != (expecting_inc)) { \ + test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \ + tst_name ?: "", (expecting_inc) ? "" : "not ", \ + before->cnt, after->cnt); \ + return -1; \ + } \ +} while(0) + + errno = 0; + /* per-netns */ + __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD)); + __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD)); + __cmp_ao(netns_ao_key_not_found, + !!(expected & TEST_CNT_NS_KEY_NOT_FOUND)); + __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED)); + __cmp_ao(netns_ao_dropped_icmp, + !!(expected & TEST_CNT_NS_DROPPED_ICMP)); + /* per-socket */ + __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD)); + __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD)); + __cmp_ao(ao_info_pkt_key_not_found, + !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND)); + __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED)); + __cmp_ao(ao_info_pkt_dropped_icmp, + !!(expected & TEST_CNT_SOCK_DROPPED_ICMP)); + return 0; +#undef __cmp_ao +} + +int test_tcp_ao_key_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected, + int sndid, int rcvid) +{ + size_t i; +#define __cmp_ao(i, cnt, expecting_inc) \ +do { \ + if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \ + test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \ + tst_name ?: "", before->key_cnts[i].cnt, \ + after->key_cnts[i].cnt, \ + before->key_cnts[i].sndid, \ + before->key_cnts[i].rcvid); \ + return -1; \ + } \ + if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \ + test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \ + tst_name ?: "", (expecting_inc) ? "" : "not ",\ + before->key_cnts[i].cnt, \ + after->key_cnts[i].cnt, \ + before->key_cnts[i].sndid, \ + before->key_cnts[i].rcvid); \ + return -1; \ + } \ +} while(0) + + if (before->nr_keys != after->nr_keys) { + test_fail("%s: Keys changed on the socket %zu != %zu", + tst_name, before->nr_keys, after->nr_keys); + return -1; + } + + /* per-key */ + i = before->nr_keys; + while (i--) { + if (sndid >= 0 && before->key_cnts[i].sndid != sndid) + continue; + if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid) + continue; + __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD)); + __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD)); + } + return 0; +#undef __cmp_ao +} + +void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts) +{ + free(cnts->key_cnts); +} + +#define TEST_BUF_SIZE 4096 +ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +{ + ssize_t total = 0; + + do { + char buf[TEST_BUF_SIZE]; + ssize_t bytes, sent; + int ret; + + ret = test_wait_fd(sk, timeout_sec, 0); + if (ret) + return ret; + + bytes = recv(sk, buf, sizeof(buf), 0); + + if (bytes < 0) + test_error("recv(): %zd", bytes); + if (bytes == 0) + break; + + ret = test_wait_fd(sk, timeout_sec, 1); + if (ret) + return ret; + + sent = send(sk, buf, bytes, 0); + if (sent == 0) + break; + if (sent != bytes) + test_error("send()"); + total += bytes; + } while (!quota || total < quota); + + return total; +} + +ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, + const size_t msg_len, time_t timeout_sec) +{ + char msg[msg_len]; + int nodelay = 1; + size_t i; + + if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay))) + test_error("setsockopt(TCP_NODELAY)"); + + for (i = 0; i < buf_sz; i += min(msg_len, buf_sz - i)) { + size_t sent, bytes = min(msg_len, buf_sz - i); + int ret; + + ret = test_wait_fd(sk, timeout_sec, 1); + if (ret) + return ret; + + sent = send(sk, buf + i, bytes, 0); + if (sent == 0) + break; + if (sent != bytes) + test_error("send()"); + + bytes = 0; + do { + ssize_t got; + + ret = test_wait_fd(sk, timeout_sec, 0); + if (ret) + return ret; + + got = recv(sk, msg + bytes, sizeof(msg) - bytes, 0); + if (got <= 0) + return i; + bytes += got; + } while (bytes < sent); + if (bytes > sent) + test_error("recv(): %zd > %zd", bytes, sent); + if (memcmp(buf + i, msg, bytes) != 0) { + test_fail("received message differs"); + return -1; + } + } + return i; +} + +int test_client_verify(int sk, const size_t msg_len, const size_t nr, + time_t timeout_sec) +{ + size_t buf_sz = msg_len * nr; + char *buf = alloca(buf_sz); + ssize_t ret; + + randomize_buffer(buf, buf_sz); + ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c new file mode 100644 index 000000000000..372daca525f5 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aolib.h" +#include <string.h> + +void randomize_buffer(void *buf, size_t buflen) +{ + int *p = (int *)buf; + size_t words = buflen / sizeof(int); + size_t leftover = buflen % sizeof(int); + + if (!buflen) + return; + + while (words--) + *p++ = rand(); + + if (leftover) { + int tmp = rand(); + + memcpy(buf + buflen - leftover, &tmp, leftover); + } +} + +const struct sockaddr_in6 addr_any6 = { + .sin6_family = AF_INET6, +}; + +const struct sockaddr_in addr_any4 = { + .sin_family = AF_INET, +}; diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c new file mode 100644 index 000000000000..8fdc808df325 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/restore.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets + * It tests that TCP-AO enabled connection can be restored. + * For the proper socket repair see: + * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h + */ +#include <inttypes.h> +#include "aolib.h" + +const size_t nr_packets = 20; +const size_t msg_len = 100; +const size_t quota = nr_packets * msg_len; +#define fault(type) (inj == FAULT_ ## type) + +static void try_server_run(const char *tst_name, unsigned int port, + fault_t inj, test_cnt cnt_expected) +{ + const char *cnt_name = "TCPAOGood"; + struct tcp_ao_counters ao1, ao2; + uint64_t before_cnt, after_cnt; + int sk, lsk; + time_t timeout; + ssize_t bytes; + + if (fault(TIMEOUT)) + cnt_name = "TCPAOBad"; + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); /* 1: MKT added => connect() */ + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + test_fail("%s: server served: %zd", tst_name, bytes); + goto out; + } + + before_cnt = netstat_get_one(cnt_name, NULL); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + bytes = test_server_run(sk, quota, timeout); + if (fault(TIMEOUT)) { + if (bytes > 0) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server couldn't serve", tst_name); + } else { + if (bytes != quota) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server alive", tst_name); + } + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_cnt = netstat_get_one(cnt_name, NULL); + + test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + + /* + * Before close() as that will send FIN and move the peer in TCP_CLOSE + * and that will prevent reading AO counters from the peer's socket. + */ + synchronize_threads(); /* 3: verified => closed */ +out: + close(sk); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + + try_server_run("TCP-AO migrate to another socket", port++, + 0, TEST_CNT_GOOD); + try_server_run("TCP-AO with wrong send ISN", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong receive ISN", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong send SEQ ext number", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong receive SEQ ext number", port++, + FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); + + synchronize_threads(); /* don't race to exit: client exits */ + return NULL; +} + +static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr, + struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, server_port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("pre-migrate verify failed"); + + test_enable_repair(sk); + test_sock_checkpoint(sk, img, saddr); + test_ao_checkpoint(sk, ao_img); + test_kill_sk(sk); +} + +static void test_sk_restore(const char *tst_name, unsigned int server_port, + sockaddr_af *saddr, struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, + fault_t inj, test_cnt cnt_expected) +{ + const char *cnt_name = "TCPAOGood"; + struct tcp_ao_counters ao1, ao2; + uint64_t before_cnt, after_cnt; + time_t timeout; + int sk; + + if (fault(TIMEOUT)) + cnt_name = "TCPAOBad"; + + before_cnt = netstat_get_one(cnt_name, NULL); + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, img, saddr, this_ip_dest, server_port); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, ao_img); + + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(img); + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + if (test_client_verify(sk, msg_len, nr_packets, timeout)) { + if (fault(TIMEOUT)) + test_ok("%s: post-migrate connection is broken", tst_name); + else + test_fail("%s: post-migrate connection is working", tst_name); + } else { + if (fault(TIMEOUT)) + test_fail("%s: post-migrate connection still working", tst_name); + else + test_ok("%s: post-migrate connection is alive", tst_name); + } + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_cnt = netstat_get_one(cnt_name, NULL); + + test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + synchronize_threads(); /* 3: verified => closed */ + close(sk); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + struct tcp_sock_state tcp_img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + test_sk_restore("TCP-AO migrate to another socket", port++, + &saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.snt_isn += 1; + test_sk_restore("TCP-AO with wrong send ISN", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.rcv_isn += 1; + test_sk_restore("TCP-AO with wrong receive ISN", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.snd_sne += 1; + test_sk_restore("TCP-AO with wrong send SEQ ext number", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + TEST_CNT_NS_BAD | TEST_CNT_GOOD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.rcv_sne += 1; + test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + TEST_CNT_NS_GOOD | TEST_CNT_BAD); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(20, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c new file mode 100644 index 000000000000..7df8b8700e39 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The test checks that both active and passive reset have correct TCP-AO + * signature. An "active" reset (abort) here is procured from closing + * listen() socket with non-accepted connections in the queue: + * inet_csk_listen_stop() => inet_child_forget() => + * => tcp_disconnect() => tcp_send_active_reset() + * + * The passive reset is quite hard to get on established TCP connections. + * It could be procured from non-established states, but the synchronization + * part from userspace in order to reliably get RST seems uneasy. + * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state. + * + * It's important to test both passive and active RST as they go through + * different code-paths: + * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb() + * - tcp_v*_send_reset() create their reply skbs and send them with + * ip_send_unicast_reply() + * + * In both cases TCP-AO signatures have to be correct, which is verified by + * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters. + * + * Author: Dmitry Safonov <dima@arista.com> + */ +#include <inttypes.h> +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +const size_t quota = 1000; +const size_t packet_sz = 100; +/* + * Backlog == 0 means 1 connection in queue, see: + * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...") + */ +const unsigned int backlog; + +static void netstats_check(struct netstat *before, struct netstat *after, + char *msg) +{ + uint64_t before_cnt, after_cnt; + + before_cnt = netstat_get(before, "TCPAORequired", NULL); + after_cnt = netstat_get(after, "TCPAORequired", NULL); + if (after_cnt > before_cnt) + test_fail("Segments without AO sign (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("No segments without AO sign (%s)", msg); + + before_cnt = netstat_get(before, "TCPAOGood", NULL); + after_cnt = netstat_get(after, "TCPAOGood", NULL); + if (after_cnt <= before_cnt) + test_fail("Signed AO segments (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("Signed AO segments (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + + before_cnt = netstat_get(before, "TCPAOBad", NULL); + after_cnt = netstat_get(after, "TCPAOBad", NULL); + if (after_cnt > before_cnt) + test_fail("Segments with bad AO sign (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("No segments with bad AO sign (%s)", msg); +} + +/* + * Another way to send RST, but not through tcp_v{4,6}_send_reset() + * is tcp_send_active_reset(), that is not in reply to inbound segment, + * but rather active send. It uses tcp_transmit_skb(), so that should + * work, but as it also sends RST - nice that it can be covered as well. + */ +static void close_forced(int sk) +{ + struct linger sl; + + sl.l_onoff = 1; + sl.l_linger = 0; + if (setsockopt(sk, SOL_SOCKET, SO_LINGER, &sl, sizeof(sl))) + test_error("setsockopt(SO_LINGER)"); + close(sk); +} + +static void test_server_active_rst(unsigned int port) +{ + struct tcp_ao_counters cnt1, cnt2; + ssize_t bytes; + int sk, lsk; + + lsk = test_listen_socket(this_ip_addr, port, backlog); + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_get_tcp_ao_counters(lsk, &cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 1: MKT added */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: connection accept()ed, another queued */ + if (test_get_tcp_ao_counters(lsk, &cnt2)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 3: close listen socket */ + close(lsk); + bytes = test_server_run(sk, quota, 0); + if (bytes != quota) + test_error("servered only %zd bytes", bytes); + else + test_ok("servered %zd bytes", bytes); + + synchronize_threads(); /* 4: finishing up */ + close_forced(sk); + + synchronize_threads(); /* 5: closed active sk */ + + synchronize_threads(); /* 6: counters checks */ + if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) + test_fail("MKT counters (server) have not only good packets"); + else + test_ok("MKT counters are good on server"); +} + +static void test_server_passive_rst(unsigned int port) +{ + struct tcp_ao_counters ao1, ao2; + int sk, lsk; + ssize_t bytes; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned %zd", bytes); + } + + synchronize_threads(); /* 3: checkpoint the client */ + synchronize_threads(); /* 4: close the server, creating twsk */ + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + close(sk); + + synchronize_threads(); /* 5: restore the socket, send more data */ + test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); + + synchronize_threads(); /* 6: server exits */ +} + +static void *server_fn(void *arg) +{ + struct netstat *ns_before, *ns_after; + unsigned int port = test_server_port; + + ns_before = netstat_read(); + + test_server_active_rst(port++); + test_server_passive_rst(port++); + + ns_after = netstat_read(); + netstats_check(ns_before, ns_after, "server"); + netstat_free(ns_after); + netstat_free(ns_before); + synchronize_threads(); /* exit */ + + synchronize_threads(); /* don't race to exit() - client exits */ + return NULL; +} + +static int test_wait_fds(int sk[], size_t nr, bool is_writable[], + ssize_t wait_for, time_t sec) +{ + struct timeval tv = { .tv_sec = sec }; + struct timeval *ptv = NULL; + fd_set left; + size_t i; + int ret; + + FD_ZERO(&left); + for (i = 0; i < nr; i++) { + FD_SET(sk[i], &left); + if (is_writable) + is_writable[i] = false; + } + + if (sec) + ptv = &tv; + + do { + bool is_empty = true; + fd_set fds, efds; + int nfd = 0; + + FD_ZERO(&fds); + FD_ZERO(&efds); + for (i = 0; i < nr; i++) { + if (!FD_ISSET(sk[i], &left)) + continue; + + if (sk[i] > nfd) + nfd = sk[i]; + + FD_SET(sk[i], &fds); + FD_SET(sk[i], &efds); + is_empty = false; + } + if (is_empty) + return -ENOENT; + + errno = 0; + ret = select(nfd + 1, NULL, &fds, &efds, ptv); + if (ret < 0) + return -errno; + if (!ret) + return -ETIMEDOUT; + for (i = 0; i < nr; i++) { + if (FD_ISSET(sk[i], &fds)) { + if (is_writable) + is_writable[i] = true; + FD_CLR(sk[i], &left); + wait_for--; + continue; + } + if (FD_ISSET(sk[i], &efds)) { + FD_CLR(sk[i], &left); + wait_for--; + } + } + } while (wait_for > 0); + + return 0; +} + +static void test_client_active_rst(unsigned int port) +{ + /* one in queue, another accept()ed */ + unsigned int wait_for = backlog + 2; + int i, sk[3], err; + bool is_writable[ARRAY_SIZE(sk)] = {false}; + unsigned int last = ARRAY_SIZE(sk) - 1; + + for (i = 0; i < ARRAY_SIZE(sk); i++) { + sk[i] = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk[i] < 0) + test_error("socket()"); + if (test_add_key(sk[i], DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + + synchronize_threads(); /* 1: MKT added */ + for (i = 0; i < last; i++) { + err = _test_connect_socket(sk[i], this_ip_dest, port, + (i == 0) ? TEST_TIMEOUT_SEC : -1); + + if (err < 0) + test_error("failed to connect()"); + } + + synchronize_threads(); /* 2: connection accept()ed, another queued */ + err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC); + if (err < 0) + test_error("test_wait_fds(): %d", err); + + synchronize_threads(); /* 3: close listen socket */ + if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + test_fail("Failed to send data on connected socket"); + else + test_ok("Verified established tcp connection"); + + synchronize_threads(); /* 4: finishing up */ + err = _test_connect_socket(sk[last], this_ip_dest, port, -1); + if (err < 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 5: closed active sk */ + err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL, + wait_for, TEST_TIMEOUT_SEC); + if (err < 0) + test_error("select(): %d", err); + + for (i = 0; i < ARRAY_SIZE(sk); i++) { + socklen_t slen = sizeof(err); + + if (getsockopt(sk[i], SOL_SOCKET, SO_ERROR, &err, &slen)) + test_error("getsockopt()"); + if (is_writable[i] && err != ECONNRESET) { + test_fail("sk[%d] = %d, err = %d, connection wasn't reset", + i, sk[i], err); + } else { + test_ok("sk[%d] = %d%s", i, sk[i], + is_writable[i] ? ", connection was reset" : ""); + } + } + synchronize_threads(); /* 6: counters checks */ +} + +static void test_client_passive_rst(unsigned int port) +{ + struct tcp_ao_counters ao1, ao2; + struct tcp_ao_repair ao_img; + struct tcp_sock_state img; + sockaddr_af saddr; + int sk, err; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + test_fail("Failed to send data on connected socket"); + else + test_ok("Verified established tcp connection"); + + synchronize_threads(); /* 3: checkpoint the client */ + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_disable_repair(sk); + + synchronize_threads(); /* 4: close the server, creating twsk */ + + /* + * The "corruption" in SEQ has to be small enough to fit into TCP + * window, see tcp_timewait_state_process() for out-of-window + * segments. + */ + img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */ + + /* + * FIXME: This is kind-of ugly and dirty, but it works. + * + * At this moment, the server has close'ed(sk). + * The passive RST that is being targeted here is new data after + * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST + * + * What is needed here is: + * (1) wait for FIN from the server + * (2) make sure that the ACK from the client went out + * (3) make sure that the ACK was received and processed by the server + * + * Otherwise, the data that will be sent from "repaired" socket + * post SEQ corruption may get to the server before it's in + * TCP_FIN_WAIT2. + * + * (1) is easy with select()/poll() + * (2) is possible by polling tcpi_state from TCP_INFO + * (3) is quite complex: as server's socket was already closed, + * probably the way to do it would be tcp-diag. + */ + sleep(TEST_RETRANSMIT_SEC); + + synchronize_threads(); /* 5: restore the socket, send more data */ + test_kill_sk(sk); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, &img, &saddr, this_ip_dest, port); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, &ao_img); + + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(&img); + + /* + * This is how "passive reset" is acquired in this test from TCP_TW_RST: + * + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 + */ + err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + /* Make sure that the connection was reset, not timeouted */ + if (err && err == -ECONNRESET) + test_ok("client sock was passively reset post-seq-adjust"); + else if (err) + test_fail("client sock was not reset post-seq-adjust: %d", err); + else + test_fail("client sock is yet connected post-seq-adjust"); + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 6: server exits */ + close(sk); + test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); +} + +static void *client_fn(void *arg) +{ + struct netstat *ns_before, *ns_after; + unsigned int port = test_server_port; + + ns_before = netstat_read(); + + test_client_active_rst(port++); + test_client_passive_rst(port++); + + ns_after = netstat_read(); + netstats_check(ns_before, ns_after, "client"); + netstat_free(ns_after); + netstat_free(ns_before); + + synchronize_threads(); /* exit */ + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(14, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c new file mode 100644 index 000000000000..e154d9e198a9 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +static union tcp_addr local_addr; + +static void __setup_lo_intf(const char *lo_intf, + const char *addr_str, uint8_t prefix) +{ + if (inet_pton(TEST_FAMILY, addr_str, &local_addr) != 1) + test_error("Can't convert local ip address"); + + if (ip_addr_add(lo_intf, TEST_FAMILY, local_addr, prefix)) + test_error("Failed to add %s ip address", lo_intf); + + if (link_set_up(lo_intf)) + test_error("Failed to bring %s up", lo_intf); +} + +static void setup_lo_intf(const char *lo_intf) +{ +#ifdef IPV6_TEST + __setup_lo_intf(lo_intf, "::1", 128); +#else + __setup_lo_intf(lo_intf, "127.0.0.1", 8); +#endif +} + +static void tcp_self_connect(const char *tst, unsigned int port, + bool different_keyids, bool check_restore) +{ + uint64_t before_challenge_ack, after_challenge_ack; + uint64_t before_syn_challenge, after_syn_challenge; + struct tcp_ao_counters before_ao, after_ao; + uint64_t before_aogood, after_aogood; + struct netstat *ns_before, *ns_after; + const size_t nr_packets = 20; + struct tcp_ao_repair ao_img; + struct tcp_sock_state img; + sockaddr_af addr; + int sk; + + tcp_addr_to_sockaddr_in(&addr, &local_addr, htons(port)); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (different_keyids) { + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 5, 7)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 7, 5)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } else { + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + + if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0) + test_error("bind()"); + + ns_before = netstat_read(); + before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); + before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL); + before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL); + if (test_get_tcp_ao_counters(sk, &before_ao)) + test_error("test_get_tcp_ao_counters()"); + + if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr, + sizeof(addr), TEST_TIMEOUT_SEC) < 0) { + ns_after = netstat_read(); + netstat_print_diff(ns_before, ns_after); + test_error("failed to connect()"); + } + + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("%s: tcp connection verify failed", tst); + close(sk); + return; + } + + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL); + after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL); + if (test_get_tcp_ao_counters(sk, &after_ao)) + test_error("test_get_tcp_ao_counters()"); + if (!check_restore) { + /* to debug: netstat_print_diff(ns_before, ns_after); */ + netstat_free(ns_before); + } + netstat_free(ns_after); + + if (after_aogood <= before_aogood) { + test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + tst, after_aogood, before_aogood); + close(sk); + return; + } + if (after_challenge_ack <= before_challenge_ack || + after_syn_challenge <= before_syn_challenge) { + /* + * It's also meant to test simultaneous open, so check + * these counters as well. + */ + test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu", + tst, after_challenge_ack, before_challenge_ack, + after_syn_challenge, before_syn_challenge); + close(sk); + return; + } + + if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) { + close(sk); + return; + } + + if (!check_restore) { + test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64, + tst, before_aogood, after_aogood); + close(sk); + return; + } + + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &addr); +#ifdef IPV6_TEST + addr.sin6_port = htons(port + 1); +#else + addr.sin_port = htons(port + 1); +#endif + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + __test_sock_restore(sk, "lo", &img, &addr, &addr, sizeof(addr)); + if (different_keyids) { + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 7, 5)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 5, 7)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } else { + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + test_ao_restore(sk, &ao_img); + test_disable_repair(sk); + test_sock_state_free(&img); + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("%s: tcp connection verify failed", tst); + close(sk); + return; + } + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + /* to debug: netstat_print_diff(ns_before, ns_after); */ + netstat_free(ns_before); + netstat_free(ns_after); + close(sk); + if (after_aogood <= before_aogood) { + test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + tst, after_aogood, before_aogood); + return; + } + test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64, + tst, before_aogood, after_aogood); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + + setup_lo_intf("lo"); + + tcp_self_connect("self-connect(same keyids)", port++, false, false); + tcp_self_connect("self-connect(different keyids)", port++, true, false); + tcp_self_connect("self-connect(restore)", port, false, true); + port += 2; + tcp_self_connect("self-connect(restore, different keyids)", port, true, true); + port += 2; + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(4, client_fn, NULL); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c new file mode 100644 index 000000000000..ad4e77d6823e --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Check that after SEQ number wrap-around: + * 1. SEQ-extension has upper bytes set + * 2. TCP conneciton is alive and no TCPAOBad segments + * In order to test (2), the test doesn't just adjust seq number for a queue + * on a connected socket, but migrates it to another sk+port number, so + * that there won't be any delayed packets that will fail to verify + * with the new SEQ numbers. + */ +#include <inttypes.h> +#include "aolib.h" + +const unsigned int nr_packets = 1000; +const unsigned int msg_len = 1000; +const unsigned int quota = nr_packets * msg_len; +unsigned int client_new_port; + +/* Move them closer to roll-over */ +static void test_adjust_seqs(struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, + bool server) +{ + uint32_t new_seq1, new_seq2; + + /* make them roll-over during quota, but on different segments */ + if (server) { + new_seq1 = ((uint32_t)-1) - msg_len; + new_seq2 = ((uint32_t)-1) - (quota - 2 * msg_len); + } else { + new_seq1 = ((uint32_t)-1) - (quota - 2 * msg_len); + new_seq2 = ((uint32_t)-1) - msg_len; + } + + img->in.seq = new_seq1; + img->trw.snd_wl1 = img->in.seq - msg_len; + img->out.seq = new_seq2; + img->trw.rcv_wup = img->in.seq; +} + +static int test_sk_restore(struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, sockaddr_af *saddr, + const union tcp_addr daddr, unsigned int dport, + struct tcp_ao_counters *cnt) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, img, saddr, daddr, dport); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, daddr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, ao_img); + + if (test_get_tcp_ao_counters(sk, cnt)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(img); + return sk; +} + +static void *server_fn(void *arg) +{ + uint64_t before_good, after_good, after_bad; + struct tcp_ao_counters ao1, ao2; + struct tcp_sock_state img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + ssize_t bytes; + int sk, lsk; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned: %zd", bytes); + goto out; + } + + before_good = netstat_get_one("TCPAOGood", NULL); + + synchronize_threads(); /* 3: restore the connection on another port */ + + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); +#ifdef IPV6_TEST + saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1); +#else + saddr.sin_port = htons(ntohs(saddr.sin_port) + 1); +#endif + test_adjust_seqs(&img, &ao_img, true); + synchronize_threads(); /* 4: dump finished */ + sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, + client_new_port, &ao1); + + synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned: %zd", bytes); + } else { + test_ok("server alive"); + } + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_good = netstat_get_one("TCPAOGood", NULL); + + test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + + if (after_good <= before_good) { + test_fail("TCPAOGood counter did not increase: %zu <= %zu", + after_good, before_good); + } else { + test_ok("TCPAOGood counter increased %zu => %zu", + before_good, after_good); + } + after_bad = netstat_get_one("TCPAOBad", NULL); + if (after_bad) + test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + else + test_ok("TCPAOBad counter didn't increase"); + test_enable_repair(sk); + test_ao_checkpoint(sk, &ao_img); + if (ao_img.snd_sne && ao_img.rcv_sne) { + test_ok("SEQ extension incremented: %u/%u", + ao_img.snd_sne, ao_img.rcv_sne); + } else { + test_fail("SEQ extension was not incremented: %u/%u", + ao_img.snd_sne, ao_img.rcv_sne); + } + + synchronize_threads(); /* 6: verified => closed */ +out: + close(sk); + return NULL; +} + +static void *client_fn(void *arg) +{ + uint64_t before_good, after_good, after_bad; + struct tcp_ao_counters ao1, ao2; + struct tcp_sock_state img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("pre-migrate verify failed"); + return NULL; + } + + before_good = netstat_get_one("TCPAOGood", NULL); + + synchronize_threads(); /* 3: restore the connection on another port */ + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); +#ifdef IPV6_TEST + client_new_port = ntohs(saddr.sin6_port) + 1; + saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1); +#else + client_new_port = ntohs(saddr.sin_port) + 1; + saddr.sin_port = htons(ntohs(saddr.sin_port) + 1); +#endif + test_adjust_seqs(&img, &ao_img, false); + synchronize_threads(); /* 4: dump finished */ + sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, + test_server_port + 1, &ao1); + + synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("post-migrate verify failed"); + else + test_ok("post-migrate connection alive"); + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_good = netstat_get_one("TCPAOGood", NULL); + + test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + + if (after_good <= before_good) { + test_fail("TCPAOGood counter did not increase: %zu <= %zu", + after_good, before_good); + } else { + test_ok("TCPAOGood counter increased %zu => %zu", + before_good, after_good); + } + after_bad = netstat_get_one("TCPAOBad", NULL); + if (after_bad) + test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + else + test_ok("TCPAOBad counter didn't increase"); + + synchronize_threads(); /* 6: verified => closed */ + close(sk); + + synchronize_threads(); /* don't race to exit: let server exit() */ + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(7, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c new file mode 100644 index 000000000000..452de131fa3a --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -0,0 +1,835 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +static union tcp_addr tcp_md5_client; + +static int test_port = 7788; +static void make_listen(int sk) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &this_ip_addr, htons(test_port++)); + if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0) + test_error("bind()"); + if (listen(sk, 1)) + test_error("listen()"); +} + +static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info, + const char *tst) +{ + struct tcp_ao_info_opt tmp; + socklen_t len = sizeof(tmp); + + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len)) + test_error("getsockopt(TCP_AO_INFO) failed"); + +#define __cmp_ao(member) \ +do { \ + if (info->member != tmp.member) { \ + test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \ + tst, (size_t)info->member, (size_t)tmp.member); \ + return; \ + } \ +} while(0) + if (info->set_current) + __cmp_ao(current_key); + if (info->set_rnext) + __cmp_ao(rnext); + if (info->set_counters) { + __cmp_ao(pkt_good); + __cmp_ao(pkt_bad); + __cmp_ao(pkt_key_not_found); + __cmp_ao(pkt_ao_required); + __cmp_ao(pkt_dropped_icmp); + } + __cmp_ao(ao_required); + __cmp_ao(accept_icmps); + + test_ok("AO info get: %s", tst); +#undef __cmp_ao +} + +static void __setsockopt_checked(int sk, int optname, bool get, + void *optval, socklen_t *len, + int err, const char *tst, const char *tst2) +{ + int ret; + + if (!tst) + tst = ""; + if (!tst2) + tst2 = ""; + + errno = 0; + if (get) + ret = getsockopt(sk, IPPROTO_TCP, optname, optval, len); + else + ret = setsockopt(sk, IPPROTO_TCP, optname, optval, *len); + if (ret == -1) { + if (errno == err) + test_ok("%s%s", tst ?: "", tst2 ?: ""); + else + test_fail("%s%s: %setsockopt() failed", + tst, tst2, get ? "g" : "s"); + close(sk); + return; + } + + if (err) { + test_fail("%s%s: %setsockopt() was expected to fail with %d", + tst, tst2, get ? "g" : "s", err); + } else { + test_ok("%s%s", tst ?: "", tst2 ?: ""); + if (optname == TCP_AO_ADD_KEY) { + test_verify_socket_key(sk, optval); + } else if (optname == TCP_AO_INFO && !get) { + test_vefify_ao_info(sk, optval, tst2); + } else if (optname == TCP_AO_GET_KEYS) { + if (*len != sizeof(struct tcp_ao_getsockopt)) + test_fail("%s%s: get keys returned wrong tcp_ao_getsockopt size", + tst, tst2); + } + } + close(sk); +} + +static void setsockopt_checked(int sk, int optname, void *optval, + int err, const char *tst) +{ + const char *cmd = NULL; + socklen_t len; + + switch (optname) { + case TCP_AO_ADD_KEY: + cmd = "key add: "; + len = sizeof(struct tcp_ao_add); + break; + case TCP_AO_DEL_KEY: + cmd = "key del: "; + len = sizeof(struct tcp_ao_del); + break; + case TCP_AO_INFO: + cmd = "AO info set: "; + len = sizeof(struct tcp_ao_info_opt); + break; + default: + break; + } + + __setsockopt_checked(sk, optname, false, optval, &len, err, cmd, tst); +} + +static int prepare_defs(int cmd, void *optval) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + switch (cmd) { + case TCP_AO_ADD_KEY: { + struct tcp_ao_add *add = optval; + + if (test_prepare_def_key(add, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, + -1, 0, 100, 100)) + test_error("prepare default tcp_ao_add"); + break; + } + case TCP_AO_DEL_KEY: { + struct tcp_ao_del *del = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(del, 0, sizeof(struct tcp_ao_del)); + del->sndid = 100; + del->rcvid = 100; + del->prefix = DEFAULT_TEST_PREFIX; + tcp_addr_to_sockaddr_in(&del->addr, &this_ip_dest, 0); + break; + } + case TCP_AO_INFO: { + struct tcp_ao_info_opt *info = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(info, 0, sizeof(struct tcp_ao_info_opt)); + break; + } + case TCP_AO_GET_KEYS: { + struct tcp_ao_getsockopt *get = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(get, 0, sizeof(struct tcp_ao_getsockopt)); + get->nkeys = 1; + get->get_all = 1; + break; + } + default: + test_error("unknown cmd"); + } + + return sk; +} + +static void test_extend(int cmd, bool get, const char *tst, socklen_t under_size) +{ + struct { + union { + struct tcp_ao_add add; + struct tcp_ao_del del; + struct tcp_ao_getsockopt get; + struct tcp_ao_info_opt info; + }; + char *extend[100]; + } tmp_opt; + socklen_t extended_size = sizeof(tmp_opt); + int sk; + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, &under_size, + EINVAL, tst, ": minimum size"); + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, &extended_size, + 0, tst, ": extended size"); + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, NULL, &extended_size, + EFAULT, tst, ": null optval"); + + if (get) { + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, NULL, + EFAULT, tst, ": null optlen"); + } +} + +static void extend_tests(void) +{ + test_extend(TCP_AO_ADD_KEY, false, "AO add", + offsetof(struct tcp_ao_add, key)); + test_extend(TCP_AO_DEL_KEY, false, "AO del", + offsetof(struct tcp_ao_del, keyflags)); + test_extend(TCP_AO_INFO, false, "AO set info", + offsetof(struct tcp_ao_info_opt, pkt_dropped_icmp)); + test_extend(TCP_AO_INFO, true, "AO get info", -1); + test_extend(TCP_AO_GET_KEYS, true, "AO get keys", -1); +} + +static void test_optmem_limit(void) +{ + size_t i, keys_limit, current_optmem = test_get_optmem(); + struct tcp_ao_add ao; + union tcp_addr net = {}; + int sk; + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + keys_limit = current_optmem / KERNEL_TCP_AO_KEY_SZ_ROUND_UP; + for (i = 0;; i++) { + union tcp_addr key_peer; + int err; + + key_peer = gen_tcp_addr(net, i + 1); + tcp_addr_to_sockaddr_in(&ao.addr, &key_peer, 0); + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, + &ao, sizeof(ao)); + if (!err) { + /* + * TCP_AO_ADD_KEY should be the same order as the real + * sizeof(struct tcp_ao_key) in kernel. + */ + if (i <= keys_limit * 10) + continue; + test_fail("optmem limit test failed: added %zu key", i); + break; + } + if (i < keys_limit) { + test_fail("optmem limit test failed: couldn't add %zu key", i); + break; + } + test_ok("optmem limit was hit on adding %zu key", i); + break; + } + close(sk); +} + +static void test_einval_add_key(void) +{ + struct tcp_ao_add ao; + int sk; + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keylen = TCP_AO_MAXKEYLEN + 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big keylen"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.reserved = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved2 padding"); + + /* tcp_ao_verify_ipv{4,6}() checks */ + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.addr.ss_family = AF_UNIX; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "wrong address family"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + tcp_addr_to_sockaddr_in(&ao.addr, &this_ip_dest, 1234); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "port (unsupported)"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 0; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "no prefix, addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 0; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "no prefix, any addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 32; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "prefix, any addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 129; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big prefix"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 2; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too short prefix"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keyflags = (uint8_t)(-1); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "bad key flags"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_current = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_current = 1; + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_current = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as rnext"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_current = 1; + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current+rnext"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.ifindex = 42; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keyflags |= TCP_AO_KEYF_IFINDEX; + ao.ifindex = 42; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "non-existent VRF"); + /* + * tcp_md5_do_lookup{,_any_l3index}() are checked in unsigned-md5 + * see client_vrf_tests(). + */ + + test_optmem_limit(); + + /* tcp_ao_parse_crypto() */ + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.maclen = 100; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EMSGSIZE, "maclen bigger than TCP hdr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + strcpy(ao.alg_name, "imaginary hash algo"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, ENOENT, "bad algo"); +} + +static void test_einval_del_key(void) +{ + struct tcp_ao_del del; + int sk; + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.reserved = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved2 padding"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.keyflags = (uint8_t)(-1); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "bad key flags"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.ifindex = 42; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.keyflags |= TCP_AO_KEYF_IFINDEX; + del.ifindex = 42; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existent VRF"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current+rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current+rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.current_key = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_rnext = 1; + del.rnext = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as rnext key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.current_key = 100; + del.set_rnext = 1; + del.rnext = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current+rnext key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.del_async = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "async on non-listen"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.sndid = 101; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing sndid"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.rcvid = 101; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing rcvid"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + tcp_addr_to_sockaddr_in(&del.addr, &this_ip_addr, 0); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "incorrect addr"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "correct key delete"); +} + +static void test_einval_ao_info(void) +{ + struct tcp_ao_info_opt info; + int sk; + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_current = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_current = 1; + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.reserved = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved2 padding"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.accept_icmps = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "accept_icmps"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.ao_required = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "ao required"); + + if (!should_skip_test("ao required with MD5 key", KCONFIG_TCP_MD5)) { + sk = prepare_defs(TCP_AO_INFO, &info); + info.ao_required = 1; + if (test_set_md5(sk, tcp_md5_client, TEST_PREFIX, -1, + "long long secret")) { + test_error("setsockopt(TCP_MD5SIG_EXT)"); + close(sk); + } else { + setsockopt_checked(sk, TCP_AO_INFO, &info, EKEYREJECTED, + "ao required with MD5 key"); + } + } + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current+rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.current_key = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_rnext = 1; + info.rnext = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.set_rnext = 1; + info.current_key = 100; + info.rnext = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current+rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_counters = 1; + info.pkt_good = 321; + info.pkt_bad = 888; + info.pkt_key_not_found = 654; + info.pkt_ao_required = 987654; + info.pkt_dropped_icmp = 10000; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set counters"); + + sk = prepare_defs(TCP_AO_INFO, &info); + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "no-op"); +} + +static void getsockopt_checked(int sk, struct tcp_ao_getsockopt *optval, + int err, const char *tst) +{ + socklen_t len = sizeof(struct tcp_ao_getsockopt); + + __setsockopt_checked(sk, TCP_AO_GET_KEYS, true, optval, &len, err, + "get keys: ", tst); +} + +static void test_einval_get_keys(void) +{ + struct tcp_ao_getsockopt out; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + getsockopt_checked(sk, &out, ENOENT, "no ao_info"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + getsockopt_checked(sk, &out, 0, "proper tcp_ao_get_mkts()"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.pkt_good = 643; + getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_good counter"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.pkt_bad = 94; + getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_bad counter"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.keyflags = (uint8_t)(-1); + getsockopt_checked(sk, &out, EINVAL, "bad keyflags"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.ifindex = 42; + getsockopt_checked(sk, &out, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.reserved = 1; + getsockopt_checked(sk, &out, EINVAL, "using reserved field"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 0; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "no prefix, addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 0; + memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + getsockopt_checked(sk, &out, 0, "no prefix, any addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 32; + memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + getsockopt_checked(sk, &out, EINVAL, "prefix, any addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 129; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "too big prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 2; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "too short prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = DEFAULT_TEST_PREFIX; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, 0, "prefix + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "get_all + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "get_all + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "current + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "current + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "current + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "current + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "rnext + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "rnext + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "rnext + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "rnext + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.is_current = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + current"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.is_rnext = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + rnext"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.is_rnext = 1; + getsockopt_checked(sk, &out, 0, "current + rnext"); +} + +static void einval_tests(void) +{ + test_einval_add_key(); + test_einval_del_key(); + test_einval_ao_info(); + test_einval_get_keys(); +} + +static void duplicate_tests(void) +{ + union tcp_addr network_dup; + struct tcp_ao_add ao, ao2; + int sk; + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao2 = ao; + memcpy(&ao2.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + ao2.prefix = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao2, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: any addr key on the socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + ao.prefix = 0; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr key"); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_dup) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + if (test_prepare_def_key(&ao, "password", 0, network_dup, + 16, 0, 100, 100)) + test_error("prepare default tcp_ao_add"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr for the same subnet"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy of a key"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + ao.rcvid = 101; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: RecvID differs"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + ao.sndid = 101; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs"); +} + +static void *client_fn(void *arg) +{ + if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1) + test_error("Can't convert ip address"); + extend_tests(); + einval_tests(); + duplicate_tests(); + /* + * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters + * returning proper nr & keys; + */ + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(120, client_fn, NULL); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/settings @@ -0,0 +1 @@ +timeout=120 diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c new file mode 100644 index 000000000000..6b59a652159f --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -0,0 +1,741 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +#define fault(type) (inj == FAULT_ ## type) +static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver."; +static const char *ao_password = DEFAULT_TEST_PASSWORD; + +static union tcp_addr client2; +static union tcp_addr client3; + +static const int test_vrf_ifindex = 200; +static const uint8_t test_vrf_tabid = 42; +static void setup_vrfs(void) +{ + int err; + + if (!kernel_config_has(KCONFIG_NET_VRF)) + return; + + err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1); + if (err) + test_error("Failed to add a VRF: %d", err); + + err = link_set_up("ksft-vrf"); + if (err) + test_error("Failed to bring up a VRF"); + + err = ip_route_add_vrf(veth_name, TEST_FAMILY, + this_ip_addr, this_ip_dest, test_vrf_tabid); + if (err) + test_error("Failed to add a route to VRF: %d", err); +} + +static void try_accept(const char *tst_name, unsigned int port, + union tcp_addr *md5_addr, uint8_t md5_prefix, + union tcp_addr *ao_addr, uint8_t ao_prefix, + bool set_ao_required, + uint8_t sndid, uint8_t rcvid, uint8_t vrf, + const char *cnt_name, test_cnt cnt_expected, + int needs_tcp_md5, fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + int lsk, err, sk = 0; + time_t timeout; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (md5_addr && test_set_md5(lsk, *md5_addr, md5_prefix, -1, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + + if (ao_addr && test_add_key(lsk, ao_password, + *ao_addr, ao_prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (set_ao_required && test_set_ao_flags(lsk, true, false)) + test_error("setsockopt(TCP_AO_INFO)"); + + if (cnt_name) + before_cnt = netstat_get_one(cnt_name, NULL); + if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + err = test_wait_fd(lsk, timeout, 0); + if (err == -ETIMEDOUT) { + if (!fault(TIMEOUT)) + test_fail("timed out for accept()"); + } else if (err < 0) { + test_error("test_wait_fd()"); + } else { + if (fault(TIMEOUT)) + test_fail("ready to accept"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) { + test_error("accept()"); + } else { + if (fault(TIMEOUT)) + test_fail("%s: accepted", tst_name); + } + } + + if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + close(lsk); + + if (!cnt_name) { + test_ok("%s: no counter checks", tst_name); + goto out; + } + + after_cnt = netstat_get_one(cnt_name, NULL); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + if (ao_addr) + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + +out: + synchronize_threads(); /* test_kill_sk() */ + if (sk > 0) + test_kill_sk(sk); +} + +static void server_add_routes(void) +{ + int family = TEST_FAMILY; + + synchronize_threads(); /* client_add_ips() */ + + if (ip_route_add(veth_name, family, this_ip_addr, client2)) + test_error("Failed to add route"); + if (ip_route_add(veth_name, family, this_ip_addr, client3)) + test_error("Failed to add route"); +} + +static void server_add_fail_tests(unsigned int *port) +{ + union tcp_addr addr_any = {}; + + try_accept("TCP-AO established: add TCP-MD5 key", (*port)++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, + 1, 0); + try_accept("TCP-MD5 established: add TCP-AO key", (*port)++, &addr_any, + 0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); + try_accept("non-signed established: add TCP-AO key", (*port)++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); +} + +static void server_vrf_tests(unsigned int *port) +{ + setup_vrfs(); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + union tcp_addr addr_any = {}; + + server_add_routes(); + + try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", + TEST_CNT_GOOD, 0, 0); + try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected", + 0, 1, FAULT_TIMEOUT); + try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); + try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + &this_ip_dest, TEST_PREFIX, true, + 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); + try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + &this_ip_dest, TEST_PREFIX, true, + 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); + + try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, + NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", + 0, 1, FAULT_TIMEOUT); + try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); + try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any, + 0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound", + 0, 1, FAULT_TIMEOUT); + + try_accept("no sign server: AO client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", + TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); + try_accept("no sign server: MD5 client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected", + 0, 1, FAULT_TIMEOUT); + try_accept("no sign server: no sign client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); + + try_accept("AO+MD5 server: AO client (matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0); + try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, + 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, + 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: MD5 client (matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, 0); + try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: no sign client (unmatched)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "CurrEstab", 0, 1, 0); + try_accept("AO+MD5 server: no sign client (misconfig, matching AO)", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT); + + try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + + server_add_fail_tests(&port); + + server_vrf_tests(&port); + + /* client exits */ + synchronize_threads(); + return NULL; +} + +static int client_bind(int sk, union tcp_addr bind_addr) +{ +#ifdef IPV6_TEST + struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_port = 0, + .sin6_addr = bind_addr.a6, + }; +#else + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = 0, + .sin_addr = bind_addr.a4, + }; +#endif + return bind(sk, &addr, sizeof(addr)); +} + +static void try_connect(const char *tst_name, unsigned int port, + union tcp_addr *md5_addr, uint8_t md5_prefix, + union tcp_addr *ao_addr, uint8_t ao_prefix, + uint8_t sndid, uint8_t rcvid, uint8_t vrf, + fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr) +{ + time_t timeout; + int sk, ret; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (bind_addr && client_bind(sk, *bind_addr)) + test_error("bind()"); + + if (md5_addr && test_set_md5(sk, *md5_addr, md5_prefix, -1, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + + if (ao_addr && test_add_key(sk, ao_password, *ao_addr, + ao_prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret < 0) { + if (fault(KEYREJECT) && ret == -EKEYREJECTED) + test_ok("%s: connect() was prevented", tst_name); + else if (ret == -ETIMEDOUT && fault(TIMEOUT)) + test_ok("%s", tst_name); + else if (ret == -ECONNREFUSED && + (fault(TIMEOUT) || fault(KEYREJECT))) + test_ok("%s: refused to connect", tst_name); + else + test_error("%s: connect() returned %d", tst_name, ret); + goto out; + } + + if (fault(TIMEOUT) || fault(KEYREJECT)) + test_fail("%s: connected", tst_name); + else + test_ok("%s: connected", tst_name); + +out: + synchronize_threads(); /* test_kill_sk() */ + /* _test_connect_socket() cleans up on failure */ + if (ret > 0) + test_kill_sk(sk); +} + +#define PREINSTALL_MD5_FIRST BIT(0) +#define PREINSTALL_AO BIT(1) +#define POSTINSTALL_AO BIT(2) +#define PREINSTALL_MD5 BIT(3) +#define POSTINSTALL_MD5 BIT(4) + +static int try_add_key_vrf(int sk, union tcp_addr in_addr, uint8_t prefix, + int vrf, uint8_t sndid, uint8_t rcvid, + bool set_ao_required) +{ + uint8_t keyflags = 0; + + if (vrf >= 0) + keyflags |= TCP_AO_KEYF_IFINDEX; + else + vrf = 0; + if (set_ao_required) { + int err = test_set_ao_flags(sk, true, 0); + + if (err) + return err; + } + return test_add_key_vrf(sk, ao_password, keyflags, in_addr, prefix, + (uint8_t)vrf, sndid, rcvid); +} + +static bool test_continue(const char *tst_name, int err, + fault_t inj, bool added_ao) +{ + bool expected_to_fail; + + expected_to_fail = fault(PREINSTALL_AO) && added_ao; + expected_to_fail |= fault(PREINSTALL_MD5) && !added_ao; + + if (!err) { + if (!expected_to_fail) + return true; + test_fail("%s: setsockopt()s were expected to fail", tst_name); + return false; + } + if (err != -EKEYREJECTED || !expected_to_fail) { + test_error("%s: setsockopt(%s) = %d", tst_name, + added_ao ? "TCP_AO_ADD_KEY" : "TCP_MD5SIG_EXT", err); + return false; + } + test_ok("%s: prefailed as expected: %m", tst_name); + return false; +} + +static int open_add(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, bool set_ao_required, + uint8_t sndid, uint8_t rcvid, + fault_t inj) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (client_bind(sk, this_ip_addr)) + test_error("bind()"); + + if (strategy & PREINSTALL_MD5_FIRST) { + if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + } + + if (strategy & PREINSTALL_AO) { + int err = try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf, + sndid, rcvid, set_ao_required); + + if (!test_continue(tst_name, err, inj, true)) { + close(sk); + return -1; + } + } + + if (strategy & PREINSTALL_MD5) { + errno = 0; + test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password); + if (!test_continue(tst_name, -errno, inj, false)) { + close(sk); + return -1; + } + } + + return sk; +} + +static void try_to_preadd(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, + int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, bool set_ao_required, + uint8_t sndid, uint8_t rcvid, + int needs_tcp_md5, int needs_vrf, fault_t inj) +{ + int sk; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + if (needs_vrf && should_skip_test(tst_name, KCONFIG_NET_VRF)) + return; + + sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf, + ao_addr, ao_prefix, ao_vrf, set_ao_required, + sndid, rcvid, inj); + if (sk < 0) + return; + + test_ok("%s", tst_name); + close(sk); +} + +static void try_to_add(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, + int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, uint8_t sndid, uint8_t rcvid, + int needs_tcp_md5, fault_t inj) +{ + time_t timeout; + int sk, ret; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf, + ao_addr, ao_prefix, ao_vrf, 0, sndid, rcvid, inj); + if (sk < 0) + return; + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret <= 0) { + test_error("%s: connect() returned %d", tst_name, ret); + goto out; + } + + if (strategy & POSTINSTALL_MD5) { + if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) { + if (fault(POSTINSTALL)) { + test_ok("%s: postfailed as expected", tst_name); + goto out; + } else { + test_error("setsockopt(TCP_MD5SIG_EXT)"); + } + } else if (fault(POSTINSTALL)) { + test_fail("%s: post setsockopt() was expected to fail", tst_name); + goto out; + } + } + + if (strategy & POSTINSTALL_AO) { + if (try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf, + sndid, rcvid, 0)) { + if (fault(POSTINSTALL)) { + test_ok("%s: postfailed as expected", tst_name); + goto out; + } else { + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + } else if (fault(POSTINSTALL)) { + test_fail("%s: post setsockopt() was expected to fail", tst_name); + goto out; + } + } + +out: + synchronize_threads(); /* test_kill_sk() */ + /* _test_connect_socket() cleans up on failure */ + if (ret > 0) + test_kill_sk(sk); +} + +static void client_add_ip(union tcp_addr *client, const char *ip) +{ + int err, family = TEST_FAMILY; + + if (inet_pton(family, ip, client) != 1) + test_error("Can't convert ip address %s", ip); + + err = ip_addr_add(veth_name, family, *client, TEST_PREFIX); + if (err) + test_error("Failed to add ip address: %d", err); +} + +static void client_add_ips(void) +{ + client_add_ip(&client2, __TEST_CLIENT_IP(2)); + client_add_ip(&client3, __TEST_CLIENT_IP(3)); + synchronize_threads(); /* server_add_routes() */ +} + +static void client_add_fail_tests(unsigned int *port) +{ + try_to_add("TCP-AO established: add TCP-MD5 key", + (*port)++, POSTINSTALL_MD5 | PREINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 1, FAULT_POSTINSTALL); + try_to_add("TCP-MD5 established: add TCP-AO key", + (*port)++, PREINSTALL_MD5 | POSTINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 1, FAULT_POSTINSTALL); + try_to_add("non-signed established: add TCP-AO key", + (*port)++, POSTINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 0, FAULT_POSTINSTALL); + + try_to_add("TCP-AO key intersects with existing TCP-MD5 key", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1, + 100, 100, 1, FAULT_PREINSTALL_AO); + try_to_add("TCP-MD5 key intersects with existing TCP-AO key", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1, + 100, 100, 1, FAULT_PREINSTALL_MD5); + + try_to_preadd("TCP-MD5 key + TCP-AO required", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, true, + 100, 100, 1, 0, FAULT_PREINSTALL_AO); + try_to_preadd("TCP-AO required on socket + TCP-MD5 key", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, true, + 100, 100, 1, 0, FAULT_PREINSTALL_MD5); +} + +static void client_vrf_tests(unsigned int *port) +{ + setup_vrfs(); + + /* The following restrictions for setsockopt()s are expected: + * + * |--------------|-----------------|-------------|-------------| + * | | MD5 key without | MD5 key | MD5 key | + * | | l3index | l3index=0 | l3index=N | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | without | reject | reject | reject | + * | l3index | | | | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | l3index=0 | reject | reject | allow | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | l3index=N | reject | allow | reject | + * |--------------|-----------------|-------------|-------------| + */ + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, 0); + + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + union tcp_addr addr_any = {}; + + client_add_ips(); + + try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2); + + try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + + try_connect("no sign server: AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + try_connect("no sign server: MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("no sign server: no sign client", port++, NULL, 0, + NULL, 0, 100, 100, 0, 0, 0, &this_ip_addr); + + try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 1, &client2); + try_connect("AO+MD5 server: AO client (misconfig, matching MD5)", + port++, NULL, 0, &addr_any, 0, 100, 100, 0, + FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("AO+MD5 server: AO client (misconfig, non-matching)", + port++, NULL, 0, &addr_any, 0, 100, 100, 0, + FAULT_TIMEOUT, 1, &client3); + try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)", + port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client2); + try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)", + port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client3); + try_connect("AO+MD5 server: no sign client (unmatched)", + port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3); + try_connect("AO+MD5 server: no sign client (misconfig, matching AO)", + port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client2); + try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)", + port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &this_ip_addr); + + try_connect("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + port++, &this_ip_addr, TEST_PREFIX, + &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT, + 1, &this_ip_addr); + try_connect("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + port++, &this_ip_addr, TEST_PREFIX, + &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT, + 1, &client2); + + client_add_fail_tests(&port); + client_vrf_tests(&port); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(72, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c index 9c55ec44fc43..c1cb0c75156a 100644 --- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c +++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c @@ -26,6 +26,8 @@ #include <fcntl.h> #include <time.h> +#include "../kselftest.h" + #ifndef TCP_FASTOPEN_KEY #define TCP_FASTOPEN_KEY 33 #endif @@ -34,10 +36,6 @@ #define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key" #define KEY_LENGTH 16 -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) -#endif - static bool do_ipv6; static bool do_sockopt; static bool do_rotate; diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 00f837c9bc6c..4fcce5150850 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -66,11 +66,16 @@ #include <poll.h> #include <linux/tcp.h> #include <assert.h> +#include <openssl/pem.h> #ifndef MSG_ZEROCOPY #define MSG_ZEROCOPY 0x4000000 #endif +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + #define FILE_SZ (1ULL << 35) static int cfg_family = AF_INET6; static socklen_t cfg_alen = sizeof(struct sockaddr_in6); @@ -81,12 +86,14 @@ static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */ static int xflg; /* hash received data (simple xor) (-h option) */ static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */ +static int integrity; /* -i option: sender and receiver compute sha256 over the data.*/ static size_t chunk_size = 512*1024; static size_t map_align; unsigned long htotal; +unsigned int digest_len; static inline void prefetch(const void *x) { @@ -137,7 +144,8 @@ static void *mmap_large_buffer(size_t need, size_t *allocated) if (buffer == (void *)-1) { sz = need; buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, + -1, 0); if (buffer != (void *)-1) fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n"); } @@ -145,14 +153,29 @@ static void *mmap_large_buffer(size_t need, size_t *allocated) return buffer; } +static uint32_t tcp_info_get_rcv_mss(int fd) +{ + socklen_t sz = sizeof(struct tcp_info); + struct tcp_info info; + + if (getsockopt(fd, IPPROTO_TCP, TCP_INFO, &info, &sz)) { + fprintf(stderr, "Error fetching TCP_INFO\n"); + return 0; + } + + return info.tcpi_rcv_mss; +} + void *child_thread(void *arg) { + unsigned char digest[SHA256_DIGEST_LENGTH]; unsigned long total_mmap = 0, total = 0; struct tcp_zerocopy_receive zc; + unsigned char *buffer = NULL; unsigned long delta_usec; + EVP_MD_CTX *ctx = NULL; int flags = MAP_SHARED; struct timeval t0, t1; - char *buffer = NULL; void *raddr = NULL; void *addr = NULL; double throughput; @@ -179,6 +202,14 @@ void *child_thread(void *arg) addr = ALIGN_PTR_UP(raddr, map_align); } } + if (integrity) { + ctx = EVP_MD_CTX_new(); + if (!ctx) { + perror("cannot enable SHA computing"); + goto error; + } + EVP_DigestInit_ex(ctx, EVP_sha256(), NULL); + } while (1) { struct pollfd pfd = { .fd = fd, .events = POLLIN, }; int sub; @@ -190,7 +221,7 @@ void *child_thread(void *arg) memset(&zc, 0, sizeof(zc)); zc.address = (__u64)((unsigned long)addr); - zc.length = chunk_size; + zc.length = min(chunk_size, FILE_SZ - total); res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, &zc, &zc_len); @@ -199,6 +230,8 @@ void *child_thread(void *arg) if (zc.length) { assert(zc.length <= chunk_size); + if (integrity) + EVP_DigestUpdate(ctx, addr, zc.length); total_mmap += zc.length; if (xflg) hash_zone(addr, zc.length); @@ -210,22 +243,30 @@ void *child_thread(void *arg) } if (zc.recv_skip_hint) { assert(zc.recv_skip_hint <= chunk_size); - lu = read(fd, buffer, zc.recv_skip_hint); + lu = read(fd, buffer, min(zc.recv_skip_hint, + FILE_SZ - total)); if (lu > 0) { + if (integrity) + EVP_DigestUpdate(ctx, buffer, lu); if (xflg) hash_zone(buffer, lu); total += lu; } + if (lu == 0) + goto end; } continue; } sub = 0; while (sub < chunk_size) { - lu = read(fd, buffer + sub, chunk_size - sub); + lu = read(fd, buffer + sub, min(chunk_size - sub, + FILE_SZ - total)); if (lu == 0) goto end; if (lu < 0) break; + if (integrity) + EVP_DigestUpdate(ctx, buffer + sub, lu); if (xflg) hash_zone(buffer + sub, lu); total += lu; @@ -236,6 +277,20 @@ end: gettimeofday(&t1, NULL); delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec; + if (integrity) { + fcntl(fd, F_SETFL, 0); + EVP_DigestFinal_ex(ctx, digest, &digest_len); + lu = read(fd, buffer, SHA256_DIGEST_LENGTH); + if (lu != SHA256_DIGEST_LENGTH) + perror("Error: Cannot read SHA256\n"); + + if (memcmp(digest, buffer, + SHA256_DIGEST_LENGTH)) + fprintf(stderr, "Error: SHA256 of the data is not right\n"); + else + printf("\nSHA256 is correct\n"); + } + throughput = 0; if (delta_usec) throughput = total * 8.0 / (double)delta_usec / 1000.0; @@ -246,7 +301,7 @@ end: total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec + 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec; printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n" - " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n", + " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches, rcv_mss %u\n", total / (1024.0 * 1024.0), 100.0*total_mmap/total, (double)delta_usec / 1000000.0, @@ -254,7 +309,8 @@ end: (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0, (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0, (double)total_usec/mb, - ru.ru_nvcsw); + ru.ru_nvcsw, + tcp_info_get_rcv_mss(fd)); } error: munmap(buffer, buffer_sz); @@ -367,19 +423,38 @@ static unsigned long default_huge_page_size(void) return hps; } +static void randomize(void *target, size_t count) +{ + static int urandom = -1; + ssize_t got; + + urandom = open("/dev/urandom", O_RDONLY); + if (urandom < 0) { + perror("open /dev/urandom"); + exit(1); + } + got = read(urandom, target, count); + if (got != count) { + perror("read /dev/urandom"); + exit(1); + } +} + int main(int argc, char *argv[]) { + unsigned char digest[SHA256_DIGEST_LENGTH]; struct sockaddr_storage listenaddr, addr; unsigned int max_pacing_rate = 0; + EVP_MD_CTX *ctx = NULL; + unsigned char *buffer; uint64_t total = 0; char *host = NULL; int fd, c, on = 1; size_t buffer_sz; - char *buffer; int sflg = 0; int mss = 0; - while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) { + while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:i")) != -1) { switch (c) { case '4': cfg_family = PF_INET; @@ -425,6 +500,9 @@ int main(int argc, char *argv[]) case 'a': map_align = atol(optarg); break; + case 'i': + integrity = 1; + break; default: exit(1); } @@ -467,7 +545,7 @@ int main(int argc, char *argv[]) } buffer = mmap_large_buffer(chunk_size, &buffer_sz); - if (buffer == (char *)-1) { + if (buffer == (unsigned char *)-1) { perror("mmap"); exit(1); } @@ -500,17 +578,34 @@ int main(int argc, char *argv[]) perror("setsockopt SO_ZEROCOPY, (-z option disabled)"); zflg = 0; } + if (integrity) { + randomize(buffer, buffer_sz); + ctx = EVP_MD_CTX_new(); + if (!ctx) { + perror("cannot enable SHA computing"); + exit(1); + } + EVP_DigestInit_ex(ctx, EVP_sha256(), NULL); + } while (total < FILE_SZ) { + size_t offset = total % chunk_size; int64_t wr = FILE_SZ - total; - if (wr > chunk_size) - wr = chunk_size; - /* Note : we just want to fill the pipe with 0 bytes */ - wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0); + if (wr > chunk_size - offset) + wr = chunk_size - offset; + /* Note : we just want to fill the pipe with random bytes */ + wr = send(fd, buffer + offset, + (size_t)wr, zflg ? MSG_ZEROCOPY : 0); if (wr <= 0) break; + if (integrity) + EVP_DigestUpdate(ctx, buffer + offset, wr); total += wr; } + if (integrity && total == FILE_SZ) { + EVP_DigestFinal_ex(ctx, digest, &digest_len); + send(fd, digest, (size_t)SHA256_DIGEST_LENGTH, 0); + } close(fd); munmap(buffer, buffer_sz); return 0; diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh new file mode 100755 index 000000000000..1b3f89e2b86e --- /dev/null +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -0,0 +1,775 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking bridge backup port and backup nexthop ID +# functionality. The topology consists of two bridge (VTEPs) connected using +# VXLAN. The test checks that when the switch port (swp1) is down, traffic is +# redirected to the VXLAN port (vx0). When a backup nexthop ID is configured, +# the test checks that traffic is redirected with the correct nexthop +# information. +# +# +------------------------------------+ +------------------------------------+ +# | + swp1 + vx0 | | + swp1 + vx0 | +# | | | | | | | | +# | | br0 | | | | | | +# | +------------+-----------+ | | +------------+-----------+ | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0 | | br0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0.10 | | br0.10 | +# | 192.0.2.65/28 | | 192.0.2.66/28 | +# | | | | +# | | | | +# | 192.0.2.33 | | 192.0.2.34 | +# | + lo | | + lo | +# | | | | +# | | | | +# | 192.0.2.49/28 | | 192.0.2.50/28 | +# | veth0 +-------+ veth0 | +# | | | | +# | sw1 | | sw2 | +# +------------------------------------+ +------------------------------------+ + +source lib.sh +ret=0 + +# All tests in this script. Can be overridden with -t option. +TESTS=" + backup_port + backup_nhid + backup_nhid_invalid + backup_nhid_ping + backup_nhid_torture +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no +PING_TIMEOUT=5 + +################################################################################ +# Utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +tc_check_packets() +{ + local ns=$1; shift + local id=$1; shift + local handle=$1; shift + local count=$1; shift + local pkts + + sleep 0.1 + pkts=$(tc -n $ns -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats.packets") + [[ $pkts == $count ]] +} + +bridge_link_check() +{ + local ns=$1; shift + local dev=$1; shift + local state=$1; shift + + bridge -n $ns -d -j link show dev $dev | \ + jq -e ".[][\"state\"] == \"$state\"" &> /dev/null +} + +################################################################################ +# Setup + +setup_topo_ns() +{ + local ns=$1; shift + + ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0 +} + +setup_topo() +{ + local ns + + setup_ns sw1 sw2 + for ns in $sw1 $sw2; do + setup_topo_ns $ns + done + + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 netns $sw1 name veth0 + ip link set dev veth1 netns $sw2 name veth0 +} + +setup_sw_common() +{ + local ns=$1; shift + local local_addr=$1; shift + local remote_addr=$1; shift + local veth_addr=$1; shift + local gw_addr=$1; shift + local br_addr=$1; shift + + ip -n $ns address add $local_addr/32 dev lo + + ip -n $ns link set dev veth0 up + ip -n $ns address add $veth_addr/28 dev veth0 + ip -n $ns route add default via $gw_addr + + ip -n $ns link add name br0 up type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + + ip -n $ns link add link br0 name br0.10 up type vlan id 10 + bridge -n $ns vlan add vid 10 dev br0 self + ip -n $ns address add $br_addr/28 dev br0.10 + + ip -n $ns link add name swp1 up type dummy + ip -n $ns link set dev swp1 master br0 + bridge -n $ns vlan add vid 10 dev swp1 untagged + + ip -n $ns link add name vx0 up master br0 type vxlan \ + local $local_addr dstport 4789 nolearning external + bridge -n $ns link set dev vx0 vlan_tunnel on learning off + + bridge -n $ns vlan add vid 10 dev vx0 + bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010 +} + +setup_sw1() +{ + local ns=$sw1 + local local_addr=192.0.2.33 + local remote_addr=192.0.2.34 + local veth_addr=192.0.2.49 + local gw_addr=192.0.2.50 + local br_addr=192.0.2.65 + + setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr \ + $br_addr +} + +setup_sw2() +{ + local ns=$sw2 + local local_addr=192.0.2.34 + local remote_addr=192.0.2.33 + local veth_addr=192.0.2.50 + local gw_addr=192.0.2.49 + local br_addr=192.0.2.66 + + setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr \ + $br_addr +} + +setup() +{ + set -e + + setup_topo + setup_sw1 + setup_sw2 + + sleep 5 + + set +e +} + +cleanup() +{ + cleanup_ns $sw1 $sw2 +} + +################################################################################ +# Tests + +backup_port() +{ + local dmac=00:11:22:33:44:55 + local smac=00:aa:bb:cc:dd:ee + + echo + echo "Backup port" + echo "-----------" + + run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" + + # Initial state - check that packets are forwarded out of swp1 when it + # has a carrier and not forwarded out of any port when it does not have + # a carrier. + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 0 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 0 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 carrier on" + + # Configure vx0 as the backup port of swp1 and check that packets are + # forwarded out of swp1 when it has a carrier and out of vx0 when swp1 + # does not have a carrier. + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + log_test $? 0 "vx0 configured as backup port of swp1" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 0 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "Forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 carrier on" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "No forwarding out of vx0" + + # Remove vx0 as the backup port of swp1 and check that packets are no + # longer forwarded out of vx0 when swp1 does not have a carrier. + run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + log_test $? 1 "vx0 not configured as backup port of swp1" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "No forwarding out of vx0" +} + +backup_nhid() +{ + local dmac=00:11:22:33:44:55 + local smac=00:aa:bb:cc:dd:ee + + echo + echo "Backup nexthop ID" + echo "-----------------" + + run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb" + + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010" + + run_cmd "ip -n $sw2 address replace 192.0.2.36/32 dev lo" + + # The first filter matches on packets forwarded using the backup + # nexthop ID and the second filter matches on packets forwarded using a + # regular VXLAN FDB entry. + run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" + run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass" + + # Configure vx0 as the backup port of swp1 and check that packets are + # forwarded out of swp1 when it has a carrier and out of vx0 when swp1 + # does not have a carrier. When packets are forwarded out of vx0, check + # that they are forwarded by the VXLAN FDB entry. + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + log_test $? 0 "vx0 configured as backup port of swp1" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 0 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets $sw2 "dev vx0 ingress" 101 0 + log_test $? 0 "No forwarding using backup nexthop ID" + tc_check_packets $sw2 "dev vx0 ingress" 102 1 + log_test $? 0 "Forwarding using VXLAN FDB entry" + + run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 carrier on" + + # Configure nexthop ID 10 as the backup nexthop ID of swp1 and check + # that when packets are forwarded out of vx0, they are forwarded using + # the backup nexthop ID. + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" + log_test $? 0 "nexthop ID 10 configured as backup nexthop ID of swp1" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets $sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "Forwarding using backup nexthop ID" + tc_check_packets $sw2 "dev vx0 ingress" 102 1 + log_test $? 0 "No forwarding using VXLAN FDB entry" + + run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 carrier on" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + tc_check_packets $sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + tc_check_packets $sw2 "dev vx0 ingress" 102 1 + log_test $? 0 "No forwarding using VXLAN FDB entry" + + # Reset the backup nexthop ID to 0 and check that packets are no longer + # forwarded using the backup nexthop ID when swp1 does not have a + # carrier and are instead forwarded by the VXLAN FDB. + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid\"" + log_test $? 1 "No backup nexthop ID configured for swp1" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + tc_check_packets $sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + tc_check_packets $sw2 "dev vx0 ingress" 102 1 + log_test $? 0 "No forwarding using VXLAN FDB entry" + + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 3 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets $sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + tc_check_packets $sw2 "dev vx0 ingress" 102 2 + log_test $? 0 "Forwarding using VXLAN FDB entry" +} + +backup_nhid_invalid() +{ + local dmac=00:11:22:33:44:55 + local smac=00:aa:bb:cc:dd:ee + local tx_drop + + echo + echo "Backup nexthop ID - invalid IDs" + echo "-------------------------------" + + # Check that when traffic is redirected with an invalid nexthop ID, it + # is forwarded out of the VXLAN port, but dropped by the VXLAN driver + # and does not crash the host. + + run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + # Drop all other Tx traffic to avoid changes to Tx drop counter. + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop" + + tx_drop=$(ip -n $sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]') + + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb" + + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" + + run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" + + # First, check that redirection works. + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + log_test $? 0 "vx0 configured as backup port of swp1" + + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" + log_test $? 0 "Valid nexthop as backup nexthop" + + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets $sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "Forwarding using backup nexthop ID" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'" + log_test $? 0 "No Tx drop increase" + + # Use a non-existent nexthop ID. + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 20" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 20\"" + log_test $? 0 "Non-existent nexthop as backup nexthop" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets $sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'" + log_test $? 0 "Tx drop increased" + + # Use a blckhole nexthop. + run_cmd "ip -n $sw1 nexthop replace id 30 blackhole" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 30" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 30\"" + log_test $? 0 "Blackhole nexthop as backup nexthop" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 3 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets $sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'" + log_test $? 0 "Tx drop increased" + + # Non-group FDB nexthop. + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 1" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 1\"" + log_test $? 0 "Non-group FDB nexthop as backup nexthop" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 4 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets $sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'" + log_test $? 0 "Tx drop increased" + + # IPv6 address family nexthop. + run_cmd "ip -n $sw1 nexthop replace id 100 via 2001:db8:100::1 fdb" + run_cmd "ip -n $sw1 nexthop replace id 200 via 2001:db8:100::1 fdb" + run_cmd "ip -n $sw1 nexthop replace id 300 group 100/200 fdb" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 300" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 300\"" + log_test $? 0 "IPv6 address family nexthop as backup nexthop" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 5 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets $sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'" + log_test $? 0 "Tx drop increased" +} + +backup_nhid_ping() +{ + local sw1_mac + local sw2_mac + + echo + echo "Backup nexthop ID - ping" + echo "------------------------" + + # Test bidirectional traffic when traffic is redirected in both VTEPs. + sw1_mac=$(ip -n $sw1 -j -p link show br0.10 | jq -r '.[]["address"]') + sw2_mac=$(ip -n $sw2 -j -p link show br0.10 | jq -r '.[]["address"]') + + run_cmd "bridge -n $sw1 fdb replace $sw2_mac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw2 fdb replace $sw1_mac dev swp1 master static vlan 10" + + run_cmd "ip -n $sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10" + run_cmd "ip -n $sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10" + + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw2 nexthop replace id 1 via 192.0.2.33 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1 fdb" + run_cmd "ip -n $sw2 nexthop replace id 10 group 1 fdb" + + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw2 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 10" + + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + run_cmd "ip -n $sw2 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw2 swp1 disabled + + run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" + log_test $? 0 "Ping with backup nexthop ID" + + # Reset the backup nexthop ID to 0 and check that ping fails. + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0" + run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 0" + + run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" + log_test $? 1 "Ping after disabling backup nexthop ID" +} + +backup_nhid_add_del_loop() +{ + while true; do + ip -n $sw1 nexthop del id 10 + ip -n $sw1 nexthop replace id 10 group 1/2 fdb + done >/dev/null 2>&1 +} + +backup_nhid_torture() +{ + local dmac=00:11:22:33:44:55 + local smac=00:aa:bb:cc:dd:ee + local pid1 + local pid2 + local pid3 + + echo + echo "Backup nexthop ID - torture test" + echo "--------------------------------" + + # Continuously send traffic through the backup nexthop while adding and + # deleting the group. The test is considered successful if nothing + # crashed. + + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb" + + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" + + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + + backup_nhid_add_del_loop & + pid1=$! + ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 & + pid2=$! + + sleep 30 + kill -9 $pid1 $pid2 + wait $pid1 $pid2 2>/dev/null + + log_test 0 0 "Torture test" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -P Pause after each test before cleanup + -v Verbose mode (show commands and output) + -w Timeout for ping +EOF +} + +################################################################################ +# Main + +trap cleanup EXIT + +while getopts ":t:pPvhw:" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + w) PING_TIMEOUT=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# Make sure we don't pause twice. +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v bridge)" ]; then + echo "SKIP: Could not run test without bridge tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tc)" ]; then + echo "SKIP: Could not run test without tc tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit $ksft_skip +fi + +bridge link help 2>&1 | grep -q "backup_nhid" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 bridge too old, missing backup nexthop ID support" + exit $ksft_skip +fi + +# Start clean. +cleanup + +for t in $TESTS +do + setup; $t; cleanup; +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh new file mode 100755 index 000000000000..8533393a4f18 --- /dev/null +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -0,0 +1,855 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking bridge neighbor suppression functionality. The +# topology consists of two bridges (VTEPs) connected using VXLAN. A single +# host is connected to each bridge over multiple VLANs. The test checks that +# ARP/NS messages from the first host are suppressed on the VXLAN port when +# should. +# +# +-----------------------+ +------------------------+ +# | h1 | | h2 | +# | | | | +# | + eth0.10 | | + eth0.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + eth0.20 | | | + eth0.20 | +# | \ | 192.0.2.17/28 | | \ | 192.0.2.18/28 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + eth0 | | + eth0 | +# +----|------------------+ +----|-------------------+ +# | | +# | | +# +----|-------------------------------+ +----|-------------------------------+ +# | + swp1 + vx0 | | + swp1 + vx0 | +# | | | | | | | | +# | | br0 | | | | | | +# | +------------+-----------+ | | +------------+-----------+ | +# | | | | | | +# | | | | | | +# | +---+---+ | | +---+---+ | +# | | | | | | | | +# | | | | | | | | +# | + + | | + + | +# | br0.10 br0.20 | | br0.10 br0.20 | +# | | | | +# | 192.0.2.33 | | 192.0.2.34 | +# | + lo | | + lo | +# | | | | +# | | | | +# | 192.0.2.49/28 | | 192.0.2.50/28 | +# | veth0 +-------+ veth0 | +# | | | | +# | sw1 | | sw2 | +# +------------------------------------+ +------------------------------------+ + +source lib.sh +ret=0 + +# All tests in this script. Can be overridden with -t option. +TESTS=" + neigh_suppress_arp + neigh_suppress_ns + neigh_vlan_suppress_arp + neigh_vlan_suppress_ns +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no + +################################################################################ +# Utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +tc_check_packets() +{ + local ns=$1; shift + local id=$1; shift + local handle=$1; shift + local count=$1; shift + local pkts + + sleep 0.1 + pkts=$(tc -n $ns -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats.packets") + [[ $pkts == $count ]] +} + +################################################################################ +# Setup + +setup_topo_ns() +{ + local ns=$1; shift + + ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0 +} + +setup_topo() +{ + local ns + + setup_ns h1 h2 sw1 sw2 + for ns in $h1 $h2 $sw1 $sw2; do + setup_topo_ns $ns + done + + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 netns $h1 name eth0 + ip link set dev veth1 netns $sw1 name swp1 + + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 netns $sw1 name veth0 + ip link set dev veth1 netns $sw2 name veth0 + + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 netns $h2 name eth0 + ip link set dev veth1 netns $sw2 name swp1 +} + +setup_host_common() +{ + local ns=$1; shift + local v4addr1=$1; shift + local v4addr2=$1; shift + local v6addr1=$1; shift + local v6addr2=$1; shift + + ip -n $ns link set dev eth0 up + ip -n $ns link add link eth0 name eth0.10 up type vlan id 10 + ip -n $ns link add link eth0 name eth0.20 up type vlan id 20 + + ip -n $ns address add $v4addr1 dev eth0.10 + ip -n $ns address add $v4addr2 dev eth0.20 + ip -n $ns address add $v6addr1 dev eth0.10 + ip -n $ns address add $v6addr2 dev eth0.20 +} + +setup_h1() +{ + local ns=$h1 + local v4addr1=192.0.2.1/28 + local v4addr2=192.0.2.17/28 + local v6addr1=2001:db8:1::1/64 + local v6addr2=2001:db8:2::1/64 + + setup_host_common $ns $v4addr1 $v4addr2 $v6addr1 $v6addr2 +} + +setup_h2() +{ + local ns=$h2 + local v4addr1=192.0.2.2/28 + local v4addr2=192.0.2.18/28 + local v6addr1=2001:db8:1::2/64 + local v6addr2=2001:db8:2::2/64 + + setup_host_common $ns $v4addr1 $v4addr2 $v6addr1 $v6addr2 +} + +setup_sw_common() +{ + local ns=$1; shift + local local_addr=$1; shift + local remote_addr=$1; shift + local veth_addr=$1; shift + local gw_addr=$1; shift + + ip -n $ns address add $local_addr/32 dev lo + + ip -n $ns link set dev veth0 up + ip -n $ns address add $veth_addr/28 dev veth0 + ip -n $ns route add default via $gw_addr + + ip -n $ns link add name br0 up type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + + ip -n $ns link add link br0 name br0.10 up type vlan id 10 + bridge -n $ns vlan add vid 10 dev br0 self + + ip -n $ns link add link br0 name br0.20 up type vlan id 20 + bridge -n $ns vlan add vid 20 dev br0 self + + ip -n $ns link set dev swp1 up master br0 + bridge -n $ns vlan add vid 10 dev swp1 + bridge -n $ns vlan add vid 20 dev swp1 + + ip -n $ns link add name vx0 up master br0 type vxlan \ + local $local_addr dstport 4789 nolearning external + bridge -n $ns fdb add 00:00:00:00:00:00 dev vx0 self static \ + dst $remote_addr src_vni 10010 + bridge -n $ns fdb add 00:00:00:00:00:00 dev vx0 self static \ + dst $remote_addr src_vni 10020 + bridge -n $ns link set dev vx0 vlan_tunnel on learning off + + bridge -n $ns vlan add vid 10 dev vx0 + bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010 + + bridge -n $ns vlan add vid 20 dev vx0 + bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020 +} + +setup_sw1() +{ + local ns=$sw1 + local local_addr=192.0.2.33 + local remote_addr=192.0.2.34 + local veth_addr=192.0.2.49 + local gw_addr=192.0.2.50 + + setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr +} + +setup_sw2() +{ + local ns=$sw2 + local local_addr=192.0.2.34 + local remote_addr=192.0.2.33 + local veth_addr=192.0.2.50 + local gw_addr=192.0.2.49 + + setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr +} + +setup() +{ + set -e + + setup_topo + setup_h1 + setup_h2 + setup_sw1 + setup_sw2 + + sleep 5 + + set +e +} + +cleanup() +{ + cleanup_ns $h1 $h2 $sw1 $sw2 +} + +################################################################################ +# Tests + +neigh_suppress_arp_common() +{ + local vid=$1; shift + local sip=$1; shift + local tip=$1; shift + local h2_mac + + echo + echo "Per-port ARP suppression - VLAN $vid" + echo "----------------------------------" + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass" + + # Initial state - check that ARP requests are not suppressed and that + # ARP replies are received. + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + log_test $? 0 "arping" + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "ARP suppression" + + # Enable neighbor suppression and check that nothing changes compared + # to the initial state. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + log_test $? 0 "arping" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "ARP suppression" + + # Install an FDB entry for the remote host and check that nothing + # changes compared to the initial state. + h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + log_test $? 0 "FDB entry installation" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + log_test $? 0 "arping" + tc_check_packets $sw1 "dev vx0 egress" 101 3 + log_test $? 0 "ARP suppression" + + # Install a neighbor on the matching SVI interface and check that ARP + # requests are suppressed. + run_cmd "ip -n $sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid" + log_test $? 0 "Neighbor entry installation" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + log_test $? 0 "arping" + tc_check_packets $sw1 "dev vx0 egress" 101 3 + log_test $? 0 "ARP suppression" + + # Take the second host down and check that ARP requests are suppressed + # and that ARP replies are received. + run_cmd "ip -n $h2 link set dev eth0.$vid down" + log_test $? 0 "H2 down" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + log_test $? 0 "arping" + tc_check_packets $sw1 "dev vx0 egress" 101 3 + log_test $? 0 "ARP suppression" + + run_cmd "ip -n $h2 link set dev eth0.$vid up" + log_test $? 0 "H2 up" + + # Disable neighbor suppression and check that ARP requests are no + # longer suppressed. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + log_test $? 0 "\"neigh_suppress\" is off" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + log_test $? 0 "arping" + tc_check_packets $sw1 "dev vx0 egress" 101 4 + log_test $? 0 "ARP suppression" + + # Take the second host down and check that ARP requests are not + # suppressed and that ARP replies are not received. + run_cmd "ip -n $h2 link set dev eth0.$vid down" + log_test $? 0 "H2 down" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + log_test $? 1 "arping" + tc_check_packets $sw1 "dev vx0 egress" 101 5 + log_test $? 0 "ARP suppression" +} + +neigh_suppress_arp() +{ + local vid=10 + local sip=192.0.2.1 + local tip=192.0.2.2 + + neigh_suppress_arp_common $vid $sip $tip + + vid=20 + sip=192.0.2.17 + tip=192.0.2.18 + neigh_suppress_arp_common $vid $sip $tip +} + +neigh_suppress_ns_common() +{ + local vid=$1; shift + local saddr=$1; shift + local daddr=$1; shift + local maddr=$1; shift + local h2_mac + + echo + echo "Per-port NS suppression - VLAN $vid" + echo "---------------------------------" + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass" + + # Initial state - check that NS messages are not suppressed and that ND + # messages are received. + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + log_test $? 0 "ndisc6" + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "NS suppression" + + # Enable neighbor suppression and check that nothing changes compared + # to the initial state. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + log_test $? 0 "ndisc6" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "NS suppression" + + # Install an FDB entry for the remote host and check that nothing + # changes compared to the initial state. + h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + log_test $? 0 "FDB entry installation" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + log_test $? 0 "ndisc6" + tc_check_packets $sw1 "dev vx0 egress" 101 3 + log_test $? 0 "NS suppression" + + # Install a neighbor on the matching SVI interface and check that NS + # messages are suppressed. + run_cmd "ip -n $sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid" + log_test $? 0 "Neighbor entry installation" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + log_test $? 0 "ndisc6" + tc_check_packets $sw1 "dev vx0 egress" 101 3 + log_test $? 0 "NS suppression" + + # Take the second host down and check that NS messages are suppressed + # and that ND messages are received. + run_cmd "ip -n $h2 link set dev eth0.$vid down" + log_test $? 0 "H2 down" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + log_test $? 0 "ndisc6" + tc_check_packets $sw1 "dev vx0 egress" 101 3 + log_test $? 0 "NS suppression" + + run_cmd "ip -n $h2 link set dev eth0.$vid up" + log_test $? 0 "H2 up" + + # Disable neighbor suppression and check that NS messages are no longer + # suppressed. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + log_test $? 0 "\"neigh_suppress\" is off" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + log_test $? 0 "ndisc6" + tc_check_packets $sw1 "dev vx0 egress" 101 4 + log_test $? 0 "NS suppression" + + # Take the second host down and check that NS messages are not + # suppressed and that ND messages are not received. + run_cmd "ip -n $h2 link set dev eth0.$vid down" + log_test $? 0 "H2 down" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + log_test $? 2 "ndisc6" + tc_check_packets $sw1 "dev vx0 egress" 101 5 + log_test $? 0 "NS suppression" +} + +neigh_suppress_ns() +{ + local vid=10 + local saddr=2001:db8:1::1 + local daddr=2001:db8:1::2 + local maddr=ff02::1:ff00:2 + + neigh_suppress_ns_common $vid $saddr $daddr $maddr + + vid=20 + saddr=2001:db8:2::1 + daddr=2001:db8:2::2 + maddr=ff02::1:ff00:2 + + neigh_suppress_ns_common $vid $saddr $daddr $maddr +} + +neigh_vlan_suppress_arp() +{ + local vid1=10 + local vid2=20 + local sip1=192.0.2.1 + local sip2=192.0.2.17 + local tip1=192.0.2.2 + local tip2=192.0.2.18 + local h2_mac1 + local h2_mac2 + + echo + echo "Per-{Port, VLAN} ARP suppression" + echo "--------------------------------" + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass" + + h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') + h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" + run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" + run_cmd "ip -n $sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + run_cmd "ip -n $sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2" + + # Enable per-{Port, VLAN} neighbor suppression and check that ARP + # requests are not suppressed and that ARP replies are received. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" + log_test $? 0 "\"neigh_vlan_suppress\" is on" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + log_test $? 0 "arping (VLAN $vid1)" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + log_test $? 0 "arping (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "ARP suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 1 + log_test $? 0 "ARP suppression (VLAN $vid2)" + + # Enable neighbor suppression on VLAN 10 and check that only on this + # VLAN ARP requests are suppressed. + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" + log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + log_test $? 0 "arping (VLAN $vid1)" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + log_test $? 0 "arping (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "ARP suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 2 + log_test $? 0 "ARP suppression (VLAN $vid2)" + + # Enable neighbor suppression on the port and check that it has no + # effect compared to previous state. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + log_test $? 0 "arping (VLAN $vid1)" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + log_test $? 0 "arping (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "ARP suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 3 + log_test $? 0 "ARP suppression (VLAN $vid2)" + + # Disable neighbor suppression on the port and check that it has no + # effect compared to previous state. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + log_test $? 0 "\"neigh_suppress\" is off" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + log_test $? 0 "arping (VLAN $vid1)" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + log_test $? 0 "arping (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "ARP suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 4 + log_test $? 0 "ARP suppression (VLAN $vid2)" + + # Disable neighbor suppression on VLAN 10 and check that ARP requests + # are no longer suppressed on this VLAN. + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" + log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + log_test $? 0 "arping (VLAN $vid1)" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + log_test $? 0 "arping (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "ARP suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 5 + log_test $? 0 "ARP suppression (VLAN $vid2)" + + # Disable per-{Port, VLAN} neighbor suppression, enable neighbor + # suppression on the port and check that on both VLANs ARP requests are + # suppressed. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" + log_test $? 0 "\"neigh_vlan_suppress\" is off" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + log_test $? 0 "arping (VLAN $vid1)" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + log_test $? 0 "arping (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "ARP suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 5 + log_test $? 0 "ARP suppression (VLAN $vid2)" +} + +neigh_vlan_suppress_ns() +{ + local vid1=10 + local vid2=20 + local saddr1=2001:db8:1::1 + local saddr2=2001:db8:2::1 + local daddr1=2001:db8:1::2 + local daddr2=2001:db8:2::2 + local maddr=ff02::1:ff00:2 + local h2_mac1 + local h2_mac2 + + echo + echo "Per-{Port, VLAN} NS suppression" + echo "-------------------------------" + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass" + + h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') + h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" + run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" + run_cmd "ip -n $sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + run_cmd "ip -n $sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2" + + # Enable per-{Port, VLAN} neighbor suppression and check that NS + # messages are not suppressed and that ND messages are received. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" + log_test $? 0 "\"neigh_vlan_suppress\" is on" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + log_test $? 0 "ndisc6 (VLAN $vid1)" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + log_test $? 0 "ndisc6 (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "NS suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 1 + log_test $? 0 "NS suppression (VLAN $vid2)" + + # Enable neighbor suppression on VLAN 10 and check that only on this + # VLAN NS messages are suppressed. + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" + log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + log_test $? 0 "ndisc6 (VLAN $vid1)" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + log_test $? 0 "ndisc6 (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "NS suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 2 + log_test $? 0 "NS suppression (VLAN $vid2)" + + # Enable neighbor suppression on the port and check that it has no + # effect compared to previous state. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + log_test $? 0 "ndisc6 (VLAN $vid1)" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + log_test $? 0 "ndisc6 (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "NS suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 3 + log_test $? 0 "NS suppression (VLAN $vid2)" + + # Disable neighbor suppression on the port and check that it has no + # effect compared to previous state. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + log_test $? 0 "\"neigh_suppress\" is off" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + log_test $? 0 "ndisc6 (VLAN $vid1)" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + log_test $? 0 "ndisc6 (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 1 + log_test $? 0 "NS suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 4 + log_test $? 0 "NS suppression (VLAN $vid2)" + + # Disable neighbor suppression on VLAN 10 and check that NS messages + # are no longer suppressed on this VLAN. + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" + log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + log_test $? 0 "ndisc6 (VLAN $vid1)" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + log_test $? 0 "ndisc6 (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "NS suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 5 + log_test $? 0 "NS suppression (VLAN $vid2)" + + # Disable per-{Port, VLAN} neighbor suppression, enable neighbor + # suppression on the port and check that on both VLANs NS messages are + # suppressed. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" + log_test $? 0 "\"neigh_vlan_suppress\" is off" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + log_test $? 0 "ndisc6 (VLAN $vid1)" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + log_test $? 0 "ndisc6 (VLAN $vid2)" + + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "NS suppression (VLAN $vid1)" + tc_check_packets $sw1 "dev vx0 egress" 102 5 + log_test $? 0 "NS suppression (VLAN $vid2)" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -P Pause after each test before cleanup + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +trap cleanup EXIT + +while getopts ":t:pPvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# Make sure we don't pause twice. +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v bridge)" ]; then + echo "SKIP: Could not run test without bridge tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tc)" ]; then + echo "SKIP: Could not run test without tc tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v arping)" ]; then + echo "SKIP: Could not run test without arping tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ndisc6)" ]; then + echo "SKIP: Could not run test without ndisc6 tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit $ksft_skip +fi + +bridge link help 2>&1 | grep -q "neigh_vlan_suppress" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support" + exit $ksft_skip +fi + +# Start clean. +cleanup + +for t in $TESTS +do + setup; $t; cleanup; +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/test_ingress_egress_chaining.sh b/tools/testing/selftests/net/test_ingress_egress_chaining.sh new file mode 100644 index 000000000000..08adff6bb3b6 --- /dev/null +++ b/tools/testing/selftests/net/test_ingress_egress_chaining.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test runs a simple ingress tc setup between two veth pairs, +# and chains a single egress rule to test ingress chaining to egress. +# +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +needed_mods="act_mirred cls_flower sch_ingress" +for mod in $needed_mods; do + modinfo $mod &>/dev/null || { echo "SKIP: Need act_mirred module"; exit $ksft_skip; } +done + +ns="ns$((RANDOM%899+100))" +veth1="veth1$((RANDOM%899+100))" +veth2="veth2$((RANDOM%899+100))" +peer1="peer1$((RANDOM%899+100))" +peer2="peer2$((RANDOM%899+100))" +ip_peer1=198.51.100.5 +ip_peer2=198.51.100.6 + +function fail() { + echo "FAIL: $@" >> /dev/stderr + exit 1 +} + +function cleanup() { + killall -q -9 udpgso_bench_rx + ip link del $veth1 &> /dev/null + ip link del $veth2 &> /dev/null + ip netns del $ns &> /dev/null +} +trap cleanup EXIT + +function config() { + echo "Setup veth pairs [$veth1, $peer1], and veth pair [$veth2, $peer2]" + ip link add $veth1 type veth peer name $peer1 + ip link add $veth2 type veth peer name $peer2 + ip addr add $ip_peer1/24 dev $peer1 + ip link set $peer1 up + ip netns add $ns + ip link set dev $peer2 netns $ns + ip netns exec $ns ip addr add $ip_peer2/24 dev $peer2 + ip netns exec $ns ip link set $peer2 up + ip link set $veth1 up + ip link set $veth2 up + + echo "Add tc filter ingress->egress forwarding $veth1 <-> $veth2" + tc qdisc add dev $veth2 ingress + tc qdisc add dev $veth1 ingress + tc filter add dev $veth2 ingress prio 1 proto all flower \ + action mirred egress redirect dev $veth1 + tc filter add dev $veth1 ingress prio 1 proto all flower \ + action mirred egress redirect dev $veth2 + + echo "Add tc filter egress->ingress forwarding $peer1 -> $veth1, bypassing the veth pipe" + tc qdisc add dev $peer1 clsact + tc filter add dev $peer1 egress prio 20 proto ip flower \ + action mirred ingress redirect dev $veth1 +} + +function test_run() { + echo "Run tcp traffic" + ./udpgso_bench_rx -t & + sleep 1 + ip netns exec $ns timeout -k 2 10 ./udpgso_bench_tx -t -l 2 -4 -D $ip_peer1 || fail "traffic failed" + echo "Test passed" +} + +config +test_run +trap - EXIT +cleanup diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh new file mode 100755 index 000000000000..74ff9fb2a6f0 --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -0,0 +1,2511 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking VXLAN MDB functionality. The topology consists of +# two sets of namespaces: One for the testing of IPv4 underlay and another for +# IPv6. In both cases, both IPv4 and IPv6 overlay traffic are tested. +# +# Data path functionality is tested by sending traffic from one of the upper +# namespaces and checking using ingress tc filters that the expected traffic +# was received by one of the lower namespaces. +# +# +------------------------------------+ +------------------------------------+ +# | ns1_v4 | | ns1_v6 | +# | | | | +# | br0.10 br0.4000 br0.20 | | br0.10 br0.4000 br0.20 | +# | + + + | | + + + | +# | | | | | | | | | | +# | | | | | | | | | | +# | +---------+---------+ | | +---------+---------+ | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0 | | br0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | + | | + | +# | vx0 | | vx0 | +# | | | | +# | | | | +# | veth0 | | veth0 | +# | + | | + | +# +-----------------|------------------+ +-----------------|------------------+ +# | | +# +-----------------|------------------+ +-----------------|------------------+ +# | + | | + | +# | veth0 | | veth0 | +# | | | | +# | | | | +# | vx0 | | vx0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0 | | br0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | +---------+---------+ | | +---------+---------+ | +# | | | | | | | | | | +# | | | | | | | | | | +# | + + + | | + + + | +# | br0.10 br0.4000 br0.10 | | br0.10 br0.4000 br0.20 | +# | | | | +# | ns2_v4 | | ns2_v6 | +# +------------------------------------+ +------------------------------------+ + +source lib.sh +ret=0 + +CONTROL_PATH_TESTS=" + basic_star_g_ipv4_ipv4 + basic_star_g_ipv6_ipv4 + basic_star_g_ipv4_ipv6 + basic_star_g_ipv6_ipv6 + basic_sg_ipv4_ipv4 + basic_sg_ipv6_ipv4 + basic_sg_ipv4_ipv6 + basic_sg_ipv6_ipv6 + star_g_ipv4_ipv4 + star_g_ipv6_ipv4 + star_g_ipv4_ipv6 + star_g_ipv6_ipv6 + sg_ipv4_ipv4 + sg_ipv6_ipv4 + sg_ipv4_ipv6 + sg_ipv6_ipv6 + dump_ipv4_ipv4 + dump_ipv6_ipv4 + dump_ipv4_ipv6 + dump_ipv6_ipv6 + flush +" + +DATA_PATH_TESTS=" + encap_params_ipv4_ipv4 + encap_params_ipv6_ipv4 + encap_params_ipv4_ipv6 + encap_params_ipv6_ipv6 + starg_exclude_ir_ipv4_ipv4 + starg_exclude_ir_ipv6_ipv4 + starg_exclude_ir_ipv4_ipv6 + starg_exclude_ir_ipv6_ipv6 + starg_include_ir_ipv4_ipv4 + starg_include_ir_ipv6_ipv4 + starg_include_ir_ipv4_ipv6 + starg_include_ir_ipv6_ipv6 + starg_exclude_p2mp_ipv4_ipv4 + starg_exclude_p2mp_ipv6_ipv4 + starg_exclude_p2mp_ipv4_ipv6 + starg_exclude_p2mp_ipv6_ipv6 + starg_include_p2mp_ipv4_ipv4 + starg_include_p2mp_ipv6_ipv4 + starg_include_p2mp_ipv4_ipv6 + starg_include_p2mp_ipv6_ipv6 + egress_vni_translation_ipv4_ipv4 + egress_vni_translation_ipv6_ipv4 + egress_vni_translation_ipv4_ipv6 + egress_vni_translation_ipv6_ipv6 + all_zeros_mdb_ipv4 + all_zeros_mdb_ipv6 + mdb_fdb_ipv4_ipv4 + mdb_fdb_ipv6_ipv4 + mdb_fdb_ipv4_ipv6 + mdb_fdb_ipv6_ipv6 + mdb_torture_ipv4_ipv4 + mdb_torture_ipv6_ipv4 + mdb_torture_ipv4_ipv6 + mdb_torture_ipv6_ipv6 +" + +# All tests in this script. Can be overridden with -t option. +TESTS=" + $CONTROL_PATH_TESTS + $DATA_PATH_TESTS +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no + +################################################################################ +# Utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +tc_check_packets() +{ + local ns=$1; shift + local id=$1; shift + local handle=$1; shift + local count=$1; shift + local pkts + + sleep 0.1 + pkts=$(tc -n $ns -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats.packets") + [[ $pkts == $count ]] +} + +################################################################################ +# Setup + +setup_common_ns() +{ + local ns=$1; shift + local local_addr=$1; shift + + ip netns exec $ns sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -qw net.ipv4.fib_multipath_use_neigh=1 + ip netns exec $ns sysctl -qw net.ipv4.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0 + + ip -n $ns link set dev lo up + ip -n $ns address add $local_addr dev lo + + ip -n $ns link set dev veth0 up + + ip -n $ns link add name br0 up type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + + ip -n $ns link add link br0 name br0.10 up type vlan id 10 + bridge -n $ns vlan add vid 10 dev br0 self + + ip -n $ns link add link br0 name br0.20 up type vlan id 20 + bridge -n $ns vlan add vid 20 dev br0 self + + ip -n $ns link add link br0 name br0.4000 up type vlan id 4000 + bridge -n $ns vlan add vid 4000 dev br0 self + + ip -n $ns link add name vx0 up master br0 type vxlan \ + local $local_addr dstport 4789 external vnifilter + bridge -n $ns link set dev vx0 vlan_tunnel on + + bridge -n $ns vlan add vid 10 dev vx0 + bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010 + bridge -n $ns vni add vni 10010 dev vx0 + + bridge -n $ns vlan add vid 20 dev vx0 + bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020 + bridge -n $ns vni add vni 10020 dev vx0 + + bridge -n $ns vlan add vid 4000 dev vx0 pvid + bridge -n $ns vlan add vid 4000 dev vx0 tunnel_info id 14000 + bridge -n $ns vni add vni 14000 dev vx0 +} + +setup_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local local_addr1=$1; shift + local local_addr2=$1; shift + + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 netns $ns1 name veth0 + ip link set dev veth1 netns $ns2 name veth0 + + setup_common_ns $ns1 $local_addr1 + setup_common_ns $ns2 $local_addr2 +} + +setup_v4() +{ + setup_ns ns1_v4 ns2_v4 + setup_common $ns1_v4 $ns2_v4 192.0.2.1 192.0.2.2 + + ip -n $ns1_v4 address add 192.0.2.17/28 dev veth0 + ip -n $ns2_v4 address add 192.0.2.18/28 dev veth0 + + ip -n $ns1_v4 route add default via 192.0.2.18 + ip -n $ns2_v4 route add default via 192.0.2.17 +} + +cleanup_v4() +{ + cleanup_ns $ns2_v4 $ns1_v4 +} + +setup_v6() +{ + setup_ns ns1_v6 ns2_v6 + setup_common $ns1_v6 $ns2_v6 2001:db8:1::1 2001:db8:1::2 + + ip -n $ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad + ip -n $ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad + + ip -n $ns1_v6 route add default via 2001:db8:2::2 + ip -n $ns2_v6 route add default via 2001:db8:2::1 +} + +cleanup_v6() +{ + cleanup_ns $ns2_v6 $ns1_v6 +} + +setup() +{ + set -e + + setup_v4 + setup_v6 + + sleep 5 + + set +e +} + +cleanup() +{ + cleanup_v6 &> /dev/null + cleanup_v4 &> /dev/null +} + +################################################################################ +# Tests - Control path + +basic_common() +{ + local ns1=$1; shift + local grp_key=$1; shift + local vtep_ip=$1; shift + + # Test basic control path operations common to all MDB entry types. + + # Basic add, replace and delete behavior. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry addition" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010" + log_test $? 0 "MDB entry presence after addition" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry replacement" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010" + log_test $? 0 "MDB entry presence after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry deletion" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010" + log_test $? 254 "MDB entry presence after deletion" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + log_test $? 255 "Non-existent MDB entry deletion" + + # Default protocol and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto static\"" + log_test $? 0 "MDB entry default protocol" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent proto 123 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto 123\"" + log_test $? 0 "MDB entry protocol replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default destination port and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" dst_port \"" + log_test $? 1 "MDB entry default destination port" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip dst_port 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"dst_port 1234\"" + log_test $? 0 "MDB entry destination port replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default destination VNI and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" vni \"" + log_test $? 1 "MDB entry default destination VNI" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"vni 1234\"" + log_test $? 0 "MDB entry destination VNI replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default outgoing interface and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" via \"" + log_test $? 1 "MDB entry default outgoing interface" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010 via veth0" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"via veth0\"" + log_test $? 0 "MDB entry outgoing interface replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Common error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port veth0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with mismatch between device and port" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key temp dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with temp state" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent vid 10 dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with VLAN" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp 01:02:03:04:05:06 permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry MAC address" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent" + log_test $? 255 "MDB entry without extended parameters" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent proto 3 dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with an invalid protocol" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni $((2 ** 24)) src_vni 10010" + log_test $? 255 "MDB entry with an invalid destination VNI" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni $((2 ** 24))" + log_test $? 255 "MDB entry with an invalid source VNI" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent src_vni 10010" + log_test $? 255 "MDB entry without a remote destination IP" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "Duplicate MDB entries" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" +} + +basic_star_g_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local grp_key="grp 239.1.1.1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local grp_key="grp ff0e::1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local grp_key="grp 239.1.1.1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local grp_key="grp ff0e::1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local grp_key="grp 239.1.1.1 src 192.0.2.129" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local grp_key="grp ff0e::1 src 2001:db8:100::1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local grp_key="grp 239.1.1.1 src 192.0.2.129" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local grp_key="grp ff0e::1 src 2001:db8:100::1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +star_g_common() +{ + local ns1=$1; shift + local grp=$1; shift + local src1=$1; shift + local src2=$1; shift + local src3=$1; shift + local vtep_ip=$1; shift + local all_zeros_grp=$1; shift + + # Test control path operations specific to (*, G) entries. + + # Basic add, replace and delete behavior. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry addition with source list" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010" + log_test $? 0 "(*, G) MDB entry presence after addition" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" + log_test $? 0 "(S, G) MDB entry presence after addition" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry replacement with source list" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010" + log_test $? 0 "(*, G) MDB entry presence after replacement" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" + log_test $? 0 "(S, G) MDB entry presence after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry deletion" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010" + log_test $? 254 "(*, G) MDB entry presence after deletion" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" + log_test $? 254 "(S, G) MDB entry presence after deletion" + + # Default filter mode and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude" + log_test $? 0 "(*, G) MDB entry default filter mode" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep include" + log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"include\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" + log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"include\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked" + log_test $? 1 "\"blocked\" flag after replacing filter mode to \"include\"" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude" + log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"exclude\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grep grp $grp src $src1 src_vni 10010" + log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"exclude\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked" + log_test $? 0 "\"blocked\" flag after replacing filter mode to \"exclude\"" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default source list and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep source_list" + log_test $? 1 "(*, G) MDB entry default source list" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src2,$src3 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" + log_test $? 0 "(S, G) MDB entry of 1st source after replacing source list" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010" + log_test $? 0 "(S, G) MDB entry of 2nd source after replacing source list" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010" + log_test $? 0 "(S, G) MDB entry of 3rd source after replacing source list" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src3 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" + log_test $? 0 "(S, G) MDB entry of 1st source after removing source" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010" + log_test $? 254 "(S, G) MDB entry of 2nd source after removing source" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010" + log_test $? 0 "(S, G) MDB entry of 3rd source after removing source" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default protocol and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto static\"" + log_test $? 0 "(*, G) MDB entry default protocol" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto static\"" + log_test $? 0 "(S, G) MDB entry default protocol" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 proto bgp dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto bgp\"" + log_test $? 0 "(*, G) MDB entry protocol after replacement" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto bgp\"" + log_test $? 0 "(S, G) MDB entry protocol after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default destination port and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port \"" + log_test $? 1 "(*, G) MDB entry default destination port" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port \"" + log_test $? 1 "(S, G) MDB entry default destination port" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip dst_port 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port 1234 \"" + log_test $? 0 "(*, G) MDB entry destination port after replacement" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port 1234 \"" + log_test $? 0 "(S, G) MDB entry destination port after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default destination VNI and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni \"" + log_test $? 1 "(*, G) MDB entry default destination VNI" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni \"" + log_test $? 1 "(S, G) MDB entry default destination VNI" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip vni 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni 1234 \"" + log_test $? 0 "(*, G) MDB entry destination VNI after replacement" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni 1234 \"" + log_test $? 0 "(S, G) MDB entry destination VNI after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default outgoing interface and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via \"" + log_test $? 1 "(*, G) MDB entry default outgoing interface" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via \"" + log_test $? 1 "(S, G) MDB entry default outgoing interface" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010 via veth0" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via veth0 \"" + log_test $? 0 "(*, G) MDB entry outgoing interface after replacement" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via veth0 \"" + log_test $? 0 "(S, G) MDB entry outgoing interface after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent filter_mode exclude dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with filter mode" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode include dst $vtep_ip src_vni 10010" + log_test $? 255 "(*, G) INCLUDE with an empty source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $grp dst $vtep_ip src_vni 10010" + log_test $? 255 "Invalid source in source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 255 "Source list without filter mode" +} + +star_g_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local grp=239.1.1.1 + local src1=192.0.2.129 + local src2=192.0.2.130 + local src3=192.0.2.131 + local vtep_ip=198.51.100.100 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (*, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local grp=ff0e::1 + local src1=2001:db8:100::1 + local src2=2001:db8:100::2 + local src3=2001:db8:100::3 + local vtep_ip=198.51.100.100 + local all_zeros_grp=:: + + echo + echo "Control path: (*, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local grp=239.1.1.1 + local src1=192.0.2.129 + local src2=192.0.2.130 + local src3=192.0.2.131 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (*, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local grp=ff0e::1 + local src1=2001:db8:100::1 + local src2=2001:db8:100::2 + local src3=2001:db8:100::3 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=:: + + echo + echo "Control path: (*, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +sg_common() +{ + local ns1=$1; shift + local grp=$1; shift + local src=$1; shift + local vtep_ip=$1; shift + local all_zeros_grp=$1; shift + + # Test control path operations specific to (S, G) entries. + + # Default filter mode. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src src_vni 10010 | grep include" + log_test $? 0 "(S, G) MDB entry default filter mode" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010" + + # Error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent filter_mode include dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with filter mode" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent source_list $src dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $grp permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with an invalid source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp src $src permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with source" +} + +sg_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local grp=239.1.1.1 + local src=192.0.2.129 + local vtep_ip=198.51.100.100 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (S, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local grp=ff0e::1 + local src=2001:db8:100::1 + local vtep_ip=198.51.100.100 + local all_zeros_grp=:: + + echo + echo "Control path: (S, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local grp=239.1.1.1 + local src=192.0.2.129 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (S, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local grp=ff0e::1 + local src=2001:db8:100::1 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=:: + + echo + echo "Control path: (S, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +ipv4_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "239.1.1.$i" + done +} + +ipv6_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "ff0e::$(printf %x $i)" + done +} + +dump_common() +{ + local ns1=$1; shift + local local_addr=$1; shift + local remote_prefix=$1; shift + local fn=$1; shift + local max_vxlan_devs=2 + local max_remotes=64 + local max_grps=256 + local num_entries + local batch_file + local grp + local i j + + # The kernel maintains various markers for the MDB dump. Add a test for + # large scale MDB dump to make sure that all the configured entries are + # dumped and that the markers are used correctly. + + # Create net devices. + for i in $(seq 1 $max_vxlan_devs); do + ip -n $ns1 link add name vx-test${i} up type vxlan \ + local $local_addr dstport 4789 external vnifilter + done + + # Create batch file with MDB entries. + batch_file=$(mktemp) + for i in $(seq 1 $max_vxlan_devs); do + for j in $(seq 1 $max_remotes); do + for grp in $($fn $max_grps); do + echo "mdb add dev vx-test${i} port vx-test${i} grp $grp permanent dst ${remote_prefix}${j}" >> $batch_file + done + done + done + + # Program the batch file and check for expected number of entries. + bridge -n $ns1 -b $batch_file + for i in $(seq 1 $max_vxlan_devs); do + num_entries=$(bridge -n $ns1 mdb show dev vx-test${i} | grep "permanent" | wc -l) + [[ $num_entries -eq $((max_grps * max_remotes)) ]] + log_test $? 0 "Large scale dump - VXLAN device #$i" + done + + rm -rf $batch_file +} + +dump_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local local_addr=192.0.2.1 + local remote_prefix=198.51.100. + local fn=ipv4_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv4 overlay / IPv4 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local local_addr=192.0.2.1 + local remote_prefix=198.51.100. + local fn=ipv6_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv6 overlay / IPv4 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local local_addr=2001:db8:1::1 + local remote_prefix=2001:db8:1000:: + local fn=ipv4_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv4 overlay / IPv6 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local local_addr=2001:db8:1::1 + local remote_prefix=2001:db8:1000:: + local fn=ipv6_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv6 overlay / IPv6 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +flush() +{ + local num_entries + + echo + echo "Control path: Flush" + echo "-------------------" + + # Add entries with different attributes and check that they are all + # flushed when the flush command is given with no parameters. + + # Different source VNI. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.2 permanent dst 198.51.100.1 src_vni 10011" + + # Different routing protocol. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.3 permanent proto bgp dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.4 permanent proto zebra dst 198.51.100.1 src_vni 10010" + + # Different destination IP. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.5 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.6 permanent dst 198.51.100.2 src_vni 10010" + + # Different destination port. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.7 permanent dst 198.51.100.1 dst_port 11111 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.8 permanent dst 198.51.100.1 dst_port 22222 src_vni 10010" + + # Different VNI. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.9 permanent dst 198.51.100.1 vni 10010 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.10 permanent dst 198.51.100.1 vni 10020 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + num_entries=$(bridge -n $ns1_v4 mdb show dev vx0 | wc -l) + [[ $num_entries -eq 0 ]] + log_test $? 0 "Flush all" + + # Check that entries are flushed when port is specified as the VXLAN + # device and that an error is returned when port is specified as a + # different net device. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by port - matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0" + log_test $? 255 "Flush by port - non-matching" + + # Check that when flushing by source VNI only entries programmed with + # the specified source VNI are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10011" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10011" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by source VNI - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011" + log_test $? 0 "Flush by source VNI - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that all entries are flushed when "permanent" is specified and + # that an error is returned when "nopermanent" is specified. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 permanent" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by \"permanent\" state" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 nopermanent" + log_test $? 255 "Flush by \"nopermanent\" state" + + # Check that when flushing by routing protocol only entries programmed + # with the specified routing protocol are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto bgp dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto zebra dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\"" + log_test $? 1 "Flush by routing protocol - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\"" + log_test $? 0 "Flush by routing protocol - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by destination IP only entries programmed + # with the specified destination IP are flushed and the rest are not. + + # IPv4. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 1 "Flush by IPv4 destination IP - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 0 "Flush by IPv4 destination IP - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # IPv6. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2" + log_test $? 1 "Flush by IPv6 destination IP - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1" + log_test $? 0 "Flush by IPv6 destination IP - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by UDP destination port only entries + # programmed with the specified port are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 11111 dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\"" + log_test $? 1 "Flush by UDP destination port - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\"" + log_test $? 0 "Flush by UDP destination port - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # When not specifying a UDP destination port for an entry, traffic is + # encapsulated with the device's UDP destination port. Check that when + # flushing by the device's UDP destination port only entries programmed + # with this port are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 1 "Flush by device's UDP destination port - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 0 "Flush by device's UDP destination port - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by destination VNI only entries programmed + # with the specified destination VNI are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20011 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\"" + log_test $? 1 "Flush by destination VNI - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\"" + log_test $? 0 "Flush by destination VNI - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # When not specifying a destination VNI for an entry, traffic is + # encapsulated with the source VNI. Check that when flushing by a + # destination VNI that is equal to the source VNI only such entries are + # flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 1 "Flush by destination VNI equal to source VNI - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 0 "Flush by destination VNI equal to source VNI - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Test that an error is returned when trying to flush using VLAN ID. + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vid 10" + log_test $? 255 "Flush by VLAN ID" +} + +################################################################################ +# Tests - Data path + +encap_params_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local enc_ethtype=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # Test that packets forwarded by the VXLAN MDB are encapsulated with + # the correct parameters. Transmit packets from the first namespace and + # check that they hit the corresponding filters on the ingress of the + # second namespace. + + run_cmd "tc -n $ns2 qdisc replace dev veth0 clsact" + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + # Check destination IP. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Destination IP - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Destination IP - no match" + + run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + + # Check destination port. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Default destination port - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Default destination port - no match" + + run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass" + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Non-default destination port - match" + + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Non-default destination port - no match" + + run_cmd "tc -n $ns2 filter del dev veth0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + + # Check default VNI. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Default destination VNI - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Default destination VNI - no match" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10020 src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Non-default destination VNI - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Non-default destination VNI - no match" + + run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" +} + +encap_params_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local enc_ethtype="ip" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Encapsulation parameters - IPv4 overlay / IPv4 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn" +} + +encap_params_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local enc_ethtype="ip" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Encapsulation parameters - IPv6 overlay / IPv4 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn -6" +} + +encap_params_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local enc_ethtype="ipv6" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Encapsulation parameters - IPv4 overlay / IPv6 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn" +} + +encap_params_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local enc_ethtype="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Encapsulation parameters - IPv6 overlay / IPv6 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn -6" +} + +starg_exclude_ir_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) EXCLUDE MDB entry with one source and two remote + # VTEPs. Make sure that the source in the source list is not forwarded + # and that a source not in the list is forwarded. Remove one of the + # VTEPs from the entry and make sure that packets are only forwarded to + # the remaining VTEP. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Block excluded source - second VTEP" + + # Check that valid source is forwarded to both VTEPs. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source - second VTEP" + + # Remove second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Block excluded source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Block excluded source after removal - second VTEP" + + # Check that valid source is forwarded to the remaining VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Forward valid source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source after removal - second VTEP" +} + +starg_exclude_ir_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_ir_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_ir_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_ir_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_ir_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) INCLUDE MDB entry with one source and two remote + # VTEPs. Make sure that the source in the source list is forwarded and + # that a source not in the list is not forwarded. Remove one of the + # VTEPs from the entry and make sure that packets are only forwarded to + # the remaining VTEP. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Block excluded source - second VTEP" + + # Check that valid source is forwarded to both VTEPs. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source - second VTEP" + + # Remove second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Block excluded source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Block excluded source after removal - second VTEP" + + # Check that valid source is forwarded to the remaining VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Forward valid source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source after removal - second VTEP" +} + +starg_include_ir_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_ir_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_ir_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_ir_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_p2mp_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) EXCLUDE MDB entry with one source and one multicast + # group to which packets are sent. Make sure that the source in the + # source list is not forwarded and that a source not in the list is + # forwarded. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0" + + # Check that invalid source is not forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source" + + # Check that valid source is forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source" + + # Remove the VTEP from the multicast group. + run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" + + # Check that valid source is not received anymore. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Receive of valid source after removal from group" +} + +starg_exclude_p2mp_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_p2mp_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_p2mp_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_p2mp_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_p2mp_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) INCLUDE MDB entry with one source and one multicast + # group to which packets are sent. Make sure that the source in the + # source list is forwarded and that a source not in the list is not + # forwarded. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0" + + # Check that invalid source is not forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source" + + # Check that valid source is forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source" + + # Remove the VTEP from the multicast group. + run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" + + # Check that valid source is not received anymore. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Receive of valid source after removal from group" +} + +starg_include_p2mp_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_p2mp_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_p2mp_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_p2mp_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +egress_vni_translation_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local proto=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # When P2MP tunnels are used with optimized inter-subnet multicast + # (OISM) [1], the ingress VTEP does not perform VNI translation and + # uses the VNI of the source broadcast domain (BD). If the egress VTEP + # is a member in the source BD, then no VNI translation is needed. + # Otherwise, the egress VTEP needs to translate the VNI to the + # supplementary broadcast domain (SBD) VNI, which is usually the L3VNI. + # + # In this test, remove the VTEP in the second namespace from VLAN 10 + # (VNI 10010) and make sure that a packet sent from this VLAN on the + # first VTEP is received by the SVI corresponding to the L3VNI (14000 / + # VLAN 4000) on the second VTEP. + # + # The second VTEP will be able to decapsulate the packet with VNI 10010 + # because this VNI is configured on its shared VXLAN device. Later, + # when ingressing the bridge, the VNI to VLAN lookup will fail because + # the VTEP is not a member in VLAN 10, which will cause the packet to + # be tagged with VLAN 4000 since it is configured as PVID. + # + # [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast + + run_cmd "tc -n $ns2 qdisc replace dev br0.4000 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + run_cmd "tc -n $ns2 filter replace dev br0.4000 ingress pref 1 handle 101 proto $proto flower src_ip $src dst_ip $grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp src $src permanent dst $mcast_grp src_vni 10010 via veth0" + + # Remove the second VTEP from VLAN 10. + run_cmd "bridge -n $ns2 vlan del vid 10 dev vx0" + + # Make sure that packets sent from the first VTEP over VLAN 10 are + # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on + # the second VTEP, since it is configured as PVID. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 + log_test $? 0 "Egress VNI translation - PVID configured" + + # Remove PVID flag from VLAN 4000 on the second VTEP and make sure + # packets are no longer received by the SVI interface. + run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 + log_test $? 0 "Egress VNI translation - no PVID configured" + + # Reconfigure the PVID and make sure packets are received again. + run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2 + log_test $? 0 "Egress VNI translation - PVID reconfigured" +} + +egress_vni_translation_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Egress VNI translation - IPv4 overlay / IPv4 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn" +} + +egress_vni_translation_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Egress VNI translation - IPv6 overlay / IPv4 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn -6" +} + +egress_vni_translation_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Egress VNI translation - IPv4 overlay / IPv6 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn" +} + +egress_vni_translation_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Egress VNI translation - IPv6 overlay / IPv6 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn -6" +} + +all_zeros_mdb_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local vtep3_ip=$1; shift + local vtep4_ip=$1; shift + local plen=$1; shift + local ipv4_grp=239.1.1.1 + local ipv4_unreg_grp=239.2.2.2 + local ipv4_ll_grp=224.0.0.100 + local ipv4_src=192.0.2.129 + local ipv6_grp=ff0e::1 + local ipv6_unreg_grp=ff0e::2 + local ipv6_ll_grp=ff02::1 + local ipv6_src=2001:db8:100::1 + + # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic + # and make sure they only forward unregistered IP multicast traffic + # which is not link-local. Also make sure that each entry only forwards + # traffic from the matching address family. + + # Associate two different VTEPs with one all-zeros MDB entry: Two with + # the IPv4 entry (0.0.0.0) and another two with the IPv6 one (::). + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep2_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep3_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep4_ip src_vni 10010" + + # Associate one VTEP from each set with a regular MDB entry: One with + # an IPv4 entry and another with an IPv6 one. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv4_grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv6_grp permanent dst $vtep3_ip src_vni 10010" + + # Add filters to match on decapsulated traffic in the second namespace. + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 103 proto all flower enc_dst_ip $vtep3_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 104 proto all flower enc_dst_ip $vtep4_ip action pass" + + # Configure the VTEP addresses in the second namespace to enable + # decapsulation. + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep3_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep4_ip/$plen dev lo" + + # Send registered IPv4 multicast and make sure it only arrives to the + # first VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Registered IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Registered IPv4 multicast - second VTEP" + + # Send unregistered IPv4 multicast that is not link-local and make sure + # it arrives to the first and second VTEPs. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Unregistered IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Unregistered IPv4 multicast - second VTEP" + + # Send IPv4 link-local multicast traffic and make sure it does not + # arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Link-local IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Link-local IPv4 multicast - second VTEP" + + # Send registered IPv4 multicast using a unicast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b 00:11:22:33:44:55 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Registered IPv4 multicast with a unicast MAC - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Registered IPv4 multicast with a unicast MAC - second VTEP" + + # Send registered IPv4 multicast using a broadcast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b bcast -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - second VTEP" + + # Make sure IPv4 traffic did not reach the VTEPs associated with + # IPv6 entries. + tc_check_packets "$ns2" "dev vx0 ingress" 103 0 + log_test $? 0 "IPv4 traffic - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 0 + log_test $? 0 "IPv4 traffic - fourth VTEP" + + # Reset IPv4 filters before testing IPv6 traffic. + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + # Send registered IPv6 multicast and make sure it only arrives to the + # third VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 1 + log_test $? 0 "Registered IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 0 + log_test $? 0 "Registered IPv6 multicast - fourth VTEP" + + # Send unregistered IPv6 multicast that is not link-local and make sure + # it arrives to the third and fourth VTEPs. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Unregistered IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Unregistered IPv6 multicast - fourth VTEP" + + # Send IPv6 link-local multicast traffic and make sure it does not + # arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Link-local IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Link-local IPv6 multicast - fourth VTEP" + + # Send registered IPv6 multicast using a unicast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b 00:11:22:33:44:55 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Registered IPv6 multicast with a unicast MAC - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Registered IPv6 multicast with a unicast MAC - fourth VTEP" + + # Send registered IPv6 multicast using a broadcast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b bcast -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - fourth VTEP" + + # Make sure IPv6 traffic did not reach the VTEPs associated with + # IPv4 entries. + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "IPv6 traffic - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "IPv6 traffic - second VTEP" +} + +all_zeros_mdb_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local vtep1_ip=198.51.100.101 + local vtep2_ip=198.51.100.102 + local vtep3_ip=198.51.100.103 + local vtep4_ip=198.51.100.104 + local plen=32 + + echo + echo "Data path: All-zeros MDB entry - IPv4 underlay" + echo "----------------------------------------------" + + all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \ + $vtep4_ip $plen +} + +all_zeros_mdb_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local vtep3_ip=2001:db8:3000::1 + local vtep4_ip=2001:db8:4000::1 + local plen=128 + + echo + echo "Data path: All-zeros MDB entry - IPv6 underlay" + echo "----------------------------------------------" + + all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \ + $vtep4_ip $plen +} + +mdb_fdb_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local proto=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # Install an MDB entry and an FDB entry and make sure that the FDB + # entry only forwards traffic that was not forwarded by the MDB. + + # Associate the MDB entry with one VTEP and the FDB entry with another + # VTEP. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 fdb add 00:00:00:00:00:00 dev vx0 self static dst $vtep2_ip src_vni 10010" + + # Add filters to match on decapsulated traffic in the second namespace. + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep2_ip action pass" + + # Configure the VTEP addresses in the second namespace to enable + # decapsulation. + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + # Send IP multicast traffic and make sure it is forwarded by the MDB + # and only arrives to the first VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "IP multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "IP multicast - second VTEP" + + # Send broadcast traffic and make sure it is forwarded by the FDB and + # only arrives to the second VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b bcast -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Broadcast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Broadcast - second VTEP" + + # Remove the MDB entry and make sure that IP multicast is now forwarded + # by the FDB to the second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "IP multicast after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 2 + log_test $? 0 "IP multicast after removal - second VTEP" +} + +mdb_fdb_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn" +} + +mdb_fdb_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local ns2=$ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn -6" +} + +mdb_fdb_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn" +} + +mdb_fdb_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local ns2=$ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn -6" +} + +mdb_grp1_loop() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local grp1=$1; shift + + while true; do + bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp1 dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010 + done >/dev/null 2>&1 +} + +mdb_grp2_loop() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local grp2=$1; shift + + while true; do + bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp2 dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010 + done >/dev/null 2>&1 +} + +mdb_torture_common() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local grp1=$1; shift + local grp2=$1; shift + local src=$1; shift + local mz=$1; shift + local pid1 + local pid2 + local pid3 + local pid4 + + # Continuously send two streams that are forwarded by two different MDB + # entries. The first entry will be added and deleted in a loop. This + # allows us to test that the data path does not use freed MDB entry + # memory. The second entry will have two remotes, one that is added and + # deleted in a loop and another that is replaced in a loop. This allows + # us to test that the data path does not use freed remote entry memory. + # The test is considered successful if nothing crashed. + + # Create the MDB entries that will be continuously deleted / replaced. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010" + + mdb_grp1_loop $ns1 $vtep1_ip $grp1 & + pid1=$! + mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 & + pid2=$! + ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + pid3=$! + ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + pid4=$! + + sleep 30 + kill -9 $pid1 $pid2 $pid3 $pid4 + wait $pid1 $pid2 $pid3 $pid4 2>/dev/null + + log_test 0 0 "Torture test" +} + +mdb_torture_ipv4_ipv4() +{ + local ns1=$ns1_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local grp1=239.1.1.1 + local grp2=239.2.2.2 + local src=192.0.2.129 + + echo + echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn" +} + +mdb_torture_ipv6_ipv4() +{ + local ns1=$ns1_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local grp1=ff0e::1 + local grp2=ff0e::2 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn -6" +} + +mdb_torture_ipv4_ipv6() +{ + local ns1=$ns1_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local grp1=239.1.1.1 + local grp2=239.2.2.2 + local src=192.0.2.129 + + echo + echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn" +} + +mdb_torture_ipv6_ipv6() +{ + local ns1=$ns1_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local grp1=ff0e::1 + local grp2=ff0e::2 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn -6" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -c Control path tests only + -d Data path tests only + -p Pause on fail + -P Pause after each test before cleanup + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +trap cleanup EXIT + +while getopts ":t:cdpPvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + c) TESTS=${CONTROL_PATH_TESTS};; + d) TESTS=${DATA_PATH_TESTS};; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# Make sure we don't pause twice. +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v bridge)" ]; then + echo "SKIP: Could not run test without bridge tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit $ksft_skip +fi + +bridge mdb help 2>&1 | grep -q "flush" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 bridge too old, missing VXLAN MDB flush support" + exit $ksft_skip +fi + +# Start clean. +cleanup + +for t in $TESTS +do + setup; $t; cleanup; +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh new file mode 100755 index 000000000000..b8805983b728 --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh @@ -0,0 +1,238 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking the [no]localbypass VXLAN device option. The test +# configures two VXLAN devices in the same network namespace and a tc filter on +# the loopback device that drops encapsulated packets. The test sends packets +# from the first VXLAN device and verifies that by default these packets are +# received by the second VXLAN device. The test then enables the nolocalbypass +# option and verifies that packets are no longer received by the second VXLAN +# device. + +source lib.sh +ret=0 + +TESTS=" + nolocalbypass +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no + +################################################################################ +# Utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +tc_check_packets() +{ + local ns=$1; shift + local id=$1; shift + local handle=$1; shift + local count=$1; shift + local pkts + + sleep 0.1 + pkts=$(tc -n $ns -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats.packets") + [[ $pkts == $count ]] +} + +################################################################################ +# Setup + +setup() +{ + setup_ns ns1 + + ip -n $ns1 address add 192.0.2.1/32 dev lo + ip -n $ns1 address add 198.51.100.1/32 dev lo + + ip -n $ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \ + dstport 4789 nolearning + ip -n $ns1 link add name vx1 up type vxlan id 100 dstport 4790 +} + +cleanup() +{ + cleanup_ns $ns1 +} + +################################################################################ +# Tests + +nolocalbypass() +{ + local smac=00:01:02:03:04:05 + local dmac=00:0a:0b:0c:0d:0e + + run_cmd "bridge -n $ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790" + + run_cmd "tc -n $ns1 qdisc add dev vx1 clsact" + run_cmd "tc -n $ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "tc -n $ns1 qdisc add dev lo clsact" + run_cmd "tc -n $ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop" + + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" + log_test $? 0 "localbypass enabled" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + + tc_check_packets "$ns1" "dev vx1 ingress" 101 1 + log_test $? 0 "Packet received by local VXLAN device - localbypass" + + run_cmd "ip -n $ns1 link set dev vx0 type vxlan nolocalbypass" + + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'" + log_test $? 0 "localbypass disabled" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + + tc_check_packets "$ns1" "dev vx1 ingress" 101 1 + log_test $? 0 "Packet not received by local VXLAN device - nolocalbypass" + + run_cmd "ip -n $ns1 link set dev vx0 type vxlan localbypass" + + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" + log_test $? 0 "localbypass enabled" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + + tc_check_packets "$ns1" "dev vx1 ingress" 101 2 + log_test $? 0 "Packet received by local VXLAN device - localbypass" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -P Pause after each test before cleanup + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +trap cleanup EXIT + +while getopts ":t:pPvh" opt; do + case $opt in + t) TESTS=$OPTARG ;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# Make sure we don't pause twice. +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v bridge)" ]; then + echo "SKIP: Could not run test without bridge tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit $ksft_skip +fi + +ip link help vxlan 2>&1 | grep -q "localbypass" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 ip too old, missing VXLAN nolocalbypass support" + exit $ksft_skip +fi + +cleanup + +for t in $TESTS +do + setup; $t; cleanup; +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh index 09f9ed92cbe4..ae8fbe3f0779 100755 --- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -43,15 +43,14 @@ # This tests both the connectivity between vm-1 and vm-2, and that the underlay # can be moved in and out of the vrf by unsetting and setting veth0's master. +source lib.sh set -e cleanup() { ip link del veth-hv-1 2>/dev/null || true ip link del veth-tap 2>/dev/null || true - for ns in hv-1 hv-2 vm-1 vm-2; do - ip netns del $ns || true - done + cleanup_ns $hv_1 $hv_2 $vm_1 $vm_2 } # Clean start @@ -60,70 +59,75 @@ cleanup &> /dev/null [[ $1 == "clean" ]] && exit 0 trap cleanup EXIT +setup_ns hv_1 hv_2 vm_1 vm_2 +hv[1]=$hv_1 +hv[2]=$hv_2 +vm[1]=$vm_1 +vm[2]=$vm_2 # Setup "Hypervisors" simulated with netns ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking() { - hv=$1 + id=$1 - ip netns add hv-$hv - ip link set veth-hv-$hv netns hv-$hv - ip -netns hv-$hv link set veth-hv-$hv name veth0 + ip link set veth-hv-$id netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-hv-$id name veth0 - ip -netns hv-$hv link add vrf-underlay type vrf table 1 - ip -netns hv-$hv link set vrf-underlay up - ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0 - ip -netns hv-$hv link set veth0 up + ip -netns ${hv[$id]} link add vrf-underlay type vrf table 1 + ip -netns ${hv[$id]} link set vrf-underlay up + ip -netns ${hv[$id]} addr add 172.16.0.$id/24 dev veth0 + ip -netns ${hv[$id]} link set veth0 up - ip -netns hv-$hv link add br0 type bridge - ip -netns hv-$hv link set br0 up + ip -netns ${hv[$id]} link add br0 type bridge + ip -netns ${hv[$id]} link set br0 up - ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789 - ip -netns hv-$hv link set vxlan0 master br0 - ip -netns hv-$hv link set vxlan0 up + ip -netns ${hv[$id]} link add vxlan0 type vxlan id 10 local 172.16.0.$id dev veth0 dstport 4789 + ip -netns ${hv[$id]} link set vxlan0 master br0 + ip -netns ${hv[$id]} link set vxlan0 up } setup-hv-networking 1 setup-hv-networking 2 # Check connectivity between HVs by pinging hv-2 from hv-1 echo -n "Checking HV connectivity " -ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $hv_1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" # Setups a "VM" simulated by a netns an a veth pair setup-vm() { id=$1 - ip netns add vm-$id ip link add veth-tap type veth peer name veth-hv - ip link set veth-tap netns hv-$id - ip -netns hv-$id link set veth-tap master br0 - ip -netns hv-$id link set veth-tap up + ip link set veth-tap netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-tap master br0 + ip -netns ${hv[$id]} link set veth-tap up + + ip link set veth-hv address 02:1d:8d:dd:0c:6$id - ip link set veth-hv netns vm-$id - ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv - ip -netns vm-$id link set veth-hv up + ip link set veth-hv netns ${vm[$id]} + ip -netns ${vm[$id]} addr add 10.0.0.$id/24 dev veth-hv + ip -netns ${vm[$id]} link set veth-hv up } setup-vm 1 setup-vm 2 # Setup VTEP routes to make ARP work -bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent -bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent +bridge -netns $hv_1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent +bridge -netns $hv_2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent echo -n "Check VM connectivity through VXLAN (underlay in the default VRF) " -ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" # Move the underlay to a non-default VRF -ip -netns hv-1 link set veth0 vrf vrf-underlay -ip -netns hv-1 link set veth0 down -ip -netns hv-1 link set veth0 up -ip -netns hv-2 link set veth0 vrf vrf-underlay -ip -netns hv-2 link set veth0 down -ip -netns hv-2 link set veth0 up +ip -netns $hv_1 link set veth0 vrf vrf-underlay +ip -netns $hv_1 link set vxlan0 down +ip -netns $hv_1 link set vxlan0 up +ip -netns $hv_2 link set veth0 vrf vrf-underlay +ip -netns $hv_2 link set vxlan0 down +ip -netns $hv_2 link set vxlan0 up echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " -ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh new file mode 100755 index 000000000000..6127a78ee988 --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -0,0 +1,607 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking the VXLAN vni filtering api and +# datapath. +# It simulates two hypervisors running two VMs each using four network +# six namespaces: two for the HVs, four for the VMs. Each VM is +# connected to a separate bridge. The VM's use overlapping vlans and +# hence the separate bridge domain. Each vxlan device is a collect +# metadata device with vni filtering and hence has the ability to +# terminate configured vni's only. + +# +--------------------------------+ +------------------------------------+ +# | vm-11 netns | | vm-21 netns | +# | | | | +# |+------------+ +-------------+ | |+-------------+ +----------------+ | +# ||veth-11.10 | |veth-11.20 | | ||veth-21.10 | | veth-21.20 | | +# ||10.0.10.11/24 |10.0.20.11/24| | ||10.0.10.21/24| | 10.0.20.21/24 | | +# |+------|-----+ +|------------+ | |+-----------|-+ +---|------------+ | +# | | | | | | | | +# | | | | | +------------+ | +# | +------------+ | | | veth-21 | | +# | | veth-11 | | | | | | +# | | | | | +-----|------+ | +# | +-----|------+ | | | | +# | | | | | | +# +------------|-------------------+ +---------------|--------------------+ +# +------------|-----------------------------------------|-------------------+ +# | +-----|------+ +-----|------+ | +# | |vethhv-11 | |vethhv-21 | | +# | +----|-------+ +-----|------+ | +# | +---|---+ +---|--+ | +# | | br1 | | br2 | | +# | +---|---+ +---|--+ | +# | +---|----+ +---|--+ | +# | | vxlan1| |vxlan2| | +# | +--|-----+ +--|---+ | +# | | | | +# | | +---------------------+ | | +# | | |veth0 | | | +# | +---------|172.16.0.1/24 -----------+ | +# | |2002:fee1::1/64 | | +# | hv-1 netns +--------|------------+ | +# +-----------------------------|--------------------------------------------+ +# | +# +-----------------------------|--------------------------------------------+ +# | hv-2 netns +--------|-------------+ | +# | | veth0 | | +# | +------| 172.16.0.2/24 |---+ | +# | | | 2002:fee1::2/64 | | | +# | | | | | | +# | | +----------------------+ | - | +# | | | | +# | +-|-------+ +--------|-+ | +# | | vxlan1 | | vxlan2 | | +# | +----|----+ +---|------+ | +# | +--|--+ +-|---+ | +# | | br1 | | br2 | | +# | +--|--+ +--|--+ | +# | +-----|-------+ +----|-------+ | +# | | vethhv-12 | |vethhv-22 | | +# | +------|------+ +-------|----+ | +# +-----------------|----------------------------|---------------------------+ +# | | +# +-----------------|-----------------+ +--------|---------------------------+ +# | +-------|---+ | | +--|---------+ | +# | | veth-12 | | | |veth-22 | | +# | +-|--------|+ | | +--|--------|+ | +# | | | | | | | | +# |+----------|--+ +---|-----------+ | |+-------|-----+ +|---------------+ | +# ||veth-12.10 | |veth-12.20 | | ||veth-22.10 | |veth-22.20 | | +# ||10.0.10.12/24| |10.0.20.12/24 | | ||10.0.10.22/24| |10.0.20.22/24 | | +# |+-------------+ +---------------+ | |+-------------+ +----------------+ | +# | | | | +# | | | | +# | vm-12 netns | |vm-22 netns | +# +-----------------------------------+ +------------------------------------+ +# +# +# This test tests the new vxlan vnifiltering api +source lib.sh +ret=0 + +# all tests in this script. Can be overridden with -t option +TESTS=" + vxlan_vnifilter_api + vxlan_vnifilter_datapath + vxlan_vnifilter_datapath_pervni + vxlan_vnifilter_datapath_mgroup + vxlan_vnifilter_datapath_mgroup_pervni + vxlan_vnifilter_metadata_and_traditional_mix +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no + +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf " TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +check_hv_connectivity() { + ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null + sleep 1 + ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null + + return $? +} + +check_vm_connectivity() { + run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" + log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" + + run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" + log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" +} + +cleanup() { + ip link del veth-hv-1 2>/dev/null || true + ip link del vethhv-11 vethhv-12 vethhv-21 vethhv-22 2>/dev/null || true + + cleanup_ns $hv_1 $hv_2 $vm_11 $vm_21 $vm_12 $vm_22 $vm_31 $vm_32 +} + +trap cleanup EXIT + +setup-hv-networking() { + id=$1 + local1=$2 + mask1=$3 + local2=$4 + mask2=$5 + + ip link set veth-hv-$id netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-hv-$id name veth0 + ip -netns ${hv[$id]} addr add $local1/$mask1 dev veth0 + ip -netns ${hv[$id]} addr add $local2/$mask2 dev veth0 + ip -netns ${hv[$id]} link set veth0 up +} + +# Setups a "VM" simulated by a netns an a veth pair +# example: setup-vm <hvid> <vmid> <brid> <VATTRS> <mcast_for_bum> +# VATTRS = comma separated "<vlan>-<v[46]>-<localip>-<remoteip>-<VTYPE>-<vxlandstport>" +# VTYPE = vxlan device type. "default = traditional device, metadata = metadata device +# vnifilter = vnifiltering device, +# vnifilterg = vnifiltering device with per vni group/remote" +# example: +# setup-vm 1 11 1 \ +# 10-v4-172.16.0.1-239.1.1.100-vnifilterg,20-v4-172.16.0.1-239.1.1.100-vnifilterg 1 +# +setup-vm() { + hvid=$1 + vmid=$2 + brid=$3 + vattrs=$4 + mcast=$5 + lastvxlandev="" + + # create bridge + ip -netns ${hv[$hvid]} link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + ip -netns ${hv[$hvid]} link set br$brid up + + # create vm namespace and interfaces and connect to hypervisor + # namespace + hvvethif="vethhv-$vmid" + vmvethif="veth-$vmid" + ip link add $hvvethif type veth peer name $vmvethif + ip link set $hvvethif netns ${hv[$hvid]} + ip link set $vmvethif netns ${vm[$vmid]} + ip -netns ${hv[$hvid]} link set $hvvethif up + ip -netns ${vm[$vmid]} link set $vmvethif up + ip -netns ${hv[$hvid]} link set $hvvethif master br$brid + + # configure VM vlan/vni filtering on hypervisor + for vmap in $(echo $vattrs | cut -d "," -f1- --output-delimiter=' ') + do + local vid=$(echo $vmap | awk -F'-' '{print ($1)}') + local family=$(echo $vmap | awk -F'-' '{print ($2)}') + local localip=$(echo $vmap | awk -F'-' '{print ($3)}') + local group=$(echo $vmap | awk -F'-' '{print ($4)}') + local vtype=$(echo $vmap | awk -F'-' '{print ($5)}') + local port=$(echo $vmap | awk -F'-' '{print ($6)}') + + ip -netns ${vm[$vmid]} link add name $vmvethif.$vid link $vmvethif type vlan id $vid + ip -netns ${vm[$vmid]} addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid + ip -netns ${vm[$vmid]} link set $vmvethif.$vid up + + tid=$vid + vxlandev="vxlan$brid" + vxlandevflags="" + + if [[ -n $vtype && $vtype == "metadata" ]]; then + vxlandevflags="$vxlandevflags external" + elif [[ -n $vtype && $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then + vxlandevflags="$vxlandevflags external vnifilter" + tid=$((vid+brid)) + else + vxlandevflags="$vxlandevflags id $tid" + vxlandev="vxlan$tid" + fi + + if [[ -n $vtype && $vtype != "vnifilterg" ]]; then + if [[ -n "$group" && "$group" != "null" ]]; then + if [ $mcast -eq 1 ]; then + vxlandevflags="$vxlandevflags group $group" + else + vxlandevflags="$vxlandevflags remote $group" + fi + fi + fi + + if [[ -n "$port" && "$port" != "default" ]]; then + vxlandevflags="$vxlandevflags dstport $port" + fi + + # create vxlan device + if [ "$vxlandev" != "$lastvxlandev" ]; then + ip -netns ${hv[$hvid]} link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null + ip -netns ${hv[$hvid]} link set $vxlandev master br$brid + ip -netns ${hv[$hvid]} link set $vxlandev up + lastvxlandev=$vxlandev + fi + + # add vlan + bridge -netns ${hv[$hvid]} vlan add vid $vid dev $hvvethif + bridge -netns ${hv[$hvid]} vlan add vid $vid pvid dev $vxlandev + + # Add bridge vni filter for tx + if [[ -n $vtype && $vtype == "metadata" || $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then + bridge -netns ${hv[$hvid]} link set dev $vxlandev vlan_tunnel on + bridge -netns ${hv[$hvid]} vlan add dev $vxlandev vid $vid tunnel_info id $tid + fi + + if [[ -n $vtype && $vtype == "metadata" ]]; then + bridge -netns ${hv[$hvid]} fdb add 00:00:00:00:00:00 dev $vxlandev \ + src_vni $tid vni $tid dst $group self + elif [[ -n $vtype && $vtype == "vnifilter" ]]; then + # Add per vni rx filter with 'bridge vni' api + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid + elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then + # Add per vni group config with 'bridge vni' api + if [ -n "$group" ]; then + if [ $mcast -eq 1 ]; then + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid group $group + else + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid remote $group + fi + fi + fi + done +} + +setup_vnifilter_api() +{ + ip link add veth-host type veth peer name veth-testns + setup_ns testns + ip link set veth-testns netns $testns +} + +cleanup_vnifilter_api() +{ + ip link del veth-host 2>/dev/null || true + ip netns del $testns 2>/dev/null || true +} + +# tests vxlan filtering api +vxlan_vnifilter_api() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + localip="172.16.0.1" + group="239.1.1.101" + + cleanup_vnifilter_api &>/dev/null + setup_vnifilter_api + + # Duplicate vni test + # create non-vnifiltering traditional vni device + run_cmd "ip -netns $testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789" + log_test $? 0 "Create traditional vxlan device" + + # create vni filtering device + run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789" + log_test $? 1 "Cannot create vnifilter device without external flag" + + run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" + log_test $? 0 "Creating external vxlan device with vnifilter flag" + + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 100" + log_test $? 0 "Cannot set in-use vni id on vnifiltering device" + + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200" + log_test $? 0 "Set new vni id on vnifiltering device" + + run_cmd "ip -netns $testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" + log_test $? 0 "Create second external vxlan device with vnifilter flag" + + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 200" + log_test $? 255 "Cannot set in-use vni id on vnifiltering device" + + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300" + log_test $? 0 "Set new vni id on vnifiltering device" + + # check in bridge vni show + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300" + log_test $? 0 "Update vni id on vnifiltering device" + + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 400" + log_test $? 0 "Add new vni id on vnifiltering device" + + # add multicast group per vni + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200 group $group" + log_test $? 0 "Set multicast group on existing vni" + + # add multicast group per vni + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300 group $group" + log_test $? 0 "Set multicast group on existing vni" + + # set vnifilter on an existing external vxlan device + run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external vnifilter" + log_test $? 2 "Cannot set vnifilter flag on a device" + + # change vxlan vnifilter flag + run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external novnifilter" + log_test $? 2 "Cannot unset vnifilter flag on a device" +} + +# Sanity test vnifilter datapath +# vnifilter vnis inherit BUM group from +# vxlan device +vxlan_vnifilter_datapath() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 + ip link add veth-hv-1 type veth peer name veth-hv-2 + setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 $hv2addr1 $hv2addr2 + setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 $hv1addr1 $hv1addr2 + + check_hv_connectivity hv2addr1 hv2addr2 + + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 + setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0 + setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0 + + setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0 + setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0 + + check_vm_connectivity "vnifiltering vxlan" +} + +# Sanity test vnifilter datapath +# with vnifilter per vni configured BUM +# group/remote +vxlan_vnifilter_datapath_pervni() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 + ip link add veth-hv-1 type veth peer name veth-hv-2 + setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 + setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 + + check_hv_connectivity hv2addr1 hv2addr2 + + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 + setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilterg,20-v4-$hv1addr1-$hv2addr1-vnifilterg 0 + setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilterg,20-v6-$hv1addr2-$hv2addr2-vnifilterg 0 + + setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilterg,20-v4-$hv2addr1-$hv1addr1-vnifilterg 0 + setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilterg,20-v6-$hv2addr2-$hv1addr2-vnifilterg 0 + + check_vm_connectivity "vnifiltering vxlan pervni remote" +} + + +vxlan_vnifilter_datapath_mgroup() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + group="239.1.1.100" + group6="ff07::1" + + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 + ip link add veth-hv-1 type veth peer name veth-hv-2 + setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 + setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 + + check_hv_connectivity hv2addr1 hv2addr2 + + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 + setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilter,20-v4-$hv1addr1-$group-vnifilter 1 + setup-vm 1 21 2 "10-v6-$hv1addr2-$group6-vnifilter,20-v6-$hv1addr2-$group6-vnifilter" 1 + + setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilter,20-v4-$hv2addr1-$group-vnifilter 1 + setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilter,20-v6-$hv2addr2-$group6-vnifilter 1 + + check_vm_connectivity "vnifiltering vxlan mgroup" +} + +vxlan_vnifilter_datapath_mgroup_pervni() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + group="239.1.1.100" + group6="ff07::1" + + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 + ip link add veth-hv-1 type veth peer name veth-hv-2 + setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 + setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 + + check_hv_connectivity hv2addr1 hv2addr2 + + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 + setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilterg,20-v4-$hv1addr1-$group-vnifilterg 1 + setup-vm 1 21 2 10-v6-$hv1addr2-$group6-vnifilterg,20-v6-$hv1addr2-$group6-vnifilterg 1 + + setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilterg,20-v4-$hv2addr1-$group-vnifilterg 1 + setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilterg,20-v6-$hv2addr2-$group6-vnifilterg 1 + + check_vm_connectivity "vnifiltering vxlan pervni mgroup" +} + +vxlan_vnifilter_metadata_and_traditional_mix() +{ + hv1addr1="172.16.0.1" + hv2addr1="172.16.0.2" + hv1addr2="2002:fee1::1" + hv2addr2="2002:fee1::2" + + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 + ip link add veth-hv-1 type veth peer name veth-hv-2 + setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 + setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 + + check_hv_connectivity hv2addr1 hv2addr2 + + setup_ns vm_11 vm_21 vm_31 vm_12 vm_22 vm_32 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[31]=$vm_31 + vm[12]=$vm_12 + vm[22]=$vm_22 + vm[32]=$vm_32 + setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0 + setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0 + setup-vm 1 31 3 30-v4-$hv1addr1-$hv2addr1-default-4790,40-v6-$hv1addr2-$hv2addr2-default-4790,50-v4-$hv1addr1-$hv2addr1-metadata-4791 0 + + + setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0 + setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0 + setup-vm 2 32 3 30-v4-$hv2addr1-$hv1addr1-default-4790,40-v6-$hv2addr2-$hv1addr2-default-4790,50-v4-$hv2addr1-$hv1addr1-metadata-4791 0 + + check_vm_connectivity "vnifiltering vxlan pervni remote mix" + + # check VM connectivity over traditional/non-vxlan filtering vxlan devices + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.30.32" + log_test $? 0 "VM connectivity over traditional vxlan (ipv4 default rdst)" + + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.40.32" + log_test $? 0 "VM connectivity over traditional vxlan (ipv6 default rdst)" + + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.50.32" + log_test $? 0 "VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)" +} + +while getopts :t:pP46hv o +do + case $o in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip link help vxlan 2>&1 | grep -q "vnifilter" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing vxlan dev vnifilter setting" + sync + exit $ksft_skip +fi + +bridge vni help 2>&1 | grep -q "Usage: bridge vni" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 bridge lacks vxlan vnifiltering support" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null + +for t in $TESTS +do + case $t in + none) setup; exit 0;; + *) $t; cleanup;; + esac +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index f4bb4fef0f39..044bc0e9ed81 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -47,7 +47,7 @@ static void usage(const char *error) { if (error) printf("invalid option: %s\n", error); - printf("timestamping interface option*\n\n" + printf("timestamping <interface> [bind_phc_index] [option]*\n\n" "Options:\n" " IP_MULTICAST_LOOP - looping outgoing multicasts\n" " SO_TIMESTAMP - normal software time stamping, ms resolution\n" @@ -58,8 +58,10 @@ static void usage(const char *error) " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" + " SOF_TIMESTAMPING_BIND_PHC - request to bind a PHC of PTP vclock\n" " SIOCGSTAMP - check last socket time stamp\n" - " SIOCGSTAMPNS - more accurate socket time stamp\n"); + " SIOCGSTAMPNS - more accurate socket time stamp\n" + " PTPV2 - use PTPv2 messages\n"); exit(1); } @@ -115,13 +117,28 @@ static const unsigned char sync[] = { 0x00, 0x00, 0x00, 0x00 }; -static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) +static const unsigned char sync_v2[] = { + 0x00, 0x02, 0x00, 0x2C, + 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xFF, + 0xFE, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, +}; + +static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len, int ptpv2) { + size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync); + const void *sync_p = ptpv2 ? sync_v2 : sync; struct timeval now; int res; - res = sendto(sock, sync, sizeof(sync), 0, - addr, addr_len); + res = sendto(sock, sync_p, sync_len, 0, addr, addr_len); gettimeofday(&now, 0); if (res < 0) printf("%s: %s\n", "send", strerror(errno)); @@ -134,9 +151,11 @@ static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) static void printpacket(struct msghdr *msg, int res, char *data, int sock, int recvmsg_flags, - int siocgstamp, int siocgstampns) + int siocgstamp, int siocgstampns, int ptpv2) { struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; + size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync); + const void *sync_p = ptpv2 ? sync_v2 : sync; struct cmsghdr *cmsg; struct timeval tv; struct timespec ts; @@ -210,10 +229,9 @@ static void printpacket(struct msghdr *msg, int res, "probably SO_EE_ORIGIN_TIMESTAMPING" #endif ); - if (res < sizeof(sync)) + if (res < sync_len) printf(" => truncated data?!"); - else if (!memcmp(sync, data + res - sizeof(sync), - sizeof(sync))) + else if (!memcmp(sync_p, data + res - sync_len, sync_len)) printf(" => GOT OUR DATA BACK (HURRAY!)"); break; } @@ -257,7 +275,7 @@ static void printpacket(struct msghdr *msg, int res, } static void recvpacket(int sock, int recvmsg_flags, - int siocgstamp, int siocgstampns) + int siocgstamp, int siocgstampns, int ptpv2) { char data[256]; struct msghdr msg; @@ -288,18 +306,18 @@ static void recvpacket(int sock, int recvmsg_flags, } else { printpacket(&msg, res, data, sock, recvmsg_flags, - siocgstamp, siocgstampns); + siocgstamp, siocgstampns, ptpv2); } } int main(int argc, char **argv) { - int so_timestamping_flags = 0; int so_timestamp = 0; int so_timestampns = 0; int siocgstamp = 0; int siocgstampns = 0; int ip_multicast_loop = 0; + int ptpv2 = 0; char *interface; int i; int enabled = 1; @@ -307,6 +325,8 @@ int main(int argc, char **argv) struct ifreq device; struct ifreq hwtstamp; struct hwtstamp_config hwconfig, hwconfig_requested; + struct so_timestamping so_timestamping_get = { 0, 0 }; + struct so_timestamping so_timestamping = { 0, 0 }; struct sockaddr_in addr; struct ip_mreq imr; struct in_addr iaddr; @@ -324,7 +344,12 @@ int main(int argc, char **argv) exit(1); } - for (i = 2; i < argc; i++) { + if (argc >= 3 && sscanf(argv[2], "%d", &so_timestamping.bind_phc) == 1) + val = 3; + else + val = 2; + + for (i = val; i < argc; i++) { if (!strcasecmp(argv[i], "SO_TIMESTAMP")) so_timestamp = 1; else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) @@ -335,18 +360,22 @@ int main(int argc, char **argv) siocgstampns = 1; else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) ip_multicast_loop = 1; + else if (!strcasecmp(argv[i], "PTPV2")) + ptpv2 = 1; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_TX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_TX_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RX_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_BIND_PHC")) + so_timestamping.flags |= SOF_TIMESTAMPING_BIND_PHC; else usage(argv[i]); } @@ -365,10 +394,11 @@ int main(int argc, char **argv) hwtstamp.ifr_data = (void *)&hwconfig; memset(&hwconfig, 0, sizeof(hwconfig)); hwconfig.tx_type = - (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? + (so_timestamping.flags & SOF_TIMESTAMPING_TX_HARDWARE) ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; hwconfig.rx_filter = - (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + (so_timestamping.flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC : HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; hwconfig_requested = hwconfig; if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { @@ -392,6 +422,9 @@ int main(int argc, char **argv) sizeof(struct sockaddr_in)) < 0) bail("bind"); + if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, interface, if_len)) + bail("bind device"); + /* set multicast group for outgoing packets */ inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ addr.sin_addr = iaddr; @@ -423,10 +456,9 @@ int main(int argc, char **argv) &enabled, sizeof(enabled)) < 0) bail("setsockopt SO_TIMESTAMPNS"); - if (so_timestamping_flags && - setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, - &so_timestamping_flags, - sizeof(so_timestamping_flags)) < 0) + if (so_timestamping.flags && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping, + sizeof(so_timestamping)) < 0) bail("setsockopt SO_TIMESTAMPING"); /* request IP_PKTINFO for debugging purposes */ @@ -447,14 +479,18 @@ int main(int argc, char **argv) else printf("SO_TIMESTAMPNS %d\n", val); - if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { + len = sizeof(so_timestamping_get); + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping_get, + &len) < 0) { printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", strerror(errno)); } else { - printf("SO_TIMESTAMPING %d\n", val); - if (val != so_timestamping_flags) - printf(" not the expected value %d\n", - so_timestamping_flags); + printf("SO_TIMESTAMPING flags %d, bind phc %d\n", + so_timestamping_get.flags, so_timestamping_get.bind_phc); + if (so_timestamping_get.flags != so_timestamping.flags || + so_timestamping_get.bind_phc != so_timestamping.bind_phc) + printf(" not expected, flags %d, bind phc %d\n", + so_timestamping.flags, so_timestamping.bind_phc); } /* send packets forever every five seconds */ @@ -496,16 +532,16 @@ int main(int argc, char **argv) printf("has error\n"); recvpacket(sock, 0, siocgstamp, - siocgstampns); + siocgstampns, ptpv2); recvpacket(sock, MSG_ERRQUEUE, siocgstamp, - siocgstampns); + siocgstampns, ptpv2); } } else { /* write one packet */ sendpacket(sock, (struct sockaddr *)&addr, - sizeof(addr)); + sizeof(addr), ptpv2); next.tv_sec += 5; continue; } diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index b599f1fa99b5..c6eda21cefb6 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -15,6 +15,7 @@ #include <linux/tcp.h> #include <linux/socket.h> +#include <sys/epoll.h> #include <sys/types.h> #include <sys/sendfile.h> #include <sys/socket.h> @@ -25,26 +26,101 @@ #define TLS_PAYLOAD_MAX_LEN 16384 #define SOL_TLS 282 -FIXTURE(tls_basic) -{ - int fd, cfd; - bool notls; +static int fips_enabled; + +struct tls_crypto_info_keys { + union { + struct tls_crypto_info crypto_info; + struct tls12_crypto_info_aes_gcm_128 aes128; + struct tls12_crypto_info_chacha20_poly1305 chacha20; + struct tls12_crypto_info_sm4_gcm sm4gcm; + struct tls12_crypto_info_sm4_ccm sm4ccm; + struct tls12_crypto_info_aes_ccm_128 aesccm128; + struct tls12_crypto_info_aes_gcm_256 aesgcm256; + struct tls12_crypto_info_aria_gcm_128 ariagcm128; + struct tls12_crypto_info_aria_gcm_256 ariagcm256; + }; + size_t len; }; -FIXTURE_SETUP(tls_basic) +static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, + struct tls_crypto_info_keys *tls12) +{ + memset(tls12, 0, sizeof(*tls12)); + + switch (cipher_type) { + case TLS_CIPHER_CHACHA20_POLY1305: + tls12->len = sizeof(struct tls12_crypto_info_chacha20_poly1305); + tls12->chacha20.info.version = tls_version; + tls12->chacha20.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_AES_GCM_128: + tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_128); + tls12->aes128.info.version = tls_version; + tls12->aes128.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_SM4_GCM: + tls12->len = sizeof(struct tls12_crypto_info_sm4_gcm); + tls12->sm4gcm.info.version = tls_version; + tls12->sm4gcm.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_SM4_CCM: + tls12->len = sizeof(struct tls12_crypto_info_sm4_ccm); + tls12->sm4ccm.info.version = tls_version; + tls12->sm4ccm.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_AES_CCM_128: + tls12->len = sizeof(struct tls12_crypto_info_aes_ccm_128); + tls12->aesccm128.info.version = tls_version; + tls12->aesccm128.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_AES_GCM_256: + tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_256); + tls12->aesgcm256.info.version = tls_version; + tls12->aesgcm256.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_ARIA_GCM_128: + tls12->len = sizeof(struct tls12_crypto_info_aria_gcm_128); + tls12->ariagcm128.info.version = tls_version; + tls12->ariagcm128.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_ARIA_GCM_256: + tls12->len = sizeof(struct tls12_crypto_info_aria_gcm_256); + tls12->ariagcm256.info.version = tls_version; + tls12->ariagcm256.info.cipher_type = cipher_type; + break; + default: + break; + } +} + +static void memrnd(void *s, size_t n) +{ + int *dword = s; + char *byte; + + for (; n >= 4; n -= 4) + *dword++ = rand(); + byte = (void *)dword; + while (n--) + *byte++ = rand(); +} + +static void ulp_sock_pair(struct __test_metadata *_metadata, + int *fd, int *cfd, bool *notls) { struct sockaddr_in addr; socklen_t len; int sfd, ret; - self->notls = false; + *notls = false; len = sizeof(addr); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); addr.sin_port = 0; - self->fd = socket(AF_INET, SOCK_STREAM, 0); + *fd = socket(AF_INET, SOCK_STREAM, 0); sfd = socket(AF_INET, SOCK_STREAM, 0); ret = bind(sfd, &addr, sizeof(addr)); @@ -55,26 +131,96 @@ FIXTURE_SETUP(tls_basic) ret = getsockname(sfd, &addr, &len); ASSERT_EQ(ret, 0); - ret = connect(self->fd, &addr, sizeof(addr)); + ret = connect(*fd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); - self->cfd = accept(sfd, &addr, &len); - ASSERT_GE(self->cfd, 0); + *cfd = accept(sfd, &addr, &len); + ASSERT_GE(*cfd, 0); close(sfd); - ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + ret = setsockopt(*fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); if (ret != 0) { ASSERT_EQ(errno, ENOENT); - self->notls = true; + *notls = true; printf("Failure setting TCP_ULP, testing without tls\n"); return; } - ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + ret = setsockopt(*cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); ASSERT_EQ(ret, 0); } +/* Produce a basic cmsg */ +static int tls_send_cmsg(int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + char cbuf[CMSG_SPACE(sizeof(char))]; + int cmsg_len = sizeof(char); + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec vec; + + vec.iov_base = data; + vec.iov_len = len; + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_TLS; + /* test sending non-record types. */ + cmsg->cmsg_type = TLS_SET_RECORD_TYPE; + cmsg->cmsg_len = CMSG_LEN(cmsg_len); + *CMSG_DATA(cmsg) = record_type; + msg.msg_controllen = cmsg->cmsg_len; + + return sendmsg(fd, &msg, flags); +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + char cbuf[CMSG_SPACE(sizeof(char))]; + struct cmsghdr *cmsg; + unsigned char ctype; + struct msghdr msg; + struct iovec vec; + int n; + + vec.iov_base = data; + vec.iov_len = len; + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + n = recvmsg(fd, &msg, flags); + + cmsg = CMSG_FIRSTHDR(&msg); + EXPECT_NE(cmsg, NULL); + EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); + EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); + ctype = *((unsigned char *)CMSG_DATA(cmsg)); + EXPECT_EQ(ctype, record_type); + + return n; +} + +FIXTURE(tls_basic) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_SETUP(tls_basic) +{ + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); +} + FIXTURE_TEARDOWN(tls_basic) { close(self->fd); @@ -95,6 +241,31 @@ TEST_F(tls_basic, base_base) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); }; +TEST_F(tls_basic, bad_cipher) +{ + struct tls_crypto_info_keys tls12; + + tls12.crypto_info.version = 200; + tls12.crypto_info.cipher_type = TLS_CIPHER_AES_GCM_128; + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); + + tls12.crypto_info.version = TLS_1_2_VERSION; + tls12.crypto_info.cipher_type = 50; + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); + + tls12.crypto_info.version = TLS_1_2_VERSION; + tls12.crypto_info.cipher_type = 59; + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); + + tls12.crypto_info.version = TLS_1_2_VERSION; + tls12.crypto_info.cipher_type = 10; + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); + + tls12.crypto_info.version = TLS_1_2_VERSION; + tls12.crypto_info.cipher_type = 70; + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); +} + FIXTURE(tls) { int fd, cfd; @@ -103,77 +274,122 @@ FIXTURE(tls) FIXTURE_VARIANT(tls) { - unsigned int tls_version; + uint16_t tls_version; + uint16_t cipher_type; + bool nopad, fips_non_compliant; }; -FIXTURE_VARIANT_ADD(tls, 12) +FIXTURE_VARIANT_ADD(tls, 12_aes_gcm) { .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, }; -FIXTURE_VARIANT_ADD(tls, 13) +FIXTURE_VARIANT_ADD(tls, 13_aes_gcm) { .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, }; -FIXTURE_SETUP(tls) +FIXTURE_VARIANT_ADD(tls, 12_chacha) { - struct tls12_crypto_info_aes_gcm_128 tls12; - struct sockaddr_in addr; - socklen_t len; - int sfd, ret; + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_CHACHA20_POLY1305, + .fips_non_compliant = true, +}; - self->notls = false; - len = sizeof(addr); +FIXTURE_VARIANT_ADD(tls, 13_chacha) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_CHACHA20_POLY1305, + .fips_non_compliant = true, +}; - memset(&tls12, 0, sizeof(tls12)); - tls12.info.version = variant->tls_version; - tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128; +FIXTURE_VARIANT_ADD(tls, 13_sm4_gcm) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_SM4_GCM, + .fips_non_compliant = true, +}; - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_ANY); - addr.sin_port = 0; +FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_SM4_CCM, + .fips_non_compliant = true, +}; - self->fd = socket(AF_INET, SOCK_STREAM, 0); - sfd = socket(AF_INET, SOCK_STREAM, 0); +FIXTURE_VARIANT_ADD(tls, 12_aes_ccm) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_CCM_128, +}; - ret = bind(sfd, &addr, sizeof(addr)); - ASSERT_EQ(ret, 0); - ret = listen(sfd, 10); - ASSERT_EQ(ret, 0); +FIXTURE_VARIANT_ADD(tls, 13_aes_ccm) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_CCM_128, +}; - ret = getsockname(sfd, &addr, &len); - ASSERT_EQ(ret, 0); +FIXTURE_VARIANT_ADD(tls, 12_aes_gcm_256) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_256, +}; - ret = connect(self->fd, &addr, sizeof(addr)); - ASSERT_EQ(ret, 0); +FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_256, +}; - ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); - if (ret != 0) { - self->notls = true; - printf("Failure setting TCP_ULP, testing without tls\n"); - } +FIXTURE_VARIANT_ADD(tls, 13_nopad) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, + .nopad = true, +}; - if (!self->notls) { - ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, - sizeof(tls12)); - ASSERT_EQ(ret, 0); - } +FIXTURE_VARIANT_ADD(tls, 12_aria_gcm) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_ARIA_GCM_128, +}; - self->cfd = accept(sfd, &addr, &len); - ASSERT_GE(self->cfd, 0); +FIXTURE_VARIANT_ADD(tls, 12_aria_gcm_256) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_ARIA_GCM_256, +}; - if (!self->notls) { - ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", - sizeof("tls")); - ASSERT_EQ(ret, 0); +FIXTURE_SETUP(tls) +{ + struct tls_crypto_info_keys tls12; + int one = 1; + int ret; - ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, - sizeof(tls12)); + if (fips_enabled && variant->fips_non_compliant) + SKIP(return, "Unsupported cipher in FIPS mode"); + + tls_crypto_info_init(variant->tls_version, variant->cipher_type, + &tls12); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + + if (self->notls) + return; + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + if (variant->nopad) { + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&one, sizeof(one)); ASSERT_EQ(ret, 0); } - - close(sfd); } FIXTURE_TEARDOWN(tls) @@ -277,6 +493,8 @@ TEST_F(tls, recv_max) char recv_mem[TLS_PAYLOAD_MAX_LEN]; char buf[TLS_PAYLOAD_MAX_LEN]; + memrnd(buf, sizeof(buf)); + EXPECT_GE(send(self->fd, buf, send_len, 0), 0); EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1); EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0); @@ -318,6 +536,17 @@ TEST_F(tls, msg_more_unsent) EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1); } +TEST_F(tls, msg_eor) +{ + char const *test_str = "test_read"; + int send_len = 10; + char buf[10]; + + EXPECT_EQ(send(self->fd, test_str, send_len, MSG_EOR), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + TEST_F(tls, sendmsg_single) { struct msghdr msg; @@ -384,11 +613,12 @@ TEST_F(tls, sendmsg_large) msg.msg_iov = &vec; msg.msg_iovlen = 1; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); } - while (recvs++ < sends) - EXPECT_NE(recv(self->fd, mem, send_len, 0), -1); + while (recvs++ < sends) { + EXPECT_NE(recv(self->cfd, mem, send_len, 0), -1); + } free(mem); } @@ -416,9 +646,9 @@ TEST_F(tls, sendmsg_multiple) msg.msg_iov = vec; msg.msg_iovlen = iov_len; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len); buf = malloc(total_len); - EXPECT_NE(recv(self->fd, buf, total_len, 0), -1); + EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1); for (i = 0; i < iov_len; i++) { EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp, strlen(test_strs[i])), @@ -477,6 +707,20 @@ TEST_F(tls, splice_from_pipe) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +TEST_F(tls, splice_more) +{ + unsigned int f = SPLICE_F_NONBLOCK | SPLICE_F_MORE | SPLICE_F_GIFT; + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + int i, send_pipe = 1; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_GE(write(p[1], mem_send, send_len), 0); + for (i = 0; i < 32; i++) + EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, send_pipe, f), 1); +} + TEST_F(tls, splice_from_pipe2) { int send_len = 16000; @@ -485,12 +729,14 @@ TEST_F(tls, splice_from_pipe2) int p2[2]; int p[2]; + memrnd(mem_send, sizeof(mem_send)); + ASSERT_GE(pipe(p), 0); ASSERT_GE(pipe(p2), 0); - EXPECT_GE(write(p[1], mem_send, 8000), 0); - EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0); - EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0); - EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0); + EXPECT_EQ(write(p[1], mem_send, 8000), 8000); + EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, 8000, 0), 8000); + EXPECT_EQ(write(p2[1], mem_send + 8000, 8000), 8000); + EXPECT_EQ(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 8000); EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -524,10 +770,107 @@ TEST_F(tls, splice_to_pipe) char mem_recv[TLS_PAYLOAD_MAX_LEN]; int p[2]; + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, splice_cmsg_to_pipe) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10]; + int p[2]; + + if (self->notls) + SKIP(return, "no TLS support"); + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1); + EXPECT_EQ(errno, EINVAL); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(errno, EIO); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); + EXPECT_EQ(memcmp(test_str, buf, send_len), 0); +} + +TEST_F(tls, splice_dec_cmsg_to_pipe) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10]; + int p[2]; + + if (self->notls) + SKIP(return, "no TLS support"); + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(errno, EIO); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1); + EXPECT_EQ(errno, EINVAL); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); + EXPECT_EQ(memcmp(test_str, buf, send_len), 0); +} + +TEST_F(tls, recv_and_splice) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int half = send_len / 2; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + /* Recv hald of the record, splice the other half */ + EXPECT_EQ(recv(self->cfd, mem_recv, half, MSG_WAITALL), half); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, half, SPLICE_F_NONBLOCK), + half); + EXPECT_EQ(read(p[0], &mem_recv[half], half), half); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, peek_and_splice) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int chunk = TLS_PAYLOAD_MAX_LEN / 4; + int n, i, p[2]; + + memrnd(mem_send, sizeof(mem_send)); + ASSERT_GE(pipe(p), 0); - EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0); - EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0); - EXPECT_GE(read(p[0], mem_recv, send_len), 0); + for (i = 0; i < 4; i++) + EXPECT_EQ(send(self->fd, &mem_send[chunk * i], chunk, 0), + chunk); + + EXPECT_EQ(recv(self->cfd, mem_recv, chunk * 5 / 2, + MSG_WAITALL | MSG_PEEK), + chunk * 5 / 2); + EXPECT_EQ(memcmp(mem_send, mem_recv, chunk * 5 / 2), 0); + + n = 0; + while (n < send_len) { + i = splice(self->cfd, NULL, p[1], NULL, send_len - n, 0); + EXPECT_GT(i, 0); + n += i; + } + EXPECT_EQ(n, send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -557,6 +900,8 @@ TEST_F(tls, recvmsg_single_max) struct iovec vec; struct msghdr hdr; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len); vec.iov_base = (char *)recv_mem; vec.iov_len = TLS_PAYLOAD_MAX_LEN; @@ -570,7 +915,6 @@ TEST_F(tls, recvmsg_single_max) TEST_F(tls, recvmsg_multiple) { unsigned int msg_iovlen = 1024; - unsigned int len_compared = 0; struct iovec vec[1024]; char *iov_base[1024]; unsigned int iov_len = 16; @@ -579,6 +923,8 @@ TEST_F(tls, recvmsg_multiple) struct msghdr hdr; int i; + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len); for (i = 0; i < msg_iovlen; i++) { iov_base[i] = (char *)malloc(iov_len); @@ -589,8 +935,6 @@ TEST_F(tls, recvmsg_multiple) hdr.msg_iovlen = msg_iovlen; hdr.msg_iov = vec; EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1); - for (i = 0; i < msg_iovlen; i++) - len_compared += iov_len; for (i = 0; i < msg_iovlen; i++) free(iov_base[i]); @@ -603,6 +947,8 @@ TEST_F(tls, single_send_multiple_recv) char send_mem[TLS_PAYLOAD_MAX_LEN * 2]; char recv_mem[TLS_PAYLOAD_MAX_LEN * 2]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0); memset(recv_mem, 0, total_len); @@ -618,6 +964,8 @@ TEST_F(tls, multiple_send_single_recv) char recv_mem[2 * 10]; char send_mem[10]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); memset(recv_mem, 0, total_len); @@ -634,6 +982,8 @@ TEST_F(tls, single_send_multiple_recv_non_align) char recv_mem[recv_len * 2]; char send_mem[total_len]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0); memset(recv_mem, 0, total_len); @@ -652,12 +1002,12 @@ TEST_F(tls, recv_partial) memset(recv_mem, 0, sizeof(recv_mem)); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first), + MSG_WAITALL), strlen(test_str_first)); EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0); memset(recv_mem, 0, sizeof(recv_mem)); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second), + MSG_WAITALL), strlen(test_str_second)); EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)), 0); } @@ -679,10 +1029,10 @@ TEST_F(tls, recv_peek) char buf[15]; EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), send_len); EXPECT_EQ(memcmp(test_str, buf, send_len), 0); memset(buf, 0, sizeof(buf)); - EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); EXPECT_EQ(memcmp(test_str, buf, send_len), 0); } @@ -803,18 +1153,17 @@ TEST_F(tls, bidir) int ret; if (!self->notls) { - struct tls12_crypto_info_aes_gcm_128 tls12; + struct tls_crypto_info_keys tls12; - memset(&tls12, 0, sizeof(tls12)); - tls12.info.version = variant->tls_version; - tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128; + tls_crypto_info_init(variant->tls_version, variant->cipher_type, + &tls12); ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, - sizeof(tls12)); + tls12.len); ASSERT_EQ(ret, 0); ret = setsockopt(self->cfd, SOL_TLS, TLS_TX, &tls12, - sizeof(tls12)); + tls12.len); ASSERT_EQ(ret, 0); } @@ -1109,63 +1458,78 @@ TEST_F(tls, mutliproc_sendpage_writers) TEST_F(tls, control_msg) { - if (self->notls) - return; - - char cbuf[CMSG_SPACE(sizeof(char))]; - char const *test_str = "test_read"; - int cmsg_len = sizeof(char); + char *test_str = "test_read"; char record_type = 100; - struct cmsghdr *cmsg; - struct msghdr msg; int send_len = 10; - struct iovec vec; char buf[10]; - vec.iov_base = (char *)test_str; - vec.iov_len = 10; - memset(&msg, 0, sizeof(struct msghdr)); - msg.msg_iov = &vec; - msg.msg_iovlen = 1; - msg.msg_control = cbuf; - msg.msg_controllen = sizeof(cbuf); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_TLS; - /* test sending non-record types. */ - cmsg->cmsg_type = TLS_SET_RECORD_TYPE; - cmsg->cmsg_len = CMSG_LEN(cmsg_len); - *CMSG_DATA(cmsg) = record_type; - msg.msg_controllen = cmsg->cmsg_len; + if (self->notls) + SKIP(return, "no TLS support"); - EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, 0), + send_len); /* Should fail because we didn't provide a control message */ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); - vec.iov_base = buf; - EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len); - - cmsg = CMSG_FIRSTHDR(&msg); - EXPECT_NE(cmsg, NULL); - EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); - EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - record_type = *((unsigned char *)CMSG_DATA(cmsg)); - EXPECT_EQ(record_type, 100); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL | MSG_PEEK), + send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); /* Recv the message again without MSG_PEEK */ - record_type = 0; memset(buf, 0, sizeof(buf)); - EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len); - cmsg = CMSG_FIRSTHDR(&msg); - EXPECT_NE(cmsg, NULL); - EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); - EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - record_type = *((unsigned char *)CMSG_DATA(cmsg)); - EXPECT_EQ(record_type, 100); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, control_msg_nomerge) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec2, send_len), 0); +} + +TEST_F(tls, data_control_data) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + char *rec3 = "3333"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); +} + TEST_F(tls, shutdown) { char const *test_str = "test_read"; @@ -1217,6 +1581,356 @@ TEST_F(tls, shutdown_reuse) EXPECT_EQ(errno, EISCONN); } +TEST_F(tls, getsockopt) +{ + struct tls_crypto_info_keys expect, get; + socklen_t len; + + /* get only the version/cipher */ + len = sizeof(struct tls_crypto_info); + memrnd(&get, sizeof(get)); + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0); + EXPECT_EQ(len, sizeof(struct tls_crypto_info)); + EXPECT_EQ(get.crypto_info.version, variant->tls_version); + EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type); + + /* get the full crypto_info */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect); + len = expect.len; + memrnd(&get, sizeof(get)); + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0); + EXPECT_EQ(len, expect.len); + EXPECT_EQ(get.crypto_info.version, variant->tls_version); + EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type); + EXPECT_EQ(memcmp(&get, &expect, expect.len), 0); + + /* short get should fail */ + len = sizeof(struct tls_crypto_info) - 1; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), -1); + EXPECT_EQ(errno, EINVAL); + + /* partial get of the cipher data should fail */ + len = expect.len - 1; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), -1); + EXPECT_EQ(errno, EINVAL); +} + +FIXTURE(tls_err) +{ + int fd, cfd; + int fd2, cfd2; + bool notls; +}; + +FIXTURE_VARIANT(tls_err) +{ + uint16_t tls_version; +}; + +FIXTURE_VARIANT_ADD(tls_err, 12_aes_gcm) +{ + .tls_version = TLS_1_2_VERSION, +}; + +FIXTURE_VARIANT_ADD(tls_err, 13_aes_gcm) +{ + .tls_version = TLS_1_3_VERSION, +}; + +FIXTURE_SETUP(tls_err) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128, + &tls12); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls); + if (self->notls) + return; + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd2, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(tls_err) +{ + close(self->fd); + close(self->cfd); + close(self->fd2); + close(self->cfd2); +} + +TEST_F(tls_err, bad_rec) +{ + char buf[64]; + + if (self->notls) + SKIP(return, "no TLS support"); + + memset(buf, 0x55, sizeof(buf)); + EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf)); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EMSGSIZE); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EAGAIN); +} + +TEST_F(tls_err, bad_auth) +{ + char buf[128]; + int n; + + if (self->notls) + SKIP(return, "no TLS support"); + + memrnd(buf, sizeof(buf) / 2); + EXPECT_EQ(send(self->fd, buf, sizeof(buf) / 2, 0), sizeof(buf) / 2); + n = recv(self->cfd, buf, sizeof(buf), 0); + EXPECT_GT(n, sizeof(buf) / 2); + + buf[n - 1]++; + + EXPECT_EQ(send(self->fd2, buf, n, 0), n); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + +TEST_F(tls_err, bad_in_large_read) +{ + char txt[3][64]; + char cip[3][128]; + char buf[3 * 128]; + int i, n; + + if (self->notls) + SKIP(return, "no TLS support"); + + /* Put 3 records in the sockets */ + for (i = 0; i < 3; i++) { + memrnd(txt[i], sizeof(txt[i])); + EXPECT_EQ(send(self->fd, txt[i], sizeof(txt[i]), 0), + sizeof(txt[i])); + n = recv(self->cfd, cip[i], sizeof(cip[i]), 0); + EXPECT_GT(n, sizeof(txt[i])); + /* Break the third message */ + if (i == 2) + cip[2][n - 1]++; + EXPECT_EQ(send(self->fd2, cip[i], n, 0), n); + } + + /* We should be able to receive the first two messages */ + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt[0]) * 2); + EXPECT_EQ(memcmp(buf, txt[0], sizeof(txt[0])), 0); + EXPECT_EQ(memcmp(buf + sizeof(txt[0]), txt[1], sizeof(txt[1])), 0); + /* Third mesasge is bad */ + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + +TEST_F(tls_err, bad_cmsg) +{ + char *test_str = "test_read"; + int send_len = 10; + char cip[128]; + char buf[128]; + char txt[64]; + int n; + + if (self->notls) + SKIP(return, "no TLS support"); + + /* Queue up one data record */ + memrnd(txt, sizeof(txt)); + EXPECT_EQ(send(self->fd, txt, sizeof(txt), 0), sizeof(txt)); + n = recv(self->cfd, cip, sizeof(cip), 0); + EXPECT_GT(n, sizeof(txt)); + EXPECT_EQ(send(self->fd2, cip, n, 0), n); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + n = recv(self->cfd, cip, sizeof(cip), 0); + cip[n - 1]++; /* Break it */ + EXPECT_GT(n, send_len); + EXPECT_EQ(send(self->fd2, cip, n, 0), n); + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt)); + EXPECT_EQ(memcmp(buf, txt, sizeof(txt)), 0); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + +TEST_F(tls_err, timeo) +{ + struct timeval tv = { .tv_usec = 10000, }; + char buf[128]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + ret = setsockopt(self->cfd2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + ASSERT_EQ(ret, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + usleep(1000); /* Give child a head start */ + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + + wait(&ret); + } else { + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + exit(0); + } +} + +TEST_F(tls_err, poll_partial_rec) +{ + struct pollfd pfd = { }; + ssize_t rec_len; + char rec[256]; + char buf[128]; + + if (self->notls) + SKIP(return, "no TLS support"); + + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 1), 0); + + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf)); + rec_len = recv(self->cfd, rec, sizeof(rec), 0); + EXPECT_GT(rec_len, sizeof(buf)); + + /* Write 100B, not the full record ... */ + EXPECT_EQ(send(self->fd2, rec, 100, 0), 100); + /* ... no full record should mean no POLLIN */ + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 1), 0); + /* Now write the rest, and it should all pop out of the other end. */ + EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100); + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 1), 1); + EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf)); + EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0); +} + +TEST_F(tls_err, epoll_partial_rec) +{ + struct epoll_event ev, events[10]; + ssize_t rec_len; + char rec[256]; + char buf[128]; + int epollfd; + + if (self->notls) + SKIP(return, "no TLS support"); + + epollfd = epoll_create1(0); + ASSERT_GE(epollfd, 0); + + memset(&ev, 0, sizeof(ev)); + ev.events = EPOLLIN; + ev.data.fd = self->cfd2; + ASSERT_GE(epoll_ctl(epollfd, EPOLL_CTL_ADD, self->cfd2, &ev), 0); + + EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0); + + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf)); + rec_len = recv(self->cfd, rec, sizeof(rec), 0); + EXPECT_GT(rec_len, sizeof(buf)); + + /* Write 100B, not the full record ... */ + EXPECT_EQ(send(self->fd2, rec, 100, 0), 100); + /* ... no full record should mean no POLLIN */ + EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0); + /* Now write the rest, and it should all pop out of the other end. */ + EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100); + EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 1); + EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf)); + EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0); + + close(epollfd); +} + +TEST_F(tls_err, poll_partial_rec_async) +{ + struct pollfd pfd = { }; + ssize_t rec_len; + char rec[256]; + char buf[128]; + char token; + int p[2]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + ASSERT_GE(pipe(p), 0); + + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf)); + rec_len = recv(self->cfd, rec, sizeof(rec), 0); + EXPECT_GT(rec_len, sizeof(buf)); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int status, pid2; + + close(p[1]); + usleep(1000); /* Give child a head start */ + + EXPECT_EQ(send(self->fd2, rec, 100, 0), 100); + + EXPECT_EQ(read(p[0], &token, 1), 1); /* Barrier #1 */ + + EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), + rec_len - 100); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + close(p[0]); + + /* Child should sleep in poll(), never get a wake */ + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 20), 0); + + EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */ + + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 20), 1); + + exit(!__test_passed(_metadata)); + } +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; @@ -1271,64 +1985,191 @@ TEST(non_established) { TEST(keysizes) { struct tls12_crypto_info_aes_gcm_256 tls12; - struct sockaddr_in addr; - int sfd, ret, fd, cfd; - socklen_t len; + int ret, fd, cfd; bool notls; - notls = false; - len = sizeof(addr); - memset(&tls12, 0, sizeof(tls12)); tls12.info.version = TLS_1_2_VERSION; tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256; - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_ANY); - addr.sin_port = 0; + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); - fd = socket(AF_INET, SOCK_STREAM, 0); - sfd = socket(AF_INET, SOCK_STREAM, 0); + if (!notls) { + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, + sizeof(tls12)); + EXPECT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, + sizeof(tls12)); + EXPECT_EQ(ret, 0); + } + + close(fd); + close(cfd); +} + +TEST(no_pad) { + struct tls12_crypto_info_aes_gcm_256 tls12; + int ret, fd, cfd, val; + socklen_t len; + bool notls; + + memset(&tls12, 0, sizeof(tls12)); + tls12.info.version = TLS_1_3_VERSION; + tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256; + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, sizeof(tls12)); + EXPECT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, sizeof(tls12)); + EXPECT_EQ(ret, 0); + + val = 1; + ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, sizeof(val)); + EXPECT_EQ(ret, 0); + + len = sizeof(val); + val = 2; + ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, &len); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, 1); + EXPECT_EQ(len, 4); + + val = 0; + ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, sizeof(val)); + EXPECT_EQ(ret, 0); + + len = sizeof(val); + val = 2; + ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, &len); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, 0); + EXPECT_EQ(len, 4); + + close(fd); + close(cfd); +} + +TEST(tls_v6ops) { + struct tls_crypto_info_keys tls12; + struct sockaddr_in6 addr, addr2; + int sfd, ret, fd; + socklen_t len, len2; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_any; + addr.sin6_port = 0; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + sfd = socket(AF_INET6, SOCK_STREAM, 0); ret = bind(sfd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); ret = listen(sfd, 10); ASSERT_EQ(ret, 0); + len = sizeof(addr); ret = getsockname(sfd, &addr, &len); ASSERT_EQ(ret, 0); ret = connect(fd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); + len = sizeof(addr); + ret = getsockname(fd, &addr, &len); + ASSERT_EQ(ret, 0); + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); - if (ret != 0) { - notls = true; - printf("Failure setting TCP_ULP, testing without tls\n"); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); } + ASSERT_EQ(ret, 0); - if (!notls) { - ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, - sizeof(tls12)); - EXPECT_EQ(ret, 0); - } + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); - cfd = accept(sfd, &addr, &len); - ASSERT_GE(cfd, 0); + ret = setsockopt(fd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); - if (!notls) { - ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", - sizeof("tls")); - EXPECT_EQ(ret, 0); + len2 = sizeof(addr2); + ret = getsockname(fd, &addr2, &len2); + ASSERT_EQ(ret, 0); - ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, - sizeof(tls12)); - EXPECT_EQ(ret, 0); - } + EXPECT_EQ(len2, len); + EXPECT_EQ(memcmp(&addr, &addr2, len), 0); + close(fd); close(sfd); +} + +TEST(prequeue) { + struct tls_crypto_info_keys tls12; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + EXPECT_EQ(send(fd, buf, sizeof(buf), MSG_DONTWAIT), sizeof(buf)); + + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_WAITALL), sizeof(buf2)); + + EXPECT_EQ(memcmp(buf, buf2, sizeof(buf)), 0); + close(fd); close(cfd); } +static void __attribute__((constructor)) fips_check(void) { + int res; + FILE *f; + + f = fopen("/proc/sys/crypto/fips_enabled", "r"); + if (f) { + res = fscanf(f, "%d", &fips_enabled); + if (res != 1) + ksft_print_msg("ERROR: Couldn't read /proc/sys/crypto/fips_enabled\n"); + fclose(f); + } +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c new file mode 100644 index 000000000000..9ba03164d73a --- /dev/null +++ b/tools/testing/selftests/net/toeplitz.c @@ -0,0 +1,589 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Toeplitz test + * + * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3 + * 2. Compute the rx_hash in software based on the packet contents + * 3. Compare the two + * + * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given. + * + * If '-C $rx_irq_cpu_list' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the rxqueue that RSS would select based on this rx_hash + * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq + * 7. Compare the cpus from 4 and 6 + * + * Else if '-r $rps_bitmap' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap + * 6. Compare the cpus from 4 and 5 + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <net/if.h> +#include <netdb.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysinfo.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include "../kselftest.h" + +#define TOEPLITZ_KEY_MIN_LEN 40 +#define TOEPLITZ_KEY_MAX_LEN 60 + +#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */ +#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN) +#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN) + +#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) + +#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ + +#define RPS_MAX_CPUS 16UL /* must be a power of 2 */ + +/* configuration options (cmdline arguments) */ +static uint16_t cfg_dport = 8000; +static int cfg_family = AF_INET6; +static char *cfg_ifname = "eth0"; +static int cfg_num_queues; +static int cfg_num_rps_cpus; +static bool cfg_sink; +static int cfg_type = SOCK_STREAM; +static int cfg_timeout_msec = 1000; +static bool cfg_verbose; + +/* global vars */ +static int num_cpus; +static int ring_block_nr; +static int ring_block_sz; + +/* stats */ +static int frames_received; +static int frames_nohash; +static int frames_error; + +#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0) + +/* tpacket ring */ +struct ring_state { + int fd; + char *mmap; + int idx; + int cpu; +}; + +static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ +static int rps_silo_to_cpu[RPS_MAX_CPUS]; +static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; +static struct ring_state rings[RSS_MAX_CPUS]; + +static inline uint32_t toeplitz(const unsigned char *four_tuple, + const unsigned char *key) +{ + int i, bit, ret = 0; + uint32_t key32; + + key32 = ntohl(*((uint32_t *)key)); + key += 4; + + for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) { + for (bit = 7; bit >= 0; bit--) { + if (four_tuple[i] & (1 << bit)) + ret ^= key32; + + key32 <<= 1; + key32 |= !!(key[0] & (1 << bit)); + } + key++; + } + + return ret; +} + +/* Compare computed cpu with arrival cpu from packet_fanout_cpu */ +static void verify_rss(uint32_t rx_hash, int cpu) +{ + int queue = rx_hash % cfg_num_queues; + + log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); + if (rx_irq_cpus[queue] != cpu) { + log_verbose(". error: rss cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void verify_rps(uint64_t rx_hash, int cpu) +{ + int silo = (rx_hash * cfg_num_rps_cpus) >> 32; + + log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]); + if (rps_silo_to_cpu[silo] != cpu) { + log_verbose(". error: rps cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void log_rxhash(int cpu, uint32_t rx_hash, + const char *addrs, int addr_len) +{ + char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN]; + uint16_t *ports; + + if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) || + !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr))) + error(1, 0, "address parse error"); + + ports = (void *)addrs + (addr_len * 2); + log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]", + cpu, rx_hash, saddr, daddr, + ntohs(ports[0]), ntohs(ports[1])); +} + +/* Compare computed rxhash with rxhash received from tpacket_v3 */ +static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu) +{ + unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0}; + uint32_t rx_hash_sw; + const char *addrs; + int addr_len; + + if (cfg_family == AF_INET) { + addr_len = sizeof(struct in_addr); + addrs = pkt + offsetof(struct iphdr, saddr); + } else { + addr_len = sizeof(struct in6_addr); + addrs = pkt + offsetof(struct ip6_hdr, ip6_src); + } + + memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2)); + rx_hash_sw = toeplitz(four_tuple, toeplitz_key); + + if (cfg_verbose) + log_rxhash(cpu, rx_hash, addrs, addr_len); + + if (rx_hash != rx_hash_sw) { + log_verbose(" != expected 0x%x\n", rx_hash_sw); + frames_error++; + return; + } + + log_verbose(" OK"); + if (cfg_num_queues) + verify_rss(rx_hash, cpu); + else if (cfg_num_rps_cpus) + verify_rps(rx_hash, cpu); + log_verbose("\n"); +} + +static char *recv_frame(const struct ring_state *ring, char *frame) +{ + struct tpacket3_hdr *hdr = (void *)frame; + + if (hdr->hv1.tp_rxhash) + verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash, + ring->cpu); + else + frames_nohash++; + + return frame + hdr->tp_next_offset; +} + +/* A single TPACKET_V3 block can hold multiple frames */ +static bool recv_block(struct ring_state *ring) +{ + struct tpacket_block_desc *block; + char *frame; + int i; + + block = (void *)(ring->mmap + ring->idx * ring_block_sz); + if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) + return false; + + frame = (char *)block; + frame += block->hdr.bh1.offset_to_first_pkt; + + for (i = 0; i < block->hdr.bh1.num_pkts; i++) { + frame = recv_frame(ring, frame); + frames_received++; + } + + block->hdr.bh1.block_status = TP_STATUS_KERNEL; + ring->idx = (ring->idx + 1) % ring_block_nr; + + return true; +} + +/* simple test: sleep once unconditionally and then process all rings */ +static void process_rings(void) +{ + int i; + + usleep(1000 * cfg_timeout_msec); + + for (i = 0; i < num_cpus; i++) + do {} while (recv_block(&rings[i])); + + fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", + frames_received - frames_nohash - frames_error, + frames_nohash, frames_error); +} + +static char *setup_ring(int fd) +{ + struct tpacket_req3 req3 = {0}; + void *ring; + + req3.tp_retire_blk_tov = cfg_timeout_msec / 8; + req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; + + req3.tp_frame_size = 2048; + req3.tp_frame_nr = 1 << 10; + req3.tp_block_nr = 16; + + req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; + req3.tp_block_size /= req3.tp_block_nr; + + if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3))) + error(1, errno, "setsockopt PACKET_RX_RING"); + + ring_block_sz = req3.tp_block_size; + ring_block_nr = req3.tp_block_nr; + + ring = mmap(0, req3.tp_block_size * req3.tp_block_nr, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0); + if (ring == MAP_FAILED) + error(1, 0, "mmap failed"); + + return ring; +} + +static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = ARRAY_SIZE(filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +/* filter on transport protocol and destination port */ +static void set_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + uint8_t proto; + + proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; + if (cfg_family == AF_INET) + __set_filter(fd, offsetof(struct iphdr, protocol), proto, + sizeof(struct iphdr) + off_dport); + else + __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto, + sizeof(struct ip6_hdr) + off_dport); +} + +/* drop everything: used temporarily during setup */ +static void set_filter_null(int fd) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET + BPF_K, 0), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = ARRAY_SIZE(filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static int create_ring(char **ring) +{ + struct fanout_args args = { + .id = 1, + .type_flags = PACKET_FANOUT_CPU, + .max_num_members = RSS_MAX_CPUS + }; + struct sockaddr_ll ll = { 0 }; + int fd, val; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket creation failed"); + + val = TPACKET_V3; + if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val))) + error(1, errno, "setsockopt PACKET_VERSION"); + *ring = setup_ring(fd); + + /* block packets until all rings are added to the fanout group: + * else packets can arrive during setup and get misclassified + */ + set_filter_null(fd); + + ll.sll_family = AF_PACKET; + ll.sll_ifindex = if_nametoindex(cfg_ifname); + ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) : + htons(ETH_P_IPV6); + if (bind(fd, (void *)&ll, sizeof(ll))) + error(1, errno, "bind"); + + /* must come after bind: verifies all programs in group match */ + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) { + /* on failure, retry using old API if that is sufficient: + * it has a hard limit of 256 sockets, so only try if + * (a) only testing rxhash, not RSS or (b) <= 256 cpus. + * in this API, the third argument is left implicit. + */ + if (cfg_num_queues || num_cpus > 256 || + setsockopt(fd, SOL_PACKET, PACKET_FANOUT, + &args, sizeof(uint32_t))) + error(1, errno, "setsockopt PACKET_FANOUT cpu"); + } + + return fd; +} + +/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */ +static int setup_sink(void) +{ + int fd, val; + + fd = socket(cfg_family, cfg_type, 0); + if (fd == -1) + error(1, errno, "socket %d.%d", cfg_family, cfg_type); + + val = 1 << 20; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + + return fd; +} + +static void setup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + rings[i].cpu = i; + rings[i].fd = create_ring(&rings[i].mmap); + } + + /* accept packets once all rings in the fanout group are up */ + for (i = 0; i < num_cpus; i++) + set_filter(rings[i].fd); +} + +static void cleanup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz)) + error(1, errno, "munmap"); + if (close(rings[i].fd)) + error(1, errno, "close"); + } +} + +static void parse_cpulist(const char *arg) +{ + do { + rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10); + + arg = strchr(arg, ','); + if (!arg) + break; + arg++; // skip ',' + } while (1); +} + +static void show_cpulist(void) +{ + int i; + + for (i = 0; i < cfg_num_queues; i++) + fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]); +} + +static void show_silos(void) +{ + int i; + + for (i = 0; i < cfg_num_rps_cpus; i++) + fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]); +} + +static void parse_toeplitz_key(const char *str, int slen, unsigned char *key) +{ + int i, ret, off; + + if (slen < TOEPLITZ_STR_MIN_LEN || + slen > TOEPLITZ_STR_MAX_LEN + 1) + error(1, 0, "invalid toeplitz key"); + + for (i = 0, off = 0; off < slen; i++, off += 3) { + ret = sscanf(str + off, "%hhx", &key[i]); + if (ret != 1) + error(1, 0, "key parse error at %d off %d len %d", + i, off, slen); + } +} + +static void parse_rps_bitmap(const char *arg) +{ + unsigned long bitmap; + int i; + + bitmap = strtoul(arg, NULL, 0); + + if (bitmap & ~(RPS_MAX_CPUS - 1)) + error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu", + bitmap, RPS_MAX_CPUS - 1); + + for (i = 0; i < RPS_MAX_CPUS; i++) + if (bitmap & 1UL << i) + rps_silo_to_cpu[cfg_num_rps_cpus++] = i; +} + +static void parse_opts(int argc, char **argv) +{ + static struct option long_options[] = { + {"dport", required_argument, 0, 'd'}, + {"cpus", required_argument, 0, 'C'}, + {"key", required_argument, 0, 'k'}, + {"iface", required_argument, 0, 'i'}, + {"ipv4", no_argument, 0, '4'}, + {"ipv6", no_argument, 0, '6'}, + {"sink", no_argument, 0, 's'}, + {"tcp", no_argument, 0, 't'}, + {"timeout", required_argument, 0, 'T'}, + {"udp", no_argument, 0, 'u'}, + {"verbose", no_argument, 0, 'v'}, + {"rps", required_argument, 0, 'r'}, + {0, 0, 0, 0} + }; + bool have_toeplitz = false; + int index, c; + + while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) { + switch (c) { + case '4': + cfg_family = AF_INET; + break; + case '6': + cfg_family = AF_INET6; + break; + case 'C': + parse_cpulist(optarg); + break; + case 'd': + cfg_dport = strtol(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'k': + parse_toeplitz_key(optarg, strlen(optarg), + toeplitz_key); + have_toeplitz = true; + break; + case 'r': + parse_rps_bitmap(optarg); + break; + case 's': + cfg_sink = true; + break; + case 't': + cfg_type = SOCK_STREAM; + break; + case 'T': + cfg_timeout_msec = strtol(optarg, NULL, 0); + break; + case 'u': + cfg_type = SOCK_DGRAM; + break; + case 'v': + cfg_verbose = true; + break; + + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!have_toeplitz) + error(1, 0, "Must supply rss key ('-k')"); + + num_cpus = get_nprocs(); + if (num_cpus > RSS_MAX_CPUS) + error(1, 0, "increase RSS_MAX_CPUS"); + + if (cfg_num_queues && cfg_num_rps_cpus) + error(1, 0, + "Can't supply both RSS cpus ('-C') and RPS map ('-r')"); + if (cfg_verbose) { + show_cpulist(); + show_silos(); + } +} + +int main(int argc, char **argv) +{ + const int min_tests = 10; + int fd_sink = -1; + + parse_opts(argc, argv); + + if (cfg_sink) + fd_sink = setup_sink(); + + setup_rings(); + process_rings(); + cleanup_rings(); + + if (cfg_sink && close(fd_sink)) + error(1, errno, "close sink"); + + if (frames_received - frames_nohash < min_tests) + error(1, 0, "too few frames for verification"); + + return frames_error; +} diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh new file mode 100755 index 000000000000..8ff172f7bb1b --- /dev/null +++ b/tools/testing/selftests/net/toeplitz.sh @@ -0,0 +1,199 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping +# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu +# ('-rps <rps_map>') +# +# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action, +# which is a driver-specific encoding. +# +# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \ +# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)] + +source setup_loopback.sh +readonly SERVER_IP4="192.168.1.200/24" +readonly SERVER_IP6="fda8::1/64" +readonly SERVER_MAC="aa:00:00:00:00:02" + +readonly CLIENT_IP4="192.168.1.100/24" +readonly CLIENT_IP6="fda8::2/64" +readonly CLIENT_MAC="aa:00:00:00:00:01" + +PORT=8000 +KEY="$(</proc/sys/net/core/netdev_rss_key)" +TEST_RSS=false +RPS_MAP="" +PROTO_FLAG="" +IP_FLAG="" +DEV="eth0" + +# Return the number of rxqs among which RSS is configured to spread packets. +# This is determined by reading the RSS indirection table using ethtool. +get_rss_cfg_num_rxqs() { + echo $(ethtool -x "${DEV}" | + grep -E [[:space:]]+[0-9]+:[[:space:]]+ | + cut -d: -f2- | + awk '{$1=$1};1' | + tr ' ' '\n' | + sort -u | + wc -l) +} + +# Return a list of the receive irq handler cpus. +# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc. +# Reads /sys/kernel/irq/ in order, so algorithm depends on +# irq_{rxq-0} < irq_{rxq-1}, etc. +get_rx_irq_cpus() { + CPUS="" + # sort so that irq 2 is read before irq 10 + SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V) + # Consider only as many queues as RSS actually uses. We assume that + # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1). + RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs) + RXQ_COUNT=0 + + for i in ${SORTED_IRQS} + do + [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break + # lookup relevant IRQs by action name + [[ -e "$i/actions" ]] || continue + cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue + irqname=$(<"$i/actions") + + # does the IRQ get called + irqcount=$(cat "$i/per_cpu_count" | tr -d '0,') + [[ -n "${irqcount}" ]] || continue + + # lookup CPU + irq=$(basename "$i") + cpu=$(cat "/proc/irq/$irq/smp_affinity_list") + + if [[ -z "${CPUS}" ]]; then + CPUS="${cpu}" + else + CPUS="${CPUS},${cpu}" + fi + RXQ_COUNT=$((RXQ_COUNT+1)) + done + + echo "${CPUS}" +} + +get_disable_rfs_cmd() { + echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;" +} + +get_set_rps_bitmaps_cmd() { + CMD="" + for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus + do + CMD="${CMD} echo $1 > ${i};" + done + + echo "${CMD}" +} + +get_disable_rps_cmd() { + echo "$(get_set_rps_bitmaps_cmd 0)" +} + +die() { + echo "$1" + exit 1 +} + +check_nic_rxhash_enabled() { + local -r pattern="receive-hashing:\ on" + + ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled" +} + +parse_opts() { + local prog=$0 + shift 1 + + while [[ "$1" =~ "-" ]]; do + if [[ "$1" = "-irq_prefix" ]]; then + shift + IRQ_PATTERN="^$1-[0-9]*$" + elif [[ "$1" = "-u" || "$1" = "-t" ]]; then + PROTO_FLAG="$1" + elif [[ "$1" = "-4" ]]; then + IP_FLAG="$1" + SERVER_IP="${SERVER_IP4}" + CLIENT_IP="${CLIENT_IP4}" + elif [[ "$1" = "-6" ]]; then + IP_FLAG="$1" + SERVER_IP="${SERVER_IP6}" + CLIENT_IP="${CLIENT_IP6}" + elif [[ "$1" = "-rss" ]]; then + TEST_RSS=true + elif [[ "$1" = "-rps" ]]; then + shift + RPS_MAP="$1" + elif [[ "$1" = "-i" ]]; then + shift + DEV="$1" + else + die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \ + [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]" + fi + shift + done +} + +setup() { + setup_loopback_environment "${DEV}" + + # Set up server_ns namespace and client_ns namespace + setup_macvlan_ns "${DEV}" $server_ns server \ + "${SERVER_MAC}" "${SERVER_IP}" + setup_macvlan_ns "${DEV}" $client_ns client \ + "${CLIENT_MAC}" "${CLIENT_IP}" +} + +cleanup() { + cleanup_macvlan_ns $server_ns server $client_ns client + cleanup_loopback "${DEV}" +} + +parse_opts $0 $@ + +setup +trap cleanup EXIT + +check_nic_rxhash_enabled + +# Actual test starts here +if [[ "${TEST_RSS}" = true ]]; then + # RPS/RFS must be disabled because they move packets between cpus, + # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. + eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ + -C "$(get_rx_irq_cpus)" -s -v & +elif [[ ! -z "${RPS_MAP}" ]]; then + eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ + -r "0x${RPS_MAP}" -s -v & +else + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & +fi + +server_pid=$! + +ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ + "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & + +client_pid=$! + +wait "${server_pid}" +exit_code=$? +kill -9 "${client_pid}" +if [[ "${exit_code}" -eq 0 ]]; then + echo "Test Succeeded!" +fi +exit "${exit_code}" diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh new file mode 100755 index 000000000000..2fef34f4aba1 --- /dev/null +++ b/tools/testing/selftests/net/toeplitz_client.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A simple program for generating traffic for the toeplitz test. +# +# This program sends packets periodically for, conservatively, 20 seconds. The +# intent is for the calling program to kill this program once it is no longer +# needed, rather than waiting for the 20 second expiration. + +send_traffic() { + expiration=$((SECONDS+20)) + while [[ "${SECONDS}" -lt "${expiration}" ]] + do + if [[ "${PROTO}" == "-u" ]]; then + echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}" + else + echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}" + fi + sleep 0.001 + done +} + +PROTO=$1 +IPVER=$2 +ADDR=$3 +PORT=$4 + +send_traffic diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index de9ca97abc30..282f14760940 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -4,6 +4,7 @@ # Run traceroute/traceroute6 tests # +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -69,9 +70,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip netns exec ${ns} ip link set lo up if [ "${addr}" != "-" ]; then ip netns exec ${ns} ip addr add dev lo ${addr} fi @@ -160,12 +158,7 @@ connect_ns() cleanup_traceroute6() { - local ns - - for ns in host-1 host-2 router-1 router-2 - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } setup_traceroute6() @@ -176,33 +169,34 @@ setup_traceroute6() cleanup_traceroute6 set -e - create_ns host-1 - create_ns host-2 - create_ns router-1 - create_ns router-2 + setup_ns h1 h2 r1 r2 + create_ns $h1 + create_ns $h2 + create_ns $r1 + create_ns $r2 # Setup N3 - connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64 - ip netns exec host-2 ip route add default via 2000:103::2 + connect_ns $r2 eth3 - 2000:103::2/64 $h2 eth3 - 2000:103::4/64 + ip netns exec $h2 ip route add default via 2000:103::2 # Setup N2 - connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64 - ip netns exec router-1 ip route add default via 2000:102::2 + connect_ns $r1 eth2 - 2000:102::1/64 $r2 eth2 - 2000:102::2/64 + ip netns exec $r1 ip route add default via 2000:102::2 # Setup N1. host-1 and router-2 connect to a bridge in router-1. - ip netns exec router-1 ip link add name ${brdev} type bridge - ip netns exec router-1 ip link set ${brdev} up - ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev} + ip netns exec $r1 ip link add name ${brdev} type bridge + ip netns exec $r1 ip link set ${brdev} up + ip netns exec $r1 ip addr add 2000:101::1/64 dev ${brdev} - connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - - - ip netns exec router-1 ip link set dev eth0 master ${brdev} - ip netns exec host-1 ip route add default via 2000:101::1 + connect_ns $h1 eth0 - 2000:101::3/64 $r1 eth0 - - + ip netns exec $r1 ip link set dev eth0 master ${brdev} + ip netns exec $h1 ip route add default via 2000:101::1 - connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - - - ip netns exec router-1 ip link set dev eth1 master ${brdev} + connect_ns $r2 eth1 - 2000:101::2/64 $r1 eth1 - - + ip netns exec $r1 ip link set dev eth1 master ${brdev} # Prime the network - ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1 + ip netns exec $h1 ping6 -c5 2000:103::4 >/dev/null 2>&1 set +e } @@ -217,7 +211,7 @@ run_traceroute6() setup_traceroute6 # traceroute6 host-2 from host-1 (expects 2000:102::2) - run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2" + run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2" log_test $? 0 "IPV6 traceroute" cleanup_traceroute6 @@ -240,12 +234,7 @@ run_traceroute6() cleanup_traceroute() { - local ns - - for ns in host-1 host-2 router - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $h1 $h2 $router } setup_traceroute() @@ -254,24 +243,25 @@ setup_traceroute() cleanup_traceroute set -e - create_ns host-1 - create_ns host-2 - create_ns router + setup_ns h1 h2 router + create_ns $h1 + create_ns $h2 + create_ns $router - connect_ns host-1 eth0 1.0.1.3/24 - \ - router eth1 1.0.3.1/24 - - ip netns exec host-1 ip route add default via 1.0.1.1 + connect_ns $h1 eth0 1.0.1.3/24 - \ + $router eth1 1.0.3.1/24 - + ip netns exec $h1 ip route add default via 1.0.1.1 - ip netns exec router ip addr add 1.0.1.1/24 dev eth1 - ip netns exec router sysctl -qw \ + ip netns exec $router ip addr add 1.0.1.1/24 dev eth1 + ip netns exec $router sysctl -qw \ net.ipv4.icmp_errors_use_inbound_ifaddr=1 - connect_ns host-2 eth0 1.0.2.4/24 - \ - router eth2 1.0.2.1/24 - - ip netns exec host-2 ip route add default via 1.0.2.1 + connect_ns $h2 eth0 1.0.2.4/24 - \ + $router eth2 1.0.2.1/24 - + ip netns exec $h2 ip route add default via 1.0.2.1 # Prime the network - ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1 + ip netns exec $h1 ping -c5 1.0.2.4 >/dev/null 2>&1 set +e } @@ -286,7 +276,7 @@ run_traceroute() setup_traceroute # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. - run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" + run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" log_test $? 0 "IPV4 traceroute" cleanup_traceroute diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c new file mode 100644 index 000000000000..fa83918b62d1 --- /dev/null +++ b/tools/testing/selftests/net/tun.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/if.h> +#include <linux/if_tun.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "../kselftest_harness.h" + +static int tun_attach(int fd, char *dev) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_ATTACH_QUEUE; + + return ioctl(fd, TUNSETQUEUE, (void *) &ifr); +} + +static int tun_detach(int fd, char *dev) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_DETACH_QUEUE; + + return ioctl(fd, TUNSETQUEUE, (void *) &ifr); +} + +static int tun_alloc(char *dev) +{ + struct ifreq ifr; + int fd, err; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + fprintf(stderr, "can't open tun: %s\n", strerror(errno)); + return fd; + } + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE; + + err = ioctl(fd, TUNSETIFF, (void *) &ifr); + if (err < 0) { + fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno)); + close(fd); + return err; + } + strcpy(dev, ifr.ifr_name); + return fd; +} + +static int tun_delete(char *dev) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg ifm; + unsigned char data[64]; + } req; + struct rtattr *rta; + int ret, rtnl; + + rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); + if (rtnl < 0) { + fprintf(stderr, "can't open rtnl: %s\n", strerror(errno)); + return 1; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm))); + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_type = RTM_DELLINK; + + req.ifm.ifi_family = AF_UNSPEC; + + rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len)); + rta->rta_type = IFLA_IFNAME; + rta->rta_len = RTA_LENGTH(IFNAMSIZ); + req.nh.nlmsg_len += rta->rta_len; + memcpy(RTA_DATA(rta), dev, IFNAMSIZ); + + ret = send(rtnl, &req, req.nh.nlmsg_len, 0); + if (ret < 0) + fprintf(stderr, "can't send: %s\n", strerror(errno)); + ret = (unsigned int)ret != req.nh.nlmsg_len; + + close(rtnl); + return ret; +} + +FIXTURE(tun) +{ + char ifname[IFNAMSIZ]; + int fd, fd2; +}; + +FIXTURE_SETUP(tun) +{ + memset(self->ifname, 0, sizeof(self->ifname)); + + self->fd = tun_alloc(self->ifname); + ASSERT_GE(self->fd, 0); + + self->fd2 = tun_alloc(self->ifname); + ASSERT_GE(self->fd2, 0); +} + +FIXTURE_TEARDOWN(tun) +{ + if (self->fd >= 0) + close(self->fd); + if (self->fd2 >= 0) + close(self->fd2); +} + +TEST_F(tun, delete_detach_close) { + EXPECT_EQ(tun_delete(self->ifname), 0); + EXPECT_EQ(tun_detach(self->fd, self->ifname), -1); + EXPECT_EQ(errno, 22); +} + +TEST_F(tun, detach_delete_close) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, detach_close_delete) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + close(self->fd); + self->fd = -1; + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, reattach_delete_close) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, reattach_close_delete) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); + close(self->fd); + self->fd = -1; + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index 490a8cca708a..ec60a16c9307 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -26,6 +26,7 @@ #include <inttypes.h> #include <linux/errqueue.h> #include <linux/if_ether.h> +#include <linux/if_packet.h> #include <linux/ipv6.h> #include <linux/net_tstamp.h> #include <netdb.h> @@ -34,7 +35,6 @@ #include <netinet/ip.h> #include <netinet/udp.h> #include <netinet/tcp.h> -#include <netpacket/packet.h> #include <poll.h> #include <stdarg.h> #include <stdbool.h> @@ -161,18 +161,19 @@ static void validate_timestamp(struct timespec *cur, int min_delay) max_delay = min_delay + cfg_delay_tolerance_usec; if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { - fprintf(stderr, "ERROR: %lu us expected between %d and %d\n", + fprintf(stderr, "ERROR: %" PRId64 " us expected between %d and %d\n", cur64 - start64, min_delay, max_delay); - test_failed = true; + if (!getenv("KSFT_MACHINE_SLOW")) + test_failed = true; } } static void __print_ts_delta_formatted(int64_t ts_delta) { if (cfg_print_nsec) - fprintf(stderr, "%lu ns", ts_delta); + fprintf(stderr, "%" PRId64 " ns", ts_delta); else - fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC); + fprintf(stderr, "%" PRId64 " us", ts_delta / NSEC_PER_USEC); } static void __print_timestamp(const char *name, struct timespec *cur, @@ -495,12 +496,12 @@ static void do_test(int family, unsigned int report_opt) total_len = cfg_payload_len; if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { total_len += sizeof(struct udphdr); - if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) { if (family == PF_INET) total_len += sizeof(struct iphdr); else total_len += sizeof(struct ipv6hdr); - + } /* special case, only rawv6_sendmsg: * pass proto in sin6_port if not connected * also see ANK comment in net/ipv4/raw.c diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh index 31637769f59f..25baca4b148e 100755 --- a/tools/testing/selftests/net/txtimestamp.sh +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -8,13 +8,13 @@ set -e setup() { # set 1ms delay on lo egress - tc qdisc add dev lo root netem delay 1ms + tc qdisc add dev lo root netem delay 10ms # set 2ms delay on ifb0 egress modprobe ifb ip link add ifb_netem0 type ifb ip link set dev ifb_netem0 up - tc qdisc add dev ifb_netem0 root netem delay 2ms + tc qdisc add dev ifb_netem0 root netem delay 20ms # redirect lo ingress through ifb0 egress tc qdisc add dev lo handle ffff: ingress @@ -24,9 +24,11 @@ setup() { } run_test_v4v6() { - # SND will be delayed 1000us - # ACK will be delayed 6000us: 1 + 2 ms round-trip - local -r args="$@ -v 1000 -V 6000" + # SND will be delayed 10ms + # ACK will be delayed 60ms: 10 + 20 ms round-trip + # allow +/- tolerance of 8ms + # wait for ACK to be queued + local -r args="$@ -v 10000 -V 60000 -t 8000 -S 80000" ./txtimestamp ${args} -4 -L 127.0.0.1 ./txtimestamp ${args} -6 -L ::1 diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index ac2a30be9b32..8802604148dd 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,8 +3,20 @@ # # Run a series of udpgro functional tests. +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="xdp_dummy.o" + +# set global exit status, but never reset nonzero one. +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -26,7 +38,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp } run_one() { @@ -41,10 +53,11 @@ run_one() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.1 + wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_test() { @@ -85,10 +98,12 @@ run_one_nat() { echo "ok" || \ echo "failed"& - sleep 0.1 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} + ret=$? kill -INT $pid wait $(jobs -p) + return $ret } run_one_2sock() { @@ -104,13 +119,13 @@ run_one_2sock() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.1 + wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 - sleep 0.1 - # first UDP GSO socket should be closed at this point + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_nat_test() { @@ -131,40 +146,58 @@ run_all() { local -r core_args="-l 4" local -r ipv4_args="${core_args} -4 -D 192.168.1.1" local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + ret=0 echo "ipv4" run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400" + check_err $? # explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1) # when GRO does not take place run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1" + check_err $? # the GSO packets are aggregated because: # * veth schedule napi after each xmit # * segmentation happens in BH context, veth napi poll is delayed after # the transmission of the last segment run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + check_err $? run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? echo "ipv6" run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" + check_err $? run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1" + check_err $? run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520" + check_err $? run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452" + check_err $? run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" + check_err $? run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452" + check_err $? + return $ret } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi @@ -180,3 +213,5 @@ elif [[ $1 == "__subprocess_2sock" ]]; then shift run_one_2sock $@ fi + +exit $? diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 820bc50f6b68..7080eae5312b 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,8 +3,12 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="xdp_dummy.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -34,12 +38,11 @@ run_one() { ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.1 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } @@ -80,8 +83,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh new file mode 100755 index 000000000000..e1ff645bd3d1 --- /dev/null +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgro benchmarks + +source net_helper.sh + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" + +BPF_FILE="xdp_dummy.o" + +cleanup() { + local -r jobs="$(jobs -p)" + local -r ns="$(ip netns list|grep $PEER_NS)" + + [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null + [ -n "$ns" ] && ip netns del $ns 2>/dev/null +} +trap cleanup EXIT + +run_one() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local rx_args=${all#*rx} + + + + ip netns add "${PEER_NS}" + ip -netns "${PEER_NS}" link set lo up + ip link add type veth + ip link set dev veth0 up + ip addr add dev veth0 192.168.1.2/24 + ip addr add dev veth0 2001:db8::2/64 nodad + + ip link set dev veth1 netns "${PEER_NS}" + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad + ip -netns "${PEER_NS}" link set dev veth1 up + ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on + + + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp + tc -n "${PEER_NS}" qdisc add dev veth1 clsact + tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action + tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action + echo ${rx_args} + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & + + wait_local_port_listen "${PEER_NS}" 8000 udp + ./udpgso_bench_tx ${tx_args} +} + +run_in_netns() { + local -r args=$@ + echo ${args} + ./in_netns.sh $0 __subprocess ${args} +} + +run_udp() { + local -r args=$@ + + echo "udp gso - over veth touching data" + run_in_netns ${args} -u -S 0 rx -4 -v + + echo "udp gso and gro - over veth touching data" + run_in_netns ${args} -S 0 rx -4 -G +} + +run_tcp() { + local -r args=$@ + + echo "tcp - over veth touching data" + run_in_netns ${args} -t rx -4 -t +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 192.168.1.1" + local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + + echo "ipv6" + run_tcp "${ipv6_args}" + run_udp "${ipv6_args}" +} + +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Run 'make' first" + exit -1 +fi + +if [ ! -f nat6to4.o ]; then + echo "Missing nat6to4 helper. Run 'make' first" + exit -1 +fi + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +else + run_in_netns $@ +fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh new file mode 100755 index 000000000000..380cb15e942e --- /dev/null +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -0,0 +1,271 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source net_helper.sh + +BPF_FILE="xdp_dummy.o" +readonly BASE="ns-$(mktemp -u XXXXXX)" +readonly SRC=2 +readonly DST=1 +readonly DST_NAT=100 +readonly NS_SRC=$BASE$SRC +readonly NS_DST=$BASE$DST + +# "baremetal" network used for raw UDP traffic +readonly BM_NET_V4=192.168.1. +readonly BM_NET_V6=2001:db8:: + +# "overlay" network used for UDP over UDP tunnel traffic +readonly OL_NET_V4=172.16.1. +readonly OL_NET_V6=2001:db8:1:: +readonly NPROCS=`nproc` + +cleanup() { + local ns + local -r jobs="$(jobs -p)" + [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null + + for ns in $NS_SRC $NS_DST; do + ip netns del $ns 2>/dev/null + done +} + +trap cleanup EXIT + +create_ns() { + local net + local ns + + for ns in $NS_SRC $NS_DST; do + ip netns add $ns + ip -n $ns link set dev lo up + + # disable route solicitations to decrease 'noise' traffic + ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0 + done + + ip link add name veth$SRC type veth peer name veth$DST + + for ns in $SRC $DST; do + ip link set dev veth$ns netns $BASE$ns + ip -n $BASE$ns link set dev veth$ns up + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad + done + ip -n $NS_DST link set veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null +} + +create_vxlan_endpoint() { + local -r netns=$1 + local -r bm_dev=$2 + local -r bm_rem_addr=$3 + local -r vxlan_dev=$4 + local -r vxlan_id=$5 + local -r vxlan_port=4789 + + ip -n $netns link set dev $bm_dev up + ip -n $netns link add dev $vxlan_dev type vxlan id $vxlan_id \ + dstport $vxlan_port remote $bm_rem_addr + ip -n $netns link set dev $vxlan_dev up +} + +create_vxlan_pair() { + local ns + + create_ns + + for ns in $SRC $DST; do + # note that 3 - $SRC == $DST and 3 - $DST == $SRC + create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V4$((3 - $ns)) vxlan$ns 4 + ip -n $BASE$ns addr add dev vxlan$ns $OL_NET_V4$ns/24 + done + for ns in $SRC $DST; do + create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6 + ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad + done + + # preload neighbur cache, do avoid some noisy traffic + local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST |jq -r '.[]["address"]') + local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC |jq -r '.[]["address"]') + ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC + ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST +} + +is_ipv6() { + if [[ $1 =~ .*:.* ]]; then + return 0 + fi + return 1 +} + +run_test() { + local -r msg=$1 + local -r dst=$2 + local -r pkts=$3 + local -r vxpkts=$4 + local bind=$5 + local rx_args="" + local rx_family="-4" + local family=-4 + local filter=IpInReceives + local ipt=iptables + + printf "%-40s" "$msg" + + if is_ipv6 $dst; then + # rx program does not support '-6' and implies ipv6 usage by default + rx_family="" + family=-6 + filter=Ip6InReceives + ipt=ip6tables + fi + + rx_args="$rx_family" + [ -n "$bind" ] && rx_args="$rx_args -b $bind" + + # send a single GSO packet, segmented in 10 UDP frames. + # Always expect 10 UDP frames on RX side as rx socket does + # not enable GRO + ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789 + ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 + ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args & + local spid=$! + wait_local_port_listen "$NS_DST" 8000 udp + ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst + local retc=$? + wait $spid + local rets=$? + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi + + local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \ + sed -e 's/\[//' -e 's/:.*//'` + if [ $rcv != $pkts ]; then + echo " fail - received $rcv packets, expected $pkts" + ret=1 + return + fi + + local vxrcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 4789' | \ + sed -e 's/\[//' -e 's/:.*//'` + + # upper net can generate a little noise, allow some tolerance + if [ $vxrcv -lt $vxpkts -o $vxrcv -gt $((vxpkts + 3)) ]; then + echo " fail - received $vxrcv vxlan packets, expected $vxpkts" + ret=1 + return + fi + echo " ok" +} + +run_bench() { + local -r msg=$1 + local -r dst=$2 + local family=-4 + + printf "%-40s" "$msg" + if [ $NPROCS -lt 2 ]; then + echo " skip - needed 2 CPUs found $NPROCS" + return + fi + + is_ipv6 $dst && family=-6 + + # bind the sender and the receiver to different CPUs to try + # get reproducible results + ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" + ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100 & + local spid=$! + wait_local_port_listen "$NS_DST" 8000 udp + ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst + local retc=$? + wait $spid + local rets=$? + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi +} + +for family in 4 6; do + BM_NET=$BM_NET_V4 + OL_NET=$OL_NET_V4 + IPT=iptables + SUFFIX=24 + VXDEV=vxlan + PING=ping + + if [ $family = 6 ]; then + BM_NET=$BM_NET_V6 + OL_NET=$OL_NET_V6 + SUFFIX="64 nodad" + VXDEV=vxlan6 + IPT=ip6tables + # Use ping6 on systems where ping doesn't handle IPv6 + ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING="ping6" + fi + + echo "IPv$family" + + create_ns + run_test "No GRO" $BM_NET$DST 10 0 + cleanup + + create_ns + ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on + ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on + run_test "GRO frag list" $BM_NET$DST 1 0 + cleanup + + # UDP GRO fwd skips aggregation when find an udp socket with the GRO option + # if there is an UDP tunnel in the running system, such lookup happen + # take place. + # use NAT to circumvent GRO FWD check + create_ns + ip -n $NS_DST addr add dev veth$DST $BM_NET$DST_NAT/$SUFFIX + ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $BM_NET$DST_NAT \ + -j DNAT --to-destination $BM_NET$DST + run_test "GRO fwd" $BM_NET$DST_NAT 1 0 $BM_NET$DST + cleanup + + create_ns + run_bench "UDP fwd perf" $BM_NET$DST + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + run_bench "UDP GRO fwd perf" $BM_NET$DST + cleanup + + create_vxlan_pair + ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on + ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + cleanup + + # use NAT to circumvent GRO FWD check + create_vxlan_pair + ip -n $NS_DST addr add dev $VXDEV$DST $OL_NET$DST_NAT/$SUFFIX + ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $OL_NET$DST_NAT \ + -j DNAT --to-destination $OL_NET$DST + + # load arp cache before running the test to reduce the amount of + # stray traffic on top of the UDP tunnel + ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST + cleanup + + create_vxlan_pair + run_bench "UDP tunnel fwd perf" $OL_NET$DST + ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on + run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + cleanup +done + +exit $ret diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index c66da6ffd6d8..1d975bf52af3 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -56,7 +56,6 @@ static bool cfg_do_msgmore; static bool cfg_do_setsockopt; static int cfg_specific_test_id = -1; -static const char cfg_ifname[] = "lo"; static unsigned short cfg_port = 9000; static char buf[ETH_MAX_MTU]; @@ -69,8 +68,13 @@ struct testcase { int r_len_last; /* recv(): size of last non-mss dgram, if any */ }; -const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; -const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; +const struct in6_addr addr6 = { + { { 0xfd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, /* fd00::1 */ +}; + +const struct in_addr addr4 = { + __constant_htonl(0x0a000001), /* 10.0.0.1 */ +}; struct testcase testcases_v4[] = { { @@ -156,13 +160,13 @@ struct testcase testcases_v4[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, @@ -259,13 +263,13 @@ struct testcase testcases_v6[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, @@ -274,48 +278,6 @@ struct testcase testcases_v6[] = { } }; -static unsigned int get_device_mtu(int fd, const char *ifname) -{ - struct ifreq ifr; - - memset(&ifr, 0, sizeof(ifr)); - - strcpy(ifr.ifr_name, ifname); - - if (ioctl(fd, SIOCGIFMTU, &ifr)) - error(1, errno, "ioctl get mtu"); - - return ifr.ifr_mtu; -} - -static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) -{ - struct ifreq ifr; - - memset(&ifr, 0, sizeof(ifr)); - - ifr.ifr_mtu = mtu; - strcpy(ifr.ifr_name, ifname); - - if (ioctl(fd, SIOCSIFMTU, &ifr)) - error(1, errno, "ioctl set mtu"); -} - -static void set_device_mtu(int fd, int mtu) -{ - int val; - - val = get_device_mtu(fd, cfg_ifname); - fprintf(stderr, "device mtu (orig): %u\n", val); - - __set_device_mtu(fd, cfg_ifname, mtu); - val = get_device_mtu(fd, cfg_ifname); - if (val != mtu) - error(1, 0, "unable to set device mtu to %u\n", val); - - fprintf(stderr, "device mtu (test): %u\n", val); -} - static void set_pmtu_discover(int fd, bool is_ipv4) { int level, name, val; @@ -354,81 +316,6 @@ static unsigned int get_path_mtu(int fd, bool is_ipv4) return mtu; } -/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ -static void set_route_mtu(int mtu, bool is_ipv4) -{ - struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; - struct nlmsghdr *nh; - struct rtattr *rta; - struct rtmsg *rt; - char data[NLMSG_ALIGN(sizeof(*nh)) + - NLMSG_ALIGN(sizeof(*rt)) + - NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + - NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + - NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; - int fd, ret, alen, off = 0; - - alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); - - fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (fd == -1) - error(1, errno, "socket netlink"); - - memset(data, 0, sizeof(data)); - - nh = (void *)data; - nh->nlmsg_type = RTM_NEWROUTE; - nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; - off += NLMSG_ALIGN(sizeof(*nh)); - - rt = (void *)(data + off); - rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; - rt->rtm_table = RT_TABLE_MAIN; - rt->rtm_dst_len = alen << 3; - rt->rtm_protocol = RTPROT_BOOT; - rt->rtm_scope = RT_SCOPE_UNIVERSE; - rt->rtm_type = RTN_UNICAST; - off += NLMSG_ALIGN(sizeof(*rt)); - - rta = (void *)(data + off); - rta->rta_type = RTA_DST; - rta->rta_len = RTA_LENGTH(alen); - if (is_ipv4) - memcpy(RTA_DATA(rta), &addr4, alen); - else - memcpy(RTA_DATA(rta), &addr6, alen); - off += NLMSG_ALIGN(rta->rta_len); - - rta = (void *)(data + off); - rta->rta_type = RTA_OIF; - rta->rta_len = RTA_LENGTH(sizeof(int)); - *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); - off += NLMSG_ALIGN(rta->rta_len); - - /* MTU is a subtype in a metrics type */ - rta = (void *)(data + off); - rta->rta_type = RTA_METRICS; - rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); - off += NLMSG_ALIGN(rta->rta_len); - - /* now fill MTU subtype. Note that it fits within above rta_len */ - rta = (void *)(((char *) rta) + RTA_LENGTH(0)); - rta->rta_type = RTAX_MTU; - rta->rta_len = RTA_LENGTH(sizeof(int)); - *((int *)(RTA_DATA(rta))) = mtu; - - nh->nlmsg_len = off; - - ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); - if (ret != off) - error(1, errno, "send netlink: %uB != %uB\n", ret, off); - - if (close(fd)) - error(1, errno, "close netlink"); - - fprintf(stderr, "route mtu (test): %u\n", mtu); -} - static bool __send_one(int fd, struct msghdr *msg, int flags) { int ret; @@ -591,15 +478,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen) /* Do not fragment these datagrams: only succeed if GSO works */ set_pmtu_discover(fdt, addr->sa_family == AF_INET); - if (cfg_do_connectionless) { - set_device_mtu(fdt, CONST_MTU_TEST); + if (cfg_do_connectionless) run_all(fdt, fdr, addr, alen); - } if (cfg_do_connected) { - set_device_mtu(fdt, CONST_MTU_TEST + 100); - set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); - if (connect(fdt, addr, alen)) error(1, errno, "connect"); diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh index fec24f584fe9..6c63178086b0 100755 --- a/tools/testing/selftests/net/udpgso.sh +++ b/tools/testing/selftests/net/udpgso.sh @@ -3,27 +3,56 @@ # # Run a series of udpgso regression tests +set -o errexit +set -o nounset + +setup_loopback() { + ip addr add dev lo 10.0.0.1/32 + ip addr add dev lo fd00::1/128 nodad noprefixroute +} + +test_dev_mtu() { + setup_loopback + # Reduce loopback MTU + ip link set dev lo mtu 1500 +} + +test_route_mtu() { + setup_loopback + # Remove default local routes + ip route del local 10.0.0.1/32 table local dev lo + ip route del local fd00::1/128 table local dev lo + # Install local routes with reduced MTU + ip route add local 10.0.0.1/32 table local dev lo mtu 1500 + ip route add local fd00::1/128 table local dev lo mtu 1500 +} + +if [ "$#" -gt 0 ]; then + "$1" + shift 2 # pop "test_*" arg and "--" delimiter + exec "$@" +fi + echo "ipv4 cmsg" -./in_netns.sh ./udpgso -4 -C +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C echo "ipv4 setsockopt" -./in_netns.sh ./udpgso -4 -C -s +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -s echo "ipv6 cmsg" -./in_netns.sh ./udpgso -6 -C +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C echo "ipv6 setsockopt" -./in_netns.sh ./udpgso -6 -C -s +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -s echo "ipv4 connected" -./in_netns.sh ./udpgso -4 -c +./in_netns.sh "$0" test_route_mtu -- ./udpgso -4 -c -# blocked on 2nd loopback address -# echo "ipv6 connected" -# ./in_netns.sh ./udpgso -6 -c +echo "ipv6 connected" +./in_netns.sh "$0" test_route_mtu -- ./udpgso -6 -c echo "ipv4 msg_more" -./in_netns.sh ./udpgso -4 -C -m +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -m echo "ipv6 msg_more" -./in_netns.sh ./udpgso -6 -C -m +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 80b5d352702e..640bc43452fa 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -7,6 +7,7 @@ readonly GREEN='\033[0;92m' readonly YELLOW='\033[0;33m' readonly RED='\033[0;31m' readonly NC='\033[0m' # No Color +readonly TESTPORT=8000 readonly KSFT_PASS=0 readonly KSFT_FAIL=1 @@ -56,11 +57,26 @@ trap wake_children EXIT run_one() { local -r args=$@ + local nr_socks=0 + local i=0 + local -r timeout=10 + + ./udpgso_bench_rx -p "$TESTPORT" & + ./udpgso_bench_rx -p "$TESTPORT" -t & + + # Wait for the above test program to get ready to receive connections. + while [ "$i" -lt "$timeout" ]; do + nr_socks="$(ss -lnHi | grep -c "\*:${TESTPORT}")" + [ "$nr_socks" -eq 2 ] && break + i=$((i + 1)) + sleep 1 + done + if [ "$nr_socks" -ne 2 ]; then + echo "timed out while waiting for udpgso_bench_rx" + exit 1 + fi - ./udpgso_bench_rx & - ./udpgso_bench_rx -t & - - ./udpgso_bench_tx ${args} + ./udpgso_bench_tx -p "$TESTPORT" ${args} } run_in_netns() { @@ -120,7 +136,7 @@ run_all() { run_udp "${ipv4_args}" echo "ipv6" - run_tcp "${ipv4_args}" + run_tcp "${ipv6_args}" run_udp "${ipv6_args}" } diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index db3d4a8b5a4c..1cbadd267c96 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -113,6 +113,9 @@ static void do_poll(int fd, int timeout_ms) interrupted = true; break; } + + /* no events and more time to wait, do poll again */ + continue; } if (pfd.revents != POLLIN) error(1, errno, "poll: 0x%x expected 0x%x\n", @@ -211,11 +214,10 @@ static void do_verify_udp(const char *data, int len) static int recv_msg(int fd, char *buf, int len, int *gso_size) { - char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; + char control[CMSG_SPACE(sizeof(int))] = {0}; struct msghdr msg = {0}; struct iovec iov = {0}; struct cmsghdr *cmsg; - uint16_t *gsosizeptr; int ret; iov.iov_base = buf; @@ -234,8 +236,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size) cmsg = CMSG_NXTHDR(&msg, cmsg)) { if (cmsg->cmsg_level == SOL_UDP && cmsg->cmsg_type == UDP_GRO) { - gsosizeptr = (uint16_t *) CMSG_DATA(cmsg); - *gso_size = *gsosizeptr; + *gso_size = *(int *)CMSG_DATA(cmsg); break; } } @@ -247,7 +248,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size) static void do_flush_udp(int fd) { static char rbuf[ETH_MAX_MTU]; - int ret, len, gso_size, budget = 256; + int ret, len, gso_size = 0, budget = 256; len = cfg_read_all ? sizeof(rbuf) : 0; while (budget--) { @@ -290,19 +291,17 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int c; - /* bind to any by default */ - setup_sockaddr(PF_INET6, "::", &cfg_bind_addr); while ((c = getopt(argc, argv, "4b:C:Gl:n:p:rR:S:tv")) != -1) { switch (c) { case '4': cfg_family = PF_INET; cfg_alen = sizeof(struct sockaddr_in); - setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr); break; case 'b': - setup_sockaddr(cfg_family, optarg, &cfg_bind_addr); + bind_addr = optarg; break; case 'C': cfg_connect_timeout_ms = strtoul(optarg, NULL, 0); @@ -335,9 +334,16 @@ static void parse_opts(int argc, char **argv) cfg_verify = true; cfg_read_all = true; break; + default: + exit(1); } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_bind_addr); + if (optind != argc) usage(argv[0]); @@ -369,7 +375,7 @@ static void do_recv(void) do_flush_udp(fd); tnow = gettimeofday_ms(); - if (tnow > treport) { + if (!cfg_expected_pkt_nr && tnow > treport) { if (packets) fprintf(stderr, "%s rx: %6lu MB/s %8lu calls/s\n", diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 17512a43885e..477392715a9a 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -62,6 +62,7 @@ static int cfg_payload_len = (1472 * 42); static int cfg_port = 8000; static int cfg_runtime_ms = -1; static bool cfg_poll; +static int cfg_poll_loop_timeout_ms = 2000; static bool cfg_segment; static bool cfg_sendmmsg; static bool cfg_tcp; @@ -235,16 +236,17 @@ static void flush_errqueue_recv(int fd) } } -static void flush_errqueue(int fd, const bool do_poll) +static void flush_errqueue(int fd, const bool do_poll, + unsigned long poll_timeout, const bool poll_err) { if (do_poll) { struct pollfd fds = {0}; int ret; fds.fd = fd; - ret = poll(&fds, 1, 500); + ret = poll(&fds, 1, poll_timeout); if (ret == 0) { - if (cfg_verbose) + if ((cfg_verbose) && (poll_err)) fprintf(stderr, "poll timeout\n"); } else if (ret < 0) { error(1, errno, "poll"); @@ -254,6 +256,20 @@ static void flush_errqueue(int fd, const bool do_poll) flush_errqueue_recv(fd); } +static void flush_errqueue_retry(int fd, unsigned long num_sends) +{ + unsigned long tnow, tstop; + bool first_try = true; + + tnow = gettimeofday_ms(); + tstop = tnow + cfg_poll_loop_timeout_ms; + do { + flush_errqueue(fd, true, tstop - tnow, first_try); + first_try = false; + tnow = gettimeofday_ms(); + } while ((stat_zcopies != num_sends) && (tnow < tstop)); +} + static int send_tcp(int fd, char *data) { int ret, done = 0, count = 0; @@ -413,16 +429,18 @@ static int send_udp_segment(int fd, char *data) static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", + error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] " + "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]", filepath); } static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int max_len, hdrlen; int c; - while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) { + while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -446,11 +464,14 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + bind_addr = optarg; break; case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; break; + case 'L': + cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000; + break; case 'm': cfg_sendmmsg = true; break; @@ -489,9 +510,16 @@ static void parse_opts(int argc, char **argv) case 'z': cfg_zerocopy = true; break; + default: + exit(1); } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr); + if (optind != argc) usage(argv[0]); @@ -671,7 +699,7 @@ int main(int argc, char **argv) num_sends += send_udp(fd, buf[i]); num_msgs++; if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp) - flush_errqueue(fd, cfg_poll); + flush_errqueue(fd, cfg_poll, 500, true); if (cfg_msg_nr && num_msgs >= cfg_msg_nr) break; @@ -690,7 +718,7 @@ int main(int argc, char **argv) } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop)); if (cfg_zerocopy || cfg_tx_tstamp) - flush_errqueue(fd, true); + flush_errqueue_retry(fd, num_sends); if (close(fd)) error(1, errno, "close"); diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh new file mode 100755 index 000000000000..f52aa5f7da52 --- /dev/null +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -0,0 +1,225 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project +# Thanks to David Ahern for help and advice on nettest modifications. +# +# Self-tests for IPv4 address extensions: the kernel's ability to accept +# certain traditionally unused or unallocated IPv4 addresses. For each kind +# of address, we test for interface assignment, ping, TCP, and forwarding. +# Must be run as root (to manipulate network namespaces and virtual +# interfaces). +# +# Things we test for here: +# +# * Currently the kernel accepts addresses in 0/8 and 240/4 as valid. +# +# * Notwithstanding that, 0.0.0.0 and 255.255.255.255 cannot be assigned. +# +# * Currently the kernel DOES NOT accept unicast use of the lowest +# address in an IPv4 subnet (e.g. 192.168.100.0/32 in 192.168.100.0/24). +# This is treated as a second broadcast address, for compatibility +# with 4.2BSD (!). +# +# * Currently the kernel DOES NOT accept unicast use of any of 127/8. +# +# * Currently the kernel DOES NOT accept unicast use of any of 224/4. +# +# These tests provide an easy way to flip the expected result of any +# of these behaviors for testing kernel patches that change them. + +source lib.sh + +# nettest can be run from PATH or from same directory as this selftest +if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + exit $ksft_skip + fi +fi + +result=0 + +hide_output(){ exec 3>&1 4>&2 >/dev/null 2>/dev/null; } +show_output(){ exec >&3 2>&4; } + +show_result(){ + if [ $1 -eq 0 ]; then + printf "TEST: %-60s [ OK ]\n" "${2}" + else + printf "TEST: %-60s [FAIL]\n" "${2}" + result=1 + fi +} + +_do_segmenttest(){ + # Perform a simple set of link tests between a pair of + # IP addresses on a shared (virtual) segment, using + # ping and nettest. + # foo --- bar + # Arguments: ip_a ip_b prefix_length test_description + # + # Caller must set up $foo_ns and $bar_ns namespaces + # containing linked veth devices foo and bar, + # respectively. + + ip -n $foo_ns address add $1/$3 dev foo || return 1 + ip -n $foo_ns link set foo up || return 1 + ip -n $bar_ns address add $2/$3 dev bar || return 1 + ip -n $bar_ns link set bar up || return 1 + + ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1 + + nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1 + nettest -B -N $foo_ns -O $bar_ns -r $2 || return 1 + + return 0 +} + +_do_route_test(){ + # Perform a simple set of gateway tests. + # + # [foo] <---> [foo1]-[bar1] <---> [bar] /prefix + # host gateway host + # + # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description + # Displays test result and returns success or failure. + + # Caller must set up $foo_ns, $bar_ns, and $router_ns + # containing linked veth devices foo-foo1, bar1-bar + # (foo in $foo_ns, foo1 and bar1 in $router_ns, and + # bar in $bar_ns). + + ip -n $foo_ns address add $1/$5 dev foo || return 1 + ip -n $foo_ns link set foo up || return 1 + ip -n $foo_ns route add default via $2 || return 1 + ip -n $bar_ns address add $4/$5 dev bar || return 1 + ip -n $bar_ns link set bar up || return 1 + ip -n $bar_ns route add default via $3 || return 1 + ip -n $router_ns address add $2/$5 dev foo1 || return 1 + ip -n $router_ns link set foo1 up || return 1 + ip -n $router_ns address add $3/$5 dev bar1 || return 1 + ip -n $router_ns link set bar1 up || return 1 + + echo 1 | ip netns exec $router_ns tee /proc/sys/net/ipv4/ip_forward + + ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec $foo_ns timeout 2 ping -c 1 $4 || return 1 + ip netns exec $bar_ns timeout 2 ping -c 1 $3 || return 1 + ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1 + + nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1 + nettest -B -N $foo_ns -O $bar_ns -r $4 || return 1 + + return 0 +} + +segmenttest(){ + # Sets up veth link and tries to connect over it. + # Arguments: ip_a ip_b prefix_len test_description + hide_output + setup_ns foo_ns bar_ns + ip link add foo netns $foo_ns type veth peer name bar netns $bar_ns + + test_result=0 + _do_segmenttest "$@" || test_result=1 + + ip netns pids $foo_ns | xargs -r kill -9 + ip netns pids $bar_ns | xargs -r kill -9 + cleanup_ns $foo_ns $bar_ns + show_output + + # inverted tests will expect failure instead of success + [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result` + + show_result $test_result "$4" +} + +route_test(){ + # Sets up a simple gateway and tries to connect through it. + # [foo] <---> [foo1]-[bar1] <---> [bar] /prefix + # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description + # Returns success or failure. + + hide_output + setup_ns foo_ns bar_ns router_ns + ip link add foo netns $foo_ns type veth peer name foo1 netns $router_ns + ip link add bar netns $bar_ns type veth peer name bar1 netns $router_ns + + test_result=0 + _do_route_test "$@" || test_result=1 + + ip netns pids $foo_ns | xargs -r kill -9 + ip netns pids $bar_ns | xargs -r kill -9 + ip netns pids $router_ns | xargs -r kill -9 + cleanup_ns $foo_ns $bar_ns $router_ns + + show_output + + # inverted tests will expect failure instead of success + [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result` + show_result $test_result "$6" +} + +echo "###########################################################################" +echo "Unicast address extensions tests (behavior of reserved IPv4 addresses)" +echo "###########################################################################" +# +# Test support for 240/4 +segmenttest 240.1.2.1 240.1.2.4 24 "assign and ping within 240/4 (1 of 2) (is allowed)" +segmenttest 250.100.2.1 250.100.30.4 16 "assign and ping within 240/4 (2 of 2) (is allowed)" +# +# Test support for 0/8 +segmenttest 0.1.2.17 0.1.2.23 24 "assign and ping within 0/8 (1 of 2) (is allowed)" +segmenttest 0.77.240.17 0.77.2.23 16 "assign and ping within 0/8 (2 of 2) (is allowed)" +# +# Even 255.255/16 is OK! +segmenttest 255.255.3.1 255.255.50.77 16 "assign and ping inside 255.255/16 (is allowed)" +# +# Or 255.255.255/24 +segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255/24 (is allowed)" +# +# Routing between different networks +route_test 240.5.6.7 240.5.6.1 255.1.2.1 255.1.2.3 24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)" +route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)" +# +# Test support for lowest address ending in .0 +segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (/24)" +# +# Test support for lowest address not ending in .0 +segmenttest 192.168.101.192 192.168.101.193 26 "assign and ping lowest address (/26)" +# +# Routing using lowest address as a gateway/endpoint +route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address" +# +# ============================================== +# ==== TESTS THAT CURRENTLY EXPECT FAILURE ===== +# ============================================== +expect_failure=true +# It should still not be possible to use 0.0.0.0 or 255.255.255.255 +# as a unicast address. Thus, these tests expect failure. +segmenttest 0.0.1.5 0.0.0.0 16 "assigning 0.0.0.0 (is forbidden)" +segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forbidden)" +# +# Test support for not having all of 127 be loopback +# Currently Linux does not allow this, so this should fail too +segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)" +# +# Test support for unicast use of class D +# Currently Linux does not allow this, so this should fail too +segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)" +# +# Routing using class D as a gateway +route_test 225.1.42.1 225.1.42.2 9.8.7.6 9.8.7.1 24 "routing using class D (is forbidden)" +# +# Routing using 127/8 +# Currently Linux does not allow this, so this should fail too +route_test 127.99.2.3 127.99.2.4 200.1.2.3 200.1.2.4 24 "routing using 127/8 (is forbidden)" +# +unset expect_failure +# ===================================================== +# ==== END OF TESTS THAT CURRENTLY EXPECT FAILURE ===== +# ===================================================== +exit ${result} diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh new file mode 100755 index 000000000000..3a394b43e274 --- /dev/null +++ b/tools/testing/selftests/net/veth.sh @@ -0,0 +1,392 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +BPF_FILE="xdp_dummy.o" +readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" +readonly BASE=`basename $STATS` +readonly SRC=2 +readonly DST=1 +readonly DST_NAT=100 +readonly NS_SRC=$BASE$SRC +readonly NS_DST=$BASE$DST + +# "baremetal" network used for raw UDP traffic +readonly BM_NET_V4=192.168.1. +readonly BM_NET_V6=2001:db8:: + +readonly CPUS=`nproc` +ret=0 + +cleanup() { + local ns + local jobs + readonly jobs="$(jobs -p)" + [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null + rm -f $STATS + + for ns in $NS_SRC $NS_DST; do + ip netns del $ns 2>/dev/null + done +} + +trap cleanup EXIT + +create_ns() { + local ns + + for ns in $NS_SRC $NS_DST; do + ip netns add $ns + ip -n $ns link set dev lo up + done + + ip link add name veth$SRC type veth peer name veth$DST + + for ns in $SRC $DST; do + ip link set dev veth$ns netns $BASE$ns up + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 + ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad + done + echo "#kernel" > $BASE + chmod go-rw $BASE +} + +__chk_flag() { + local msg="$1" + local target=$2 + local expected=$3 + local flagname=$4 + + local flag=`ip netns exec $BASE$target ethtool -k veth$target |\ + grep $flagname | awk '{print $2}'` + + printf "%-60s" "$msg" + if [ "$flag" = "$expected" ]; then + echo " ok " + else + echo " fail - expected $expected found $flag" + ret=1 + fi +} + +chk_gro_flag() { + __chk_flag "$1" $2 $3 generic-receive-offload +} + +chk_tso_flag() { + __chk_flag "$1" $2 $3 tcp-segmentation-offload +} + +chk_channels() { + local msg="$1" + local target=$2 + local rx=$3 + local tx=$4 + + local dev=veth$target + + local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\ + grep RX: | tail -n 1 | awk '{print $2}' ` + local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\ + grep TX: | tail -n 1 | awk '{print $2}'` + local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\ + grep Combined: | tail -n 1 | awk '{print $2}'` + + printf "%-60s" "$msg" + if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then + echo " ok " + else + echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined" + fi +} + +chk_gro() { + local msg="$1" + local expected=$2 + + ip netns exec $BASE$SRC ping -qc 1 $BM_NET_V4$DST >/dev/null + NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat -n + + printf "%-60s" "$msg" + ip netns exec $BASE$DST ./udpgso_bench_rx -C 1000 -R 10 & + local spid=$! + sleep 0.1 + + ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 13000 -S 1300 -M 1 -D $BM_NET_V4$DST + local retc=$? + wait $spid + local rets=$? + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " fail client exit code $retc, server $rets" + ret=1 + return + fi + + local pkts=`NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat IpInReceives | \ + awk '{print $2}' | tail -n 1` + if [ "$pkts" = "$expected" ]; then + echo " ok " + else + echo " fail - got $pkts packets, expected $expected " + ret=1 + fi +} + +__change_channels() +{ + local cur_cpu + local end=$1 + local cur + local i + + while true; do + printf -v cur '%(%s)T' + [ $cur -le $end ] || break + + for i in `seq 1 $CPUS`; do + ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i + ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i + done + + for i in `seq 1 $((CPUS - 1))`; do + cur_cpu=$((CPUS - $i)) + ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu + ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu + done + done +} + +__send_data() { + local end=$1 + + while true; do + printf -v cur '%(%s)T' + [ $cur -le $end ] || break + + ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST + done +} + +do_stress() { + local end + printf -v end '%(%s)T' + end=$((end + $STRESS)) + + ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3 + ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3 + + ip netns exec $NS_DST ./udpgso_bench_rx & + local rx_pid=$! + + echo "Running stress test for $STRESS seconds..." + __change_channels $end & + local ch_pid=$! + __send_data $end & + local data_pid_1=$! + __send_data $end & + local data_pid_2=$! + __send_data $end & + local data_pid_3=$! + __send_data $end & + local data_pid_4=$! + + wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4 + kill -9 $rx_pid + echo "done" + + # restore previous setting + ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2 + ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1 +} + +usage() { + echo "Usage: $0 [-h] [-s <seconds>]" + echo -e "\t-h: show this help" + echo -e "\t-s: run optional stress tests for the given amount of seconds" +} + +STRESS=0 +while getopts "hs:" option; do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "s") + STRESS=$OPTARG + ;; + esac +done + +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Run 'make' first" + exit 1 +fi + +[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped" +[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too" + +create_ns +chk_gro_flag "default - gro flag" $SRC off +chk_gro_flag " - peer gro flag" $DST off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +chk_gro " - aggregation" 1 +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +chk_gro " - aggregation with TSO off" 10 +cleanup + +create_ns +ip netns exec $NS_DST ethtool -K veth$DST gro on +chk_gro_flag "with gro on - gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +chk_gro " - aggregation with TSO off" 1 +cleanup + +create_ns +ip -n $NS_DST link set dev veth$DST up +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp +chk_gro_flag "gro vs xdp while down - gro flag off" $DST off +ip -n $NS_DST link set dev veth$DST down +chk_gro_flag " - after down" $DST off +ip -n $NS_DST link set dev veth$DST xdp off +chk_gro_flag " - after xdp off" $DST off +ip -n $NS_DST link set dev veth$DST up +chk_gro_flag " - after up" $DST off +ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp +chk_gro_flag " - after peer xdp" $DST off +cleanup + +create_ns +ip -n $NS_DST link set dev veth$DST up +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp +ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on +chk_gro_flag "gro vs xdp while down - gro flag on" $DST on +ip -n $NS_DST link set dev veth$DST down +chk_gro_flag " - after down" $DST on +ip -n $NS_DST link set dev veth$DST xdp off +chk_gro_flag " - after xdp off" $DST on +ip -n $NS_DST link set dev veth$DST up +chk_gro_flag " - after up" $DST on +ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp +chk_gro_flag " - after peer xdp" $DST on +cleanup + +create_ns +chk_channels "default channels" $DST 1 1 + +ip -n $NS_DST link set dev veth$DST down +ip netns exec $NS_DST ethtool -K veth$DST gro on +chk_gro_flag "with gro enabled on link down - gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on +ip -n $NS_DST link set dev veth$DST up +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +chk_gro " - aggregation with TSO off" 1 +cleanup + +create_ns + +CUR_TX=1 +CUR_RX=1 +if [ $CPUS -gt 1 ]; then + ip netns exec $NS_DST ethtool -L veth$DST tx 2 + chk_channels "setting tx channels" $DST 1 2 + CUR_TX=2 +fi + +if [ $CPUS -gt 2 ]; then + ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3 + chk_channels "setting both rx and tx channels" $DST 3 3 + CUR_RX=3 + CUR_TX=3 +fi + +ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null +chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX + +ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null +chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX + +if [ $CPUS -gt 1 ]; then + # this also tests queues nr reduction + ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null + ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null + printf "%-60s" "bad setting: XDP with RX nr less than TX" + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ + section xdp 2>/dev/null &&\ + echo "fail - set operation successful ?!?" || echo " ok " + + # the following tests will run with multiple channels active + ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 + ip netns exec $NS_DST ethtool -L veth$DST rx 2 + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ + section xdp 2>/dev/null + printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set" + ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\ + echo "fail - set operation successful ?!?" || echo " ok " + CUR_RX=2 + CUR_TX=2 +fi + +if [ $CPUS -gt 2 ]; then + printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set" + ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\ + echo "fail - set operation successful ?!?" || echo " ok " + chk_channels "setting invalid channels nr" $DST 2 2 +fi + +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null +chk_gro_flag "with xdp attached - gro flag" $DST off +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC off +chk_tso_flag " - peer tso flag" $DST on +ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +chk_gro " - no aggregation" 10 +ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on +chk_gro_flag " - gro flag with GRO on" $DST on +chk_gro " - aggregation" 1 + + +ip -n $NS_DST link set dev veth$DST down +ip -n $NS_SRC link set dev veth$SRC down +chk_gro_flag " - after dev off, flag" $DST on +chk_gro_flag " - peer flag" $SRC off + +ip netns exec $NS_DST ethtool -K veth$DST gro on +ip -n $NS_DST link set dev veth$DST xdp off +chk_gro_flag " - after gro on xdp off, gro flag" $DST on +chk_gro_flag " - peer gro flag" $SRC off +chk_tso_flag " - tso flag" $SRC on +chk_tso_flag " - peer tso flag" $DST on + +if [ $CPUS -gt 1 ]; then + ip netns exec $NS_DST ethtool -L veth$DST tx 1 + chk_channels "decreasing tx channels with device down" $DST 2 1 +fi + +ip -n $NS_DST link set dev veth$DST up +ip -n $NS_SRC link set dev veth$SRC up +chk_gro " - aggregation" 1 + +if [ $CPUS -gt 1 ]; then + [ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress + + ip -n $NS_DST link set dev veth$DST down + ip -n $NS_SRC link set dev veth$SRC down + ip netns exec $NS_DST ethtool -L veth$DST tx 2 + chk_channels "increasing tx channels with device down" $DST 2 2 + ip -n $NS_DST link set dev veth$DST up + ip -n $NS_SRC link set dev veth$SRC up +fi + +ip netns exec $NS_DST ethtool -K veth$DST gro off +ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off +chk_gro "aggregation again with default and TSO off" 10 + +exit $ret diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh new file mode 100755 index 000000000000..7bc804ffaf7c --- /dev/null +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +ret=0 + +cleanup() { + ip netns del $NETNS +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +ip netns add ${NETNS} +ip netns exec ${NETNS} ip link add bond0 type bond mode 0 +ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 +ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 +ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off +ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 +ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 +ip netns exec ${NETNS} ip link set bond_slave_1 nomaster +ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + +exit $ret diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh index 184da81f554f..b64dd891699d 100755 --- a/tools/testing/selftests/net/vrf-xfrm-tests.sh +++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh @@ -3,9 +3,7 @@ # # Various combinations of VRF with xfrms and qdisc. -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 ret=0 @@ -67,7 +65,7 @@ run_cmd_host1() printf " COMMAND: $cmd\n" fi - out=$(eval ip netns exec host1 $cmd 2>&1) + out=$(eval ip netns exec $host1 $cmd 2>&1) rc=$? if [ "$VERBOSE" = "1" ]; then if [ -n "$out" ]; then @@ -116,9 +114,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -177,25 +172,25 @@ connect_ns() cleanup() { - ip netns del host1 - ip netns del host2 + cleanup_ns $host1 $host2 } setup() { - create_ns "host1" - create_ns "host2" + setup_ns host1 host2 + create_ns "$host1" + create_ns "$host2" - connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \ - "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64 + connect_ns "$host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \ + "$host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64 - create_vrf "host1" ${VRF} ${TABLE} - ip -netns host1 link set dev eth0 master ${VRF} + create_vrf "$host1" ${VRF} ${TABLE} + ip -netns $host1 link set dev eth0 master ${VRF} } cleanup_xfrm() { - for ns in host1 host2 + for ns in $host1 $host2 do for x in state policy do @@ -218,127 +213,127 @@ setup_xfrm() # # host1 - IPv4 out - ip -netns host1 xfrm policy add \ + ip -netns $host1 xfrm policy add \ src ${h1_4} dst ${h2_4} ${devarg} dir out \ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel # host2 - IPv4 in - ip -netns host2 xfrm policy add \ + ip -netns $host2 xfrm policy add \ src ${h1_4} dst ${h2_4} dir in \ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel # host1 - IPv4 in - ip -netns host1 xfrm policy add \ + ip -netns $host1 xfrm policy add \ src ${h2_4} dst ${h1_4} ${devarg} dir in \ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel # host2 - IPv4 out - ip -netns host2 xfrm policy add \ + ip -netns $host2 xfrm policy add \ src ${h2_4} dst ${h1_4} dir out \ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel # host1 - IPv6 out - ip -6 -netns host1 xfrm policy add \ + ip -6 -netns $host1 xfrm policy add \ src ${h1_6} dst ${h2_6} ${devarg} dir out \ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel # host2 - IPv6 in - ip -6 -netns host2 xfrm policy add \ + ip -6 -netns $host2 xfrm policy add \ src ${h1_6} dst ${h2_6} dir in \ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel # host1 - IPv6 in - ip -6 -netns host1 xfrm policy add \ + ip -6 -netns $host1 xfrm policy add \ src ${h2_6} dst ${h1_6} ${devarg} dir in \ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel # host2 - IPv6 out - ip -6 -netns host2 xfrm policy add \ + ip -6 -netns $host2 xfrm policy add \ src ${h2_6} dst ${h1_6} dir out \ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel # # state # - ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ + ip -netns $host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ - enc 'cbc(des3_ede)' ${ENC_1} \ + auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ + enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_4} dst ${h2_4} ${devarg} - ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ + ip -netns $host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ - enc 'cbc(des3_ede)' ${ENC_1} \ + auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ + enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_4} dst ${h2_4} - ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ + ip -netns $host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ - enc 'cbc(des3_ede)' ${ENC_2} \ + auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ + enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_4} dst ${h1_4} ${devarg} - ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ + ip -netns $host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ - enc 'cbc(des3_ede)' ${ENC_2} \ + auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ + enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_4} dst ${h1_4} - ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ + ip -6 -netns $host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ - enc 'cbc(des3_ede)' ${ENC_1} \ + auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ + enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_6} dst ${h2_6} ${devarg} - ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ + ip -6 -netns $host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_1} 96 \ - enc 'cbc(des3_ede)' ${ENC_1} \ + auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ + enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_6} dst ${h2_6} - ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ + ip -6 -netns $host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ - enc 'cbc(des3_ede)' ${ENC_2} \ + auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ + enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_6} dst ${h1_6} ${devarg} - ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ + ip -6 -netns $host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ - auth-trunc 'hmac(md5)' ${AUTH_2} 96 \ - enc 'cbc(des3_ede)' ${ENC_2} \ + auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ + enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_6} dst ${h1_6} } cleanup_xfrm_dev() { - ip -netns host1 li del xfrm0 - ip -netns host2 addr del ${XFRM2_4}/24 dev eth0 - ip -netns host2 addr del ${XFRM2_6}/64 dev eth0 + ip -netns $host1 li del xfrm0 + ip -netns $host2 addr del ${XFRM2_4}/24 dev eth0 + ip -netns $host2 addr del ${XFRM2_6}/64 dev eth0 } setup_xfrm_dev() { local vrfarg="vrf ${VRF}" - ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID} - ip -netns host1 li set xfrm0 ${vrfarg} up - ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0 - ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0 + ip -netns $host1 li add type xfrm dev eth0 if_id ${IF_ID} + ip -netns $host1 li set xfrm0 ${vrfarg} up + ip -netns $host1 addr add ${XFRM1_4}/24 dev xfrm0 + ip -netns $host1 addr add ${XFRM1_6}/64 dev xfrm0 - ip -netns host2 addr add ${XFRM2_4}/24 dev eth0 - ip -netns host2 addr add ${XFRM2_6}/64 dev eth0 + ip -netns $host2 addr add ${XFRM2_4}/24 dev eth0 + ip -netns $host2 addr add ${XFRM2_6}/64 dev eth0 setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}" } diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index 23cf924754a5..2da32f4c479b 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -58,6 +58,7 @@ # to send an ICMP error back to the source when the ttl of a packet reaches 1 # while it is forwarded between different vrfs. +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no DEFAULT_TTYPE=sym @@ -171,11 +172,7 @@ run_cmd_grep() cleanup() { - local ns - - for ns in h1 h2 r1 r2; do - ip netns del $ns 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } setup_vrf() @@ -212,72 +209,69 @@ setup_sym() # # create nodes as namespaces - # - for ns in h1 h2 r1; do - ip netns add $ns - ip -netns $ns link set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - esac + setup_ns h1 h2 r1 + for ns in $h1 $h2 $r1; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + fi done # # create interconnects # - ip -netns h1 link add eth0 type veth peer name r1h1 - ip -netns h1 link set r1h1 netns r1 name eth0 up + ip -netns $h1 link add eth0 type veth peer name r1h1 + ip -netns $h1 link set r1h1 netns $r1 name eth0 up - ip -netns h2 link add eth0 type veth peer name r1h2 - ip -netns h2 link set r1h2 netns r1 name eth1 up + ip -netns $h2 link add eth0 type veth peer name r1h2 + ip -netns $h2 link set r1h2 netns $r1 name eth1 up # # h1 # - ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad - ip -netns h1 link set eth0 up + ip -netns $h1 addr add dev eth0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 link set eth0 up # h1 to h2 via r1 - ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 - ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 + ip -netns $h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 + ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 # # h2 # - ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 - ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad - ip -netns h2 link set eth0 up + ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 link set eth0 up # h2 to h1 via r1 - ip -netns h2 route add default via ${R1_N2_IP} dev eth0 - ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0 + ip -netns $h2 route add default via ${R1_N2_IP} dev eth0 + ip -netns $h2 -6 route add default via ${R1_N2_IP6} dev eth0 # # r1 # - setup_vrf r1 - create_vrf r1 blue 1101 - create_vrf r1 red 1102 - ip -netns r1 link set mtu 1400 dev eth1 - ip -netns r1 link set eth0 vrf blue up - ip -netns r1 link set eth1 vrf red up - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 - ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + setup_vrf $r1 + create_vrf $r1 blue 1101 + create_vrf $r1 red 1102 + ip -netns $r1 link set mtu 1400 dev eth1 + ip -netns $r1 link set eth0 vrf blue up + ip -netns $r1 link set eth1 vrf red up + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad # Route leak from blue to red - ip -netns r1 route add vrf blue ${H2_N2} dev red - ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + ip -netns $r1 route add vrf blue ${H2_N2} dev red + ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red # Route leak from red to blue - ip -netns r1 route add vrf red ${H1_N1} dev blue - ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue + ip -netns $r1 route add vrf red ${H1_N1} dev blue + ip -netns $r1 -6 route add vrf red ${H1_N1_6} dev blue # Wait for ip config to settle @@ -293,90 +287,87 @@ setup_asym() # # create nodes as namespaces - # - for ns in h1 h2 r1 r2; do - ip netns add $ns - ip -netns $ns link set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - esac + setup_ns h1 h2 r1 r2 + for ns in $h1 $h2 $r1 $r2; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + fi done # # create interconnects # - ip -netns h1 link add eth0 type veth peer name r1h1 - ip -netns h1 link set r1h1 netns r1 name eth0 up + ip -netns $h1 link add eth0 type veth peer name r1h1 + ip -netns $h1 link set r1h1 netns $r1 name eth0 up - ip -netns h1 link add eth1 type veth peer name r2h1 - ip -netns h1 link set r2h1 netns r2 name eth0 up + ip -netns $h1 link add eth1 type veth peer name r2h1 + ip -netns $h1 link set r2h1 netns $r2 name eth0 up - ip -netns h2 link add eth0 type veth peer name r1h2 - ip -netns h2 link set r1h2 netns r1 name eth1 up + ip -netns $h2 link add eth0 type veth peer name r1h2 + ip -netns $h2 link set r1h2 netns $r1 name eth1 up - ip -netns h2 link add eth1 type veth peer name r2h2 - ip -netns h2 link set r2h2 netns r2 name eth1 up + ip -netns $h2 link add eth1 type veth peer name r2h2 + ip -netns $h2 link set r2h2 netns $r2 name eth1 up # # h1 # - ip -netns h1 link add br0 type bridge - ip -netns h1 link set br0 up - ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad - ip -netns h1 link set eth0 master br0 up - ip -netns h1 link set eth1 master br0 up + ip -netns $h1 link add br0 type bridge + ip -netns $h1 link set br0 up + ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 link set eth0 master br0 up + ip -netns $h1 link set eth1 master br0 up # h1 to h2 via r1 - ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 - ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 + ip -netns $h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 + ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 # # h2 # - ip -netns h2 link add br0 type bridge - ip -netns h2 link set br0 up - ip -netns h2 addr add dev br0 ${H2_N2_IP}/24 - ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad - ip -netns h2 link set eth0 master br0 up - ip -netns h2 link set eth1 master br0 up + ip -netns $h2 link add br0 type bridge + ip -netns $h2 link set br0 up + ip -netns $h2 addr add dev br0 ${H2_N2_IP}/24 + ip -netns $h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 link set eth0 master br0 up + ip -netns $h2 link set eth1 master br0 up # h2 to h1 via r2 - ip -netns h2 route add default via ${R2_N2_IP} dev br0 - ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0 + ip -netns $h2 route add default via ${R2_N2_IP} dev br0 + ip -netns $h2 -6 route add default via ${R2_N2_IP6} dev br0 # # r1 # - setup_vrf r1 - create_vrf r1 blue 1101 - create_vrf r1 red 1102 - ip -netns r1 link set mtu 1400 dev eth1 - ip -netns r1 link set eth0 vrf blue up - ip -netns r1 link set eth1 vrf red up - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 - ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + setup_vrf $r1 + create_vrf $r1 blue 1101 + create_vrf $r1 red 1102 + ip -netns $r1 link set mtu 1400 dev eth1 + ip -netns $r1 link set eth0 vrf blue up + ip -netns $r1 link set eth1 vrf red up + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad # Route leak from blue to red - ip -netns r1 route add vrf blue ${H2_N2} dev red - ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + ip -netns $r1 route add vrf blue ${H2_N2} dev red + ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red # No route leak from red to blue # # r2 # - ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 - ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad - ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24 - ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad + ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns $r2 addr add dev eth1 ${R2_N2_IP}/24 + ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad # Wait for ip config to settle sleep 2 @@ -384,14 +375,14 @@ setup_asym() check_connectivity() { - ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 + ip netns exec $h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 log_test $? 0 "Basic IPv4 connectivity" return $? } check_connectivity6() { - ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 + ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 log_test $? 0 "Basic IPv6 connectivity" return $? } @@ -426,7 +417,7 @@ ipv4_traceroute() check_connectivity || return - run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP} + run_cmd_grep "${R1_N1_IP}" ip netns exec $h1 traceroute ${H2_N2_IP} log_test $? 0 "Traceroute reports a hop on r1" } @@ -449,7 +440,7 @@ ipv6_traceroute() check_connectivity6 || return - run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6} + run_cmd_grep "${R1_N1_IP6}" ip netns exec $h1 traceroute6 ${H2_N2_IP6} log_test $? 0 "Traceroute6 reports a hop on r1" } @@ -470,7 +461,7 @@ ipv4_ping_ttl() check_connectivity || return - run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP} + run_cmd_grep "Time to live exceeded" ip netns exec $h1 ping -t1 -c1 -W2 ${H2_N2_IP} log_test $? 0 "Ping received ICMP ttl exceeded" } @@ -491,7 +482,7 @@ ipv4_ping_frag() check_connectivity || return - run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} + run_cmd_grep "Frag needed" ip netns exec $h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} log_test $? 0 "Ping received ICMP Frag needed" } @@ -512,7 +503,7 @@ ipv6_ping_ttl() check_connectivity6 || return - run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} + run_cmd_grep "Time exceeded: Hop limit" ip netns exec $h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} log_test $? 0 "Ping received ICMP Hop limit" } @@ -533,7 +524,7 @@ ipv6_ping_frag() check_connectivity6 || return - run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} + run_cmd_grep "Packet too big" ip netns exec $h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} log_test $? 0 "Ping received ICMP Packet too big" } @@ -565,7 +556,7 @@ EOF command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym" -TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym" +TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_ttl_asym ipv6_traceroute_asym" ret=0 nsuccess=0 diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh index 18b982d611de..01552b542544 100755 --- a/tools/testing/selftests/net/vrf_strict_mode_test.sh +++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh @@ -3,6 +3,7 @@ # This test is designed for testing the new VRF strict_mode functionality. +source lib.sh ret=0 # identifies the "init" network namespace which is often called root network @@ -11,6 +12,8 @@ INIT_NETNS_NAME="init" PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +TESTS="init testns mix" + log_test() { local rc=$1 @@ -242,13 +245,12 @@ setup() { modprobe vrf - ip netns add testns - ip netns exec testns ip link set lo up + setup_ns testns } cleanup() { - ip netns del testns 2>/dev/null + ip netns del $testns 2>/dev/null ip link del vrf100 2>/dev/null ip link del vrf101 2>/dev/null @@ -259,6 +261,8 @@ cleanup() vrf_strict_mode_tests_init() { + log_section "VRF strict_mode test on init network namespace" + vrf_strict_mode_check_support init strict_mode_check_default init @@ -289,67 +293,83 @@ vrf_strict_mode_tests_init() vrf_strict_mode_tests_testns() { - vrf_strict_mode_check_support testns + log_section "VRF strict_mode test on testns network namespace" - strict_mode_check_default testns + vrf_strict_mode_check_support $testns - enable_strict_mode_and_check testns + strict_mode_check_default $testns - add_vrf_and_check testns vrf100 100 - config_vrf_and_check testns 10.0.100.1/24 vrf100 + enable_strict_mode_and_check $testns - add_vrf_and_check_fail testns vrf101 100 + add_vrf_and_check $testns vrf100 100 + config_vrf_and_check $testns 10.0.100.1/24 vrf100 - add_vrf_and_check_fail testns vrf102 100 + add_vrf_and_check_fail $testns vrf101 100 - add_vrf_and_check testns vrf200 200 + add_vrf_and_check_fail $testns vrf102 100 - disable_strict_mode_and_check testns + add_vrf_and_check $testns vrf200 200 - add_vrf_and_check testns vrf101 100 + disable_strict_mode_and_check $testns - add_vrf_and_check testns vrf102 100 + add_vrf_and_check $testns vrf101 100 - #the strict_mode is disabled in the testns + add_vrf_and_check $testns vrf102 100 + + #the strict_mode is disabled in the $testns } vrf_strict_mode_tests_mix() { + log_section "VRF strict_mode test mixing init and testns network namespaces" + read_strict_mode_compare_and_check init 1 - read_strict_mode_compare_and_check testns 0 + read_strict_mode_compare_and_check $testns 0 - del_vrf_and_check testns vrf101 + del_vrf_and_check $testns vrf101 - del_vrf_and_check testns vrf102 + del_vrf_and_check $testns vrf102 disable_strict_mode_and_check init - enable_strict_mode_and_check testns + enable_strict_mode_and_check $testns enable_strict_mode_and_check init enable_strict_mode_and_check init - disable_strict_mode_and_check testns - disable_strict_mode_and_check testns + disable_strict_mode_and_check $testns + disable_strict_mode_and_check $testns read_strict_mode_compare_and_check init 1 - read_strict_mode_compare_and_check testns 0 + read_strict_mode_compare_and_check $testns 0 } -vrf_strict_mode_tests() -{ - log_section "VRF strict_mode test on init network namespace" - vrf_strict_mode_tests_init +################################################################################ +# usage - log_section "VRF strict_mode test on testns network namespace" - vrf_strict_mode_tests_testns +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS - log_section "VRF strict_mode test mixing init and testns network namespaces" - vrf_strict_mode_tests_mix + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF } +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + vrf_strict_mode_check_support() { local nsname=$1 @@ -371,24 +391,34 @@ vrf_strict_mode_check_support() if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" - exit 0 + exit $ksft_skip fi if [ ! -x "$(command -v ip)" ]; then echo "SKIP: Could not run test without ip tool" - exit 0 + exit $ksft_skip fi modprobe vrf &>/dev/null if [ ! -e /proc/sys/net/vrf/strict_mode ]; then echo "SKIP: vrf sysctl does not exist" - exit 0 + exit $ksft_skip fi cleanup &> /dev/null setup -vrf_strict_mode_tests +for t in $TESTS +do + case $t in + vrf_strict_mode_tests_init|init) vrf_strict_mode_tests_init;; + vrf_strict_mode_tests_testns|testns) vrf_strict_mode_tests_testns;; + vrf_strict_mode_tests_mix|mix) vrf_strict_mode_tests_mix;; + + help) echo "Test names: $TESTS"; exit 0;; + + esac +done cleanup print_log_test_results diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.c new file mode 100644 index 000000000000..d988b2e0cee8 --- /dev/null +++ b/tools/testing/selftests/net/xdp_dummy.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 7a1bf94c5bd3..457789530645 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -18,8 +18,7 @@ # ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception) # ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception) -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh ret=0 policy_checks_ok=1 @@ -202,26 +201,26 @@ check_xfrm() { # 1: iptables -m policy rule count != 0 rval=$1 ip=$2 - lret=0 + local lret=0 - ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null + ip netns exec ${ns[1]} ping -q -c 1 10.0.2.$ip > /dev/null - check_ipt_policy_count ns3 + check_ipt_policy_count ${ns[3]} if [ $? -ne $rval ] ; then lret=1 fi - check_ipt_policy_count ns4 + check_ipt_policy_count ${ns[4]} if [ $? -ne $rval ] ; then lret=1 fi - ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null + ip netns exec ${ns[2]} ping -q -c 1 10.0.1.$ip > /dev/null - check_ipt_policy_count ns3 + check_ipt_policy_count ${ns[3]} if [ $? -ne $rval ] ; then lret=1 fi - check_ipt_policy_count ns4 + check_ipt_policy_count ${ns[4]} if [ $? -ne $rval ] ; then lret=1 fi @@ -270,11 +269,11 @@ check_hthresh_repeat() i=0 for i in $(seq 1 10);do - ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break - ip -net ns1 xfrm policy set hthresh6 0 28 || break + ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break + ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break - ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break - ip -net ns1 xfrm policy set hthresh6 0 28 || break + ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break + ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break done if [ $i -ne 10 ] ;then @@ -287,6 +286,47 @@ check_hthresh_repeat() return 0 } +# insert non-overlapping policies in a random order and check that +# all of them can be fetched using the traffic selectors. +check_random_order() +{ + local ns=$1 + local log=$2 + + for i in $(seq 100); do + ip -net $ns xfrm policy flush + for j in $(seq 0 16 255 | sort -R); do + ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow + done + for j in $(seq 0 16 255); do + if ! ip -net $ns xfrm policy get dst $j.0.0.0/24 dir out > /dev/null; then + echo "FAIL: $log" 1>&2 + return 1 + fi + done + done + + for i in $(seq 100); do + ip -net $ns xfrm policy flush + for j in $(seq 0 16 255 | sort -R); do + local addr=$(printf "e000:0000:%02x00::/56" $j) + ip -net $ns xfrm policy add dst $addr dir out priority 10 action allow + done + for j in $(seq 0 16 255); do + local addr=$(printf "e000:0000:%02x00::/56" $j) + if ! ip -net $ns xfrm policy get dst $addr dir out > /dev/null; then + echo "FAIL: $log" 1>&2 + return 1 + fi + done + done + + ip -net $ns xfrm policy flush + + echo "PASS: $log" + return 0 +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" @@ -306,79 +346,80 @@ if [ $? -ne 0 ];then exit $ksft_skip fi -for i in 1 2 3 4; do - ip netns add ns$i - ip -net ns$i link set lo up -done +setup_ns ns1 ns2 ns3 ns4 +ns[1]=$ns1 +ns[2]=$ns2 +ns[3]=$ns3 +ns[4]=$ns4 DEV=veth0 -ip link add $DEV netns ns1 type veth peer name eth1 netns ns3 -ip link add $DEV netns ns2 type veth peer name eth1 netns ns4 +ip link add $DEV netns ${ns[1]} type veth peer name eth1 netns ${ns[3]} +ip link add $DEV netns ${ns[2]} type veth peer name eth1 netns ${ns[4]} -ip link add $DEV netns ns3 type veth peer name veth0 netns ns4 +ip link add $DEV netns ${ns[3]} type veth peer name veth0 netns ${ns[4]} DEV=veth0 for i in 1 2; do - ip -net ns$i link set $DEV up - ip -net ns$i addr add 10.0.$i.2/24 dev $DEV - ip -net ns$i addr add dead:$i::2/64 dev $DEV - - ip -net ns$i addr add 10.0.$i.253 dev $DEV - ip -net ns$i addr add 10.0.$i.254 dev $DEV - ip -net ns$i addr add dead:$i::fd dev $DEV - ip -net ns$i addr add dead:$i::fe dev $DEV + ip -net ${ns[$i]} link set $DEV up + ip -net ${ns[$i]} addr add 10.0.$i.2/24 dev $DEV + ip -net ${ns[$i]} addr add dead:$i::2/64 dev $DEV + + ip -net ${ns[$i]} addr add 10.0.$i.253 dev $DEV + ip -net ${ns[$i]} addr add 10.0.$i.254 dev $DEV + ip -net ${ns[$i]} addr add dead:$i::fd dev $DEV + ip -net ${ns[$i]} addr add dead:$i::fe dev $DEV done for i in 3 4; do -ip -net ns$i link set eth1 up -ip -net ns$i link set veth0 up + ip -net ${ns[$i]} link set eth1 up + ip -net ${ns[$i]} link set veth0 up done -ip -net ns1 route add default via 10.0.1.1 -ip -net ns2 route add default via 10.0.2.1 +ip -net ${ns[1]} route add default via 10.0.1.1 +ip -net ${ns[2]} route add default via 10.0.2.1 -ip -net ns3 addr add 10.0.1.1/24 dev eth1 -ip -net ns3 addr add 10.0.3.1/24 dev veth0 -ip -net ns3 addr add 2001:1::1/64 dev eth1 -ip -net ns3 addr add 2001:3::1/64 dev veth0 +ip -net ${ns[3]} addr add 10.0.1.1/24 dev eth1 +ip -net ${ns[3]} addr add 10.0.3.1/24 dev veth0 +ip -net ${ns[3]} addr add 2001:1::1/64 dev eth1 +ip -net ${ns[3]} addr add 2001:3::1/64 dev veth0 -ip -net ns3 route add default via 10.0.3.10 +ip -net ${ns[3]} route add default via 10.0.3.10 -ip -net ns4 addr add 10.0.2.1/24 dev eth1 -ip -net ns4 addr add 10.0.3.10/24 dev veth0 -ip -net ns4 addr add 2001:2::1/64 dev eth1 -ip -net ns4 addr add 2001:3::10/64 dev veth0 -ip -net ns4 route add default via 10.0.3.1 +ip -net ${ns[4]} addr add 10.0.2.1/24 dev eth1 +ip -net ${ns[4]} addr add 10.0.3.10/24 dev veth0 +ip -net ${ns[4]} addr add 2001:2::1/64 dev eth1 +ip -net ${ns[4]} addr add 2001:3::10/64 dev veth0 +ip -net ${ns[4]} route add default via 10.0.3.1 for j in 4 6; do for i in 3 4;do - ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null - ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null + ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null + ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null done done # abuse iptables rule counter to check if ping matches a policy -ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec -ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ${ns[3]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ${ns[4]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec if [ $? -ne 0 ];then echo "SKIP: Could not insert iptables rule" - for i in 1 2 3 4;do ip netns del ns$i;done + cleanup_ns $ns1 $ns2 $ns3 $ns4 exit $ksft_skip fi # localip remoteip localnet remotenet -do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 -do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 -do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 -do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 +do_esp ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp ${ns[3]} dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 +do_esp ${ns[4]} 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp ${ns[4]} dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 -do_dummies4 ns3 -do_dummies6 ns4 +do_dummies4 ${ns[3]} +do_dummies6 ${ns[4]} -do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24 -do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24 -do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64 -do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64 +do_esp_policy_get_check ${ns[3]} 10.0.1.0/24 10.0.2.0/24 +do_esp_policy_get_check ${ns[4]} 10.0.2.0/24 10.0.1.0/24 +do_esp_policy_get_check ${ns[3]} dead:1::/64 dead:2::/64 +do_esp_policy_get_check ${ns[4]} dead:2::/64 dead:1::/64 # ping to .254 should use ipsec, exception is not installed. check_xfrm 1 254 @@ -391,11 +432,11 @@ fi # installs exceptions # localip remoteip encryptdst plaindst -do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 -do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 +do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_exception ${ns[4]} 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 -do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 -do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 +do_exception ${ns[3]} dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 +do_exception ${ns[4]} dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 check_exceptions "exceptions" if [ $? -ne 0 ]; then @@ -403,14 +444,14 @@ if [ $? -ne 0 ]; then fi # insert block policies with adjacent/overlapping netmasks -do_overlap ns3 +do_overlap ${ns[3]} check_exceptions "exceptions and block policies" if [ $? -ne 0 ]; then ret=1 fi -for n in ns3 ns4;do +for n in ${ns[3]} ${ns[4]};do ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125 sleep $((RANDOM%5)) done @@ -418,19 +459,19 @@ done check_exceptions "exceptions and block policies after hresh changes" # full flush of policy db, check everything gets freed incl. internal meta data -ip -net ns3 xfrm policy flush +ip -net ${ns[3]} xfrm policy flush -do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 -do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_esp_policy ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 +do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 # move inexact policies to hash table -ip -net ns3 xfrm policy set hthresh4 16 16 +ip -net ${ns[3]} xfrm policy set hthresh4 16 16 sleep $((RANDOM%5)) check_exceptions "exceptions and block policies after hthresh change in ns3" # restore original hthresh settings -- move policies back to tables -for n in ns3 ns4;do +for n in ${ns[3]} ${ns[4]};do ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128 sleep $((RANDOM%5)) done @@ -438,6 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal" check_hthresh_repeat "policies with repeated htresh change" -for i in 1 2 3 4;do ip netns del ns$i;done +check_random_order ${ns[3]} "policies inserted in random order" + +cleanup_ns $ns1 $ns2 $ns3 $ns4 exit $ret diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 8448f74adfec..c2229b3e40d4 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -1,2 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only nf-queue +connect_close +audit_logread +conntrack_dump_flush +sctp_collision diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index a374e10ef506..936c3085bb83 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,12 +1,21 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ +TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ - nft_queue.sh nft_meta.sh + nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ + ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ + conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \ + conntrack_sctp_collision.sh xt_string.sh \ + bridge_netfilter.sh -LDLIBS = -lmnl -TEST_GEN_FILES = nf-queue +HOSTPKG_CONFIG := pkg-config + +CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) +LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) + +TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision \ + conntrack_dump_flush include ../lib.mk diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c new file mode 100644 index 000000000000..a0a880fc2d9d --- /dev/null +++ b/tools/testing/selftests/netfilter/audit_logread.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> +#include <linux/audit.h> +#include <linux/netlink.h> + +static int fd; + +#define MAX_AUDIT_MESSAGE_LENGTH 8970 +struct audit_message { + struct nlmsghdr nlh; + union { + struct audit_status s; + char data[MAX_AUDIT_MESSAGE_LENGTH]; + } u; +}; + +int audit_recv(int fd, struct audit_message *rep) +{ + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + int ret; + + do { + ret = recvfrom(fd, rep, sizeof(*rep), 0, + (struct sockaddr *)&addr, &addrlen); + } while (ret < 0 && errno == EINTR); + + if (ret < 0 || + addrlen != sizeof(addr) || + addr.nl_pid != 0 || + rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */ + return -1; + + return ret; +} + +int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val) +{ + static int seq = 0; + struct audit_message msg = { + .nlh = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_seq = ++seq, + }, + .u.s = { + .mask = key, + .enabled = key == AUDIT_STATUS_ENABLED ? val : 0, + .pid = key == AUDIT_STATUS_PID ? val : 0, + } + }; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + int ret; + + do { + ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0, + (struct sockaddr *)&addr, sizeof(addr)); + } while (ret < 0 && errno == EINTR); + + if (ret != (int)msg.nlh.nlmsg_len) + return -1; + return 0; +} + +int audit_set(int fd, uint32_t key, uint32_t val) +{ + struct audit_message rep = { 0 }; + int ret; + + ret = audit_send(fd, AUDIT_SET, key, val); + if (ret) + return ret; + + ret = audit_recv(fd, &rep); + if (ret < 0) + return ret; + return 0; +} + +int readlog(int fd) +{ + struct audit_message rep = { 0 }; + int ret = audit_recv(fd, &rep); + const char *sep = ""; + char *k, *v; + + if (ret < 0) + return ret; + + if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG) + return 0; + + /* skip the initial "audit(...): " part */ + strtok(rep.u.data, " "); + + while ((k = strtok(NULL, "="))) { + v = strtok(NULL, " "); + + /* these vary and/or are uninteresting, ignore */ + if (!strcmp(k, "pid") || + !strcmp(k, "comm") || + !strcmp(k, "subj")) + continue; + + /* strip the varying sequence number */ + if (!strcmp(k, "table")) + *strchrnul(v, ':') = '\0'; + + printf("%s%s=%s", sep, k, v); + sep = " "; + } + if (*sep) { + printf("\n"); + fflush(stdout); + } + return 0; +} + +void cleanup(int sig) +{ + audit_set(fd, AUDIT_STATUS_ENABLED, 0); + close(fd); + if (sig) + exit(0); +} + +int main(int argc, char **argv) +{ + struct sigaction act = { + .sa_handler = cleanup, + }; + + fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT); + if (fd < 0) { + perror("Can't open netlink socket"); + return -1; + } + + if (sigaction(SIGTERM, &act, NULL) < 0 || + sigaction(SIGINT, &act, NULL) < 0) { + perror("Can't set signal handler"); + close(fd); + return -1; + } + + audit_set(fd, AUDIT_STATUS_ENABLED, 1); + audit_set(fd, AUDIT_STATUS_PID, getpid()); + + while (1) + readlog(fd); +} diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh new file mode 100644 index 000000000000..659b3ab02c8b --- /dev/null +++ b/tools/testing/selftests/netfilter/bridge_netfilter.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bridge netfilter + conntrack, a combination that doesn't really work, +# with multicast/broadcast packets racing for hash table insertion. + +# eth0 br0 eth0 +# setup is: ns1 <->,ns0 <-> ns3 +# ns2 <-' `'-> ns4 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" +ns2="ns2-$sfx" +ns3="ns3-$sfx" +ns4="ns4-$sfx" + +ebtables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ebtables" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +for i in $(seq 0 4); do + eval ip netns add \$ns$i +done + +cleanup() { + for i in $(seq 0 4); do eval ip netns del \$ns$i;done +} + +trap cleanup EXIT + +do_ping() +{ + fromns="$1" + dstip="$2" + + ip netns exec $fromns ping -c 1 -q $dstip > /dev/null + if [ $? -ne 0 ]; then + echo "ERROR: ping from $fromns to $dstip" + ip netns exec ${ns0} nft list ruleset + ret=1 + fi +} + +bcast_ping() +{ + fromns="$1" + dstip="$2" + + for i in $(seq 1 1000); do + ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "ERROR: ping -b from $fromns to $dstip" + ip netns exec ${ns0} nft list ruleset + fi + done +} + +ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1} +if [ $? -ne 0 ]; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi + +ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2 +ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3 +ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4 + +ip -net ${ns0} link set lo up + +for i in $(seq 1 4); do + ip -net ${ns0} link set veth$i up +done + +ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1 +if [ $? -ne 0 ]; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +# make veth0,1,2 part of bridge. +for i in $(seq 1 3); do + ip -net ${ns0} link set veth$i master br0 +done + +# add a macvlan on top of the bridge. +MACVLAN_ADDR=ba:f3:13:37:42:23 +ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private +ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR} +ip -net ${ns0} link set macvlan0 up +ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0 + +# add a macvlan on top of veth4. +MACVLAN_ADDR=ba:f3:13:37:42:24 +ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa +ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR} +ip -net ${ns0} link set macvlan4 up + +# make the macvlan part of the bridge. +# veth4 is not a bridge port, only the macvlan on top of it. +ip -net ${ns0} link set macvlan4 master br0 + +ip -net ${ns0} link set br0 up +ip -net ${ns0} addr add 10.0.0.1/24 dev br0 +ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1 +ret=$? +if [ $ret -ne 0 ] ; then + echo "SKIP: bridge netfilter not available" + ret=$ksft_skip +fi + +# for testing, so namespaces will reply to ping -b probes. +ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0 + +# enable conntrack in ns0 and drop broadcast packets in forward to +# avoid them from getting confirmed in the postrouting hook before +# the cloned skb is passed up the stack. +ip netns exec ${ns0} nft -f - <<EOF +table ip filter { + chain input { + type filter hook input priority 1; policy accept + iifname br0 counter + ct state new accept + } +} + +table bridge filter { + chain forward { + type filter hook forward priority 0; policy accept + meta pkttype broadcast ip protocol icmp counter drop + } +} +EOF + +# place 1, 2 & 3 in same subnet, connected via ns0:br0. +# ns4 is placed in same subnet as well, but its not +# part of the bridge: the corresponding veth4 is not +# part of the bridge, only its macvlan interface. +for i in $(seq 1 4); do + eval ip -net \$ns$i link set lo up + eval ip -net \$ns$i link set eth0 up +done +for i in $(seq 1 2); do + eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0 +done + +ip -net ${ns3} addr add 10.23.0.13/24 dev eth0 +ip -net ${ns4} addr add 10.23.0.14/24 dev eth0 + +# test basic connectivity +do_ping ${ns1} 10.0.0.12 +do_ping ${ns3} 10.23.0.1 +do_ping ${ns4} 10.23.0.1 + +if [ $ret -eq 0 ];then + echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0" +fi + +bcast_ping ${ns1} 10.0.0.255 + +# This should deliver broadcast to macvlan0, which is on top of ns0:br0. +bcast_ping ${ns3} 10.23.0.255 + +# same, this time via veth4:macvlan4. +bcast_ping ${ns4} 10.23.0.255 + +read t < /proc/sys/kernel/tainted + +if [ $t -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + ret=1 +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 4faf2ce021d9..7c42b1b2c69b 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m CONFIG_NFT_MASQ=m CONFIG_NFT_FLOW_OFFLOAD=m CONFIG_NF_CT_NETLINK=m +CONFIG_AUDIT=y diff --git a/tools/testing/selftests/netfilter/connect_close.c b/tools/testing/selftests/netfilter/connect_close.c new file mode 100644 index 000000000000..1c3b0add54c4 --- /dev/null +++ b/tools/testing/selftests/netfilter/connect_close.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> + +#include <arpa/inet.h> +#include <sys/socket.h> + +#define PORT 12345 +#define RUNTIME 10 + +static struct { + unsigned int timeout; + unsigned int port; +} opts = { + .timeout = RUNTIME, + .port = PORT, +}; + +static void handler(int sig) +{ + _exit(sig == SIGALRM ? 0 : 1); +} + +static void set_timeout(void) +{ + struct sigaction action = { + .sa_handler = handler, + }; + + sigaction(SIGALRM, &action, NULL); + + alarm(opts.timeout); +} + +static void do_connect(const struct sockaddr_in *dst) +{ + int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (s >= 0) + fcntl(s, F_SETFL, O_NONBLOCK); + + connect(s, (struct sockaddr *)dst, sizeof(*dst)); + close(s); +} + +static void do_accept(const struct sockaddr_in *src) +{ + int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (s < 0) + return; + + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + + bind(s, (struct sockaddr *)src, sizeof(*src)); + + listen(s, 16); + + c = accept(s, NULL, NULL); + if (c >= 0) + close(c); + + close(s); +} + +static int accept_loop(void) +{ + struct sockaddr_in src = { + .sin_family = AF_INET, + .sin_port = htons(opts.port), + }; + + inet_pton(AF_INET, "127.0.0.1", &src.sin_addr); + + set_timeout(); + + for (;;) + do_accept(&src); + + return 1; +} + +static int connect_loop(void) +{ + struct sockaddr_in dst = { + .sin_family = AF_INET, + .sin_port = htons(opts.port), + }; + + inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr); + + set_timeout(); + + for (;;) + do_connect(&dst); + + return 1; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "t:p:")) != -1) { + switch (c) { + case 't': + opts.timeout = atoi(optarg); + break; + case 'p': + opts.port = atoi(optarg); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + pid_t p; + + parse_opts(argc, argv); + + p = fork(); + if (p < 0) + return 111; + + if (p > 0) + return accept_loop(); + + return connect_loop(); +} diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c new file mode 100644 index 000000000000..b11ea8ee6719 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c @@ -0,0 +1,471 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <time.h> +#include <libmnl/libmnl.h> +#include <netinet/ip.h> + +#include <linux/netlink.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> +#include <linux/netfilter/nf_conntrack_tcp.h> +#include "../kselftest_harness.h" + +#define TEST_ZONE_ID 123 +#define NF_CT_DEFAULT_ZONE_ID 0 + +static int reply_counter; + +static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type, + uint32_t src_ip, uint32_t dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip); + mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, + struct in6_addr src_ip, struct in6_addr dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip); + mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int build_cta_proto(struct nlmsghdr *nlh) +{ + struct nlattr *nest, *nest_proto; + + nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO); + if (!nest) + return -1; + + nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *rplnlh; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + ret = build_cta_proto(nlh); + if (ret < 0) { + perror("build_cta_proto"); + return -1; + } + mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000)); + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + return -1; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + if (errno == EEXIST) { + /* The entries are probably still there from a previous + * run. So we are good + */ + return 0; + } + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip, + uint32_t dst_ip, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int conntrack_data_generate_v6(struct mnl_socket *sock, + struct in6_addr src_ip, + struct in6_addr dst_ip, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET6; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int count_entries(const struct nlmsghdr *nlh, void *data) +{ + reply_counter++; +} + +static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + reply_counter = 0; + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + while (ret > 0) { + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, + count_entries, NULL); + if (ret <= MNL_CB_STOP) + break; + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + } + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + return reply_counter; +} + +static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +FIXTURE(conntrack_dump_flush) +{ + struct mnl_socket *sock; +}; + +FIXTURE_SETUP(conntrack_dump_flush) +{ + struct in6_addr src, dst; + int ret; + + self->sock = mnl_socket_open(NETLINK_NETFILTER); + if (!self->sock) { + perror("mnl_socket_open"); + exit(EXIT_FAILURE); + } + + if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) { + perror("mnl_socket_bind"); + exit(EXIT_FAILURE); + } + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + if (ret < 0 && errno == EPERM) + SKIP(return, "Needs to be run as root"); + else if (ret < 0 && errno == EOPNOTSUPP) + SKIP(return, "Kernel does not seem to support conntrack zones"); + + ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7, + NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x01000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x02000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x03000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x04000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x05000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x06000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x07000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x08000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_GE(ret, 2); + if (ret > 2) + SKIP(return, "kernel does not support filtering by zone"); +} + +FIXTURE_TEARDOWN(conntrack_dump_flush) +{ +} + +TEST_F(conntrack_dump_flush, test_dump_by_zone) +{ + int ret; + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone_default) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh index b48e1833bc89..76645aaf2b58 100755 --- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh +++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh @@ -35,6 +35,8 @@ cleanup() { for i in 1 2;do ip netns del nsrouter$i;done } +trap cleanup EXIT + ipv4() { echo -n 192.168.$1.2 } @@ -146,11 +148,17 @@ ip netns exec nsclient1 nft -f - <<EOF table inet filter { counter unknown { } counter related { } + counter redir4 { } + counter redir6 { } chain input { type filter hook input priority 0; policy accept; - meta l4proto { icmp, icmpv6 } ct state established,untracked accept + icmp type "redirect" ct state "related" counter name "redir4" accept + icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept + + meta l4proto { icmp, icmpv6 } ct state established,untracked accept meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept + counter name "unknown" drop } } @@ -279,5 +287,29 @@ else echo "ERROR: icmp error RELATED state test has failed" fi -cleanup +# add 'bad' route, expect icmp REDIRECT to be generated +ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1 +ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1 + +ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null + +expect="packets 1 bytes 112" +check_counter nsclient1 "redir4" "$expect" +if [ $? -ne 0 ];then + ret=1 +fi + +ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null +expect="packets 1 bytes 192" +check_counter nsclient1 "redir6" "$expect" +if [ $? -ne 0 ];then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: icmp redirects had RELATED state" +else + echo "ERROR: icmp redirect RELATED state test has failed" +fi + exit $ret diff --git a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh new file mode 100755 index 000000000000..a924e595cfd8 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP COLLISION SCENARIO as Below: +# +# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359] +# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187] +# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359] +# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO] +# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK] +# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970] +# +# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP="198.51.200.1" +CLIENT_PORT=1234 + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP="198.51.100.1" +SERVER_PORT=1234 + +ROUTER_NS=$(mktemp -u router-XXXXXXXX) +CLIENT_GW="198.51.200.2" +SERVER_GW="198.51.100.2" + +# setup the topo +setup() { + ip net add $CLIENT_NS + ip net add $SERVER_NS + ip net add $ROUTER_NS + ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS + ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS + + ip -n $SERVER_NS link set link0 up + ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0 + ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW + + ip -n $ROUTER_NS link set link1 up + ip -n $ROUTER_NS link set link2 up + ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1 + ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2 + ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1 + + ip -n $CLIENT_NS link set link3 up + ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3 + ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW + + # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with + # tc on $SERVER_NS side + tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb + tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit + tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \ + 0xff match u8 2 0xff at 32 flowid 1:1 + tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms + + # simulate the ctstate check on OVS nf_conntrack + ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP + ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP + + # use a smaller number for assoc's max_retrans to reproduce the issue + modprobe sctp + ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3 +} + +cleanup() { + ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null + ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null + ip net del "$CLIENT_NS" + ip net del "$SERVER_NS" + ip net del "$ROUTER_NS" +} + +do_test() { + ip net exec $SERVER_NS ./sctp_collision server \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT & + ip net exec $CLIENT_NS ./sctp_collision client \ + $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT +} + +# NOTE: one way to work around the issue is set a smaller hb_interval +# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500 + +# run the test case +trap cleanup EXIT +setup && \ +echo "Test for SCTP Collision in nf_conntrack:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh new file mode 100755 index 000000000000..e7d7bf13cff5 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh @@ -0,0 +1,167 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that UNREPLIED tcp conntrack will eventually timeout. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +waittime=20 +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup() { + ip netns pids $ns1 | xargs kill 2>/dev/null + ip netns pids $ns2 | xargs kill 2>/dev/null + + ip netns del $ns1 + ip netns del $ns2 +} + +ipv4() { + echo -n 192.168.$1.2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect") + if [ $? -ne 0 ]; then + echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns2 nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +# Create test namespaces +ip netns add $ns1 || exit 1 + +trap cleanup EXIT + +ip netns add $ns2 || exit 1 + +# Connect the namespace to the host using a veth pair +ip -net $ns1 link add name veth1 type veth peer name veth2 +ip -net $ns1 link set netns $ns2 dev veth2 + +ip -net $ns1 link set up dev lo +ip -net $ns2 link set up dev lo +ip -net $ns1 link set up dev veth1 +ip -net $ns2 link set up dev veth2 + +ip -net $ns2 addr add 10.11.11.2/24 dev veth2 +ip -net $ns2 route add default via 10.11.11.1 + +ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1 + +# add a rule inside NS so we enable conntrack +ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT + +ip -net $ns1 addr add 10.11.11.1/24 dev veth1 +ip -net $ns1 route add 10.99.99.99 via 10.11.11.2 + +# Check connectivity works +ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1 + +ip netns exec $ns2 nc -l -p 8080 < /dev/null & + +# however, conntrack entries are there + +ip netns exec $ns2 nft -f - <<EOF +table inet filter { + counter connreq { } + counter redir { } + chain input { + type filter hook input priority 0; policy accept; + ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept + ct state new ct status dnat tcp dport 8080 counter name "redir" accept + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nft rules" + exit 1 +fi + +ip netns exec $ns2 sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10 + +echo "INFO: connect $ns1 -> $ns2 to the virtual ip" +ip netns exec $ns1 bash -c 'while true ; do + nc -p 60000 10.99.99.99 80 + sleep 1 + done' & + +sleep 1 + +ip netns exec $ns2 nft -f - <<EOF +table inet nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + ip daddr 10.99.99.99 tcp dport 80 redirect to :8080 + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nat redirect" + exit 1 +fi + +count=$(ip netns exec $ns2 conntrack -L -p tcp --dport 80 2>/dev/null | wc -l) +if [ $count -eq 0 ]; then + echo "ERROR: $ns2 did not pick up tcp connection from peer" + exit 1 +fi + +echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect" +for i in $(seq 1 $waittime); do + echo -n "." + + sleep 1 + + count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l) + if [ $count -gt 0 ]; then + echo + echo "PASS: redirection took effect after $i seconds" + break + fi + + m=$((i%20)) + if [ $m -eq 0 ]; then + echo " waited for $i seconds" + fi +done + +expect="packets 1 bytes 60" +check_counter "$ns2" "redir" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: redirection counter has expected values" +else + echo "ERROR: no tcp connection was redirected" +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh new file mode 100755 index 000000000000..8b5ea9234588 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh @@ -0,0 +1,241 @@ +#!/bin/sh + +# This script demonstrates interaction of conntrack and vrf. +# The vrf driver calls the netfilter hooks again, with oif/iif +# pointing at the VRF device. +# +# For ingress, this means first iteration has iifname of lower/real +# device. In this script, thats veth0. +# Second iteration is iifname set to vrf device, tvrf in this script. +# +# For egress, this is reversed: first iteration has the vrf device, +# second iteration is done with the lower/real/veth0 device. +# +# test_ct_zone_in demonstrates unexpected change of nftables +# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack +# connection on VRF rcv" +# +# It was possible to assign conntrack zone to a packet (or mark it for +# `notracking`) in the prerouting chain before conntrack, based on real iif. +# +# After the change, the zone assignment is lost and the zone is assigned based +# on the VRF master interface (in case such a rule exists). +# assignment is lost. Instead, assignment based on the `iif` matching +# Thus it is impossible to distinguish packets based on the original +# interface. +# +# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem +# that was supposed to be fixed by the commit mentioned above to make sure +# that any fix to test case 1 won't break masquerade again. + +ksft_skip=4 + +IP0=172.30.30.1 +IP1=172.30.30.2 +PFXL=30 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" + +cleanup() +{ + ip netns pids $ns0 | xargs kill 2>/dev/null + ip netns pids $ns1 | xargs kill 2>/dev/null + + ip netns del $ns0 $ns1 +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add "$ns0" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $ns0" + exit $ksft_skip +fi +ip netns add "$ns1" + +trap cleanup EXIT + +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + +ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not add veth device" + exit $ksft_skip +fi + +ip -net $ns0 li add tvrf type vrf table 9876 +if [ $? -ne 0 ];then + echo "SKIP: Could not add vrf device" + exit $ksft_skip +fi + +ip -net $ns0 li set lo up + +ip -net $ns0 li set veth0 master tvrf +ip -net $ns0 li set tvrf up +ip -net $ns0 li set veth0 up +ip -net $ns1 li set veth0 up + +ip -net $ns0 addr add $IP0/$PFXL dev veth0 +ip -net $ns1 addr add $IP1/$PFXL dev veth0 + +ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& +if [ $? -ne 0 ];then + echo "SKIP: Could not start iperf3" + exit $ksft_skip +fi + +# test vrf ingress handling. +# The incoming connection should be placed in conntrack zone 1, +# as decided by the first iteration of the ruleset. +test_ct_zone_in() +{ +ip netns exec $ns0 nft -f - <<EOF +table testct { + chain rawpre { + type filter hook prerouting priority raw; + + iif { veth0, tvrf } counter meta nftrace set 1 + iif veth0 counter ct zone set 1 counter return + iif tvrf counter ct zone set 2 counter return + ip protocol icmp counter + notrack counter + } + + chain rawout { + type filter hook output priority raw; + + oif veth0 counter ct zone set 1 counter return + oif tvrf counter ct zone set 2 counter return + notrack counter + } +} +EOF + ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null + + # should be in zone 1, not zone 2 + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "PASS: entry found in conntrack zone 1" + else + echo "FAIL: entry not found in conntrack zone 1" + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "FAIL: entry found in zone 2 instead" + else + echo "FAIL: entry not in zone 1 or 2, dumping table" + ip netns exec $ns0 conntrack -L + ip netns exec $ns0 nft list ruleset + fi + fi +} + +# add masq rule that gets evaluated w. outif set to vrf device. +# This tests the first iteration of the packet through conntrack, +# oifname is the vrf device. +test_masquerade_vrf() +{ + local qdisc=$1 + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc add dev tvrf root $qdisc + fi + + ip netns exec $ns0 conntrack -F 2>/dev/null + +ip netns exec $ns0 nft -f - <<EOF +flush ruleset +table ip nat { + chain rawout { + type filter hook output priority raw; + + oif tvrf ct state untracked counter + } + chain postrouting2 { + type filter hook postrouting priority mangle; + + oif tvrf ct state untracked counter + } + chain postrouting { + type nat hook postrouting priority 0; + # NB: masquerade should always be combined with 'oif(name) bla', + # lack of this is intentional here, we want to exercise double-snat. + ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' && + ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" + else + echo "FAIL: vrf rules have unexpected counter value" + ret=1 + fi + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc del dev tvrf root + fi +} + +# add masq rule that gets evaluated w. outif set to veth device. +# This tests the 2nd iteration of the packet through conntrack, +# oifname is the lower device (veth0 in this case). +test_masquerade_veth() +{ + ip netns exec $ns0 conntrack -F 2>/dev/null +ip netns exec $ns0 nft -f - <<EOF +flush ruleset +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; + meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" + else + echo "FAIL: vrf masq rule has unexpected counter value" + ret=1 + fi +} + +test_ct_zone_in +test_masquerade_vrf "default" +test_masquerade_vrf "pfifo" +test_masquerade_veth + +exit $ret diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh new file mode 100755 index 000000000000..eb9553e4986b --- /dev/null +++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh @@ -0,0 +1,207 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Conntrack needs to reassemble fragments in order to have complete +# packets for rule matching. Reassembly can lead to packet loss. + +# Consider the following setup: +# +--------+ +---------+ +--------+ +# |Router A|-------|Wanrouter|-------|Router B| +# | |.IPIP..| |..IPIP.| | +# +--------+ +---------+ +--------+ +# / mtu 1400 \ +# / \ +#+--------+ +--------+ +#|Client A| |Client B| +#| | | | +#+--------+ +--------+ + +# Router A and Router B use IPIP tunnel interfaces to tunnel traffic +# between Client A and Client B over WAN. Wanrouter has MTU 1400 set +# on its interfaces. + +rnd=$(mktemp -u XXXXXXXX) +rx=$(mktemp) + +r_a="ns-ra-$rnd" +r_b="ns-rb-$rnd" +r_w="ns-rw-$rnd" +c_a="ns-ca-$rnd" +c_b="ns-cb-$rnd" + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "iptables --version" "run test without iptables" +checktool "ip -Version" "run test without ip tool" +checktool "which socat" "run test without socat" +checktool "ip netns add ${r_a}" "create net namespace" + +for n in ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns add ${n} +done + +cleanup() { + for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns del ${n} + done + rm -f ${rx} +} + +trap cleanup EXIT + +test_path() { + msg="$1" + + ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null & + + sleep 1 + for i in 1 2 3; do + head -c1400 /dev/zero | tr "\000" "a" | \ + ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000 + done + + wait + + bytes=$(wc -c < ${rx}) + + if [ $bytes -eq 1400 ];then + echo "OK: PMTU $msg connection tracking" + else + echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" + exit 1 + fi +} + +# Detailed setup for Router A +# --------------------------- +# Interfaces: +# eth0: 10.2.2.1/24 +# eth1: 192.168.10.1/24 +# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1 +# Routes: +# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B) +# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w} +ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a} + +l_addr="10.2.2.1" +r_addr="10.4.4.1" +ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_a} link set $dev up +done + +ip -net ${r_a} addr add 10.2.2.1/24 dev veth0 +ip -net ${r_a} addr add 192.168.10.1/24 dev veth1 + +ip -net ${r_a} route add 192.168.20.0/24 dev ipip0 +ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254 + +ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Detailed setup for Router B +# --------------------------- +# Interfaces: +# eth0: 10.4.4.1/24 +# eth1: 192.168.20.1/24 +# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1 +# Routes: +# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A) +# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w} +ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b} + +l_addr="10.4.4.1" +r_addr="10.2.2.1" + +ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_b} link set $dev up +done + +ip -net ${r_b} addr add 10.4.4.1/24 dev veth0 +ip -net ${r_b} addr add 192.168.20.1/24 dev veth1 + +ip -net ${r_b} route add 192.168.10.0/24 dev ipip0 +ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254 +ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Client A +ip -net ${c_a} addr add 192.168.10.2/24 dev veth0 +ip -net ${c_a} link set dev lo up +ip -net ${c_a} link set dev veth0 up +ip -net ${c_a} route add default via 192.168.10.1 + +# Client A +ip -net ${c_b} addr add 192.168.20.2/24 dev veth0 +ip -net ${c_b} link set dev veth0 up +ip -net ${c_b} link set dev lo up +ip -net ${c_b} route add default via 192.168.20.1 + +# Wan +ip -net ${r_w} addr add 10.2.2.254/24 dev veth0 +ip -net ${r_w} addr add 10.4.4.254/24 dev veth1 + +ip -net ${r_w} link set dev lo up +ip -net ${r_w} link set dev veth0 up mtu 1400 +ip -net ${r_w} link set dev veth1 up mtu 1400 + +ip -net ${r_a} link set dev veth0 mtu 1400 +ip -net ${r_b} link set dev veth0 mtu 1400 + +ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Path MTU discovery +# ------------------ +# Running tracepath from Client A to Client B shows PMTU discovery is working +# as expected: +# +# clienta:~# tracepath 192.168.20.2 +# 1?: [LOCALHOST] pmtu 1500 +# 1: 192.168.10.1 0.867ms +# 1: 192.168.10.1 0.302ms +# 2: 192.168.10.1 0.312ms pmtu 1480 +# 2: no reply +# 3: 192.168.10.1 0.510ms pmtu 1380 +# 3: 192.168.20.2 2.320ms reached +# Resume: pmtu 1380 hops 3 back 3 + +# ip netns exec ${c_a} traceroute --mtu 192.168.20.2 + +# Router A has learned PMTU (1400) to Router B from Wanrouter. +# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B +# from Router A. + +#Send large UDP packet +#--------------------- +#Now we send a 1400 bytes UDP packet from Client A to Client B: + +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000 +test_path "without" + +# The IPv4 stack on Client A already knows the PMTU to Client B, so the +# UDP packet is sent as two fragments (1380 + 20). Router A forwards the +# fragments between eth1 and ipip0. The fragments fit into the tunnel and +# reach their destination. + +#When sending the large UDP packet again, Router A now reassembles the +#fragments before routing the packet over ipip0. The resulting IPIP +#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is +#dropped on Router A before sending. + +ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW +test_path "with" diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh new file mode 100755 index 000000000000..a1aa8f4a5828 --- /dev/null +++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test NAT source port clash resolution +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +socatpid=0 + +cleanup() +{ + [ $socatpid -gt 0 ] && kill $socatpid + ip netns del $ns1 + ip netns del $ns2 +} + +socat -h > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without socat" + exit $ksft_skip +fi + +iptables --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without iptables" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add "$ns1" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $ns1" + exit $ksft_skip +fi + +trap cleanup EXIT + +ip netns add $ns2 + +# Connect the namespaces using a veth pair +ip link add name veth2 type veth peer name veth1 +ip link set netns $ns1 dev veth1 +ip link set netns $ns2 dev veth2 + +ip netns exec $ns1 ip link set up dev lo +ip netns exec $ns1 ip link set up dev veth1 +ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1 + +ip netns exec $ns2 ip link set up dev lo +ip netns exec $ns2 ip link set up dev veth2 +ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2 + +# Create a server in one namespace +ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 & +socatpid=$! + +# Restrict source port to just one so we don't have to exhaust +# all others. +ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000" + +# add a virtual IP using DNAT +ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 + +# ... and route it to the other namespace +ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1 + +sleep 1 + +# add a persistent connection from the other namespace +ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & + +sleep 1 + +# ip daddr:dport will be rewritten to 192.168.1.1 5201 +# NAT must reallocate source port 10000 because +# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use +echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null +ret=$? + +# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). +if [ $ret -eq 0 ]; then + echo "PASS: socat can connect via NAT'd address" +else + echo "FAIL: socat cannot connect via NAT'd address" +fi + +# check sport clashres. +ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201 +ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201 + +sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & +cpid1=$! +sleep 1 + +# if connect succeeds, client closes instantly due to EOF on stdin. +# if connect hangs, it will time out after 5s. +echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & +cpid2=$! + +time_then=$(date +%s) +wait $cpid2 +rv=$? +time_now=$(date +%s) + +# Check how much time has elapsed, expectation is for +# 'cpid2' to connect and then exit (and no connect delay). +delta=$((time_now - time_then)) + +if [ $delta -lt 2 -a $rv -eq 0 ]; then + echo "PASS: could connect to service via redirected ports" +else + echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)" + ret=1 +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh new file mode 100755 index 000000000000..99ed5bd6e840 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_audit.sh @@ -0,0 +1,245 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that audit logs generated for nft commands are as expected. + +SKIP_RC=4 +RC=0 + +nft --version >/dev/null 2>&1 || { + echo "SKIP: missing nft tool" + exit $SKIP_RC +} + +# Run everything in a separate network namespace +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +# give other scripts a chance to finish - audit_logread sees all activity +sleep 1 + +logfile=$(mktemp) +rulefile=$(mktemp) +echo "logging into $logfile" +./audit_logread >"$logfile" & +logread_pid=$! +trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT +exec 3<"$logfile" + +do_test() { # (cmd, log) + echo -n "testing for cmd: $1 ... " + cat <&3 >/dev/null + $1 >/dev/null || exit 1 + sleep 0.1 + res=$(diff -a -u <(echo "$2") - <&3) + [ $? -eq 0 ] && { echo "OK"; return; } + echo "FAIL" + grep -v '^\(---\|+++\|@@\)' <<< "$res" + ((RC--)) +} + +nft flush ruleset + +# adding tables, chains and rules + +for table in t1 t2; do + do_test "nft add table $table" \ + "table=$table family=2 entries=1 op=nft_register_table" + + do_test "nft add chain $table c1" \ + "table=$table family=2 entries=1 op=nft_register_chain" + + do_test "nft add chain $table c2; add chain $table c3" \ + "table=$table family=2 entries=2 op=nft_register_chain" + + cmd="add rule $table c1 counter" + + do_test "nft $cmd" \ + "table=$table family=2 entries=1 op=nft_register_rule" + + do_test "nft $cmd; $cmd" \ + "table=$table family=2 entries=2 op=nft_register_rule" + + cmd="" + sep="" + for chain in c2 c3; do + for i in {1..3}; do + cmd+="$sep add rule $table $chain counter" + sep=";" + done + done + do_test "nft $cmd" \ + "table=$table family=2 entries=6 op=nft_register_rule" +done + +for ((i = 0; i < 500; i++)); do + echo "add rule t2 c3 counter accept comment \"rule $i\"" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=500 op=nft_register_rule' + +# adding sets and elements + +settype='type inet_service; counter' +setelem='{ 22, 80, 443 }' +setblock="{ $settype; elements = $setelem; }" +do_test "nft add set t1 s $setblock" \ +"table=t1 family=2 entries=4 op=nft_register_set" + +do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \ +"table=t1 family=2 entries=5 op=nft_register_set" + +do_test "nft add element t1 s3 $setelem" \ +"table=t1 family=2 entries=3 op=nft_register_setelem" + +# adding counters + +do_test 'nft add counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add counter t2 c1; add counter t2 c2' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +for ((i = 3; i <= 500; i++)); do + echo "add counter t2 c$i" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=498 op=nft_register_obj' + +# adding/updating quotas + +do_test 'nft add quota t1 q1 { 10 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +for ((i = 3; i <= 500; i++)); do + echo "add quota t2 q$i { 10 bytes }" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=498 op=nft_register_obj' + +# changing the quota value triggers obj update path +do_test 'nft add quota t1 q1 { 20 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +# resetting rules + +do_test 'nft reset rules t1 c2' \ +'table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules table t1' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules t2 c3' \ +'table=t2 family=2 entries=189 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=126 op=nft_reset_rule' + +do_test 'nft reset rules t2' \ +'table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=186 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=129 op=nft_reset_rule' + +do_test 'nft reset rules' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=180 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=135 op=nft_reset_rule' + +# resetting sets and elements + +elem=(22 ,80 ,443) +relem="" +for i in {1..3}; do + relem+="${elem[((i - 1))]}" + do_test "nft reset element t1 s { $relem }" \ + "table=t1 family=2 entries=$i op=nft_reset_setelem" +done + +do_test 'nft reset set t1 s' \ +'table=t1 family=2 entries=3 op=nft_reset_setelem' + +# resetting counters + +do_test 'nft reset counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset counters t1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset counters t2' \ +'table=t2 family=2 entries=342 op=nft_reset_obj +table=t2 family=2 entries=158 op=nft_reset_obj' + +do_test 'nft reset counters' \ +'table=t1 family=2 entries=1 op=nft_reset_obj +table=t2 family=2 entries=341 op=nft_reset_obj +table=t2 family=2 entries=159 op=nft_reset_obj' + +# resetting quotas + +do_test 'nft reset quota t1 q1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset quotas t1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset quotas t2' \ +'table=t2 family=2 entries=315 op=nft_reset_obj +table=t2 family=2 entries=185 op=nft_reset_obj' + +do_test 'nft reset quotas' \ +'table=t1 family=2 entries=1 op=nft_reset_obj +table=t2 family=2 entries=314 op=nft_reset_obj +table=t2 family=2 entries=186 op=nft_reset_obj' + +# deleting rules + +readarray -t handles < <(nft -a list chain t1 c1 | \ + sed -n 's/.*counter.* handle \(.*\)$/\1/p') + +do_test "nft delete rule t1 c1 handle ${handles[0]}" \ +'table=t1 family=2 entries=1 op=nft_unregister_rule' + +cmd='delete rule t1 c1 handle' +do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \ +'table=t1 family=2 entries=2 op=nft_unregister_rule' + +do_test 'nft flush chain t1 c2' \ +'table=t1 family=2 entries=3 op=nft_unregister_rule' + +do_test 'nft flush table t2' \ +'table=t2 family=2 entries=509 op=nft_unregister_rule' + +# deleting chains + +do_test 'nft delete chain t2 c2' \ +'table=t2 family=2 entries=1 op=nft_unregister_chain' + +# deleting sets and elements + +do_test 'nft delete element t1 s { 22 }' \ +'table=t1 family=2 entries=1 op=nft_unregister_setelem' + +do_test 'nft delete element t1 s { 80, 443 }' \ +'table=t1 family=2 entries=2 op=nft_unregister_setelem' + +do_test 'nft flush set t1 s2' \ +'table=t1 family=2 entries=3 op=nft_unregister_setelem' + +do_test 'nft delete set t1 s2' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +do_test 'nft delete set t1 s3' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +exit $RC diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index 5a4938d6dcf2..e908009576c7 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -23,15 +23,15 @@ TESTS="reported_issues correctness concurrency timeout" # Set types, defined by TYPE_ variables below TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto - net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port - net_port_mac_proto_net" + net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp + net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add" +BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" - ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh + ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh pktgen/pktgen_bench_xmit_mode_netif_receive.sh" # Definition of set types: @@ -91,7 +91,7 @@ src start 1 count 5 src_delta 2000 -tools sendip nc bash +tools sendip socat nc bash proto udp race_repeat 3 @@ -116,7 +116,7 @@ src start 10 count 5 src_delta 2000 -tools sendip nc bash +tools sendip socat nc bash proto udp6 race_repeat 3 @@ -141,7 +141,7 @@ src start 1 count 5 src_delta 2000 -tools sendip nc bash +tools sendip socat nc bash proto udp race_repeat 0 @@ -163,7 +163,7 @@ src mac start 10 count 5 src_delta 2000 -tools sendip nc bash +tools sendip socat nc bash proto udp6 race_repeat 0 @@ -185,7 +185,7 @@ src mac proto start 10 count 5 src_delta 2000 -tools sendip nc bash +tools sendip socat nc bash proto udp6 race_repeat 0 @@ -207,7 +207,7 @@ src addr4 start 1 count 5 src_delta 2000 -tools sendip nc bash +tools sendip socat nc bash proto udp race_repeat 3 @@ -227,7 +227,7 @@ src addr6 port start 10 count 5 src_delta 2000 -tools sendip nc +tools sendip socat nc proto udp6 race_repeat 3 @@ -247,7 +247,7 @@ src mac proto addr4 start 1 count 5 src_delta 2000 -tools sendip nc bash +tools sendip socat nc bash proto udp race_repeat 0 @@ -264,7 +264,7 @@ src mac start 1 count 5 src_delta 2000 -tools sendip nc bash +tools sendip socat nc bash proto udp race_repeat 0 @@ -277,6 +277,23 @@ perf_entries 1000 perf_proto ipv4 " +TYPE_mac_net=" +display mac,net +type_spec ether_addr . ipv4_addr +chain_spec ether saddr . ip saddr +dst +src mac addr4 +start 1 +count 5 +src_delta 2000 +tools sendip socat nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + TYPE_net_mac_icmp=" display net,mac - ICMP type_spec ipv4_addr . ether_addr @@ -320,7 +337,7 @@ src addr4 start 1 count 5 src_delta 2000 -tools sendip nc +tools sendip socat nc proto udp race_repeat 3 @@ -337,6 +354,23 @@ TYPE_flush_remove_add=" display Add two elements, flush, re-add " +TYPE_reload=" +display net,mac with reload +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 1 +src_delta 2000 +tools sendip socat nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -507,6 +541,24 @@ setup_send_udp() { dst_port= src_addr4= } + elif command -v socat -v >/dev/null; then + send_udp() { + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}" dev veth_b + __socatbind=",bind=${src_addr4}" + if [ -n "${src_port}" ];then + __socatbind="${__socatbind}:${src_port}" + fi + fi + + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + [ -z "${dst_port}" ] && dst_port=12345 + + echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:${dst_addr4}:${dst_port}"${__socatbind}" + + src_addr4= + src_port= + } elif command -v nc >/dev/null; then if nc -u -w0 1.1.1.1 1 2>/dev/null; then # OpenBSD netcat @@ -572,6 +624,29 @@ setup_send_udp6() { dst_port= src_addr6= } + elif command -v socat -v >/dev/null; then + send_udp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + __socatbind6= + + if [ -n "${src_addr6}" ]; then + if [ -n "${src_addr6} != "${src_addr6_added} ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + + src_addr6_added=${src_addr6} + fi + + __socatbind6=",bind=[${src_addr6}]" + + if [ -n "${src_port}" ] ;then + __socatbind6="${__socatbind6}:${src_port}" + fi + fi + + echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:[${dst_addr6}]:${dst_port}"${__socatbind6}" + } elif command -v nc >/dev/null && nc -u -w0 1.1.1.1 1 2>/dev/null; then # GNU netcat might not work with IPv6, try next tool send_udp6() { @@ -984,7 +1059,8 @@ format() { fi done for f in ${src}; do - __expr="${__expr} . " + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + __start="$(eval format_"${f}" "${srcstart}")" __end="$(eval format_"${f}" "${srcend}")" @@ -1455,6 +1531,59 @@ test_bug_flush_remove_add() { nft flush ruleset } +# - add ranged element, check that packets match it +# - reload the set, check packets still match +test_bug_reload() { + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + rstart=${start} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + # check kernel does allocate pcpu sctrach map + # for reload with no elemet add/delete + ( echo flush set inet filter test ; + nft list set inet filter test ) | nft -f - + + start=${rstart} + range_size=1 + + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset +} + test_reported_issues() { eval test_bug_"${subtest}" } @@ -1513,4 +1642,4 @@ for name in ${TESTS}; do done done -[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} +[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0 diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh index edf0a48da6bf..faa7778d7bd1 100755 --- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh +++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh @@ -94,28 +94,50 @@ check_for_helper() local message=$2 local port=$3 - ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' + if echo $message |grep -q 'ipv6';then + local family="ipv6" + else + local family="ipv4" + fi + + ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' if [ $? -ne 0 ] ; then - echo "FAIL: ${netns} did not show attached helper $message" 1>&2 - ret=1 + if [ $autoassign -eq 0 ] ;then + echo "FAIL: ${netns} did not show attached helper $message" 1>&2 + ret=1 + else + echo "PASS: ${netns} did not show attached helper $message" 1>&2 + fi + else + if [ $autoassign -eq 0 ] ;then + echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2 + else + echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2 + ret=1 + fi fi - echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2 return 0 } test_helper() { local port=$1 - local msg=$2 + local autoassign=$2 + + if [ $autoassign -eq 0 ] ;then + msg="set via ruleset" + else + msg="auto-assign" + fi sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null & + sleep 1 - check_for_helper "$ns1" "ip $msg" $port - check_for_helper "$ns2" "ip $msg" $port + check_for_helper "$ns1" "ip $msg" $port $autoassign + check_for_helper "$ns2" "ip $msg" $port $autoassign wait @@ -128,8 +150,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ipv6 $msg" $port check_for_helper "$ns2" "ipv6 $msg" $port @@ -167,9 +189,9 @@ if [ $? -ne 0 ];then fi fi -test_helper 2121 "set via ruleset" -ip netns exec ${ns1} sysctl -q 'net.netfilter.nf_conntrack_helper=1' -ip netns exec ${ns2} sysctl -q 'net.netfilter.nf_conntrack_helper=1' -test_helper 21 "auto-assign" +test_helper 2121 0 +ip netns exec ${ns1} sysctl -qe 'net.netfilter.nf_conntrack_helper=1' +ip netns exec ${ns2} sysctl -qe 'net.netfilter.nf_conntrack_helper=1' +test_helper 21 1 exit $ret diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh new file mode 100755 index 000000000000..dff476e45e77 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_fib.sh @@ -0,0 +1,273 @@ +#!/bin/bash +# +# This tests the fib expression. +# +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +nsrouter="nsrouter-$sfx" +timeout=4 + +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) + +cleanup() +{ + ip netns del ${ns1} + ip netns del ${ns2} + ip netns del ${nsrouter} + + [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ${nsrouter} +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace" + exit $ksft_skip +fi + +trap cleanup EXIT + +dmesg | grep -q ' nft_rpfilter: ' +if [ $? -eq 0 ]; then + dmesg -c | grep ' nft_rpfilter: ' + echo "WARN: a previous test run has failed" 1>&2 +fi + +sysctl -q net.netfilter.nf_log_all_netns=1 +ip netns add ${ns1} +ip netns add ${ns2} + +load_ruleset() { + local netns=$1 + +ip netns exec ${netns} nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + +load_pbr_ruleset() { + local netns=$1 + +ip netns exec ${netns} nft -f /dev/stdin <<EOF +table inet filter { + chain forward { + type filter hook forward priority raw; + fib saddr . iif oif gt 0 accept + log drop + } +} +EOF +} + +load_ruleset_count() { + local netns=$1 + +ip netns exec ${netns} nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop + ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop + } +} +EOF +} + +check_drops() { + dmesg | grep -q ' nft_rpfilter: ' + if [ $? -eq 0 ]; then + dmesg | grep ' nft_rpfilter: ' + echo "FAIL: rpfilter did drop packets" + return 1 + fi + + return 0 +} + +check_fib_counter() { + local want=$1 + local ns=$2 + local address=$3 + + line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" ) + ret=$? + + if [ $ret -ne 0 ];then + echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2 + ip netns exec ${ns} nft list table inet filter + return 1 + fi + + if [ $want -gt 0 ]; then + echo "PASS: fib expression did drop packets for $address" + fi + + return 0 +} + +load_ruleset ${nsrouter} +load_ruleset ${ns1} +load_ruleset ${ns2} + +ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2} + +ip -net ${nsrouter} link set lo up +ip -net ${nsrouter} link set veth0 up +ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0 +ip -net ${nsrouter} addr add dead:1::1/64 dev veth0 + +ip -net ${nsrouter} link set veth1 up +ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1 +ip -net ${nsrouter} addr add dead:2::1/64 dev veth1 + +ip -net ${ns1} link set lo up +ip -net ${ns1} link set eth0 up + +ip -net ${ns2} link set lo up +ip -net ${ns2} link set eth0 up + +ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr add dead:1::99/64 dev eth0 +ip -net ${ns1} route add default via 10.0.1.1 +ip -net ${ns1} route add default via dead:1::1 + +ip -net ${ns2} addr add 10.0.2.99/24 dev eth0 +ip -net ${ns2} addr add dead:2::99/64 dev eth0 +ip -net ${ns2} route add default via 10.0.2.1 +ip -net ${ns2} route add default via dead:2::1 + +test_ping() { + local daddr4=$1 + local daddr6=$2 + + ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null + ret=$? + if [ $ret -ne 0 ];then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2 + return 1 + fi + + ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null + ret=$? + if [ $ret -ne 0 ];then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2 + return 1 + fi + + return 0 +} + +ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null + +sleep 3 + +test_ping 10.0.2.1 dead:2::1 || exit 1 +check_drops || exit 1 + +test_ping 10.0.2.99 dead:2::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not cause unwanted packet drops" + +ip netns exec ${nsrouter} nft flush table inet filter + +ip -net ${ns1} route del default +ip -net ${ns1} -6 route del default + +ip -net ${ns1} addr del 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr del dead:1::99/64 dev eth0 + +ip -net ${ns1} addr add 10.0.2.99/24 dev eth0 +ip -net ${ns1} addr add dead:2::99/64 dev eth0 + +ip -net ${ns1} route add default via 10.0.2.1 +ip -net ${ns1} -6 route add default via dead:2::1 + +ip -net ${nsrouter} addr add dead:2::1/64 dev veth0 + +# switch to ruleset that doesn't log, this time +# its expected that this does drop the packets. +load_ruleset_count ${nsrouter} + +# ns1 has a default route, but nsrouter does not. +# must not check return value, ping to 1.1.1.1 will +# fail. +check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1 +check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1 + +ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null +check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1 + +sleep 2 +ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null +check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1 + +# delete all rules +ip netns exec ${ns1} nft flush ruleset +ip netns exec ${ns2} nft flush ruleset +ip netns exec ${nsrouter} nft flush ruleset + +ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr add dead:1::99/64 dev eth0 + +ip -net ${ns1} addr del 10.0.2.99/24 dev eth0 +ip -net ${ns1} addr del dead:2::99/64 dev eth0 + +ip -net ${nsrouter} addr del dead:2::1/64 dev veth0 + +# ... pbr ruleset for the router, check iif+oif. +load_pbr_ruleset ${nsrouter} +if [ $? -ne 0 ] ; then + echo "SKIP: Could not load fib forward ruleset" + exit $ksft_skip +fi + +ip -net ${nsrouter} rule add from all table 128 +ip -net ${nsrouter} rule add from all iif veth0 table 129 +ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0 +ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1 + +# drop main ipv4 table +ip -net ${nsrouter} -4 rule delete table main + +test_ping 10.0.2.99 dead:2::99 +if [ $? -ne 0 ] ; then + ip -net ${nsrouter} nft list ruleset + echo "FAIL: fib mismatch in pbr setup" + exit 1 +fi + +echo "PASS: fib expression forward check with policy based routing" +exit 0 diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 431296c0f91c..a32f490f7539 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -14,13 +14,17 @@ # nft_flowtable.sh -o8000 -l1500 -r2000 # +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +nsr1="nsr1-$sfx" +nsr2="nsr2-$sfx" # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 ret=0 -ns1in="" -ns2in="" +nsin="" ns1out="" ns2out="" @@ -36,21 +40,19 @@ checktool (){ checktool "nft --version" "run test without nft tool" checktool "ip -Version" "run test without ip tool" checktool "which nc" "run test without nc (netcat)" -checktool "ip netns add nsr1" "create net namespace" +checktool "ip netns add $nsr1" "create net namespace $nsr1" -ip netns add ns1 -ip netns add ns2 - -ip netns add nsr2 +ip netns add $ns1 +ip netns add $ns2 +ip netns add $nsr2 cleanup() { - for i in 1 2; do - ip netns del ns$i - ip netns del nsr$i - done + ip netns del $ns1 + ip netns del $ns2 + ip netns del $nsr1 + ip netns del $nsr2 - rm -f "$ns1in" "$ns1out" - rm -f "$ns2in" "$ns2out" + rm -f "$nsin" "$ns1out" "$ns2out" [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns } @@ -59,22 +61,21 @@ trap cleanup EXIT sysctl -q net.netfilter.nf_log_all_netns=1 -ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 -ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 +ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1 +ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2 -ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 +ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2 for dev in lo veth0 veth1; do - for i in 1 2; do - ip -net nsr$i link set $dev up - done + ip -net $nsr1 link set $dev up + ip -net $nsr2 link set $dev up done -ip -net nsr1 addr add 10.0.1.1/24 dev veth0 -ip -net nsr1 addr add dead:1::1/64 dev veth0 +ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 +ip -net $nsr1 addr add dead:1::1/64 dev veth0 -ip -net nsr2 addr add 10.0.2.1/24 dev veth1 -ip -net nsr2 addr add dead:2::1/64 dev veth1 +ip -net $nsr2 addr add 10.0.2.1/24 dev veth1 +ip -net $nsr2 addr add dead:2::1/64 dev veth1 # set different MTUs so we need to push packets coming from ns1 (large MTU) # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), @@ -106,85 +107,76 @@ do esac done -if ! ip -net nsr1 link set veth0 mtu $omtu; then +if ! ip -net $nsr1 link set veth0 mtu $omtu; then exit 1 fi -ip -net ns1 link set eth0 mtu $omtu +ip -net $ns1 link set eth0 mtu $omtu -if ! ip -net nsr2 link set veth1 mtu $rmtu; then +if ! ip -net $nsr2 link set veth1 mtu $rmtu; then exit 1 fi -ip -net ns2 link set eth0 mtu $rmtu +ip -net $ns2 link set eth0 mtu $rmtu # transfer-net between nsr1 and nsr2. # these addresses are not used for connections. -ip -net nsr1 addr add 192.168.10.1/24 dev veth1 -ip -net nsr1 addr add fee1:2::1/64 dev veth1 - -ip -net nsr2 addr add 192.168.10.2/24 dev veth0 -ip -net nsr2 addr add fee1:2::2/64 dev veth0 - -for i in 1 2; do - ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null - ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null - - ip -net ns$i link set lo up - ip -net ns$i link set eth0 up - ip -net ns$i addr add 10.0.$i.99/24 dev eth0 - ip -net ns$i route add default via 10.0.$i.1 - ip -net ns$i addr add dead:$i::99/64 dev eth0 - ip -net ns$i route add default via dead:$i::1 - if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then +ip -net $nsr1 addr add 192.168.10.1/24 dev veth1 +ip -net $nsr1 addr add fee1:2::1/64 dev veth1 + +ip -net $nsr2 addr add 192.168.10.2/24 dev veth0 +ip -net $nsr2 addr add fee1:2::2/64 dev veth0 + +for i in 0 1; do + ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null + ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null +done + +for ns in $ns1 $ns2;do + ip -net $ns link set lo up + ip -net $ns link set eth0 up + + if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then echo "ERROR: Check Originator/Responder values (problem during address addition)" exit 1 fi - # don't set ip DF bit for first two tests - ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null + ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null done -ip -net nsr1 route add default via 192.168.10.2 -ip -net nsr2 route add default via 192.168.10.1 +ip -net $ns1 addr add 10.0.1.99/24 dev eth0 +ip -net $ns2 addr add 10.0.2.99/24 dev eth0 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns2 route add default via 10.0.2.1 +ip -net $ns1 addr add dead:1::99/64 dev eth0 +ip -net $ns2 addr add dead:2::99/64 dev eth0 +ip -net $ns1 route add default via dead:1::1 +ip -net $ns2 route add default via dead:2::1 + +ip -net $nsr1 route add default via 192.168.10.2 +ip -net $nsr2 route add default via 192.168.10.1 -ip netns exec nsr1 nft -f - <<EOF +ip netns exec $nsr1 nft -f - <<EOF table inet filter { flowtable f1 { hook ingress priority 0 devices = { veth0, veth1 } } + counter routed_orig { } + counter routed_repl { } + chain forward { type filter hook forward priority 0; policy drop; # flow offloaded? Tag ct with mark 1, so we can detect when it fails. - meta oif "veth1" tcp dport 12345 flow offload @f1 counter - - # use packet size to trigger 'should be offloaded by now'. - # otherwise, if 'flow offload' expression never offloads, the - # test will pass. - tcp dport 12345 meta length gt 200 ct mark set 1 counter + meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept - # this turns off flow offloading internally, so expect packets again - tcp flags fin,rst ct mark set 0 accept - - # this allows large packets from responder, we need this as long - # as PMTUd is off. - # This rule is deleted for the last test, when we expect PMTUd - # to kick in and ensure all packets meet mtu requirements. - meta length gt $lmtu accept comment something-to-grep-for - - # next line blocks connection w.o. working offload. - # we only do this for reverse dir, because we expect packets to - # enter slow path due to MTU mismatch of veth0 and veth1. - tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop + # count packets supposedly offloaded as per direction. + ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept ct state established,related accept - # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed) - meta length lt 200 oif "veth1" tcp dport 12345 counter accept - meta nfproto ipv4 meta l4proto icmp accept meta nfproto ipv6 meta l4proto icmpv6 accept } @@ -196,32 +188,51 @@ if [ $? -ne 0 ]; then exit $ksft_skip fi +ip netns exec $ns2 nft -f - <<EOF +table inet filter { + counter ip4dscp0 { } + counter ip4dscp3 { } + + chain input { + type filter hook input priority 0; policy accept; + meta l4proto tcp goto { + ip dscp cs3 counter name ip4dscp3 accept + ip dscp 0 counter name ip4dscp0 accept + } + } +} +EOF + +if [ $? -ne 0 ]; then + echo "SKIP: Could not load nft ruleset" + exit $ksft_skip +fi + # test basic connectivity -if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then - echo "ERROR: ns1 cannot reach ns2" 1>&2 - bash +if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then + echo "ERROR: $ns1 cannot reach ns2" 1>&2 exit 1 fi -if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then - echo "ERROR: ns2 cannot reach ns1" 1>&2 +if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 exit 1 fi if [ $ret -eq 0 ];then - echo "PASS: netns routing/connectivity: ns1 can reach ns2" + echo "PASS: netns routing/connectivity: $ns1 can reach $ns2" fi -ns1in=$(mktemp) +nsin=$(mktemp) ns1out=$(mktemp) -ns2in=$(mktemp) ns2out=$(mktemp) make_file() { name=$1 - SIZE=$((RANDOM % (1024 * 8))) + SIZE=$((RANDOM % (1024 * 128))) + SIZE=$((SIZE + (1024 * 8))) TSIZE=$((SIZE * 1024)) dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null @@ -232,6 +243,92 @@ make_file() dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null } +check_counters() +{ + local what=$1 + local ok=1 + + local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets) + local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets) + + local orig_cnt=${orig#*bytes} + local repl_cnt=${repl#*bytes} + + local fs=$(du -sb $nsin) + local max_orig=${fs%%/*} + local max_repl=$((max_orig/4)) + + if [ $orig_cnt -gt $max_orig ];then + echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2 + ret=1 + ok=0 + fi + + if [ $repl_cnt -gt $max_repl ];then + echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2 + ret=1 + ok=0 + fi + + if [ $ok -eq 1 ]; then + echo "PASS: $what" + fi +} + +check_dscp() +{ + local what=$1 + local ok=1 + + local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp3 | grep packets) + + local pc4=${counter%*bytes*} + local pc4=${pc4#*packets} + + local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp0 | grep packets) + local pc4z=${counter%*bytes*} + local pc4z=${pc4z#*packets} + + case "$what" in + "dscp_none") + if [ $pc4 -gt 0 ] || [ $pc4z -eq 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + "dscp_fwd") + if [ $pc4 -eq 0 ] || [ $pc4z -eq 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + "dscp_ingress") + if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + "dscp_egress") + if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + *) + echo "FAIL: Unknown DSCP check" 1>&2 + ret=1 + ok=0 + esac + + if [ $ok -eq 1 ] ;then + echo "PASS: $what: dscp packet counters match" + fi +} + check_transfer() { in=$1 @@ -256,30 +353,39 @@ test_tcp_forwarding_ip() local dstport=$4 local lret=0 - ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & + ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" & lpid=$! sleep 1 - ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" & + ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" & cpid=$! - sleep 3 + sleep 1 + + prev="$(ls -l $ns1out $ns2out)" + sleep 1 + + while [[ "$prev" != "$(ls -l $ns1out $ns2out)" ]]; do + sleep 1; + prev="$(ls -l $ns1out $ns2out)" + done - if ps -p $lpid > /dev/null;then + if test -d /proc/"$lpid"/; then kill $lpid fi - if ps -p $cpid > /dev/null;then + if test -d /proc/"$cpid"/; then kill $cpid fi - wait + wait $lpid + wait $cpid - if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then + if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then lret=1 fi - if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then + if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then lret=1 fi @@ -293,44 +399,107 @@ test_tcp_forwarding() return $? } +test_tcp_forwarding_set_dscp() +{ + check_dscp "dscp_none" + +ip netns exec $nsr1 nft -f - <<EOF +table netdev dscpmangle { + chain setdscp0 { + type filter hook ingress device "veth0" priority 0; policy accept + ip dscp set cs3 + } +} +EOF +if [ $? -eq 0 ]; then + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + check_dscp "dscp_ingress" + + ip netns exec $nsr1 nft delete table netdev dscpmangle +else + echo "SKIP: Could not load netdev:ingress for veth0" +fi + +ip netns exec $nsr1 nft -f - <<EOF +table netdev dscpmangle { + chain setdscp0 { + type filter hook egress device "veth1" priority 0; policy accept + ip dscp set cs3 + } +} +EOF +if [ $? -eq 0 ]; then + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + check_dscp "dscp_egress" + + ip netns exec $nsr1 nft flush table netdev dscpmangle +else + echo "SKIP: Could not load netdev:egress for veth1" +fi + + # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 + # counters should have seen packets (before and after ft offload kicks in). + ip netns exec $nsr1 nft -a insert rule inet filter forward ip dscp set cs3 + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + check_dscp "dscp_fwd" +} + test_tcp_forwarding_nat() { local lret + local pmtu test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 lret=$? + pmtu=$3 + what=$4 + if [ $lret -eq 0 ] ; then + if [ $pmtu -eq 1 ] ;then + check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" + else + echo "PASS: flow offload for ns1/ns2 with masquerade $what" + fi + test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 lret=$? + if [ $pmtu -eq 1 ] ;then + check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" + elif [ $lret -eq 0 ] ; then + echo "PASS: flow offload for ns1/ns2 with dnat $what" + fi fi return $lret } -make_file "$ns1in" -make_file "$ns2in" +make_file "$nsin" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. -if test_tcp_forwarding ns1 ns2; then +# Due to MTU mismatch in both directions, all packets (except small packets like pure +# acks) have to be handled by normal forwarding path. Therefore, packet counters +# are not checked. +if test_tcp_forwarding $ns1 $ns2; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset ret=1 fi # delete default route, i.e. ns2 won't be able to reach ns1 and # will depend on ns1 being masqueraded in nsr1. # expect ns1 has nsr1 address. -ip -net ns2 route del default via 10.0.2.1 -ip -net ns2 route del default via dead:2::1 -ip -net ns2 route add 192.168.10.1 via 10.0.2.1 +ip -net $ns2 route del default via 10.0.2.1 +ip -net $ns2 route del default via dead:2::1 +ip -net $ns2 route add 192.168.10.1 via 10.0.2.1 # Second test: -# Same, but with NAT enabled. -ip netns exec nsr1 nft -f - <<EOF +# Same, but with NAT enabled. Same as in first test: we expect normal forward path +# to handle most packets. +ip netns exec $nsr1 nft -f - <<EOF table ip nat { chain prerouting { type nat hook prerouting priority 0; policy accept; @@ -344,35 +513,117 @@ table ip nat { } EOF -if test_tcp_forwarding_nat ns1 ns2; then - echo "PASS: flow offloaded for ns1/ns2 with NAT" -else +if ! test_tcp_forwarding_set_dscp $ns1 $ns2 0 ""; then + echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 + exit 0 +fi + +if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset ret=1 fi # Third test: -# Same as second test, but with PMTU discovery enabled. -handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) +# Same as second test, but with PMTU discovery enabled. This +# means that we expect the fastpath to handle packets as soon +# as the endpoints adjust the packet size. +ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null +ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null + +# reset counters. +# With pmtu in-place we'll also check that nft counters +# are lower than file size and packets were forwarded via flowtable layer. +# For earlier tests (large mtus), packets cannot be handled via flowtable +# (except pure acks and other small packets). +ip netns exec $nsr1 nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then + echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 + ip netns exec $nsr1 nft list ruleset +fi -if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then - echo "FAIL: Could not delete large-packet accept rule" - exit 1 +# Another test: +# Add bridge interface br0 to Router1, with NAT enabled. +ip -net $nsr1 link add name br0 type bridge +ip -net $nsr1 addr flush dev veth0 +ip -net $nsr1 link set up dev veth0 +ip -net $nsr1 link set veth0 master br0 +ip -net $nsr1 addr add 10.0.1.1/24 dev br0 +ip -net $nsr1 addr add dead:1::1/64 dev br0 +ip -net $nsr1 link set up dev br0 + +ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null + +# br0 with NAT enabled. +ip netns exec $nsr1 nft -f - <<EOF +flush table ip nat +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 + } + + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oifname "veth1" counter masquerade + } +} +EOF + +if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then + echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 + ip netns exec $nsr1 nft list ruleset + ret=1 fi -ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null -ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null -if test_tcp_forwarding_nat ns1 ns2; then - echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" -else - echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 - ip netns exec nsr1 nft list ruleset +# Another test: +# Add bridge interface br0 to Router1, with NAT and VLAN. +ip -net $nsr1 link set veth0 nomaster +ip -net $nsr1 link set down dev veth0 +ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10 +ip -net $nsr1 link set up dev veth0 +ip -net $nsr1 link set up dev veth0.10 +ip -net $nsr1 link set veth0.10 master br0 + +ip -net $ns1 addr flush dev eth0 +ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10 +ip -net $ns1 link set eth0 up +ip -net $ns1 link set eth0.10 up +ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns1 addr add dead:1::99/64 dev eth0.10 + +if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then + echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 + ip netns exec $nsr1 nft list ruleset + ret=1 fi -KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) -KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) +# restore test topology (remove bridge and VLAN) +ip -net $nsr1 link set veth0 nomaster +ip -net $nsr1 link set veth0 down +ip -net $nsr1 link set veth0.10 down +ip -net $nsr1 link delete veth0.10 type vlan +ip -net $nsr1 link delete br0 type bridge +ip -net $ns1 addr flush dev eth0.10 +ip -net $ns1 link set eth0.10 down +ip -net $ns1 link set eth0 down +ip -net $ns1 link delete eth0.10 type vlan + +# restore address in ns1 and nsr1 +ip -net $ns1 link set eth0 up +ip -net $ns1 addr add 10.0.1.99/24 dev eth0 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns1 addr add dead:1::99/64 dev eth0 +ip -net $ns1 route add default via dead:1::1 +ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 +ip -net $nsr1 addr add dead:1::1/64 dev veth0 +ip -net $nsr1 link set up dev veth0 + +KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1) +KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1) SPI1=$RANDOM SPI2=$RANDOM @@ -399,23 +650,23 @@ do_esp() { } -do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 -do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 -ip netns exec nsr1 nft delete table ip nat +ip netns exec $nsr1 nft delete table ip nat # restore default routes -ip -net ns2 route del 192.168.10.1 via 10.0.2.1 -ip -net ns2 route add default via 10.0.2.1 -ip -net ns2 route add default via dead:2::1 +ip -net $ns2 route del 192.168.10.1 via 10.0.2.1 +ip -net $ns2 route add default via 10.0.2.1 +ip -net $ns2 route add default via dead:2::1 -if test_tcp_forwarding ns1 ns2; then - echo "PASS: ipsec tunnel mode for ns1/ns2" +if test_tcp_forwarding $ns1 $ns2; then + check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" - ip netns exec nsr1 nft list ruleset 1>&2 - ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2 + ip netns exec $nsr1 nft list ruleset 1>&2 + ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2 fi exit $ret diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index 087f0e6e71ce..f33154c04d34 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -23,7 +23,7 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo trap cleanup EXIT -currentyear=$(date +%G) +currentyear=$(date +%Y) lastyear=$((currentyear-1)) ip netns exec "$ns0" nft -f /dev/stdin <<EOF table inet filter { diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index d7e07f4c3d7f..dd40d9f6f259 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -374,6 +374,47 @@ EOF return $lret } +test_local_dnat_portonly() +{ + local family=$1 + local daddr=$2 + local lret=0 + local sr_s + local sr_r + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain output { + type nat hook output priority 0; policy accept; + meta l4proto tcp dnat to :2000 + + } +} +EOF + if [ $? -ne 0 ]; then + if [ $family = "inet" ];then + echo "SKIP: inet port test" + test_inet_nat=false + return + fi + echo "SKIP: Could not add $family dnat hook" + return + fi + + echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 & + sc_s=$! + + sleep 1 + + result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT) + + if [ "$result" = "SERVER-inet" ];then + echo "PASS: inet port rewrite without l3 address" + else + echo "ERROR: inet port rewrite" + ret=1 + fi +} test_masquerade6() { @@ -741,6 +782,300 @@ EOF return $lret } +# test port shadowing. +# create two listening services, one on router (ns0), one +# on client (ns2), which is masqueraded from ns1 point of view. +# ns2 sends udp packet coming from service port to ns1, on a highport. +# Later, if n1 uses same highport to connect to ns0:service, packet +# might be port-forwarded to ns2 instead. + +# second argument tells if we expect the 'fake-entry' to take effect +# (CLIENT) or not (ROUTER). +test_port_shadow() +{ + local test=$1 + local expect=$2 + local daddrc="10.0.1.99" + local daddrs="10.0.1.1" + local result="" + local logmsg="" + + # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. + echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405 + + echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 & + sc_r=$! + + echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport & + sc_c=$! + + sleep 0.3 + + # ns1 tries to connect to ns0:1405. With default settings this should connect + # to client, it matches the conntrack entry created above. + + result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404) + + if [ "$result" = "$expect" ] ;then + echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}" + else + echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended" + ret=1 + fi + + kill $sc_r $sc_c 2>/dev/null + + # flush udp entries for next test round, if any + ip netns exec "$ns0" conntrack -F >/dev/null 2>&1 +} + +# This prevents port shadow of router service via packet filter, +# packets claiming to originate from service port from internal +# network are dropped. +test_port_shadow_filter() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family filter { + chain forward { + type filter hook forward priority 0; policy accept; + meta iif veth1 udp sport 1405 drop + } +} +EOF + test_port_shadow "port-filter" "ROUTER" + + ip netns exec "$ns0" nft delete table $family filter +} + +# This prevents port shadow of router service via notrack. +test_port_shadow_notrack() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family raw { + chain prerouting { + type filter hook prerouting priority -300; policy accept; + meta iif veth0 udp dport 1405 notrack + } + chain output { + type filter hook output priority -300; policy accept; + meta oif veth0 udp sport 1405 notrack + } +} +EOF + test_port_shadow "port-notrack" "ROUTER" + + ip netns exec "$ns0" nft delete table $family raw +} + +# This prevents port shadow of router service via sport remap. +test_port_shadow_pat() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family pat { + chain postrouting { + type nat hook postrouting priority -1; policy accept; + meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random + } +} +EOF + test_port_shadow "pat" "ROUTER" + + ip netns exec "$ns0" nft delete table $family pat +} + +test_port_shadowing() +{ + local family="ip" + + conntrack -h >/dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run nat port shadowing test without conntrack tool" + return + fi + + socat -h > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run nat port shadowing test without socat tool" + return + fi + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + + # test default behaviour. Packet from ns1 to ns0 is redirected to ns2. + test_port_shadow "default" "CLIENT" + + # test packet filter based mitigation: prevent forwarding of + # packets claiming to come from the service port. + test_port_shadow_filter "$family" + + # test conntrack based mitigation: connections going or coming + # from router:service bypass connection tracking. + test_port_shadow_notrack "$family" + + # test nat based mitigation: fowarded packets coming from service port + # are masqueraded with random highport. + test_port_shadow_pat "$family" + + ip netns exec "$ns0" nft delete table $family nat +} + +test_stateless_nat_ip() +{ + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules" + return 1 + fi + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table ip stateless { + map xlate_in { + typeof meta iifname . ip saddr . ip daddr : ip daddr + elements = { + "veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2, + } + } + map xlate_out { + typeof meta iifname . ip saddr . ip daddr : ip daddr + elements = { + "veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99 + } + } + + chain prerouting { + type filter hook prerouting priority -400; policy accept; + ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in + ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add ip statless rules" + return $ksft_skip + fi + + reset_counters + + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules" + lret=1 + fi + + # ns1 should have seen packets from .2.2, due to stateless rewrite. + expect="packets 1 bytes 84" + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 1" + lret=1 + fi + + for dir in "in" "out" ; do + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3" + lret=1 + fi + + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4" + lret=1 + fi + done + + reset_counters + + socat -h > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run stateless nat frag test without socat tool" + if [ $lret -eq 0 ]; then + return $ksft_skip + fi + + ip netns exec "$ns0" nft delete table ip stateless + return $lret + fi + + local tmpfile=$(mktemp) + dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null + + local outfile=$(mktemp) + ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null & + sc_r=$! + + sleep 1 + # re-do with large ping -> ip fragmentation + ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null + if [ $? -ne 0 ] ; then + echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2 + lret=1 + fi + + wait + + cmp "$tmpfile" "$outfile" + if [ $? -ne 0 ]; then + ls -l "$tmpfile" "$outfile" + echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 + lret=1 + fi + + rm -f "$tmpfile" "$outfile" + + # ns1 should have seen packets from 2.2, due to stateless rewrite. + expect="packets 3 bytes 4164" + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 5" + lret=1 + fi + + ip netns exec "$ns0" nft delete table ip stateless + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete table ip stateless" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP statless for $ns2" + + return $lret +} # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 for i in 0 1 2; do @@ -808,6 +1143,19 @@ table inet filter { EOF done +# special case for stateless nat check, counter needs to +# be done before (input) ip defragmentation +ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF +table inet filter { + counter ns0insl {} + + chain pre { + type filter hook prerouting priority -400; policy accept; + ip saddr 10.0.2.2 counter name "ns0insl" + } +} +EOF + sleep 3 # test basic connectivity for i in 1 2; do @@ -841,6 +1189,10 @@ fi reset_counters test_local_dnat ip test_local_dnat6 ip6 + +reset_counters +test_local_dnat_portonly inet 10.0.1.99 + reset_counters $test_inet_nat && test_local_dnat inet $test_inet_nat && test_local_dnat6 inet @@ -861,6 +1213,9 @@ reset_counters $test_inet_nat && test_redirect inet $test_inet_nat && test_redirect6 inet +test_port_shadowing +test_stateless_nat_ip + if [ $ret -ne 0 ];then echo -n "FAIL: " nft --version diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh new file mode 100755 index 000000000000..b9ab37380f33 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat_zones.sh @@ -0,0 +1,309 @@ +#!/bin/bash +# +# Test connection tracking zone and NAT source port reallocation support. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Don't increase too much, 2000 clients should work +# just fine but script can then take several minutes with +# KASAN/debug builds. +maxclients=100 + +have_iperf=1 +ret=0 + +# client1---. +# veth1-. +# | +# NAT Gateway --veth0--> Server +# | | +# veth2-' | +# client2---' | +# .... | +# clientX----vethX---' + +# All clients share identical IP address. +# NAT Gateway uses policy routing and conntrack zones to isolate client +# namespaces. Each client connects to Server, each with colliding tuples: +# clientsaddr:10000 -> serveraddr:dport +# NAT Gateway is supposed to do port reallocation for each of the +# connections. + +sfx=$(mktemp -u "XXXXXXXX") +gw="ns-gw-$sfx" +cl1="ns-cl1-$sfx" +cl2="ns-cl2-$sfx" +srv="ns-srv-$sfx" + +v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null) +v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null) +v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null) +v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null) +v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null) +v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null) + +cleanup() +{ + ip netns del $gw + ip netns del $srv + for i in $(seq 1 $maxclients); do + ip netns del ns-cl$i-$sfx 2>/dev/null + done + + sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null + sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null + sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +conntrack -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without conntrack tool" + exit $ksft_skip +fi + +iperf3 -v >/dev/null 2>&1 +if [ $? -ne 0 ];then + have_iperf=0 +fi + +ip netns add "$gw" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $gw" + exit $ksft_skip +fi +ip -net "$gw" link set lo up + +trap cleanup EXIT + +ip netns add "$srv" +if [ $? -ne 0 ];then + echo "SKIP: Could not create server netns $srv" + exit $ksft_skip +fi + +ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv" +ip -net "$gw" link set veth0 up +ip -net "$srv" link set lo up +ip -net "$srv" link set eth0 up + +sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null +sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null +sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null + +for i in $(seq 1 $maxclients);do + cl="ns-cl$i-$sfx" + + ip netns add "$cl" + if [ $? -ne 0 ];then + echo "SKIP: Could not create client netns $cl" + exit $ksft_skip + fi + ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip + fi +done + +for i in $(seq 1 $maxclients);do + cl="ns-cl$i-$sfx" + echo netns exec "$cl" ip link set lo up + echo netns exec "$cl" ip link set eth0 up + echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2 + echo netns exec "$gw" ip link set veth$i up + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2 + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0 + + # clients have same IP addresses. + echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0 + echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 + echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0 + echo netns exec "$cl" ip route add default via dead:1::2 dev eth0 + + # NB: same addresses on client-facing interfaces. + echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i + echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i + + # gw: policy routing + echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i)) + echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i)) + echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i)) + echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i)) + echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i)) +done | ip -batch /dev/stdin + +ip -net "$gw" addr add 10.3.0.1/24 dev veth0 +ip -net "$gw" addr add dead:3::1/64 dev veth0 + +ip -net "$srv" addr add 10.3.0.99/24 dev eth0 +ip -net "$srv" addr add dead:3::99/64 dev eth0 + +ip netns exec $gw nft -f /dev/stdin<<EOF +table inet raw { + map iiftomark { + type ifname : mark + } + + map iiftozone { + typeof iifname : ct zone + } + + set inicmp { + flags dynamic + type ipv4_addr . ifname . ipv4_addr + } + set inflows { + flags dynamic + type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service + } + + set inflows6 { + flags dynamic + type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service + } + + chain prerouting { + type filter hook prerouting priority -64000; policy accept; + ct original zone set meta iifname map @iiftozone + meta mark set meta iifname map @iiftomark + + tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter } + add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter } + ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter } + } + + chain nat_postrouting { + type nat hook postrouting priority 0; policy accept; + ct mark set meta mark meta oifname veth0 masquerade + } + + chain mangle_prerouting { + type filter hook prerouting priority -100; policy accept; + ct direction reply meta mark set ct mark + } +} +EOF + +( echo add element inet raw iiftomark \{ + for i in $(seq 1 $((maxclients-1))); do + echo \"veth$i\" : $i, + done + echo \"veth$maxclients\" : $maxclients \} + echo add element inet raw iiftozone \{ + for i in $(seq 1 $((maxclients-1))); do + echo \"veth$i\" : $i, + done + echo \"veth$maxclients\" : $maxclients \} +) | ip netns exec $gw nft -f /dev/stdin + +ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null + +# useful for debugging: allows to use 'ping' from clients to gateway. +ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null + +for i in $(seq 1 $maxclients); do + cl="ns-cl$i-$sfx" + ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 & + if [ $? -ne 0 ]; then + echo FAIL: Ping failure from $cl 1>&2 + ret=1 + break + fi +done + +wait + +for i in $(seq 1 $maxclients); do + ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }" + if [ $? -ne 0 ];then + ret=1 + echo "FAIL: counter icmp mismatch for veth$i" 1>&2 + ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2 + break + fi +done + +ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }" +if [ $? -ne 0 ];then + ret=1 + echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }" + ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2 +fi + +if [ $ret -eq 0 ]; then + echo "PASS: ping test from all $maxclients namespaces" +fi + +if [ $have_iperf -eq 0 ];then + echo "SKIP: iperf3 not installed" + if [ $ret -ne 0 ];then + exit $ret + fi + exit $ksft_skip +fi + +ip netns exec $srv iperf3 -s > /dev/null 2>&1 & +iperfpid=$! +sleep 1 + +for i in $(seq 1 $maxclients); do + if [ $ret -ne 0 ]; then + break + fi + cl="ns-cl$i-$sfx" + ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null + if [ $? -ne 0 ]; then + echo FAIL: Failure to connect for $cl 1>&2 + ip netns exec $gw conntrack -S 1>&2 + ret=1 + fi +done +if [ $ret -eq 0 ];then + echo "PASS: iperf3 connections for all $maxclients net namespaces" +fi + +kill $iperfpid +wait + +for i in $(seq 1 $maxclients); do + ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null + if [ $? -ne 0 ];then + ret=1 + echo "FAIL: can't find expected tcp entry for veth$i" 1>&2 + break + fi +done +if [ $ret -eq 0 ];then + echo "PASS: Found client connection for all $maxclients net namespaces" +fi + +ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null +if [ $? -ne 0 ];then + ret=1 + echo "FAIL: cannot find return entry on veth0" 1>&2 +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh index 3d202b90b33d..e12729753351 100755 --- a/tools/testing/selftests/netfilter/nft_queue.sh +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -16,6 +16,10 @@ timeout=4 cleanup() { + ip netns pids ${ns1} | xargs kill 2>/dev/null + ip netns pids ${ns2} | xargs kill 2>/dev/null + ip netns pids ${nsrouter} | xargs kill 2>/dev/null + ip netns del ${ns1} ip netns del ${ns2} ip netns del ${nsrouter} @@ -109,6 +113,7 @@ table inet $name { chain output { type filter hook output priority $prio; policy accept; tcp dport 12345 queue num 3 + tcp sport 23456 queue num 3 jump nfq } chain post { @@ -292,6 +297,23 @@ test_tcp_localhost() wait 2>/dev/null } +test_tcp_localhost_connectclose() +{ + tmpfile=$(mktemp) || exit 1 + + ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout & + + ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout & + local nfqpid=$! + + sleep 1 + rm -f "$tmpfile" + + wait $rpid + [ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close" + wait 2>/dev/null +} + test_tcp_localhost_requeue() { ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF @@ -332,6 +354,55 @@ EOF echo "PASS: tcp via loopback and re-queueing" } +test_icmp_vrf() { + ip -net $ns1 link add tvrf type vrf table 9876 + if [ $? -ne 0 ];then + echo "SKIP: Could not add vrf device" + return + fi + + ip -net $ns1 li set eth0 master tvrf + ip -net $ns1 li set tvrf up + + ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876 +ip netns exec ${ns1} nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } + chain post { + type filter hook postrouting priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } +} +EOF + ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout & + local nfqpid=$! + + sleep 1 + ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null + + for n in output post; do + for d in tvrf eth0; do + ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1" + if [ $? -ne 0 ] ; then + echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 + ip netns exec ${ns1} nft list ruleset + ret=1 + return + fi + done + done + + wait $nfqpid + [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf" + wait 2>/dev/null +} + ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null @@ -371,6 +442,8 @@ test_queue 20 test_tcp_forward test_tcp_localhost +test_tcp_localhost_connectclose test_tcp_localhost_requeue +test_icmp_vrf exit $ret diff --git a/tools/testing/selftests/netfilter/nft_synproxy.sh b/tools/testing/selftests/netfilter/nft_synproxy.sh new file mode 100755 index 000000000000..b62933b680d6 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_synproxy.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +rnd=$(mktemp -u XXXXXXXX) +nsr="nsr-$rnd" # synproxy machine +ns1="ns1-$rnd" # iperf client +ns2="ns2-$rnd" # iperf server + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "nft --version" "run test without nft tool" +checktool "ip -Version" "run test without ip tool" +checktool "iperf3 --version" "run test without iperf3" +checktool "ip netns add $nsr" "create net namespace" + +modprobe -q nf_conntrack + +ip netns add $ns1 +ip netns add $ns2 + +cleanup() { + ip netns pids $ns1 | xargs kill 2>/dev/null + ip netns pids $ns2 | xargs kill 2>/dev/null + ip netns del $ns1 + ip netns del $ns2 + + ip netns del $nsr +} + +trap cleanup EXIT + +ip link add veth0 netns $nsr type veth peer name eth0 netns $ns1 +ip link add veth1 netns $nsr type veth peer name eth0 netns $ns2 + +for dev in lo veth0 veth1; do +ip -net $nsr link set $dev up +done + +ip -net $nsr addr add 10.0.1.1/24 dev veth0 +ip -net $nsr addr add 10.0.2.1/24 dev veth1 + +ip netns exec $nsr sysctl -q net.ipv4.conf.veth0.forwarding=1 +ip netns exec $nsr sysctl -q net.ipv4.conf.veth1.forwarding=1 +ip netns exec $nsr sysctl -q net.netfilter.nf_conntrack_tcp_loose=0 + +for n in $ns1 $ns2; do + ip -net $n link set lo up + ip -net $n link set eth0 up +done +ip -net $ns1 addr add 10.0.1.99/24 dev eth0 +ip -net $ns2 addr add 10.0.2.99/24 dev eth0 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns2 route add default via 10.0.2.1 + +# test basic connectivity +if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then + echo "ERROR: $ns1 cannot reach $ns2" 1>&2 + exit 1 +fi + +if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 + exit 1 +fi + +ip netns exec $ns2 iperf3 -s > /dev/null 2>&1 & +# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 & + +sleep 1 + +ip netns exec $nsr nft -f - <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority -300; policy accept; + meta iif veth0 tcp flags syn counter notrack + } + + chain forward { + type filter hook forward priority 0; policy accept; + + ct state new,established counter accept + + meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp + + ct state invalid counter drop + + # make ns2 unreachable w.o. tcp synproxy + tcp flags syn counter drop + } +} +EOF +if [ $? -ne 0 ]; then + echo "SKIP: Cannot add nft synproxy" + exit $ksft_skip +fi + +ip netns exec $ns1 timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null + +if [ $? -ne 0 ]; then + echo "FAIL: iperf3 returned an error" 1>&2 + ret=$? + ip netns exec $nsr nft list ruleset +else + echo "PASS: synproxy connection successful" +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh index f1affd12c4b1..2ffba45a78bf 100755 --- a/tools/testing/selftests/netfilter/nft_trans_stress.sh +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -9,8 +9,35 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -testns=testns1 +testns=testns-$(mktemp -u "XXXXXXXX") +tmp="" + tables="foo bar baz quux" +global_ret=0 +eret=0 +lret=0 + +cleanup() { + ip netns pids "$testns" | xargs kill 2>/dev/null + ip netns del "$testns" + + rm -f "$tmp" +} + +check_result() +{ + local r=$1 + local OK="PASS" + + if [ $r -ne 0 ] ;then + OK="FAIL" + global_ret=$r + fi + + echo "$OK: nft $2 test returned $r" + + eret=0 +} nft --version > /dev/null 2>&1 if [ $? -ne 0 ];then @@ -24,6 +51,7 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +trap cleanup EXIT tmp=$(mktemp) for table in $tables; do @@ -59,20 +87,65 @@ done) sleep 1 +ip netns exec "$testns" nft -f "$tmp" for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done for table in $tables;do - randsleep=$((RANDOM%10)) + randsleep=$((RANDOM%2)) sleep $randsleep - ip netns exec "$testns" nft delete table inet $table 2>/dev/null + ip netns exec "$testns" nft delete table inet $table + lret=$? + if [ $lret -ne 0 ]; then + eret=$lret + fi +done + +check_result $eret "add/delete" + +for i in $(seq 1 10) ; do + (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin + + lret=$? + if [ $lret -ne 0 ]; then + eret=$lret + fi +done + +check_result $eret "reload" + +for i in $(seq 1 10) ; do + (echo "flush ruleset"; cat "$tmp" + echo "insert rule inet foo INPUT meta nftrace set 1" + echo "insert rule inet foo OUTPUT meta nftrace set 1" + ) | ip netns exec "$testns" nft -f /dev/stdin + lret=$? + if [ $lret -ne 0 ]; then + eret=$lret + fi + + (echo "flush ruleset"; cat "$tmp" + ) | ip netns exec "$testns" nft -f /dev/stdin + + lret=$? + if [ $lret -ne 0 ]; then + eret=$lret + fi done -randsleep=$((RANDOM%10)) -sleep $randsleep +check_result $eret "add/delete with nftrace enabled" + +echo "insert rule inet foo INPUT meta nftrace set 1" >> $tmp +echo "insert rule inet foo OUTPUT meta nftrace set 1" >> $tmp -pkill -9 ping +for i in $(seq 1 10) ; do + (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin + + lret=$? + if [ $lret -ne 0 ]; then + eret=1 + fi +done -wait +check_result $lret "add/delete with nftrace enabled" -rm -f "$tmp" -ip netns del "$testns" +exit $global_ret diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh new file mode 100755 index 000000000000..5a8db0b48928 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_zones_many.sh @@ -0,0 +1,163 @@ +#!/bin/bash + +# Test insertion speed for packets with identical addresses/ports +# that are all placed in distinct conntrack zones. + +sfx=$(mktemp -u "XXXXXXXX") +ns="ns-$sfx" + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +zones=2000 +have_ct_tool=0 +ret=0 + +cleanup() +{ + ip netns del $ns +} + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "nft --version" "run test without nft tool" +checktool "ip -Version" "run test without ip tool" +checktool "socat -V" "run test without socat tool" +checktool "ip netns add $ns" "create net namespace" + +trap cleanup EXIT + +conntrack -V > /dev/null 2>&1 +if [ $? -eq 0 ];then + have_ct_tool=1 +fi + +ip -net "$ns" link set lo up + +test_zones() { + local max_zones=$1 + +ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600 +ip netns exec $ns nft -f /dev/stdin<<EOF +flush ruleset +table inet raw { + map rndzone { + typeof numgen inc mod $max_zones : ct zone + } + + chain output { + type filter hook output priority -64000; policy accept; + udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone + } +} +EOF + ( + echo "add element inet raw rndzone {" + for i in $(seq 1 $max_zones);do + echo -n "$i : $i" + if [ $i -lt $max_zones ]; then + echo "," + else + echo "}" + fi + done + ) | ip netns exec $ns nft -f /dev/stdin + + local i=0 + local j=0 + local outerstart=$(date +%s%3N) + local stop=$outerstart + + while [ $i -lt $max_zones ]; do + local start=$(date +%s%3N) + i=$((i + 1000)) + j=$((j + 1)) + # nft rule in output places each packet in a different zone. + dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 + if [ $? -ne 0 ] ;then + ret=1 + break + fi + + stop=$(date +%s%3N) + local duration=$((stop-start)) + echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)" + done + + if [ $have_ct_tool -eq 1 ]; then + local count=$(ip netns exec "$ns" conntrack -C) + local duration=$((stop-outerstart)) + + if [ $count -eq $max_zones ]; then + echo "PASS: inserted $count entries from packet path in $duration ms total" + else + ip netns exec $ns conntrack -S 1>&2 + echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries" + ret=1 + fi + fi + + if [ $ret -ne 0 ];then + echo "FAIL: insert $max_zones entries from packet path" 1>&2 + fi +} + +test_conntrack_tool() { + local max_zones=$1 + + ip netns exec $ns conntrack -F >/dev/null 2>/dev/null + + local outerstart=$(date +%s%3N) + local start=$(date +%s%3N) + local stop=$start + local i=0 + while [ $i -lt $max_zones ]; do + i=$((i + 1)) + ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ + --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1 + if [ $? -ne 0 ];then + ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ + --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null + echo "FAIL: conntrack -I returned an error" + ret=1 + break + fi + + if [ $((i%1000)) -eq 0 ];then + stop=$(date +%s%3N) + + local duration=$((stop-start)) + echo "PASS: added 1000 entries in $duration ms (now $i total)" + start=$stop + fi + done + + local count=$(ip netns exec "$ns" conntrack -C) + local duration=$((stop-outerstart)) + + if [ $count -eq $max_zones ]; then + echo "PASS: inserted $count entries via ctnetlink in $duration ms" + else + ip netns exec $ns conntrack -S 1>&2 + echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)" + ret=1 + fi +} + +test_zones $zones + +if [ $have_ct_tool -eq 1 ];then + test_conntrack_tool $zones +else + echo "SKIP: Could not run ctnetlink insertion test without conntrack tool" + if [ $ret -eq 0 ];then + exit $ksft_skip + fi +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/netfilter/rpath.sh new file mode 100755 index 000000000000..5289c8447a41 --- /dev/null +++ b/tools/testing/selftests/netfilter/rpath.sh @@ -0,0 +1,169 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# return code to signal skipped test +ksft_skip=4 + +# search for legacy iptables (it uses the xtables extensions +if iptables-legacy --version >/dev/null 2>&1; then + iptables='iptables-legacy' +elif iptables --version >/dev/null 2>&1; then + iptables='iptables' +else + iptables='' +fi + +if ip6tables-legacy --version >/dev/null 2>&1; then + ip6tables='ip6tables-legacy' +elif ip6tables --version >/dev/null 2>&1; then + ip6tables='ip6tables' +else + ip6tables='' +fi + +if nft --version >/dev/null 2>&1; then + nft='nft' +else + nft='' +fi + +if [ -z "$iptables$ip6tables$nft" ]; then + echo "SKIP: Test needs iptables, ip6tables or nft" + exit $ksft_skip +fi + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +trap "ip netns del $ns1; ip netns del $ns2" EXIT + +# create two netns, disable rp_filter in ns2 and +# keep IPv6 address when moving into VRF +ip netns add "$ns1" +ip netns add "$ns2" +ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0 +ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1 + +# a standard connection between the netns, should not trigger rp filter +ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2" +ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up +ip -net "$ns1" a a 192.168.23.2/24 dev v0 +ip -net "$ns2" a a 192.168.23.1/24 dev v0 +ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad +ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad + +# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0 +ip -net "$ns2" link add d0 type dummy +ip -net "$ns2" link set d0 up +ip -net "$ns1" a a 192.168.42.2/24 dev v0 +ip -net "$ns2" a a 192.168.42.1/24 dev d0 +ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad +ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad + +# firewall matches to test +[ -n "$iptables" ] && { + common='-t raw -A PREROUTING -s 192.168.0.0/16' + ip netns exec "$ns2" "$iptables" $common -m rpfilter + ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert +} +[ -n "$ip6tables" ] && { + common='-t raw -A PREROUTING -s fec0::/16' + ip netns exec "$ns2" "$ip6tables" $common -m rpfilter + ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert +} +[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF +table inet t { + chain c { + type filter hook prerouting priority raw; + ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter + ip6 saddr fec0::/16 fib saddr . iif oif exists counter + } +} +EOF + +die() { + echo "FAIL: $*" + #ip netns exec "$ns2" "$iptables" -t raw -vS + #ip netns exec "$ns2" "$ip6tables" -t raw -vS + #ip netns exec "$ns2" nft list ruleset + exit 1 +} + +# check rule counters, return true if rule did not match +ipt_zero_rule() { # (command) + [ -n "$1" ] || return 0 + ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0" +} +ipt_zero_reverse_rule() { # (command) + [ -n "$1" ] || return 0 + ip netns exec "$ns2" "$1" -t raw -vS | \ + grep -q -- "-m rpfilter --invert -c 0 0" +} +nft_zero_rule() { # (family) + [ -n "$nft" ] || return 0 + ip netns exec "$ns2" "$nft" list chain inet t c | \ + grep -q "$1 saddr .* counter packets 0 bytes 0" +} + +netns_ping() { # (netns, args...) + local netns="$1" + shift + ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null +} + +clear_counters() { + [ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z + [ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z + if [ -n "$nft" ]; then + ( + echo "delete table inet t"; + ip netns exec "$ns2" $nft -s list table inet t; + ) | ip netns exec "$ns2" $nft -f - + fi +} + +testrun() { + clear_counters + + # test 1: martian traffic should fail rpfilter matches + netns_ping "$ns1" -I v0 192.168.42.1 && \ + die "martian ping 192.168.42.1 succeeded" + netns_ping "$ns1" -I v0 fec0:42::1 && \ + die "martian ping fec0:42::1 succeeded" + + ipt_zero_rule "$iptables" || die "iptables matched martian" + ipt_zero_rule "$ip6tables" || die "ip6tables matched martian" + ipt_zero_reverse_rule "$iptables" && die "iptables not matched martian" + ipt_zero_reverse_rule "$ip6tables" && die "ip6tables not matched martian" + nft_zero_rule ip || die "nft IPv4 matched martian" + nft_zero_rule ip6 || die "nft IPv6 matched martian" + + clear_counters + + # test 2: rpfilter match should pass for regular traffic + netns_ping "$ns1" 192.168.23.1 || \ + die "regular ping 192.168.23.1 failed" + netns_ping "$ns1" fec0:23::1 || \ + die "regular ping fec0:23::1 failed" + + ipt_zero_rule "$iptables" && die "iptables match not effective" + ipt_zero_rule "$ip6tables" && die "ip6tables match not effective" + ipt_zero_reverse_rule "$iptables" || die "iptables match over-effective" + ipt_zero_reverse_rule "$ip6tables" || die "ip6tables match over-effective" + nft_zero_rule ip && die "nft IPv4 match not effective" + nft_zero_rule ip6 && die "nft IPv6 match not effective" + +} + +testrun + +# repeat test with vrf device in $ns2 +ip -net "$ns2" link add vrf0 type vrf table 10 +ip -net "$ns2" link set vrf0 up +ip -net "$ns2" link set v0 master vrf0 + +testrun + +echo "PASS: netfilter reverse path match works as intended" +exit 0 diff --git a/tools/testing/selftests/netfilter/sctp_collision.c b/tools/testing/selftests/netfilter/sctp_collision.c new file mode 100644 index 000000000000..21bb1cfd8a85 --- /dev/null +++ b/tools/testing/selftests/netfilter/sctp_collision.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> + +int main(int argc, char *argv[]) +{ + struct sockaddr_in saddr = {}, daddr = {}; + int sd, ret, len = sizeof(daddr); + struct timeval tv = {25, 0}; + char buf[] = "hello"; + + if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n", + argv[0]); + return -1; + } + + sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP); + if (sd < 0) { + printf("Failed to create sd\n"); + return -1; + } + + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = inet_addr(argv[2]); + saddr.sin_port = htons(atoi(argv[3])); + + ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + printf("Failed to bind to address\n"); + goto out; + } + + ret = listen(sd, 5); + if (ret < 0) { + printf("Failed to listen on port\n"); + goto out; + } + + daddr.sin_family = AF_INET; + daddr.sin_addr.s_addr = inet_addr(argv[4]); + daddr.sin_port = htons(atoi(argv[5])); + + /* make test shorter than 25s */ + ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + if (ret < 0) { + printf("Failed to setsockopt SO_RCVTIMEO\n"); + goto out; + } + + if (!strcmp(argv[1], "server")) { + sleep(1); /* wait a bit for client's INIT */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + printf("Server: sent! %d\n", ret); + } + + if (!strcmp(argv[1], "client")) { + usleep(300000); /* wait a bit for server's listening */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */ + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + printf("Client: rcvd! %d\n", ret); + } + ret = 0; +out: + close(sd); + return ret; +} diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/netfilter/settings @@ -0,0 +1 @@ +timeout=120 diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/netfilter/xt_string.sh new file mode 100755 index 000000000000..1802653a4728 --- /dev/null +++ b/tools/testing/selftests/netfilter/xt_string.sh @@ -0,0 +1,128 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# return code to signal skipped test +ksft_skip=4 +rc=0 + +if ! iptables --version >/dev/null 2>&1; then + echo "SKIP: Test needs iptables" + exit $ksft_skip +fi +if ! ip -V >/dev/null 2>&1; then + echo "SKIP: Test needs iproute2" + exit $ksft_skip +fi +if ! nc -h >/dev/null 2>&1; then + echo "SKIP: Test needs netcat" + exit $ksft_skip +fi + +pattern="foo bar baz" +patlen=11 +hdrlen=$((20 + 8)) # IPv4 + UDP +ns="ns-$(mktemp -u XXXXXXXX)" +trap 'ip netns del $ns' EXIT +ip netns add "$ns" +ip -net "$ns" link add d0 type dummy +ip -net "$ns" link set d0 up +ip -net "$ns" addr add 10.1.2.1/24 dev d0 + +#ip netns exec "$ns" tcpdump -npXi d0 & +#tcpdump_pid=$! +#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT + +add_rule() { # (alg, from, to) + ip netns exec "$ns" \ + iptables -A OUTPUT -o d0 -m string \ + --string "$pattern" --algo $1 --from $2 --to $3 +} +showrules() { # () + ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A' +} +zerorules() { + ip netns exec "$ns" iptables -Z OUTPUT +} +countrule() { # (pattern) + showrules | grep -c -- "$*" +} +send() { # (offset) + ( for ((i = 0; i < $1 - $hdrlen; i++)); do + printf " " + done + printf "$pattern" + ) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374 +} + +add_rule bm 1000 1500 +add_rule bm 1400 1600 +add_rule kmp 1000 1500 +add_rule kmp 1400 1600 + +zerorules +send 0 +send $((1000 - $patlen)) +if [ $(countrule -c 0 0) -ne 4 ]; then + echo "FAIL: rules match data before --from" + showrules + ((rc--)) +fi + +zerorules +send 1000 +send $((1400 - $patlen)) +if [ $(countrule -c 2) -ne 2 ]; then + echo "FAIL: only two rules should match at low offset" + showrules + ((rc--)) +fi + +zerorules +send $((1500 - $patlen)) +if [ $(countrule -c 1) -ne 4 ]; then + echo "FAIL: all rules should match at end of packet" + showrules + ((rc--)) +fi + +zerorules +send 1495 +if [ $(countrule -c 1) -ne 1 ]; then + echo "FAIL: only kmp with proper --to should match pattern spanning fragments" + showrules + ((rc--)) +fi + +zerorules +send 1500 +if [ $(countrule -c 1) -ne 2 ]; then + echo "FAIL: two rules should match pattern at start of second fragment" + showrules + ((rc--)) +fi + +zerorules +send $((1600 - $patlen)) +if [ $(countrule -c 1) -ne 2 ]; then + echo "FAIL: two rules should match pattern at end of largest --to" + showrules + ((rc--)) +fi + +zerorules +send $((1600 - $patlen + 1)) +if [ $(countrule -c 1) -ne 0 ]; then + echo "FAIL: no rules should match pattern extending largest --to" + showrules + ((rc--)) +fi + +zerorules +send 1600 +if [ $(countrule -c 1) -ne 0 ]; then + echo "FAIL: no rule should match pattern past largest --to" + showrules + ((rc--)) +fi + +exit $rc |