aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/testing
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-30 08:58:55 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-30 08:58:55 -0700
commit8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf (patch)
treefec3039a08284cd87f4ec9c3bea5b5a439f1859f /tools/testing
parentMerge tag 'sysctl-6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl (diff)
parentdpll: zl3073x: Fix build failure (diff)
downloadwireguard-linux-8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf.tar.xz
wireguard-linux-8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf.zip
Merge tag 'net-next-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core & protocols: - Wrap datapath globals into net_aligned_data, to avoid false sharing - Preserve MSG_ZEROCOPY in forwarding (e.g. out of a container) - Add SO_INQ and SCM_INQ support to AF_UNIX - Add SIOCINQ support to AF_VSOCK - Add TCP_MAXSEG sockopt to MPTCP - Add IPv6 force_forwarding sysctl to enable forwarding per interface - Make TCP validation of whether packet fully fits in the receive window and the rcv_buf more strict. With increased use of HW aggregation a single "packet" can be multiple 100s of kB - Add MSG_MORE flag to optimize large TCP transmissions via sockmap, improves latency up to 33% for sockmap users - Convert TCP send queue handling from tasklet to BH workque - Improve BPF iteration over TCP sockets to see each socket exactly once - Remove obsolete and unused TCP RFC3517/RFC6675 loss recovery code - Support enabling kernel threads for NAPI processing on per-NAPI instance basis rather than a whole device. Fully stop the kernel NAPI thread when threaded NAPI gets disabled. Previously thread would stick around until ifdown due to tricky synchronization - Allow multicast routing to take effect on locally-generated packets - Add output interface argument for End.X in segment routing - MCTP: add support for gateway routing, improve bind() handling - Don't require rtnl_lock when fetching an IPv6 neighbor over Netlink - Add a new neighbor flag ("extern_valid"), which cedes refresh responsibilities to userspace. This is needed for EVPN multi-homing where a neighbor entry for a multi-homed host needs to be synced across all the VTEPs among which the host is multi-homed - Support NUD_PERMANENT for proxy neighbor entries - Add a new queuing discipline for IETF RFC9332 DualQ Coupled AQM - Add sequence numbers to netconsole messages. Unregister netconsole's console when all net targets are removed. Code refactoring. Add a number of selftests - Align IPSec inbound SA lookup to RFC 4301. Only SPI and protocol should be used for an inbound SA lookup - Support inspecting ref_tracker state via DebugFS - Don't force bonding advertisement frames tx to ~333 ms boundaries. Add broadcast_neighbor option to send ARP/ND on all bonded links - Allow providing upcall pid for the 'execute' command in openvswitch - Remove DCCP support from Netfilter's conntrack - Disallow multiple packet duplications in the queuing layer - Prevent use of deprecated iptables code on PREEMPT_RT Driver API: - Support RSS and hashing configuration over ethtool Netlink - Add dedicated ethtool callbacks for getting and setting hashing fields - Add support for power budget evaluation strategy in PSE / Power-over-Ethernet. Generate Netlink events for overcurrent etc - Support DPLL phase offset monitoring across all device inputs. Support providing clock reference and SYNC over separate DPLL inputs - Support traffic classes in devlink rate API for bandwidth management - Remove rtnl_lock dependency from UDP tunnel port configuration Device drivers: - Add a new Broadcom driver for 800G Ethernet (bnge) - Add a standalone driver for Microchip ZL3073x DPLL - Remove IBM's NETIUCV device driver - Ethernet high-speed NICs: - Broadcom (bnxt): - support zero-copy Tx of DMABUF memory - take page size into account for page pool recycling rings - Intel (100G, ice, idpf): - idpf: XDP and AF_XDP support preparations - idpf: add flow steering - add link_down_events statistic - clean up the TSPLL code - preparations for live VM migration - nVidia/Mellanox: - support zero-copy Rx/Tx interfaces (DMABUF and io_uring) - optimize context memory usage for matchers - expose serial numbers in devlink info - support PCIe congestion metrics - Meta (fbnic): - add 25G, 50G, and 100G link modes to phylink - support dumping FW logs - Marvell/Cavium: - support for CN20K generation of the Octeon chips - Amazon: - add HW clock (without timestamping, just hypervisor time access) - Ethernet virtual: - VirtIO net: - support segmentation of UDP-tunnel-encapsulated packets - Google (gve): - support packet timestamping and clock synchronization - Microsoft vNIC: - add handler for device-originated servicing events - allow dynamic MSI-X vector allocation - support Tx bandwidth clamping - Ethernet NICs consumer, and embedded: - AMD: - amd-xgbe: hardware timestamping and PTP clock support - Broadcom integrated MACs (bcmgenet, bcmasp): - use napi_complete_done() return value to support NAPI polling - add support for re-starting auto-negotiation - Broadcom switches (b53): - support BCM5325 switches - add bcm63xx EPHY power control - Synopsys (stmmac): - lots of code refactoring and cleanups - TI: - icssg-prueth: read firmware-names from device tree - icssg: PRP offload support - Microchip: - lan78xx: convert to PHYLINK for improved PHY and MAC management - ksz: add KSZ8463 switch support - Intel: - support similar queue priority scheme in multi-queue and time-sensitive networking (taprio) - support packet pre-emption in both - RealTek (r8169): - enable EEE at 5Gbps on RTL8126 - Airoha: - add PPPoE offload support - MDIO bus controller for Airoha AN7583 - Ethernet PHYs: - support for the IPQ5018 internal GE PHY - micrel KSZ9477 switch-integrated PHYs: - add MDI/MDI-X control support - add RX error counters - add cable test support - add Signal Quality Indicator (SQI) reporting - dp83tg720: improve reset handling and reduce link recovery time - support bcm54811 (and its MII-Lite interface type) - air_en8811h: support resume/suspend - support PHY counters for QCA807x and QCA808x - support WoL for QCA807x - CAN drivers: - rcar_canfd: support for Transceiver Delay Compensation - kvaser: report FW versions via devlink dev info - WiFi: - extended regulatory info support (6 GHz) - add statistics and beacon monitor for Multi-Link Operation (MLO) - support S1G aggregation, improve S1G support - add Radio Measurement action fields - support per-radio RTS threshold - some work around how FIPS affects wifi, which was wrong (RC4 is used by TKIP, not only WEP) - improvements for unsolicited probe response handling - WiFi drivers: - RealTek (rtw88): - IBSS mode for SDIO devices - RealTek (rtw89): - BT coexistence for MLO/WiFi7 - concurrent station + P2P support - support for USB devices RTL8851BU/RTL8852BU - Intel (iwlwifi): - use embedded PNVM in (to be released) FW images to fix compatibility issues - many cleanups (unused FW APIs, PCIe code, WoWLAN) - some FIPS interoperability - MediaTek (mt76): - firmware recovery improvements - more MLO work - Qualcomm/Atheros (ath12k): - fix scan on multi-radio devices - more EHT/Wi-Fi 7 features - encapsulation/decapsulation offload - Broadcom (brcm80211): - support SDIO 43751 device - Bluetooth: - hci_event: add support for handling LE BIG Sync Lost event - ISO: add socket option to report packet seqnum via CMSG - ISO: support SCM_TIMESTAMPING for ISO TS - Bluetooth drivers: - intel_pcie: support Function Level Reset - nxpuart: add support for 4M baudrate - nxpuart: implement powerup sequence, reset, FW dump, and FW loading" * tag 'net-next-6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1742 commits) dpll: zl3073x: Fix build failure selftests: bpf: fix legacy netfilter options ipv6: annotate data-races around rt->fib6_nsiblings ipv6: fix possible infinite loop in fib6_info_uses_dev() ipv6: prevent infinite loop in rt6_nlmsg_size() ipv6: add a retry logic in net6_rt_notify() vrf: Drop existing dst reference in vrf_ip6_input_dst net/sched: taprio: align entry index attr validation with mqprio net: fsl_pq_mdio: use dev_err_probe selftests: rtnetlink.sh: remove esp4_offload after test vsock: remove unnecessary null check in vsock_getname() igb: xsk: solve negative overflow of nb_pkts in zerocopy mode stmmac: xsk: fix negative overflow of budget in zerocopy mode dt-bindings: ieee802154: Convert at86rf230.txt yaml format net: dsa: microchip: Disable PTP function of KSZ8463 net: dsa: microchip: Setup fiber ports for KSZ8463 net: dsa: microchip: Write switch MAC address differently for KSZ8463 net: dsa: microchip: Use different registers for KSZ8463 net: dsa: microchip: Add KSZ8463 switch support to KSZ DSA driver dt-bindings: net: dsa: microchip: Add KSZ8463 switch support ...
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/selftests/bpf/config3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c458
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c91
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_ktls.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c25
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c56
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h1
-rw-r--r--tools/testing/selftests/drivers/net/Makefile3
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile1
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py465
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py98
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py17
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c9
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_api.py476
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py8
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py101
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py14
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py2
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py2
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh165
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_id.py4
-rw-r--r--tools/testing/selftests/drivers/net/napi_id_helper.c35
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh57
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_cmdline.sh52
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_sysdata.sh30
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh55
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh23
-rwxr-xr-xtools/testing/selftests/drivers/net/netpoll_basic.py396
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py2
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py45
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py658
-rw-r--r--tools/testing/selftests/hid/config.common1
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c125
-rw-r--r--tools/testing/selftests/net/bench/Makefile7
-rw-r--r--tools/testing/selftests/net/bench/page_pool/Makefile17
-rw-r--r--tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c267
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.c394
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.h238
-rwxr-xr-xtools/testing/selftests/net/bench/test_bench_page_pool.sh32
-rwxr-xr-xtools/testing/selftests/net/broadcast_pmtu.sh47
-rw-r--r--tools/testing/selftests/net/config11
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh69
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multicast.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh52
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh771
-rwxr-xr-xtools/testing/selftests/net/ipv6_force_forwarding.sh105
-rw-r--r--tools/testing/selftests/net/lib.sh35
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py2
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py7
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py39
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py5
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c621
-rw-r--r--tools/testing/selftests/net/mptcp/config2
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c24
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh84
-rwxr-xr-xtools/testing/selftests/net/netdev-l2addr.sh59
-rw-r--r--tools/testing/selftests/net/netfilter/config7
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh4
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh5
-rw-r--r--tools/testing/selftests/net/nettest.c12
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py127
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh4
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt45
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt33
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh92
-rwxr-xr-xtools/testing/selftests/net/rtnetlink_notification.sh112
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh2
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh50
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh2
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh2
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c2
-rwxr-xr-xtools/testing/selftests/net/test_neigh.sh366
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh9
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh4
-rw-r--r--tools/testing/selftests/ptp/testptp.c11
-rw-r--r--tools/testing/selftests/tc-testing/config2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json5
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json254
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json81
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json36
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh6
-rw-r--r--tools/testing/selftests/vsock/.gitignore2
-rw-r--r--tools/testing/selftests/vsock/Makefile17
-rw-r--r--tools/testing/selftests/vsock/config111
-rw-r--r--tools/testing/selftests/vsock/settings1
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh487
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config4
-rw-r--r--tools/testing/vsock/Makefile1
-rw-r--r--tools/testing/vsock/util.c112
-rw-r--r--tools/testing/vsock/util.h35
-rw-r--r--tools/testing/vsock/vsock_test.c353
104 files changed, 8581 insertions, 353 deletions
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index f74e1ea0ad3b..e8c6c77b96cb 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -97,6 +97,9 @@ CONFIG_NF_TABLES_NETDEV=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NF_TABLES_IPV6=y
CONFIG_NETFILTER_INGRESS=y
+CONFIG_IP_NF_IPTABLES_LEGACY=y
+CONFIG_IP6_NF_IPTABLES_LEGACY=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NF_FLOW_TABLE=y
CONFIG_NF_FLOW_TABLE_INET=y
CONFIG_NETFILTER_NETLINK=y
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index a4517bee34d5..27781df8f2fb 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2024 Meta
+#include <poll.h>
#include <test_progs.h>
#include "network_helpers.h"
#include "sock_iter_batch.skel.h"
#define TEST_NS "sock_iter_batch_netns"
+#define TEST_CHILD_NS "sock_iter_batch_child_netns"
static const int init_batch_size = 16;
static const int nr_soreuse = 4;
@@ -118,6 +120,45 @@ done:
return nth_sock_idx;
}
+static void destroy(int fd)
+{
+ struct sock_iter_batch *skel = NULL;
+ __u64 cookie = socket_cookie(fd);
+ struct bpf_link *link = NULL;
+ int iter_fd = -1;
+ int nread;
+ __u64 out;
+
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ goto done;
+
+ skel->rodata->destroy_cookie = cookie;
+
+ if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ goto done;
+
+ /* Delete matching socket. */
+ nread = read(iter_fd, &out, sizeof(out));
+ ASSERT_GE(nread, 0, "nread");
+ if (nread)
+ ASSERT_EQ(out, cookie, "cookie matches");
+done:
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+ close(fd);
+}
+
static int get_seen_count(int fd, struct sock_count counts[], int n)
{
__u64 cookie = socket_cookie(fd);
@@ -152,8 +193,71 @@ static void check_n_were_seen_once(int *fds, int fds_len, int n,
ASSERT_EQ(seen_once, n, "seen_once");
}
+static int accept_from_one(struct pollfd *server_poll_fds,
+ int server_poll_fds_len)
+{
+ static const int poll_timeout_ms = 5000; /* 5s */
+ int ret;
+ int i;
+
+ ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms);
+ if (!ASSERT_EQ(ret, 1, "poll"))
+ return -1;
+
+ for (i = 0; i < server_poll_fds_len; i++)
+ if (server_poll_fds[i].revents & POLLIN)
+ return accept(server_poll_fds[i].fd, NULL, NULL);
+
+ return -1;
+}
+
+static int *connect_to_server(int family, int sock_type, const char *addr,
+ __u16 port, int nr_connects, int *server_fds,
+ int server_fds_len)
+{
+ struct pollfd *server_poll_fds = NULL;
+ int *established_socks = NULL;
+ int i;
+
+ server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds));
+ if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds"))
+ return NULL;
+
+ for (i = 0; i < server_fds_len; i++) {
+ server_poll_fds[i].fd = server_fds[i];
+ server_poll_fds[i].events = POLLIN;
+ }
+
+ i = 0;
+
+ established_socks = malloc(sizeof(*established_socks) * nr_connects*2);
+ if (!ASSERT_OK_PTR(established_socks, "established_socks"))
+ goto error;
+
+ while (nr_connects--) {
+ established_socks[i] = connect_to_addr_str(family, sock_type,
+ addr, port, NULL);
+ if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str"))
+ goto error;
+ i++;
+ established_socks[i] = accept_from_one(server_poll_fds,
+ server_fds_len);
+ if (!ASSERT_OK_FD(established_socks[i], "accept_from_one"))
+ goto error;
+ i++;
+ }
+
+ free(server_poll_fds);
+ return established_socks;
+error:
+ free_fds(established_socks, i);
+ free(server_poll_fds);
+ return NULL;
+}
+
static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
- int *socks, int socks_len, struct sock_count *counts,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
int counts_len, struct bpf_link *link, int iter_fd)
{
int close_idx;
@@ -182,8 +286,46 @@ static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
counts_len);
}
+static void remove_seen_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Leave one established socket. */
+ read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+ /* Close a socket we've already seen to remove it from the bucket. */
+ close_idx = get_nth_socket(established_socks, established_socks_len,
+ link, listen_socks_len + 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ destroy(established_socks[close_idx]);
+ established_socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the last socket wasn't skipped and that there were no
+ * repeats.
+ */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+}
+
static void remove_unseen(int family, int sock_type, const char *addr,
__u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
struct sock_count *counts, int counts_len,
struct bpf_link *link, int iter_fd)
{
@@ -214,8 +356,54 @@ static void remove_unseen(int family, int sock_type, const char *addr,
counts_len);
}
+static void remove_unseen_established(int family, int sock_type,
+ const char *addr, __u16 port,
+ int *listen_socks, int listen_socks_len,
+ int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw one established socks. */
+ check_n_were_seen_once(established_socks, established_socks_len, 1,
+ counts, counts_len);
+
+ /* Close what would be the next socket in the bucket to exercise the
+ * condition where we need to skip past the first cookie we remembered.
+ */
+ close_idx = get_nth_socket(established_socks, established_socks_len,
+ link, listen_socks_len + 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+
+ destroy(established_socks[close_idx]);
+ established_socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the remaining sockets were seen exactly once and that we
+ * didn't repeat the socket that was already seen.
+ */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+}
+
static void remove_all(int family, int sock_type, const char *addr,
__u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
struct sock_count *counts, int counts_len,
struct bpf_link *link, int iter_fd)
{
@@ -242,8 +430,57 @@ static void remove_all(int family, int sock_type, const char *addr,
ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
}
+static void remove_all_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int *close_idx = NULL;
+ int i;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw one established socks. */
+ check_n_were_seen_once(established_socks, established_socks_len, 1,
+ counts, counts_len);
+
+ /* Close all remaining sockets to exhaust the list of saved cookies and
+ * exit without putting any sockets into the batch on the next read.
+ */
+ close_idx = malloc(sizeof(int) * (established_socks_len - 1));
+ if (!ASSERT_OK_PTR(close_idx, "close_idx malloc"))
+ return;
+ for (i = 0; i < established_socks_len - 1; i++) {
+ close_idx[i] = get_nth_socket(established_socks,
+ established_socks_len, link,
+ listen_socks_len + i);
+ if (!ASSERT_GE(close_idx[i], 0, "close_idx"))
+ return;
+ }
+
+ for (i = 0; i < established_socks_len - 1; i++) {
+ destroy(established_socks[close_idx[i]]);
+ established_socks[close_idx[i]] = -1;
+ }
+
+ /* Make sure there are no more sockets returned */
+ ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+ free(close_idx);
+}
+
static void add_some(int family, int sock_type, const char *addr, __u16 port,
- int *socks, int socks_len, struct sock_count *counts,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
int counts_len, struct bpf_link *link, int iter_fd)
{
int *new_socks = NULL;
@@ -271,8 +508,52 @@ done:
free_fds(new_socks, socks_len);
}
+static void add_some_established(int family, int sock_type, const char *addr,
+ __u16 port, int *listen_socks,
+ int listen_socks_len, int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts,
+ int counts_len, struct bpf_link *link,
+ int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through all listening sockets. */
+ read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+ /* Make sure we saw all listening sockets exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+
+ /* Iterate through the first established_socks_len - 1 sockets. */
+ read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw established_socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len - 1, counts, counts_len);
+
+ /* Double the number of established sockets in the bucket. */
+ new_socks = connect_to_server(family, sock_type, addr, port,
+ established_socks_len / 2, listen_socks,
+ listen_socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "connect_to_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each of the original sockets was seen exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len, counts, counts_len);
+done:
+ free_fds(new_socks, established_socks_len);
+}
+
static void force_realloc(int family, int sock_type, const char *addr,
__u16 port, int *socks, int socks_len,
+ int *established_socks, int established_socks_len,
struct sock_count *counts, int counts_len,
struct bpf_link *link, int iter_fd)
{
@@ -299,11 +580,32 @@ done:
free_fds(new_socks, socks_len);
}
+static void force_realloc_established(int family, int sock_type,
+ const char *addr, __u16 port,
+ int *listen_socks, int listen_socks_len,
+ int *established_socks,
+ int established_socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ /* Iterate through all sockets to trigger a realloc. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each socket was seen exactly once. */
+ check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+ counts, counts_len);
+ check_n_were_seen_once(established_socks, established_socks_len,
+ established_socks_len, counts, counts_len);
+}
+
struct test_case {
void (*test)(int family, int sock_type, const char *addr, __u16 port,
- int *socks, int socks_len, struct sock_count *counts,
+ int *socks, int socks_len, int *established_socks,
+ int established_socks_len, struct sock_count *counts,
int counts_len, struct bpf_link *link, int iter_fd);
const char *description;
+ int ehash_buckets;
+ int connections;
int init_socks;
int max_socks;
int sock_type;
@@ -358,18 +660,140 @@ static struct test_case resume_tests[] = {
.family = AF_INET6,
.test = force_realloc,
},
+ {
+ .description = "tcp: resume after removing a seen socket (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_seen,
+ },
+ {
+ .description = "tcp: resume after removing one unseen socket (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_unseen,
+ },
+ {
+ .description = "tcp: resume after removing all unseen sockets (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_all,
+ },
+ {
+ .description = "tcp: resume after adding a few sockets (listening)",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_STREAM,
+ /* Use AF_INET so that new sockets are added to the head of the
+ * bucket's list.
+ */
+ .family = AF_INET,
+ .test = add_some,
+ },
+ {
+ .description = "tcp: force a realloc to occur (listening)",
+ .init_socks = init_batch_size,
+ .max_socks = init_batch_size * 2,
+ .sock_type = SOCK_STREAM,
+ /* Use AF_INET6 so that new sockets are added to the tail of the
+ * bucket's list, needing to be added to the next batch to force
+ * a realloc.
+ */
+ .family = AF_INET6,
+ .test = force_realloc,
+ },
+ {
+ .description = "tcp: resume after removing a seen socket (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_seen_established,
+ },
+ {
+ .description = "tcp: resume after removing one unseen socket (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_unseen_established,
+ },
+ {
+ .description = "tcp: resume after removing all unseen sockets (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = remove_all_established,
+ },
+ {
+ .description = "tcp: resume after adding a few sockets (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ .connections = nr_soreuse,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse * 3,
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = add_some_established,
+ },
+ {
+ .description = "tcp: force a realloc to occur (established)",
+ /* Force all established sockets into one bucket */
+ .ehash_buckets = 1,
+ /* Bucket size will need to double when going from listening to
+ * established sockets.
+ */
+ .connections = init_batch_size,
+ .init_socks = nr_soreuse,
+ /* Room for connect()ed and accept()ed sockets */
+ .max_socks = nr_soreuse + (init_batch_size * 2),
+ .sock_type = SOCK_STREAM,
+ .family = AF_INET6,
+ .test = force_realloc_established,
+ },
};
static void do_resume_test(struct test_case *tc)
{
struct sock_iter_batch *skel = NULL;
+ struct sock_count *counts = NULL;
static const __u16 port = 10001;
+ struct nstoken *nstoken = NULL;
struct bpf_link *link = NULL;
- struct sock_count *counts;
+ int *established_fds = NULL;
int err, iter_fd = -1;
const char *addr;
int *fds = NULL;
- int local_port;
+
+ if (tc->ehash_buckets) {
+ SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+ SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d",
+ tc->ehash_buckets);
+ SYS(done, "ip netns add %s", TEST_CHILD_NS);
+ SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS);
+ nstoken = open_netns(TEST_CHILD_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_child_netns"))
+ goto done;
+ }
counts = calloc(tc->max_socks, sizeof(*counts));
if (!ASSERT_OK_PTR(counts, "counts"))
@@ -384,11 +808,18 @@ static void do_resume_test(struct test_case *tc)
tc->init_socks);
if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
goto done;
- local_port = get_socket_local_port(*fds);
- if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
- goto done;
- skel->rodata->ports[0] = ntohs(local_port);
+ if (tc->connections) {
+ established_fds = connect_to_server(tc->family, tc->sock_type,
+ addr, port,
+ tc->connections, fds,
+ tc->init_socks);
+ if (!ASSERT_OK_PTR(established_fds, "connect_to_server"))
+ goto done;
+ }
+ skel->rodata->ports[0] = 0;
+ skel->rodata->ports[1] = 0;
skel->rodata->sf = tc->family;
+ skel->rodata->ss = 0;
err = sock_iter_batch__load(skel);
if (!ASSERT_OK(err, "sock_iter_batch__load"))
@@ -406,10 +837,15 @@ static void do_resume_test(struct test_case *tc)
goto done;
tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
- counts, tc->max_socks, link, iter_fd);
+ established_fds, tc->connections*2, counts, tc->max_socks,
+ link, iter_fd);
done:
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+ SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0");
free(counts);
free_fds(fds, tc->init_socks);
+ free_fds(established_fds, tc->connections*2);
if (iter_fd >= 0)
close(iter_fd);
bpf_link__destroy(link);
@@ -454,6 +890,8 @@ static void do_test(int sock_type, bool onebyone)
skel->rodata->ports[i] = ntohs(local_port);
}
skel->rodata->sf = AF_INET6;
+ if (sock_type == SOCK_STREAM)
+ skel->rodata->ss = TCP_LISTEN;
err = sock_iter_batch__load(skel);
if (!ASSERT_OK(err, "sock_iter_batch__load"))
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index b6c471da5c28..b87e7f39e15a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -314,6 +314,95 @@ out:
test_sockmap_ktls__destroy(skel);
}
+static void test_sockmap_ktls_tx_pop(int family, int sotype)
+{
+ char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0";
+ int c = 0, p = 0, one = 1, sent, recvd;
+ struct test_sockmap_ktls *skel;
+ int prog_fd, map_fd;
+ char rcv[50] = {0};
+ int err;
+ int i, m, r;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ struct {
+ int pop_start;
+ int pop_len;
+ } pop_policy[] = {
+ /* trim the start */
+ {0, 2},
+ {0, 10},
+ {1, 2},
+ {1, 10},
+ /* trim the end */
+ {35, 2},
+ /* New entries should be added before this line */
+ {-1, -1},
+ };
+
+ i = 0;
+ while (pop_policy[i].pop_start >= 0) {
+ skel->bss->pop_start = pop_policy[i].pop_start;
+ skel->bss->pop_end = pop_policy[i].pop_len;
+
+ sent = send(c, msg, sizeof(msg), 0);
+ if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch"))
+ goto out;
+
+ /* verify the data
+ * msg: 0123456789a bcdefghij klmnopqrstuvwxyz
+ * | |
+ * popped data
+ */
+ for (m = 0, r = 0; m < sizeof(msg);) {
+ /* skip checking the data that has been popped */
+ if (m >= pop_policy[i].pop_start &&
+ m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) {
+ m++;
+ continue;
+ }
+
+ if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch"))
+ goto out;
+ m++;
+ r++;
+ }
+ i++;
+ }
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
+
static void run_tests(int family, enum bpf_map_type map_type)
{
int map;
@@ -338,6 +427,8 @@ static void run_ktls_test(int family, int sotype)
test_sockmap_ktls_tx_cork(family, sotype, true);
if (test__start_subtest("tls tx egress with no buf"))
test_sockmap_ktls_tx_no_buf(family, sotype, true);
+ if (test__start_subtest("tls tx with pop"))
+ test_sockmap_ktls_tx_pop(family, sotype);
}
void test_sockmap_ktls(void)
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index 8f483337e103..77966ded5467 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -23,6 +23,7 @@ static bool ipv4_addr_loopback(__be32 a)
}
volatile const unsigned int sf;
+volatile const unsigned int ss;
volatile const __u16 ports[2];
unsigned int bucket[2];
@@ -42,16 +43,18 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
if (sk->sk_family != sf ||
- sk->sk_state != TCP_LISTEN ||
- sk->sk_family == AF_INET6 ?
+ (ss && sk->sk_state != ss) ||
+ (sk->sk_family == AF_INET6 ?
!ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
- !ipv4_addr_loopback(sk->sk_rcv_saddr))
+ !ipv4_addr_loopback(sk->sk_rcv_saddr)))
return 0;
if (sk->sk_num == ports[0])
idx = 0;
else if (sk->sk_num == ports[1])
idx = 1;
+ else if (!ports[0] && !ports[1])
+ idx = 0;
else
return 0;
@@ -67,6 +70,27 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
return 0;
}
+volatile const __u64 destroy_cookie;
+
+SEC("iter/tcp")
+int iter_tcp_destroy(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = (struct sock_common *)ctx->sk_common;
+ __u64 sock_cookie;
+
+ if (!sk_common)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk_common);
+ if (sock_cookie != destroy_cookie)
+ return 0;
+
+ bpf_sock_destroy(sk_common);
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
+
+ return 0;
+}
+
#define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk)
SEC("iter/udp")
@@ -83,15 +107,17 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
if (sk->sk_family != sf ||
- sk->sk_family == AF_INET6 ?
+ (sk->sk_family == AF_INET6 ?
!ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
- !ipv4_addr_loopback(sk->sk_rcv_saddr))
+ !ipv4_addr_loopback(sk->sk_rcv_saddr)))
return 0;
if (sk->sk_num == ports[0])
idx = 0;
else if (sk->sk_num == ports[1])
idx = 1;
+ else if (!ports[0] && !ports[1])
+ idx = 0;
else
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
index 8bdb9987c0c7..83df4919c224 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
@@ -7,6 +7,8 @@ int cork_byte;
int push_start;
int push_end;
int apply_bytes;
+int pop_start;
+int pop_end;
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
@@ -22,6 +24,8 @@ int prog_sk_policy(struct sk_msg_md *msg)
bpf_msg_cork_bytes(msg, cork_byte);
if (push_start > 0 && push_end > 0)
bpf_msg_push_data(msg, push_start, push_end, 0);
+ if (pop_start >= 0 && pop_end > 0)
+ bpf_msg_pop_data(msg, pop_start, pop_end, 0);
return SK_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c
index a83809a1dbbf..0450840c92d9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c
@@ -218,4 +218,29 @@ __naked void null_check_8_null_bind(void)
: __clobber_all);
}
+#define narrow_load(type, ctx, field) \
+ SEC(type) \
+ __description("narrow load on field " #field " of " #ctx) \
+ __failure __msg("invalid bpf_context access") \
+ __naked void invalid_narrow_load##ctx##field(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u32 *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(off, offsetof(struct ctx, field) + 4) \
+ : __clobber_all); \
+ }
+
+narrow_load("cgroup/getsockopt", bpf_sockopt, sk);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval_end);
+narrow_load("tc", __sk_buff, sk);
+narrow_load("cgroup/bind4", bpf_sock_addr, sk);
+narrow_load("sockops", bpf_sock_ops, sk);
+narrow_load("sockops", bpf_sock_ops, skb_data);
+narrow_load("sockops", bpf_sock_ops, skb_data_end);
+narrow_load("sockops", bpf_sock_ops, skb_hwtstamp);
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 0ced4026ee44..a29de0713f19 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -109,6 +109,8 @@
#include <network_helpers.h>
+#define MAX_TX_BUDGET_DEFAULT 32
+
static bool opt_verbose;
static bool opt_print_tests;
static enum test_mode opt_mode = TEST_MODE_ALL;
@@ -1091,11 +1093,45 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
return true;
}
+static u32 load_value(u32 *counter)
+{
+ return __atomic_load_n(counter, __ATOMIC_ACQUIRE);
+}
+
+static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret)
+{
+ u32 max_budget = MAX_TX_BUDGET_DEFAULT;
+ u32 cons, ready_to_send;
+ int delta;
+
+ cons = load_value(xsk->tx.consumer);
+ ready_to_send = load_value(xsk->tx.producer) - cons;
+ *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+ delta = load_value(xsk->tx.consumer) - cons;
+ /* By default, xsk should consume exact @max_budget descs at one
+ * send in this case where hitting the max budget limit in while
+ * loop is triggered in __xsk_generic_xmit(). Please make sure that
+ * the number of descs to be sent is larger than @max_budget, or
+ * else the tx.consumer will be updated in xskq_cons_peek_desc()
+ * in time which hides the issue we try to verify.
+ */
+ if (ready_to_send > max_budget && delta != max_budget)
+ return false;
+
+ return true;
+}
+
static int kick_tx(struct xsk_socket_info *xsk)
{
int ret;
- ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ if (xsk->check_consumer) {
+ if (!kick_tx_with_check(xsk, &ret))
+ return TEST_FAILURE;
+ } else {
+ ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ }
if (ret >= 0)
return TEST_PASS;
if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
@@ -2613,6 +2649,23 @@ static int testapp_adjust_tail_grow_mb(struct test_spec *test)
XSK_UMEM__LARGE_FRAME_SIZE * 2);
}
+static int testapp_tx_queue_consumer(struct test_spec *test)
+{
+ int nr_packets;
+
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_test_result_skip("Can not run TX_QUEUE_CONSUMER test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
+ nr_packets = MAX_TX_BUDGET_DEFAULT + 1;
+ pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE);
+ test->ifobj_tx->xsk->batch_size = nr_packets;
+ test->ifobj_tx->xsk->check_consumer = true;
+
+ return testapp_validate_traffic(test);
+}
+
static void run_pkt_test(struct test_spec *test)
{
int ret;
@@ -2723,6 +2776,7 @@ static const struct test_spec tests[] = {
{.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
{.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
{.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
+ {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer},
};
static void print_tests(void)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 67fc44b2813b..4df3a5d329ac 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -95,6 +95,7 @@ struct xsk_socket_info {
u32 batch_size;
u8 dst_mac[ETH_ALEN];
u8 src_mac[ETH_ALEN];
+ bool check_consumer;
};
struct pkt {
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index be780bcb73a3..3556f3563e08 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -12,14 +12,17 @@ TEST_GEN_FILES := \
TEST_PROGS := \
napi_id.py \
netcons_basic.sh \
+ netcons_cmdline.sh \
netcons_fragmented_msg.sh \
netcons_overflow.sh \
netcons_sysdata.sh \
+ netpoll_basic.py \
ping.py \
queues.py \
stats.py \
shaper.py \
hds.py \
+ xdp.py \
# end of TEST_PROGS
include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index df2c047ffa90..fdc97355588c 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -16,6 +16,7 @@ TEST_PROGS = \
irq.py \
loopback.sh \
pp_alloc_fail.py \
+ rss_api.py \
rss_ctx.py \
rss_input_xfrm.py \
tso.py \
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
new file mode 100755
index 000000000000..ead6784d1910
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
@@ -0,0 +1,465 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Devlink Rate TC Bandwidth Test Suite
+===================================
+
+This test suite verifies the functionality of devlink-rate traffic class (TC)
+bandwidth distribution in a virtualized environment. The tests validate that
+bandwidth can be properly allocated between different traffic classes and
+that TC mapping works as expected.
+
+Test Environment:
+----------------
+- Creates 1 VF
+- Establishes a bridge connecting the VF representor and the uplink representor
+- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102)
+- Configures different traffic classes (TC3 and TC4) for each VLAN
+
+Test Cases:
+----------
+1. test_no_tc_mapping_bandwidth:
+ - Verifies that without TC mapping, bandwidth is NOT distributed according to
+ the configured 80/20 split between TC4 and TC3
+ - This test should fail if bandwidth matches the 80/20 split without TC
+ mapping
+ - Expected: Bandwidth should NOT be distributed as 80/20
+
+2. test_tc_mapping_bandwidth:
+ - Configures TC mapping using mqprio qdisc
+ - Verifies that with TC mapping, bandwidth IS distributed according to the
+ configured 80/20 split between TC3 and TC4
+ - Expected: Bandwidth should be distributed as 80/20
+
+Bandwidth Distribution:
+----------------------
+- TC3 (VLAN 101): Configured for 80% of total bandwidth
+- TC4 (VLAN 102): Configured for 20% of total bandwidth
+- Total bandwidth: 1Gbps
+- Tolerance: +-12%
+
+Hardware-Specific Behavior (mlx5):
+--------------------------
+mlx5 hardware enforces traffic class separation by ensuring that each transmit
+queue (SQ) is associated with a single TC. If a packet is sent on a queue that
+doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set
+mapping), the hardware moves the queue to the correct TC scheduler to preserve
+traffic isolation.
+
+This behavior means that even without explicit TC-to-queue mapping, bandwidth
+enforcement may still appear to work—because the hardware dynamically adjusts
+the scheduling context. However, this can lead to performance issues in high
+rates and HOL blocking if traffic from different TCs is mixed on the same queue.
+"""
+
+import json
+import os
+import subprocess
+import threading
+import time
+
+from lib.py import ksft_pr, ksft_run, ksft_exit
+from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+from lib.py import NetDrvEpEnv, DevlinkFamily
+from lib.py import NlError
+from lib.py import cmd, defer, ethtool, ip
+
+
+class BandwidthValidator:
+ """
+ Validates bandwidth totals and per-TC shares against expected values
+ with a tolerance.
+ """
+
+ def __init__(self):
+ self.tolerance_percent = 12
+ self.expected_total_gbps = 1.0
+ self.total_min_expected = self.min_expected(self.expected_total_gbps)
+ self.total_max_expected = self.max_expected(self.expected_total_gbps)
+ self.tc_expected_percent = {
+ 3: 20.0,
+ 4: 80.0,
+ }
+
+ def min_expected(self, value):
+ """Calculates the minimum acceptable value based on tolerance."""
+ return value - (value * self.tolerance_percent / 100)
+
+ def max_expected(self, value):
+ """Calculates the maximum acceptable value based on tolerance."""
+ return value + (value * self.tolerance_percent / 100)
+
+ def bound(self, expected, value):
+ """Returns True if value is within expected tolerance."""
+ return self.min_expected(expected) <= value <= self.max_expected(expected)
+
+ def tc_bandwidth_bound(self, value, tc_ix):
+ """
+ Returns True if the given bandwidth value is within tolerance
+ for the TC's expected bandwidth.
+ """
+ expected = self.tc_expected_percent[tc_ix]
+ return self.bound(expected, value)
+
+
+def setup_vf(cfg, set_tc_mapping=True):
+ """
+ Sets up a VF on the given network interface.
+
+ Enables SR-IOV and switchdev mode, brings the VF interface up,
+ and optionally configures TC mapping using mqprio.
+ """
+ try:
+ cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev")
+ defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy")
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc
+ try:
+ cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs")
+ defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs")
+ except Exception as exc:
+ raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc
+
+ time.sleep(2)
+ vf_ifc = (os.listdir(
+ f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0]
+ if vf_ifc:
+ ip(f"link set dev {vf_ifc} up")
+ else:
+ raise KsftSkipEx("VF interface not found")
+ if set_tc_mapping:
+ cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8")
+
+ return vf_ifc
+
+
+def setup_vlans_on_vf(vf_ifc):
+ """
+ Sets up two VLAN interfaces on the given VF, each mapped to a different TC.
+ """
+ vlan_configs = [
+ {"vlan_id": 101, "tc": 3, "ip": "198.51.100.2"},
+ {"vlan_id": 102, "tc": 4, "ip": "198.51.100.10"},
+ ]
+
+ for config in vlan_configs:
+ vlan_dev = f"{vf_ifc}.{config['vlan_id']}"
+ ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}")
+ ip(f"addr add {config['ip']}/29 dev {vlan_dev}")
+ ip(f"link set dev {vlan_dev} up")
+ ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}")
+ ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}")
+
+
+def get_vf_info(cfg):
+ """
+ Finds the VF representor interface and devlink port index
+ for the given PCI device used in the test environment.
+ """
+ cfg.vf_representor = None
+ cfg.vf_port_index = None
+ out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8")
+ ports = json.loads(out)["port"]
+
+ for port_name, props in ports.items():
+ netdev = props.get("netdev")
+
+ if (port_name.startswith(f"pci/{cfg.pci}/") and
+ props.get("vfnum") == 0):
+ cfg.vf_representor = netdev
+ cfg.vf_port_index = int(port_name.split("/")[-1])
+ break
+
+
+def setup_bridge(cfg):
+ """
+ Creates and configures a Linux bridge, with both the uplink
+ and VF representor interfaces attached to it.
+ """
+ bridge_name = f"br_{os.getpid()}"
+ ip(f"link add name {bridge_name} type bridge")
+ defer(cmd, f"ip link del name {bridge_name} type bridge")
+
+ ip(f"link set dev {cfg.ifname} master {bridge_name}")
+
+ rep_name = cfg.vf_representor
+ if rep_name:
+ ip(f"link set dev {rep_name} master {bridge_name}")
+ ip(f"link set dev {rep_name} up")
+ ksft_pr(f"Set representor {rep_name} up and added to bridge")
+ else:
+ raise KsftSkipEx("Could not find representor for the VF")
+
+ ip(f"link set dev {bridge_name} up")
+
+
+def setup_devlink_rate(cfg):
+ """
+ Configures devlink rate tx_max and traffic class bandwidth for the VF.
+ """
+ port_index = cfg.vf_port_index
+ if port_index is None:
+ raise KsftSkipEx("Could not find VF port index")
+ try:
+ cfg.devnl.rate_set({
+ "bus-name": "pci",
+ "dev-name": cfg.pci,
+ "port-index": port_index,
+ "rate-tx-max": 125000000,
+ "rate-tc-bws": [
+ {"index": 0, "bw": 0},
+ {"index": 1, "bw": 0},
+ {"index": 2, "bw": 0},
+ {"index": 3, "bw": 20},
+ {"index": 4, "bw": 80},
+ {"index": 5, "bw": 0},
+ {"index": 6, "bw": 0},
+ {"index": 7, "bw": 0},
+ ]
+ })
+ except NlError as exc:
+ if exc.error == 95: # EOPNOTSUPP
+ raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc
+ raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc
+
+
+def setup_remote_server(cfg):
+ """
+ Sets up VLAN interfaces and starts iperf3 servers on the remote side.
+ """
+ remote_dev = cfg.remote_ifname
+ vlan_ids = [101, 102]
+ remote_ips = ["198.51.100.1", "198.51.100.9"]
+
+ for vlan_id, ip_addr in zip(vlan_ids, remote_ips):
+ vlan_dev = f"{remote_dev}.{vlan_id}"
+ cmd(f"ip link add link {remote_dev} name {vlan_dev} "
+ f"type vlan id {vlan_id}", host=cfg.remote)
+ cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote)
+ cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote)
+ cmd(f"iperf3 -s -1 -B {ip_addr}",background=True, host=cfg.remote)
+ defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote)
+
+
+def setup_test_environment(cfg, set_tc_mapping=True):
+ """
+ Sets up the complete test environment including VF creation, VLANs,
+ bridge configuration, devlink rate setup, and the remote server.
+ """
+ vf_ifc = setup_vf(cfg, set_tc_mapping)
+ ksft_pr(f"Created VF interface: {vf_ifc}")
+
+ setup_vlans_on_vf(vf_ifc)
+
+ get_vf_info(cfg)
+ setup_bridge(cfg)
+
+ setup_devlink_rate(cfg)
+ setup_remote_server(cfg)
+ time.sleep(2)
+
+
+def run_iperf_client(server_ip, local_ip, barrier, min_expected_gbps=0.1):
+ """
+ Runs a single iperf3 client instance, binding to the given local IP.
+ Waits on a barrier to synchronize with other threads.
+ """
+ try:
+ barrier.wait(timeout=10)
+ except Exception as exc:
+ raise KsftFailEx("iperf3 barrier wait timed") from exc
+
+ iperf_cmd = ["iperf3", "-c", server_ip, "-B", local_ip, "-J"]
+ result = subprocess.run(iperf_cmd, capture_output=True, text=True,
+ check=True)
+
+ try:
+ output = json.loads(result.stdout)
+ bits_per_second = output["end"]["sum_received"]["bits_per_second"]
+ gbps = bits_per_second / 1e9
+ if gbps < min_expected_gbps:
+ ksft_pr(
+ f"iperf3 bandwidth too low: {gbps:.2f} Gbps "
+ f"(expected ≥ {min_expected_gbps} Gbps)"
+ )
+ return None
+ return gbps
+ except json.JSONDecodeError as exc:
+ ksft_pr(f"Failed to parse iperf3 JSON output: {exc}")
+ return None
+
+
+def run_bandwidth_test():
+ """
+ Launches iperf3 client threads for each VLAN/TC pair and collects results.
+ """
+ def _run_iperf_client_thread(server_ip, local_ip, results, barrier, tc_ix):
+ results[tc_ix] = run_iperf_client(server_ip, local_ip, barrier)
+
+ vf_vlan_data = [
+ # (local_ip, remote_ip, TC)
+ ("198.51.100.2", "198.51.100.1", 3),
+ ("198.51.100.10", "198.51.100.9", 4),
+ ]
+
+ results = {}
+ threads = []
+ start_barrier = threading.Barrier(len(vf_vlan_data))
+
+ for local_ip, remote_ip, tc_ix in vf_vlan_data:
+ thread = threading.Thread(
+ target=_run_iperf_client_thread,
+ args=(remote_ip, local_ip, results, start_barrier, tc_ix)
+ )
+ thread.start()
+ threads.append(thread)
+
+ for thread in threads:
+ thread.join()
+
+ for tc_ix, tc_bw in results.items():
+ if tc_bw is None:
+ raise KsftFailEx("iperf3 client failed; cannot evaluate bandwidth")
+
+ return results
+
+def calculate_bandwidth_percentages(results):
+ """
+ Calculates the percentage of total bandwidth received by TC3 and TC4.
+ """
+ if 3 not in results or 4 not in results:
+ raise KsftFailEx(f"Missing expected TC results in {results}")
+
+ tc3_bw = results[3]
+ tc4_bw = results[4]
+ total_bw = tc3_bw + tc4_bw
+ tc3_percentage = (tc3_bw / total_bw) * 100
+ tc4_percentage = (tc4_bw / total_bw) * 100
+
+ return {
+ 'tc3_bw': tc3_bw,
+ 'tc4_bw': tc4_bw,
+ 'tc3_percentage': tc3_percentage,
+ 'tc4_percentage': tc4_percentage,
+ 'total_bw': total_bw
+ }
+
+
+def print_bandwidth_results(bw_data, test_name):
+ """
+ Prints bandwidth measurements and TC usage summary for a given test.
+ """
+ ksft_pr(f"Bandwidth check results {test_name}:")
+ ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec")
+ ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec")
+ ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%")
+ ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%")
+
+
+def verify_total_bandwidth(bw_data, validator):
+ """
+ Ensures the total measured bandwidth falls within the acceptable tolerance.
+ """
+ total = bw_data['total_bw']
+
+ if validator.bound(validator.expected_total_gbps, total):
+ return
+
+ if total < validator.total_min_expected:
+ raise KsftSkipEx(
+ f"Total bandwidth {total:.2f} Gbps < minimum "
+ f"{validator.total_min_expected:.2f} Gbps; "
+ f"parent tx_max ({validator.expected_total_gbps:.1f} G) "
+ f"not reached, cannot validate share"
+ )
+
+ raise KsftFailEx(
+ f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling "
+ f"{validator.total_max_expected:.2f} Gbps "
+ f"(VF tx_max set to {validator.expected_total_gbps:.1f} G)"
+ )
+
+
+def check_bandwidth_distribution(bw_data, validator):
+ """
+ Checks whether the measured TC3 and TC4 bandwidth percentages
+ fall within their expected tolerance ranges.
+
+ Returns:
+ bool: True if both TC3 and TC4 percentages are within bounds.
+ """
+ tc3_valid = validator.tc_bandwidth_bound(bw_data['tc3_percentage'], 3)
+ tc4_valid = validator.tc_bandwidth_bound(bw_data['tc4_percentage'], 4)
+
+ return tc3_valid and tc4_valid
+
+
+def run_bandwidth_distribution_test(cfg, set_tc_mapping):
+ """
+ Runs parallel iperf3 tests for both TCs and collects results.
+ """
+ setup_test_environment(cfg, set_tc_mapping)
+ bandwidths = run_bandwidth_test()
+ bw_data = calculate_bandwidth_percentages(bandwidths)
+ test_name = "with TC mapping" if set_tc_mapping else "without TC mapping"
+ print_bandwidth_results(bw_data, test_name)
+
+ verify_total_bandwidth(bw_data, cfg.bw_validator)
+
+ return check_bandwidth_distribution(bw_data, cfg.bw_validator)
+
+
+def test_no_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is not split 80/20 without traffic class mapping.
+ """
+ pass_bw_msg = "Bandwidth is NOT distributed as 80/20 without TC mapping"
+ fail_bw_msg = "Bandwidth matched 80/20 split without TC mapping"
+ is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout
+
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=False):
+ if is_mlx5:
+ raise KsftXfailEx(fail_bw_msg)
+ raise KsftFailEx(fail_bw_msg)
+ if is_mlx5:
+ raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg)
+ ksft_pr(pass_bw_msg)
+
+
+def test_tc_mapping_bandwidth(cfg):
+ """
+ Verifies that bandwidth is correctly split 80/20 between TC3 and TC4
+ when traffic class mapping is set.
+ """
+ if run_bandwidth_distribution_test(cfg, set_tc_mapping=True):
+ ksft_pr("Bandwidth is distributed as 80/20 with TC mapping")
+ else:
+ raise KsftFailEx("Bandwidth did not match 80/20 split with TC mapping")
+
+
+def main() -> None:
+ """
+ Main entry point for running the test cases.
+ """
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.devnl = DevlinkFamily()
+
+ cfg.pci = os.path.basename(
+ os.path.realpath(f"/sys/class/net/{cfg.ifname}/device")
+ )
+ if not cfg.pci:
+ raise KsftSkipEx("Could not get PCI address of the interface")
+ cfg.require_cmd("iperf3", local=True, remote=True)
+
+ cfg.bw_validator = BandwidthValidator()
+
+ cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth]
+
+ ksft_run(cases=cases, args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index 7947650210a0..baa2f24240ba 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -51,15 +51,14 @@ def check_tx(cfg) -> None:
@ksft_disruptive
def check_tx_chunks(cfg) -> None:
- cfg.require_ipver("6")
require_devmem(cfg)
port = rand_port()
- listen_cmd = f"socat -U - TCP6-LISTEN:{port}"
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
with bkg(listen_cmd, exit_wait=True) as socat:
wait_port_listen(port)
- cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True)
ksft_eq(socat.stdout.strip(), "hello\nworld")
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 9c03fd777f3d..712c806508b5 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -3,37 +3,37 @@
import re
from os import path
-from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
from lib.py import NetDrvEpEnv
from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
def _get_current_settings(cfg):
- output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0]
+ output = ethtool(f"-g {cfg.ifname}", json=True)[0]
return (output['rx'], output['hds-thresh'])
def _get_combined_channels(cfg):
- output = ethtool(f"-l {cfg.ifname}", host=cfg.remote).stdout
+ output = ethtool(f"-l {cfg.ifname}").stdout
values = re.findall(r'Combined:\s+(\d+)', output)
return int(values[1])
def _create_rss_ctx(cfg, chan):
- output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout
+ output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout
values = re.search(r'New RSS context is (\d+)', output).group(1)
ctx_id = int(values)
- return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote))
+ return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}"))
def _set_flow_rule(cfg, port, chan):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
def _set_flow_rule_rss(cfg, port, ctx_id):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
@@ -47,26 +47,26 @@ def test_zcrx(cfg) -> None:
(rx_ring, hds_thresh) = _get_current_settings(cfg)
port = rand_port()
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
- ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(port, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
def test_zcrx_oneshot(cfg) -> None:
@@ -78,26 +78,26 @@ def test_zcrx_oneshot(cfg) -> None:
(rx_ring, hds_thresh) = _get_current_settings(cfg)
port = rand_port()
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
- ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(port, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
def test_zcrx_rss(cfg) -> None:
@@ -109,27 +109,27 @@ def test_zcrx_rss(cfg) -> None:
(rx_ring, hds_thresh) = _get_current_settings(cfg)
port = rand_port()
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
- ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
(ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(port, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
def main() -> None:
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index b582885786f5..1462a339a74b 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -7,8 +7,25 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
+
from net.lib.py import *
from drivers.net.lib.py import *
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \
+ rand_port, tool, wait_port_listen
+ from net.lib.py import fd_read_timeout
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true
+ from net.lib.py import NetNSEnter
+ from drivers.net.lib.py import GenerateTraffic
+ from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
except ModuleNotFoundError as e:
ksft_pr("Failed importing `net` library from kernel sources")
ksft_pr(str(e))
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 02e4d3d7ded2..72f828021f83 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -526,12 +526,10 @@ static struct netdev_queue_id *create_queues(void)
struct netdev_queue_id *queues;
size_t i = 0;
- queues = calloc(num_queues, sizeof(*queues));
+ queues = netdev_queue_id_alloc(num_queues);
for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
+ netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX);
+ netdev_queue_id_set_id(&queues[i], start_queue + i);
}
return queues;
@@ -852,7 +850,6 @@ static int do_client(struct memory_buffer *mem)
ssize_t line_size = 0;
struct cmsghdr *cmsg;
char *line = NULL;
- unsigned long mid;
size_t len = 0;
int socket_fd;
__u32 ddmabuf;
diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py
new file mode 100755
index 000000000000..19847f3d4a00
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_api.py
@@ -0,0 +1,476 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+API level tests for RSS (mostly Netlink vs IOCTL).
+"""
+
+import errno
+import glob
+import random
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import defer, ethtool, CmdExitFailure
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+
+def _require_2qs(cfg):
+ qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
+ if qcnt < 2:
+ raise KsftSkipEx(f"Local has only {qcnt} queues")
+ return qcnt
+
+
+def _ethtool_create(cfg, act, opts):
+ output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+ # Output will be something like: "New RSS context is 1" or
+ # "Added rule with ID 7", we want the integer from the end
+ return int(output.split()[-1])
+
+
+def _ethtool_get_cfg(cfg, fl_type, to_nl=False):
+ descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout
+
+ if to_nl:
+ converter = {
+ "IP SA": "ip-src",
+ "IP DA": "ip-dst",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3",
+ }
+
+ ret = set()
+ else:
+ converter = {
+ "IP SA": "s",
+ "IP DA": "d",
+ "L3 proto": "t",
+ "L4 bytes 0 & 1 [TCP/UDP src port]": "f",
+ "L4 bytes 2 & 3 [TCP/UDP dst port]": "n",
+ }
+
+ ret = ""
+
+ for line in descr.split("\n")[1:-2]:
+ # if this raises we probably need to add more keys to converter above
+ if to_nl:
+ ret.add(converter[line])
+ else:
+ ret += converter[line]
+ return ret
+
+
+def test_rxfh_nl_set_fail(cfg):
+ """
+ Test error path of Netlink SET.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-name": "lo"},
+ "indir": None})
+
+ with ksft_raises(NlError):
+ ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [100000]})
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ ksft_is(ntf, None)
+
+
+def test_rxfh_nl_set_indir(cfg):
+ """
+ Test setting indirection table via Netlink.
+ """
+ qcnt = _require_2qs(cfg)
+
+ # Test some SETs with a value
+ reset = defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex}, "indir": None})
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+ # Test reset back to default
+ reset.exec()
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt)))
+
+
+def test_rxfh_nl_set_indir_ctx(cfg):
+ """
+ Test setting indirection table for a custom context via Netlink.
+ """
+ _require_2qs(cfg)
+
+ # Get setting for ctx 0, we'll make sure they don't get clobbered
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+
+ # Create context
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id, "indir": [0, 1]})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex},
+ "context": ctx_id})
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+ ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(ctx0, dflt)
+
+ # Make sure we can't set the queue count below max queue used
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 0 rx 1")
+ with ksft_raises(CmdExitFailure):
+ ethtool(f"-L {cfg.ifname} combined 1 rx 0")
+
+
+def test_rxfh_indir_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified.
+ """
+ _require_2qs(cfg)
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1")
+ reset = defer(ethtool, f"-X {cfg.ifname} default")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after reset")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_is(ntf["msg"].get("context"), None)
+ ksft_ne(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_indir_ctx_ntf(cfg):
+ """
+ Check that Netlink notifications are generated when RSS indirection
+ table was modified on an additional RSS context.
+ """
+ _require_2qs(cfg)
+
+ ctx_id = _ethtool_create(cfg, "-X", "context new")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"].get("context"), ctx_id)
+ ksft_eq(set(ntf["msg"]["indir"]), {1})
+
+
+def test_rxfh_nl_set_key(cfg):
+ """
+ Test setting hashing key via Netlink.
+ """
+
+ dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex},
+ "hkey": dflt["hkey"], "indir": None})
+
+ # Empty key should error out
+ with ksft_raises(NlError) as cm:
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": None})
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey')
+
+ # Set key to random
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+
+ # Set key to random and indir tbl to something at once
+ mod = random.randbytes(len(dflt["hkey"]))
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "indir": [0, 1], "hkey": mod})
+ rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(rss.get("hkey", [-1]), mod)
+ ksft_eq(set(rss.get("indir", [-1])), {0, 1})
+
+
+def test_rxfh_fields(cfg):
+ """
+ Test reading Rx Flow Hash over Netlink.
+ """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+ ethnl = EthtoolFamily()
+
+ cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ one = _ethtool_get_cfg(cfg, fl_type, to_nl=True)
+ ksft_eq(one, cfg_nl["flow-hash"][fl_type],
+ comment="Config for " + fl_type)
+
+
+def test_rxfh_fields_set(cfg):
+ """ Test configuring Rx Flow Hash over Netlink. """
+
+ flow_types = ["tcp4", "tcp6", "udp4", "udp6"]
+
+ # Collect current settings
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # symmetric hashing prevents some of the configs below
+ if cfg_old.get("input-xfrm"):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": {}})
+
+ for fl_type in flow_types:
+ cur = _ethtool_get_cfg(cfg, fl_type)
+ if cur == "sdfn":
+ change_nl = {"ip-src", "ip-dst"}
+ change_ic = "sd"
+ else:
+ change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+ change_ic = "sdfn"
+
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {fl_type: change_nl}
+ })
+ reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} "
+ f"rx-flow-hash {fl_type} {cur}")
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type],
+ comment=f"Config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(change_ic, cfg_ic,
+ comment=f"Config for {fl_type} over IOCTL")
+
+ reset.exec()
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type],
+ comment=f"Un-config for {fl_type} over Netlink")
+ cfg_ic = _ethtool_get_cfg(cfg, fl_type)
+ ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL")
+
+ # Try to set multiple at once, the defer was already installed at the start
+ change = {"ip-src"}
+ if change == cfg_old["flow-hash"]["tcp4"]:
+ change = {"ip-dst"}
+ cfg.ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {x: change for x in flow_types}
+ })
+
+ cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ for fl_type in flow_types:
+ ksft_eq(change, cfg_nl["flow-hash"][fl_type],
+ comment=f"multi-config for {fl_type} over Netlink")
+
+
+def test_rxfh_fields_set_xfrm(cfg):
+ """ Test changing Rx Flow Hash vs xfrm_input at once. """
+
+ def set_rss(cfg, xfrm, fh):
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": xfrm, "flow-hash": fh})
+
+ # Install the reset handler
+ cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+ # symmetric hashing is config-order-sensitive make sure we leave
+ # symmetric mode, or make the flow-hash sym-compatible first
+ changes = [{"flow-hash": cfg_old["flow-hash"],},
+ {"input-xfrm": cfg_old.get("input-xfrm", {}),}]
+ if cfg_old.get("input-xfrm"):
+ changes = list(reversed(changes))
+ for old in changes:
+ defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old)
+
+ # Make sure we start with input-xfrm off, and tcp4 config non-sym
+ set_rss(cfg, {}, {})
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+ # Setting sym and fixing tcp4 config not expected to pass right now
+ with ksft_raises(NlError):
+ set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}})
+ # One at a time should work, hopefully
+ set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}})
+ no_support = False
+ try:
+ set_rss(cfg, {"sym-xor"}, {})
+ except NlError:
+ try:
+ set_rss(cfg, {"sym-or-xor"}, {})
+ except NlError:
+ no_support = True
+ if no_support:
+ raise KsftSkipEx("no input-xfrm supported")
+ # Disabling two at once should not work either without kernel changes
+ with ksft_raises(NlError):
+ set_rss(cfg, {}, {"tcp4": {"ip-src"}})
+
+
+def test_rxfh_fields_ntf(cfg):
+ """ Test Rx Flow Hash notifications. """
+
+ cur = _ethtool_get_cfg(cfg, "tcp4")
+ if cur == "sdfn":
+ change = {"ip-src", "ip-dst"}
+ else:
+ change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"}
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ ethnl.rss_set({
+ "header": {"dev-index": cfg.ifindex},
+ "flow-hash": {"tcp4": change}
+ })
+ reset = defer(ethtool,
+ f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after IOCTL change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+ reset.exec()
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("No notification received after Netlink change")
+ ksft_eq(ntf["name"], "rss-ntf")
+ ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change)
+ ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None)
+
+
+def test_rss_ctx_add(cfg):
+ """ Test creating an additional RSS context via Netlink """
+
+ _require_2qs(cfg)
+
+ # Test basic creation
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_ne(ctx.get("context", 0), 0)
+ ksft_ne(set(ctx.get("indir", [0])), {0},
+ comment="Driver should init the indirection table")
+
+ # Try requesting the ID we just got allocated
+ with ksft_raises(NlError) as cm:
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx.get("context"),
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ d.exec()
+ ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY)
+
+ # Test creating with a specified RSS table, and context ID
+ ctx_id = ctx.get("context")
+ ctx = cfg.ethnl.rss_create_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx_id,
+ "indir": [1],
+ })
+ ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete")
+ ksft_eq(ctx.get("context"), ctx_id)
+ ksft_eq(set(ctx.get("indir", [0])), {1})
+
+
+def test_rss_ctx_ntf(cfg):
+ """ Test notifications for creating additional RSS contexts """
+
+ ethnl = EthtoolFamily()
+ ethnl.ntf_subscribe("monitor")
+
+ # Create / delete via Netlink
+ ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}})
+ cfg.ethnl.rss_delete_act({
+ "header": {"dev-index": cfg.ifindex},
+ "context": ctx["context"],
+ })
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+ ksft_eq(ctx, ntf["msg"])
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[NL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+ # Create / deleve via IOCTL
+ ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new")
+ ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete")
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context creation")
+ ksft_eq(ntf["name"], "rss-create-ntf")
+
+ ntf = next(ethnl.poll_ntf(duration=0.2), None)
+ if ntf is None:
+ raise KsftFailEx("[IOCTL] No notification after context deletion")
+ ksft_eq(ntf["name"], "rss-delete-ntf")
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index 648ff50bc1c3..72880e388478 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -32,16 +32,16 @@ def test_rss_input_xfrm(cfg, ipver):
if multiprocessing.cpu_count() < 2:
raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
- cfg.require_cmd("socat", remote=True)
+ cfg.require_cmd("socat", local=False, remote=True)
if not hasattr(socket, "SO_INCOMING_CPU"):
raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
- input_xfrm = cfg.ethnl.rss_get(
- {'header': {'dev-name': cfg.ifname}}).get('input-xfrm')
+ rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
+ input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {})))
# Check for symmetric xor/or-xor
- if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2):
+ if not input_xfrm:
raise KsftSkipEx("Symmetric RSS hash not requested")
cpus = set()
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
index 3370827409aa..c13dd5efa27a 100755
--- a/tools/testing/selftests/drivers/net/hw/tso.py
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -34,7 +34,7 @@ def tcp_sock_get_retrans(sock):
def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso):
- cfg.require_cmd("socat", remote=True)
+ cfg.require_cmd("socat", local=False, remote=True)
port = rand_port()
listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof"
@@ -102,7 +102,7 @@ def build_tunnel(cfg, outer_ipver, tun_info):
remote_addr = cfg.remote_addr_v[outer_ipver]
tun_type = tun_info[0]
- tun_arg = tun_info[2]
+ tun_arg = tun_info[1]
ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
defer(ip, f"link del {tun_type}-ksft")
ip(f"link set dev {tun_type}-ksft up")
@@ -119,15 +119,30 @@ def build_tunnel(cfg, outer_ipver, tun_info):
return remote_v4, remote_v6
+def restore_wanted_features(cfg):
+ features_cmd = ""
+ for feature in cfg.hw_features:
+ setting = "on" if feature in cfg.wanted_features else "off"
+ features_cmd += f" {feature} {setting}"
+ try:
+ ethtool(f"-K {cfg.ifname} {features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure restoring wanted features: {e}")
+
+
def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
"""Construct specific tests from the common template."""
def f(cfg):
cfg.require_ipver(outer_ipver)
+ defer(restore_wanted_features, cfg)
if not cfg.have_stat_super_count and \
not cfg.have_stat_wire_count:
raise KsftSkipEx(f"Device does not support LSO queue stats")
+ if feature not in cfg.hw_features:
+ raise KsftSkipEx(f"Device does not support {feature}")
+
ipver = outer_ipver
if tun:
remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
@@ -136,36 +151,21 @@ def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
remote_v4 = cfg.remote_addr_v["4"]
remote_v6 = cfg.remote_addr_v["6"]
- tun_partial = tun and tun[1]
- # Tunnel which can silently fall back to gso-partial
- has_gso_partial = tun and 'tx-gso-partial' in cfg.features
-
- # For TSO4 via partial we need mangleid
- if ipver == "4" and feature in cfg.partial_features:
- ksft_pr("Testing with mangleid enabled")
- if 'tx-tcp-mangleid-segmentation' not in cfg.features:
- ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
- defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
-
# First test without the feature enabled.
ethtool(f"-K {cfg.ifname} {feature} off")
- if has_gso_partial:
- ethtool(f"-K {cfg.ifname} tx-gso-partial off")
run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
- # Now test with the feature enabled.
- # For compatible tunnels only - just GSO partial, not specific feature.
- if has_gso_partial:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+ if feature in cfg.partial_features:
ethtool(f"-K {cfg.ifname} tx-gso-partial on")
- run_one_stream(cfg, ipver, remote_v4, remote_v6,
- should_lso=tun_partial)
+ if ipver == "4":
+ ksft_pr("Testing with mangleid enabled")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
# Full feature enabled.
- if feature in cfg.features:
- ethtool(f"-K {cfg.ifname} {feature} on")
- run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
- else:
- raise KsftXfailEx(f"Device does not support {feature}")
+ ethtool(f"-K {cfg.ifname} {feature} on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
return f
@@ -176,23 +176,39 @@ def query_nic_features(cfg) -> None:
cfg.have_stat_super_count = False
cfg.have_stat_wire_count = False
- cfg.features = set()
features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
- for f in features["active"]["bits"]["bit"]:
- cfg.features.add(f["name"])
+
+ cfg.wanted_features = set()
+ for f in features["wanted"]["bits"]["bit"]:
+ cfg.wanted_features.add(f["name"])
+
+ cfg.hw_features = set()
+ hw_all_features_cmd = ""
+ for f in features["hw"]["bits"]["bit"]:
+ if f.get("value", False):
+ feature = f["name"]
+ cfg.hw_features.add(feature)
+ hw_all_features_cmd += f" {feature} on"
+ try:
+ ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure enabling all hw features: {e}")
+ ksft_pr("partial gso feature detection may be impacted")
# Check which features are supported via GSO partial
cfg.partial_features = set()
- if 'tx-gso-partial' in cfg.features:
+ if 'tx-gso-partial' in cfg.hw_features:
ethtool(f"-K {cfg.ifname} tx-gso-partial off")
no_partial = set()
features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
for f in features["active"]["bits"]["bit"]:
no_partial.add(f["name"])
- cfg.partial_features = cfg.features - no_partial
+ cfg.partial_features = cfg.hw_features - no_partial
ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+ restore_wanted_features(cfg)
+
stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
if stats:
if 'tx-hw-gso-packets' in stats[0]:
@@ -211,13 +227,14 @@ def main() -> None:
query_nic_features(cfg)
test_info = (
- # name, v4/v6 ethtool_feature tun:(type, partial, args)
- ("", "4", "tx-tcp-segmentation", None),
- ("", "6", "tx-tcp6-segmentation", None),
- ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")),
- ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")),
- ("gre", "4", "tx-gre-segmentation", ("gre", False, "")),
- ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")),
+ # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions)
+ ("", "4", "tx-tcp-segmentation", None),
+ ("", "6", "tx-tcp6-segmentation", None),
+ ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))),
+ ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))),
+ ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))),
+ ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))),
+ ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))),
)
cases = []
@@ -227,11 +244,13 @@ def main() -> None:
if info[1] and outer_ipver != info[1]:
continue
- cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
- tun=info[3], inner_ipver="4"))
if info[3]:
- cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
- tun=info[3], inner_ipver="6"))
+ cases += [
+ test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver)
+ for inner_ipver in info[3][2]
+ ]
+ else:
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver))
ksft_run(cases=cases, args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
index 401e70f7f136..8711c67ad658 100644
--- a/tools/testing/selftests/drivers/net/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -7,7 +7,21 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
try:
sys.path.append(KSFT_DIR.as_posix())
+
from net.lib.py import *
+
+ # Import one by one to avoid pylint false positives
+ from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
+ NlError, RtnlFamily, DevlinkFamily
+ from net.lib.py import CmdExitFailure
+ from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+ fd_read_timeout, ip, rand_port, tool, wait_port_listen
+ from net.lib.py import fd_read_timeout
+ from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
+ from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
+ ksft_setup
+ from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
+ ksft_ne, ksft_not_in, ksft_raises, ksft_true
except ModuleNotFoundError as e:
ksft_pr("Failed importing `net` library from kernel sources")
ksft_pr(str(e))
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 3bccddf8cbc5..1b8bd648048f 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -259,7 +259,7 @@ class NetDrvEpEnv(NetDrvEnvBase):
if not self._require_cmd(comm, "local"):
raise KsftSkipEx("Test requires command: " + comm)
if remote:
- if not self._require_cmd(comm, "remote"):
+ if not self._require_cmd(comm, "remote", host=self.remote):
raise KsftSkipEx("Test requires (remote) command: " + comm)
def wait_hw_stats_settle(self):
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index 44151b7b1a24..c4e808407cc4 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -7,7 +7,7 @@ from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
class GenerateTraffic:
def __init__(self, env, port=None):
- env.require_cmd("iperf3", remote=True)
+ env.require_cmd("iperf3", local=True, remote=True)
self.env = env
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 29b01b8e2215..b6071e80ebbb 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -11,9 +11,11 @@ set -euo pipefail
LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
SRCIF="" # to be populated later
-SRCIP=192.0.2.1
+SRCIP4="192.0.2.1"
+SRCIP6="fc00::1"
DSTIF="" # to be populated later
-DSTIP=192.0.2.2
+DSTIP4="192.0.2.2"
+DSTIP6="fc00::2"
PORT="6666"
MSG="netconsole selftest"
@@ -80,7 +82,23 @@ function configure_ip() {
ip link set "${SRCIF}" up
}
+function select_ipv4_or_ipv6()
+{
+ local VERSION=${1}
+
+ if [[ "$VERSION" == "ipv6" ]]
+ then
+ DSTIP="${DSTIP6}"
+ SRCIP="${SRCIP6}"
+ else
+ DSTIP="${DSTIP4}"
+ SRCIP="${SRCIP4}"
+ fi
+}
+
function set_network() {
+ local IP_VERSION=${1:-"ipv4"}
+
# setup_ns function is coming from lib.sh
setup_ns NAMESPACE
@@ -91,10 +109,13 @@ function set_network() {
# Link both interfaces back to back
link_ifaces
+ select_ipv4_or_ipv6 "${IP_VERSION}"
configure_ip
}
function create_dynamic_target() {
+ local FORMAT=${1:-"extended"}
+
DSTMAC=$(ip netns exec "${NAMESPACE}" \
ip link show "${DSTIF}" | awk '/ether/ {print $2}')
@@ -106,7 +127,33 @@ function create_dynamic_target() {
echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+ if [ "${FORMAT}" == "basic" ]
+ then
+ # Basic target does not support release
+ echo 0 > "${NETCONS_PATH}"/release
+ echo 0 > "${NETCONS_PATH}"/extended
+ elif [ "${FORMAT}" == "extended" ]
+ then
+ echo 1 > "${NETCONS_PATH}"/extended
+ fi
+
echo 1 > "${NETCONS_PATH}"/enabled
+
+ # This will make sure that the kernel was able to
+ # load the netconsole driver configuration. The console message
+ # gets more organized/sequential as well.
+ sleep 1
+}
+
+# Generate the command line argument for netconsole following:
+# netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+function create_cmdline_str() {
+ DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+ SRCPORT="1514"
+ TGTPORT="6666"
+
+ echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\""
}
# Do not append the release to the header of the message
@@ -116,16 +163,9 @@ function disable_release_append() {
echo 1 > "${NETCONS_PATH}"/enabled
}
-function cleanup() {
+function do_cleanup() {
local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
- # delete netconsole dynamic reconfiguration
- echo 0 > "${NETCONS_PATH}"/enabled
- # Remove all the keys that got created during the selftest
- find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
- # Remove the configfs entry
- rmdir "${NETCONS_PATH}"
-
# Delete netdevsim devices
echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
@@ -137,6 +177,17 @@ function cleanup() {
echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
}
+function cleanup() {
+ # delete netconsole dynamic reconfiguration
+ echo 0 > "${NETCONS_PATH}"/enabled
+ # Remove all the keys that got created during the selftest
+ find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+ # Remove the configfs entry
+ rmdir "${NETCONS_PATH}"
+
+ do_cleanup
+}
+
function set_user_data() {
if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
then
@@ -152,18 +203,24 @@ function set_user_data() {
function listen_port_and_save_to() {
local OUTPUT=${1}
+ local IPVERSION=${2:-"ipv4"}
+
+ if [ "${IPVERSION}" == "ipv4" ]
+ then
+ SOCAT_MODE="UDP-LISTEN"
+ else
+ SOCAT_MODE="UDP6-LISTEN"
+ fi
+
# Just wait for 2 seconds
timeout 2 ip netns exec "${NAMESPACE}" \
- socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
+ socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}"
}
-function validate_result() {
+# Only validate that the message arrived properly
+function validate_msg() {
local TMPFILENAME="$1"
- # TMPFILENAME will contain something like:
- # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
- # key=value
-
# Check if the file exists
if [ ! -f "$TMPFILENAME" ]; then
echo "FAIL: File was not generated." >&2
@@ -175,17 +232,32 @@ function validate_result() {
cat "${TMPFILENAME}" >&2
exit "${ksft_fail}"
fi
+}
- if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
- echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
- cat "${TMPFILENAME}" >&2
- exit "${ksft_fail}"
+# Validate the message and userdata
+function validate_result() {
+ local TMPFILENAME="$1"
+
+ # TMPFILENAME will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # key=value
+
+ validate_msg "${TMPFILENAME}"
+
+ # userdata is not supported on basic format target,
+ # thus, do not validate it.
+ if [ "${FORMAT}" != "basic" ];
+ then
+ if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
+ echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
fi
# Delete the file once it is validated, otherwise keep it
# for debugging purposes
rm "${TMPFILENAME}"
- exit "${ksft_pass}"
}
function check_for_dependencies() {
@@ -209,6 +281,11 @@ function check_for_dependencies() {
exit "${ksft_skip}"
fi
+ if [ ! -f /proc/net/if_inet6 ]; then
+ echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
exit "${ksft_skip}"
@@ -224,8 +301,15 @@ function check_for_dependencies() {
exit "${ksft_skip}"
fi
- if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
- echo "SKIP: IPs already in use. Skipping it" >&2
+ REGEXP4="inet.*(${SRCIP4}|${DSTIP4})"
+ REGEXP6="inet.*(${SRCIP6}|${DSTIP6})"
+ if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then
+ echo "SKIP: IPv4s already in use. Skipping it" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then
+ echo "SKIP: IPv6s already in use. Skipping it" >&2
exit "${ksft_skip}"
fi
}
@@ -239,10 +323,41 @@ function check_for_taskset() {
# This is necessary if running multiple tests in a row
function pkill_socat() {
- PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+ PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}"
+ PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}"
# socat runs under timeout(1), kill it if it is still alive
# do not fail if socat doesn't exist anymore
set +e
- pkill -f "${PROCESS_NAME}"
+ pkill -f "${PROCESS_NAME4}"
+ pkill -f "${PROCESS_NAME6}"
set -e
}
+
+# Check if netconsole was compiled as a module, otherwise exit
+function check_netconsole_module() {
+ if modinfo netconsole | grep filename: | grep -q builtin
+ then
+ echo "SKIP: netconsole should be compiled as a module" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# A wrapper to translate protocol version to udp version
+function wait_for_port() {
+ local NAMESPACE=${1}
+ local PORT=${2}
+ IP_VERSION=${3}
+
+ if [ "${IP_VERSION}" == "ipv6" ]
+ then
+ PROTOCOL="udp6"
+ else
+ PROTOCOL="udp"
+ fi
+
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}"
+ # even after the port is open, let's wait 1 second before writing
+ # otherwise the packet could be missed, and the test will fail. Happens
+ # more frequently on IPv6
+ sleep 1
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 899b6892603f..d7505b933aef 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -51,7 +51,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
fi
${current_test}_setup_prepare
- setup_wait $num_netifs
+ setup_wait_n $num_netifs
# Update target in case occupancy of a certain resource changed
# following the test setup.
target=$(${current_test}_get_target "$should_fail")
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 482ebb744eba..7b98cdd0580d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -55,7 +55,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
continue
fi
${current_test}_setup_prepare
- setup_wait $num_netifs
+ setup_wait_n $num_netifs
# Update target in case occupancy of a certain resource
# changed following the test setup.
target=$(${current_test}_get_target "$should_fail")
diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py
index 356bac46ba04..d05eddcad539 100755
--- a/tools/testing/selftests/drivers/net/napi_id.py
+++ b/tools/testing/selftests/drivers/net/napi_id.py
@@ -7,10 +7,10 @@ from lib.py import bkg, cmd, rand_port, NetNSEnter
def test_napi_id(cfg) -> None:
port = rand_port()
- listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}"
+ listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}"
with bkg(listen_cmd, ksft_wait=3) as server:
- cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True)
+ cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True)
ksft_eq(0, server.ret)
diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c
index eecd610c2109..7f49ca6c8637 100644
--- a/tools/testing/selftests/drivers/net/napi_id_helper.c
+++ b/tools/testing/selftests/drivers/net/napi_id_helper.c
@@ -7,41 +7,58 @@
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
+#include <netdb.h>
#include "../../net/lib/ksft.h"
int main(int argc, char *argv[])
{
- struct sockaddr_in address;
+ struct sockaddr_storage address;
+ struct addrinfo *result;
+ struct addrinfo hints;
unsigned int napi_id;
- unsigned int port;
+ socklen_t addr_len;
socklen_t optlen;
char buf[1024];
int opt = 1;
+ int family;
int server;
int client;
int ret;
- server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE;
+
+ ret = getaddrinfo(argv[1], argv[2], &hints, &result);
+ if (ret != 0) {
+ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret));
+ return 1;
+ }
+
+ family = result->ai_family;
+ addr_len = result->ai_addrlen;
+
+ server = socket(family, SOCK_STREAM, IPPROTO_TCP);
if (server < 0) {
perror("socket creation failed");
+ freeaddrinfo(result);
if (errno == EAFNOSUPPORT)
return -1;
return 1;
}
- port = atoi(argv[2]);
-
if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {
perror("setsockopt");
+ freeaddrinfo(result);
return 1;
}
- address.sin_family = AF_INET;
- inet_pton(AF_INET, argv[1], &address.sin_addr);
- address.sin_port = htons(port);
+ memcpy(&address, result->ai_addr, result->ai_addrlen);
+ freeaddrinfo(result);
- if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) {
+ if (bind(server, (struct sockaddr *)&address, addr_len) < 0) {
perror("bind failed");
return 1;
}
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
index fe765da498e8..a3446b569976 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -32,21 +32,42 @@ check_for_dependencies
echo "6 5" > /proc/sys/kernel/printk
# Remove the namespace, interfaces and netconsole target on exit
trap cleanup EXIT
-# Create one namespace and two interfaces
-set_network
-# Create a dynamic target for netconsole
-create_dynamic_target
-# Set userdata "key" with the "value" value
-set_user_data
-# Listed for netconsole port inside the namespace and destination interface
-listen_port_and_save_to "${OUTPUT_FILE}" &
-# Wait for socat to start and listen to the port.
-wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
-# Send the message
-echo "${MSG}: ${TARGET}" > /dev/kmsg
-# Wait until socat saves the file to disk
-busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
-
-# Make sure the message was received in the dst part
-# and exit
-validate_result "${OUTPUT_FILE}"
+
+# Run the test twice, with different format modes
+for FORMAT in "basic" "extended"
+do
+ for IP_VERSION in "ipv6" "ipv4"
+ do
+ echo "Running with target mode: ${FORMAT} (${IP_VERSION})"
+ # Create one namespace and two interfaces
+ set_network "${IP_VERSION}"
+ # Create a dynamic target for netconsole
+ create_dynamic_target "${FORMAT}"
+ # Only set userdata for extended format
+ if [ "$FORMAT" == "extended" ]
+ then
+ # Set userdata "key" with the "value" value
+ set_user_data
+ fi
+ # Listed for netconsole port inside the namespace and
+ # destination interface
+ listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+ # Wait for socat to start and listen to the port.
+ wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}"
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_result "${OUTPUT_FILE}" "${FORMAT}"
+ # kill socat in case it is still running
+ pkill_socat
+ cleanup
+ echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2
+ done
+done
+
+trap - EXIT
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
new file mode 100755
index 000000000000..ad2fb8b1c463
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This is a selftest to test cmdline arguments on netconsole.
+# It exercises loading of netconsole from cmdline instead of the dynamic
+# reconfiguration. This includes parsing the long netconsole= line and all the
+# flow through init_netconsole().
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+check_netconsole_module
+
+modprobe netdevsim 2> /dev/null || true
+rmmod netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+# check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace and network interfaces
+trap do_cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create the command line for netconsole, with the configuration from the
+# function above
+CMDLINE="$(create_cmdline_str)"
+
+# Load the module, with the cmdline set
+modprobe netconsole "${CMDLINE}"
+
+# Listed for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+# Make sure the message was received in the dst part
+# and exit
+validate_msg "${OUTPUT_FILE}"
+
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
index a737e377bf08..baf69031089e 100755
--- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh
+++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh
@@ -53,6 +53,17 @@ function set_release() {
echo 1 > "${NETCONS_PATH}/userdata/release_enabled"
}
+# Enable the msgid to be appended to sysdata
+function set_msgid() {
+ if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]]
+ then
+ echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
# Disable the sysdata cpu_nr feature
function unset_cpu_nr() {
echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled"
@@ -67,6 +78,10 @@ function unset_release() {
echo 0 > "${NETCONS_PATH}/userdata/release_enabled"
}
+function unset_msgid() {
+ echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled"
+}
+
# Test if MSG contains sysdata
function validate_sysdata() {
# OUTPUT_FILE will contain something like:
@@ -74,6 +89,7 @@ function validate_sysdata() {
# userdatakey=userdatavalue
# cpu=X
# taskname=<taskname>
+ # msgid=<id>
# Echo is what this test uses to create the message. See runtest()
# function
@@ -104,6 +120,12 @@ function validate_sysdata() {
exit "${ksft_fail}"
fi
+ if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'msgid=<id>' not found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
rm "${OUTPUT_FILE}"
pkill_socat
}
@@ -155,6 +177,12 @@ function validate_no_sysdata() {
exit "${ksft_fail}"
fi
+ if grep -q "msgid=" "${OUTPUT_FILE}"; then
+ echo "FAIL: 'msgid= found in ${OUTPUT_FILE}" >&2
+ cat "${OUTPUT_FILE}" >&2
+ exit "${ksft_fail}"
+ fi
+
rm "${OUTPUT_FILE}"
}
@@ -206,6 +234,7 @@ set_cpu_nr
# Enable taskname to be appended to sysdata
set_taskname
set_release
+set_msgid
runtest
# Make sure the message was received in the dst part
# and exit
@@ -235,6 +264,7 @@ MSG="Test #3 from CPU${CPU}"
unset_cpu_nr
unset_taskname
unset_release
+unset_msgid
runtest
# At this time, cpu= shouldn't be present in the msg
validate_no_sysdata
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index b5ea2526f23c..030762b203d7 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -40,6 +40,8 @@ fw_flash_test()
return
fi
+ echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms
+
devlink dev flash $DL_HANDLE file $DUMMYFILE
check_err $? "Failed to flash with status updates on"
@@ -608,6 +610,46 @@ rate_attr_parent_check()
check_err $? "Unexpected parent attr value $api_value != $parent"
}
+rate_attr_tc_bw_check()
+{
+ local handle=$1
+ local tc_bw=$2
+ local debug_file=$3
+
+ local tc_bw_str=""
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local value=${bw##*:}
+ tc_bw_str="$tc_bw_str $tc:$value"
+ done
+ tc_bw_str=${tc_bw_str# }
+
+ rate_attr_set "$handle" tc-bw "$tc_bw_str"
+ check_err $? "Failed to set tc-bw values"
+
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local value=${bw##*:}
+ local debug_value
+ debug_value=$(cat "$debug_file"/tc"${tc}"_bw)
+ check_err $? "Failed to read tc-bw value from debugfs for tc$tc"
+ [ "$debug_value" == "$value" ]
+ check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value"
+ done
+
+ for bw in $tc_bw; do
+ local tc=${bw%%:*}
+ local expected_value=${bw##*:}
+ local api_value
+ api_value=$(rate_attr_get "$handle" tc_"$tc")
+ if [ "$api_value" = "null" ]; then
+ api_value=0
+ fi
+ [ "$api_value" == "$expected_value" ]
+ check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value"
+ done
+}
+
rate_node_add()
{
local handle=$1
@@ -649,6 +691,13 @@ rate_test()
rate=$(($rate+100))
done
+ local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0"
+ for r_obj in $leafs
+ do
+ rate_attr_tc_bw_check "$r_obj" "$tc_bw" \
+ "$DEBUGFS_DIR"/ports/"${r_obj##*/}"
+ done
+
local node1_name='group1'
local node1="$DL_HANDLE/$node1_name"
rate_node_add "$node1"
@@ -666,6 +715,12 @@ rate_test()
rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
$DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+ local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0"
+ rate_attr_tc_bw_check $node1 "$tc_bw" \
+ "$DEBUGFS_DIR"/rate_nodes/"${node1##*/}"
+
+
rate_node_del "$node1"
check_err $? "Failed to delete node $node1"
local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index 92c2f0376c08..4c859ecdad94 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -266,7 +266,6 @@ for port in 0 1; do
echo $NSIM_ID > /sys/bus/netdevsim/new_device
else
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_SYS/new_port
fi
NSIM_NETDEV=`get_netdev_name old_netdevs`
@@ -350,23 +349,11 @@ old_netdevs=$(ls /sys/class/net)
port=0
echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
-echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
echo 0 > $NSIM_DEV_SYS/new_port
NSIM_NETDEV=`get_netdev_name old_netdevs`
msg="create VxLANs"
-exp0=( 0 0 0 0 ) # sleep is longer than out wait
-new_vxlan vxlan0 10000 $NSIM_NETDEV
-
-modprobe -r vxlan
-modprobe -r udp_tunnel
-
-msg="remove tunnels"
-exp0=( 0 0 0 0 )
-check_tables
-
-msg="create VxLANs"
-exp0=( 0 0 0 0 ) # sleep is longer than out wait
+exp0=( `mke 10000 1` 0 0 0 )
new_vxlan vxlan0 10000 $NSIM_NETDEV
exp0=( 0 0 0 0 )
@@ -428,7 +415,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -486,7 +472,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -543,7 +528,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -573,7 +557,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -634,7 +617,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -690,7 +672,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -750,7 +731,6 @@ echo 0 > $NSIM_DEV_SYS/del_port
for port in 0 1; do
if [ $port -ne 0 ]; then
echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
- echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
fi
echo $port > $NSIM_DEV_SYS/new_port
@@ -809,7 +789,6 @@ echo $NSIM_ID > /sys/bus/netdevsim/new_device
echo 0 > $NSIM_DEV_SYS/del_port
echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
-echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
echo 1 > $NSIM_DEV_DFS/udp_ports_shared
old_netdevs=$(ls /sys/class/net)
diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py
new file mode 100755
index 000000000000..408bd54d6779
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netpoll_basic.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Author: Breno Leitao <leitao@debian.org>
+"""
+ This test aims to evaluate the netpoll polling mechanism (as in
+ netpoll_poll_dev()). It presents a complex scenario where the network
+ attempts to send a packet but fails, prompting it to poll the NIC from within
+ the netpoll TX side.
+
+ This has been a crucial path in netpoll that was previously untested. Jakub
+ suggested using a single RX/TX queue, pushing traffic to the NIC, and then
+ sending netpoll messages (via netconsole) to trigger the poll.
+
+ In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If
+ so, the test passes, otherwise it will be skipped. This test is very dependent on
+ the driver and environment, given we are trying to trigger a tricky scenario.
+"""
+
+import errno
+import logging
+import os
+import random
+import string
+import threading
+import time
+from typing import Optional
+
+from lib.py import (
+ bpftrace,
+ CmdExitFailure,
+ defer,
+ ethtool,
+ GenerateTraffic,
+ ksft_exit,
+ ksft_pr,
+ ksft_run,
+ KsftFailEx,
+ KsftSkipEx,
+ NetDrvEpEnv,
+ KsftXfailEx,
+)
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole"
+NETCONS_REMOTE_PORT: int = 6666
+NETCONS_LOCAL_PORT: int = 1514
+
+# Max number of netcons messages to send. Each iteration will setup
+# netconsole and send MAX_WRITES messages
+ITERATIONS: int = 20
+# Number of writes to /dev/kmsg per iteration
+MAX_WRITES: int = 40
+# MAPS contains the information coming from bpftrace it will have only one
+# key: "hits", which tells the number of times netpoll_poll_dev() was called
+MAPS: dict[str, int] = {}
+# Thread to run bpftrace in parallel
+BPF_THREAD: Optional[threading.Thread] = None
+# Time bpftrace will be running in parallel.
+BPFTRACE_TIMEOUT: int = 10
+
+
+def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]:
+ """
+ Read the ringsize using ethtool. This will be used to restore it after the test
+ """
+ try:
+ ethtool_result = ethtool(f"-g {interface_name}", json=True)[0]
+ rxs = ethtool_result["rx"]
+ txs = ethtool_result["tx"]
+ except (KeyError, IndexError) as exception:
+ raise KsftSkipEx(
+ f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them."
+ ) from exception
+
+ return rxs, txs
+
+
+def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool:
+ """Try to the number of RX and TX ringsize."""
+ rxs = ring_size[0]
+ txs = ring_size[1]
+
+ logging.debug("Setting ring size to %d/%d", rxs, txs)
+ try:
+ ethtool(f"-G {interface_name} rx {rxs} tx {txs}")
+ except CmdExitFailure:
+ # This might fail on real device, retry with a higher value,
+ # worst case, keep it as it is.
+ return False
+
+ return True
+
+
+def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]:
+ """Read the number of RX, TX and combined queues using ethtool"""
+
+ try:
+ ethtool_result = ethtool(f"-l {interface_name}", json=True)[0]
+ rxq = ethtool_result.get("rx", -1)
+ txq = ethtool_result.get("tx", -1)
+ combined = ethtool_result.get("combined", -1)
+
+ except IndexError as exception:
+ raise KsftSkipEx(
+ f"Failed to read queues numbers: {exception}. Not going to mess with them."
+ ) from exception
+
+ return rxq, txq, combined
+
+
+def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None:
+ """Set the number of RX, TX and combined queues using ethtool"""
+ rxq, txq, combined = queues
+
+ cmdline = f"-L {interface_name}"
+
+ if rxq != -1:
+ cmdline += f" rx {rxq}"
+ if txq != -1:
+ cmdline += f" tx {txq}"
+ if combined != -1:
+ cmdline += f" combined {combined}"
+
+ logging.debug("calling: ethtool %s", cmdline)
+
+ try:
+ ethtool(cmdline)
+ except CmdExitFailure as exception:
+ raise KsftSkipEx(
+ f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
+ ) from exception
+
+
+def netcons_generate_random_target_name() -> str:
+ """Generate a random target name starting with 'netcons'"""
+ random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
+ return f"netcons_{random_suffix}"
+
+
+def netcons_create_target(
+ config_data: dict[str, str],
+ target_name: str,
+) -> None:
+ """Create a netconsole dynamic target against the interfaces"""
+ logging.debug("Using netconsole name: %s", target_name)
+ try:
+ os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
+ logging.debug(
+ "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
+ )
+ except OSError as exception:
+ if exception.errno != errno.EEXIST:
+ raise KsftFailEx(
+ f"Failed to create netconsole target directory: {exception}"
+ ) from exception
+
+ try:
+ for key, value in config_data.items():
+ path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
+ logging.debug("Writing %s to %s", key, path)
+ with open(path, "w", encoding="utf-8") as file:
+ # Always convert to string to write to file
+ file.write(str(value))
+
+ # Read all configuration values for debugging purposes
+ for debug_key in config_data.keys():
+ with open(
+ f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
+ "r",
+ encoding="utf-8",
+ ) as file:
+ content = file.read()
+ logging.debug(
+ "%s/%s/%s : %s",
+ NETCONSOLE_CONFIGFS_PATH,
+ target_name,
+ debug_key,
+ content.strip(),
+ )
+
+ except Exception as exception:
+ raise KsftFailEx(
+ f"Failed to configure netconsole target: {exception}"
+ ) from exception
+
+
+def netcons_configure_target(
+ cfg: NetDrvEpEnv, interface_name: str, target_name: str
+) -> None:
+ """Configure netconsole on the interface with the given target name"""
+ config_data = {
+ "extended": "1",
+ "dev_name": interface_name,
+ "local_port": NETCONS_LOCAL_PORT,
+ "remote_port": NETCONS_REMOTE_PORT,
+ "local_ip": cfg.addr,
+ "remote_ip": cfg.remote_addr,
+ "remote_mac": "00:00:00:00:00:00", # Not important for this test
+ "enabled": "1",
+ }
+
+ netcons_create_target(config_data, target_name)
+ logging.debug(
+ "Created netconsole target: %s on interface %s", target_name, interface_name
+ )
+
+
+def netcons_delete_target(name: str) -> None:
+ """Delete a netconsole dynamic target"""
+ target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
+ try:
+ if os.path.exists(target_path):
+ os.rmdir(target_path)
+ except OSError as exception:
+ raise KsftFailEx(
+ f"Failed to delete netconsole target: {exception}"
+ ) from exception
+
+
+def netcons_load_module() -> None:
+ """Try to load the netconsole module"""
+ os.system("modprobe netconsole")
+
+
+def bpftrace_call() -> None:
+ """Call bpftrace to find how many times netpoll_poll_dev() is called.
+ Output is saved in the global variable `maps`"""
+
+ # This is going to update the global variable, that will be seen by the
+ # main function
+ global MAPS # pylint: disable=W0603
+
+ # This will be passed to bpftrace as in bpftrace -e "expr"
+ expr = "kprobe:netpoll_poll_dev { @hits = count(); }"
+
+ MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True)
+ logging.debug("BPFtrace output: %s", MAPS)
+
+
+def bpftrace_start():
+ """Start a thread to call `call_bpf` in a parallel thread"""
+ global BPF_THREAD # pylint: disable=W0603
+
+ BPF_THREAD = threading.Thread(target=bpftrace_call)
+ BPF_THREAD.start()
+ if not BPF_THREAD.is_alive():
+ raise KsftSkipEx("BPFtrace thread is not alive. Skipping test")
+
+
+def bpftrace_stop() -> None:
+ """Stop the bpftrace thread"""
+ if BPF_THREAD:
+ BPF_THREAD.join()
+
+
+def bpftrace_any_hit(join: bool) -> bool:
+ """Check if netpoll_poll_dev() was called by checking the global variable `maps`"""
+ if not BPF_THREAD:
+ raise KsftFailEx("BPFtrace didn't start")
+
+ if BPF_THREAD.is_alive():
+ if join:
+ # Wait for bpftrace to finish
+ BPF_THREAD.join()
+ else:
+ # bpftrace is still running, so, we will not check the result yet
+ return False
+
+ logging.debug("MAPS coming from bpftrace = %s", MAPS)
+ if "hits" not in MAPS.keys():
+ raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}")
+
+ logging.debug("Got a total of %d hits", MAPS["hits"])
+ return MAPS["hits"] > 0
+
+
+def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+ """Print messages to the console, trying to trigger a netpoll poll"""
+ # Start bpftrace in parallel, so, it is watching
+ # netpoll_poll_dev() while we are sending netconsole messages
+ bpftrace_start()
+ defer(bpftrace_stop)
+
+ do_netpoll_flush(cfg, ifname, target_name)
+
+ if bpftrace_any_hit(join=True):
+ ksft_pr("netpoll_poll_dev() was called. Success")
+ return
+
+ raise KsftXfailEx("netpoll_poll_dev() was not called during the test...")
+
+
+def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
+ """Print messages to the console, trying to trigger a netpoll poll"""
+ netcons_configure_target(cfg, ifname, target_name)
+ retry = 0
+
+ for i in range(int(ITERATIONS)):
+ if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False):
+ # bpftrace is done, stop sending messages
+ break
+
+ msg = f"netcons test #{i}"
+ with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
+ for j in range(MAX_WRITES):
+ try:
+ kmsg.write(f"{msg}-{j}\n")
+ except OSError as exception:
+ # in some cases, kmsg can be busy, so, we will retry
+ time.sleep(1)
+ retry += 1
+ if retry < 5:
+ logging.info("Failed to write to kmsg. Retrying")
+ # Just retry a few times
+ continue
+ raise KsftFailEx(
+ f"Failed to write to kmsg: {exception}"
+ ) from exception
+
+ netcons_delete_target(target_name)
+ netcons_configure_target(cfg, ifname, target_name)
+ # If we sleep here, we will have a better chance of triggering
+ # This number is based on a few tests I ran while developing this test
+ time.sleep(0.4)
+
+
+def configure_network(ifname: str) -> None:
+ """Configure ring size and queue numbers"""
+
+ # Set defined queues to 1 to force congestion
+ prev_queues = ethtool_get_queues_cnt(ifname)
+ logging.debug("RX/TX/combined queues: %s", prev_queues)
+ # Only set the queues to 1 if they exists in the device. I.e, they are > 0
+ ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues))
+ defer(ethtool_set_queues_cnt, ifname, prev_queues)
+
+ # Try to set the ring size to some low value.
+ # Do not fail if the hardware do not accepted desired values
+ prev_ring_size = ethtool_get_ringsize(ifname)
+ for size in [(1, 1), (128, 128), (256, 256)]:
+ if ethtool_set_ringsize(ifname, size):
+ # hardware accepted the desired ringsize
+ logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size)
+ break
+ defer(ethtool_set_ringsize, ifname, prev_ring_size)
+
+
+def test_netpoll(cfg: NetDrvEpEnv) -> None:
+ """
+ Test netpoll by sending traffic to the interface and then sending
+ netconsole messages to trigger a poll
+ """
+
+ ifname = cfg.ifname
+ configure_network(ifname)
+ target_name = netcons_generate_random_target_name()
+ traffic = None
+
+ try:
+ traffic = GenerateTraffic(cfg)
+ do_netpoll_flush_monitored(cfg, ifname, target_name)
+ finally:
+ if traffic:
+ traffic.stop()
+
+ # Revert RX/TX queues
+ netcons_delete_target(target_name)
+
+
+def test_check_dependencies() -> None:
+ """Check if the dependencies are met"""
+ if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
+ raise KsftSkipEx(
+ f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301
+ )
+
+
+def main() -> None:
+ """Main function to run the test"""
+ netcons_load_module()
+ test_check_dependencies()
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run(
+ [test_netpoll],
+ args=(cfg,),
+ )
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index e0f114612c1a..da3623c5e8a9 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -30,7 +30,7 @@ def _test_v6(cfg) -> None:
cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote)
def _test_tcp(cfg) -> None:
- cfg.require_cmd("socat", remote=True)
+ cfg.require_cmd("socat", local=False, remote=True)
port = rand_port()
listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index efcc1e10575b..c2bb5d3f1ca1 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -1,12 +1,16 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+"""
+Tests related to standard netdevice statistics.
+"""
+
import errno
import subprocess
import time
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
-from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import KsftSkipEx, KsftFailEx
from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
@@ -18,13 +22,16 @@ rtnl = RtnlFamily()
def check_pause(cfg) -> None:
- global ethnl
+ """
+ Check that drivers which support Pause config also report standard
+ pause stats.
+ """
try:
ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftXfailEx("pause not supported by the device")
+ raise KsftSkipEx("pause not supported by the device") from e
raise
data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
@@ -33,13 +40,16 @@ def check_pause(cfg) -> None:
def check_fec(cfg) -> None:
- global ethnl
+ """
+ Check that drivers which support FEC config also report standard
+ FEC stats.
+ """
try:
ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftXfailEx("FEC not supported by the device")
+ raise KsftSkipEx("FEC not supported by the device") from e
raise
data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
@@ -48,15 +58,17 @@ def check_fec(cfg) -> None:
def pkt_byte_sum(cfg) -> None:
- global netfam, rtnl
+ """
+ Check that qstat and interface stats match in value.
+ """
def get_qstat(test):
- global netfam
stats = netfam.qstats_get({}, dump=True)
if stats:
for qs in stats:
if qs["ifindex"]== test.ifindex:
return qs
+ return None
qstat = get_qstat(cfg)
if qstat is None:
@@ -77,15 +89,14 @@ def pkt_byte_sum(cfg) -> None:
for _ in range(10):
rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
if stat_cmp(rtstat, qstat) < 0:
- raise Exception("RTNL stats are lower, fetched later")
+ raise KsftFailEx("RTNL stats are lower, fetched later")
qstat = get_qstat(cfg)
if stat_cmp(rtstat, qstat) > 0:
- raise Exception("Qstats are lower, fetched later")
+ raise KsftFailEx("Qstats are lower, fetched later")
def qstat_by_ifindex(cfg) -> None:
- global netfam
- global rtnl
+ """ Qstats Netlink API tests - querying by ifindex. """
# Construct a map ifindex -> [dump, by-index, dump]
ifindexes = {}
@@ -93,7 +104,7 @@ def qstat_by_ifindex(cfg) -> None:
for entry in stats:
ifindexes[entry['ifindex']] = [entry, None, None]
- for ifindex in ifindexes.keys():
+ for ifindex in ifindexes:
entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
ksft_eq(len(entry), 1)
ifindexes[entry[0]['ifindex']][1] = entry[0]
@@ -145,7 +156,7 @@ def qstat_by_ifindex(cfg) -> None:
# Try to get stats for lowest unused ifindex but not 0
devs = rtnl.getlink({}, dump=True)
- all_ifindexes = set([dev["ifi-index"] for dev in devs])
+ all_ifindexes = set(dev["ifi-index"] for dev in devs)
lowest = 2
while lowest in all_ifindexes:
lowest += 1
@@ -158,18 +169,20 @@ def qstat_by_ifindex(cfg) -> None:
@ksft_disruptive
def check_down(cfg) -> None:
+ """ Test statistics (interface and qstat) are not impacted by ifdown """
+
try:
qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
except NlError as e:
if e.error == errno.EOPNOTSUPP:
- raise KsftSkipEx("qstats not supported by the device")
+ raise KsftSkipEx("qstats not supported by the device") from e
raise
ip(f"link set dev {cfg.dev['ifname']} down")
defer(ip, f"link set dev {cfg.dev['ifname']} up")
qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
- for k, v in qstat.items():
+ for k in qstat:
ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
# exercise per-queue API to make sure that "device down" state
@@ -263,6 +276,8 @@ def procfs_downup_hammer(cfg) -> None:
def main() -> None:
+ """ Ksft boiler plate main """
+
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
check_down, procfs_hammer, procfs_downup_hammer],
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
new file mode 100755
index 000000000000..1dd8bf3bf6c9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -0,0 +1,658 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This file contains tests to verify native XDP support in network drivers.
+The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib`
+directory, with each test focusing on a specific aspect of XDP functionality.
+"""
+import random
+import string
+from dataclasses import dataclass
+from enum import Enum
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr
+from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError
+from lib.py import bkg, cmd, rand_port, wait_port_listen
+from lib.py import ip, bpftool, defer
+
+
+class TestConfig(Enum):
+ """Enum for XDP configuration options."""
+ MODE = 0 # Configures the BPF program for a specific test
+ PORT = 1 # Port configuration to communicate with the remote host
+ ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking
+ ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension
+
+
+class XDPAction(Enum):
+ """Enum for XDP actions."""
+ PASS = 0 # Pass the packet up to the stack
+ DROP = 1 # Drop the packet
+ TX = 2 # Route the packet to the remote host
+ TAIL_ADJST = 3 # Adjust the tail of the packet
+ HEAD_ADJST = 4 # Adjust the head of the packet
+
+
+class XDPStats(Enum):
+ """Enum for XDP statistics."""
+ RX = 0 # Count of valid packets received for testing
+ PASS = 1 # Count of packets passed up to the stack
+ DROP = 2 # Count of packets dropped
+ TX = 3 # Count of incoming packets routed to the remote host
+ ABORT = 4 # Count of packets that were aborted
+
+
+@dataclass
+class BPFProgInfo:
+ """Data class to store information about a BPF program."""
+ name: str # Name of the BPF program
+ file: str # BPF program object file
+ xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags")
+ mtu: int = 1500 # Maximum Transmission Unit, default is 1500
+
+
+def _exchg_udp(cfg, port, test_string):
+ """
+ Exchanges UDP packets between a local and remote host using the socat tool.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ port: Port number to use for the UDP communication.
+ test_string: String that the remote host will send.
+
+ Returns:
+ The string received by the test host.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT"
+ tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}"
+
+ with bkg(rx_udp_cmd, exit_wait=True) as nc:
+ wait_port_listen(port, proto="udp")
+ cmd(tx_udp_cmd, host=cfg.remote, shell=True)
+
+ return nc.stdout.strip()
+
+
+def _test_udp(cfg, port, size=256):
+ """
+ Tests UDP packet exchange between a local and remote host.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ port: Port number to use for the UDP communication.
+ size: The length of the test string to be exchanged, default is 256 characters.
+
+ Returns:
+ bool: True if the received string matches the sent string, False otherwise.
+ """
+ test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size))
+ recvd_str = _exchg_udp(cfg, port, test_str)
+
+ return recvd_str == test_str
+
+
+def _load_xdp_prog(cfg, bpf_info):
+ """
+ Loads an XDP program onto a network interface.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+
+ Returns:
+ dict: A dictionary containing the XDP program ID, name, and associated map IDs.
+ """
+ abs_path = cfg.net_lib_dir / bpf_info.file
+ prog_info = {}
+
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
+
+ cmd(
+ f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}",
+ shell=True
+ )
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+
+ xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0]
+ prog_info["id"] = xdp_info["xdp"]["prog"]["id"]
+ prog_info["name"] = xdp_info["xdp"]["prog"]["name"]
+ prog_id = prog_info["id"]
+
+ map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"]
+ prog_info["maps"] = {}
+ for map_id in map_ids:
+ name = bpftool(f"map show id {map_id}", json=True)["name"]
+ prog_info["maps"][name] = map_id
+
+ return prog_info
+
+
+def format_hex_bytes(value):
+ """
+ Helper function that converts an integer into a formatted hexadecimal byte string.
+
+ Args:
+ value: An integer representing the number to be converted.
+
+ Returns:
+ A string representing hexadecimal equivalent of value, with bytes separated by spaces.
+ """
+ hex_str = value.to_bytes(4, byteorder='little', signed=True)
+ return ' '.join(f'{byte:02x}' for byte in hex_str)
+
+
+def _set_xdp_map(map_name, key, value):
+ """
+ Updates an XDP map with a given key-value pair using bpftool.
+
+ Args:
+ map_name: The name of the XDP map to update.
+ key: The key to update in the map, formatted as a hexadecimal string.
+ value: The value to associate with the key, formatted as a hexadecimal string.
+ """
+ key_formatted = format_hex_bytes(key)
+ value_formatted = format_hex_bytes(value)
+ bpftool(
+ f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}"
+ )
+
+
+def _get_stats(xdp_map_id):
+ """
+ Retrieves and formats statistics from an XDP map.
+
+ Args:
+ xdp_map_id: The ID of the XDP map from which to retrieve statistics.
+
+ Returns:
+ A dictionary containing formatted packet statistics for various XDP actions.
+ The keys are based on the XDPStats Enum values.
+
+ Raises:
+ KsftFailEx: If the stats retrieval fails.
+ """
+ stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True)
+ if not stats_dump:
+ raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}")
+
+ stats_formatted = {}
+ for key in range(0, 5):
+ val = stats_dump[key]["formatted"]["value"]
+ if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value:
+ stats_formatted[XDPStats.RX.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value:
+ stats_formatted[XDPStats.PASS.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value:
+ stats_formatted[XDPStats.DROP.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value:
+ stats_formatted[XDPStats.TX.value] = val
+ elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value:
+ stats_formatted[XDPStats.ABORT.value] = val
+
+ return stats_formatted
+
+
+def _test_pass(cfg, bpf_info, msg_sz):
+ """
+ Tests the XDP_PASS action by exchanging UDP packets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+ msg_sz: Size of the test message to send.
+ """
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed")
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero")
+ ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch")
+
+
+def test_xdp_native_pass_sb(cfg):
+ """
+ Tests the XDP_PASS action for single buffer case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ _test_pass(cfg, bpf_info, 256)
+
+
+def test_xdp_native_pass_mb(cfg):
+ """
+ Tests the XDP_PASS action for a multi-buff size.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ _test_pass(cfg, bpf_info, 8000)
+
+
+def _test_drop(cfg, bpf_info, msg_sz):
+ """
+ Tests the XDP_DROP action by exchanging UDP packets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ bpf_info: BPFProgInfo object containing information about the BPF program.
+ msg_sz: Size of the test message to send.
+ """
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail")
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero")
+ ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch")
+
+
+def test_xdp_native_drop_sb(cfg):
+ """
+ Tests the XDP_DROP action for a signle-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
+
+ _test_drop(cfg, bpf_info, 256)
+
+
+def test_xdp_native_drop_mb(cfg):
+ """
+ Tests the XDP_DROP action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ _test_drop(cfg, bpf_info, 8000)
+
+
+def test_xdp_native_tx_mb(cfg):
+ """
+ Tests the XDP_TX action for a multi-buff case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000))
+ rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT"
+ tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}"
+
+ with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc:
+ wait_port_listen(port, proto="udp", host=cfg.remote)
+ cmd(tx_udp, host=cfg.remote, shell=True)
+
+ stats = _get_stats(prog_info['maps']['map_xdp_stats'])
+
+ ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed")
+ ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch")
+
+
+def _validate_res(res, offset_lst, pkt_sz_lst):
+ """
+ Validates the result of a test.
+
+ Args:
+ res: The result of the test, which should be a dictionary with a "status" key.
+
+ Raises:
+ KsftFailEx: If the test fails to pass any combination of offset and packet size.
+ """
+ if "status" not in res:
+ raise KsftFailEx("Missing 'status' key in result dictionary")
+
+ # Validate that not a single case was successful
+ if res["status"] == "fail":
+ if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]:
+ raise KsftFailEx(f"{res['reason']}")
+
+ # Get the previous offset and packet size to report the successful run
+ tmp_idx = offset_lst.index(res["offset"])
+ prev_offset = offset_lst[tmp_idx - 1]
+ if tmp_idx == 0:
+ tmp_idx = pkt_sz_lst.index(res["pkt_sz"])
+ prev_pkt_sz = pkt_sz_lst[tmp_idx - 1]
+ else:
+ prev_pkt_sz = res["pkt_sz"]
+
+ # Use these values for error reporting
+ ksft_pr(
+ f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. "
+ f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. "
+ f"Reason: {res['reason']}"
+ )
+
+
+def _check_for_failures(recvd_str, stats):
+ """
+ Checks for common failures while adjusting headroom or tailroom.
+
+ Args:
+ recvd_str: The string received from the remote host after sending a test string.
+ stats: A dictionary containing formatted packet statistics for various XDP actions.
+
+ Returns:
+ str: A string describing the failure reason if a failure is detected, otherwise None.
+ """
+
+ # Any adjustment failure result in an abort hence, we track this counter
+ if stats[XDPStats.ABORT.value] != 0:
+ return "Adjustment failed"
+
+ # Since we are using aggregate stats for a single test across all offsets and packet sizes
+ # we can't use RX stats only to track data exchange failure without taking a previous
+ # snapshot. An easier way is to simply check for non-zero length of received string.
+ if len(recvd_str) == 0:
+ return "Data exchange failed"
+
+ # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run
+ if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]:
+ return "RX stats mismatch"
+
+ return None
+
+
+def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst):
+ """
+ Tests the XDP tail adjustment functionality.
+
+ This function loads the appropriate XDP program based on the provided
+ program name and configures the XDP map for tail adjustment. It then
+ validates the tail adjustment by sending and receiving UDP packets
+ with specified packet sizes and offsets.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ prog: Name of the XDP program to load.
+ pkt_sz_lst: List of packet sizes to test.
+ offset_lst: List of offsets to validate support for tail adjustment.
+
+ Returns:
+ dict: A dictionary with test status and failure details if applicable.
+ """
+ port = rand_port()
+ bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000)
+
+ prog_info = _load_xdp_prog(cfg, bpf_info)
+
+ # Configure the XDP map for tail adjustment
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ for offset in offset_lst:
+ tag = format(random.randint(65, 90), "02x")
+
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+ if offset > 0:
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+
+ for pkt_sz in pkt_sz_lst:
+ test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+ recvd_str = _exchg_udp(cfg, port, test_str)
+ stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
+
+ failure = _check_for_failures(recvd_str, stats)
+ if failure is not None:
+ return {
+ "status": "fail",
+ "reason": failure,
+ "offset": offset,
+ "pkt_sz": pkt_sz,
+ }
+
+ # Validate data content based on offset direction
+ expected_data = None
+ if offset > 0:
+ expected_data = test_str + (offset * chr(int(tag, 16)))
+ else:
+ expected_data = test_str[0:pkt_sz + offset]
+
+ if recvd_str != expected_data:
+ return {
+ "status": "fail",
+ "reason": "Data mismatch",
+ "offset": offset,
+ "pkt_sz": pkt_sz,
+ }
+
+ return {"status": "pass"}
+
+
+def test_xdp_native_adjst_tail_grow_data(cfg):
+ """
+ Tests the XDP tail adjustment by growing packet data.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+ offset_lst = [1, 16, 32, 64, 128, 256]
+ res = _test_xdp_native_tail_adjst(
+ cfg,
+ pkt_sz_lst,
+ offset_lst,
+ )
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_tail_shrnk_data(cfg):
+ """
+ Tests the XDP tail adjustment by shrinking packet data.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+ offset_lst = [-16, -32, -64, -128, -256]
+ res = _test_xdp_native_tail_adjst(
+ cfg,
+ pkt_sz_lst,
+ offset_lst,
+ )
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def get_hds_thresh(cfg):
+ """
+ Retrieves the header data split (HDS) threshold for a network interface.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ Returns:
+ The HDS threshold value. If the threshold is not supported or an error occurs,
+ a default value of 1500 is returned.
+ """
+ netnl = cfg.netnl
+ hds_thresh = 1500
+
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ if 'hds-thresh' not in rings:
+ ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}')
+ return hds_thresh
+ hds_thresh = rings['hds-thresh']
+ except NlError as e:
+ ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}")
+
+ return hds_thresh
+
+
+def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst):
+ """
+ Tests the XDP head adjustment action for a multi-buffer case.
+
+ Args:
+ cfg: Configuration object containing network settings.
+ netnl: Network namespace or link object (not used in this function).
+
+ This function sets up the packet size and offset lists, then performs
+ the head adjustment test by sending and receiving UDP packets.
+ """
+ cfg.require_cmd("socat", remote=True)
+
+ prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000))
+ port = rand_port()
+
+ _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value)
+ _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port)
+
+ hds_thresh = get_hds_thresh(cfg)
+ for offset in offset_lst:
+ for pkt_sz in pkt_sz_lst:
+ # The "head" buffer must contain at least the Ethernet header
+ # after we eat into it. We send large-enough packets, but if HDS
+ # is enabled head will only contain headers. Don't try to eat
+ # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14)
+ l2_cut_off = 28 if cfg.addr_ipver == 4 else 48
+ if pkt_sz > hds_thresh and offset > l2_cut_off:
+ ksft_pr(
+ f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and "
+ f"offset {offset} > {l2_cut_off}"
+ )
+ return {"status": "pass"}
+
+ test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz))
+ tag = format(random.randint(65, 90), '02x')
+
+ _set_xdp_map("map_xdp_setup",
+ TestConfig.ADJST_OFFSET.value,
+ offset)
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16))
+ _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset)
+
+ recvd_str = _exchg_udp(cfg, port, test_str)
+
+ # Check for failures around adjustment and data exchange
+ failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats']))
+ if failure is not None:
+ return {
+ "status": "fail",
+ "reason": failure,
+ "offset": offset,
+ "pkt_sz": pkt_sz
+ }
+
+ # Validate data content based on offset direction
+ expected_data = None
+ if offset < 0:
+ expected_data = chr(int(tag, 16)) * (0 - offset) + test_str
+ else:
+ expected_data = test_str[offset:]
+
+ if recvd_str != expected_data:
+ return {
+ "status": "fail",
+ "reason": "Data mismatch",
+ "offset": offset,
+ "pkt_sz": pkt_sz
+ }
+
+ return {"status": "pass"}
+
+
+def test_xdp_native_adjst_head_grow_data(cfg):
+ """
+ Tests the XDP headroom growth support.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ This function sets up the packet size and offset lists, then calls the
+ _test_xdp_native_head_adjst_mb function to perform the actual test. The
+ test is passed if the headroom is successfully extended for given packet
+ sizes and offsets.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+
+ # Negative values result in headroom shrinking, resulting in growing of payload
+ offset_lst = [-16, -32, -64, -128, -256]
+ res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def test_xdp_native_adjst_head_shrnk_data(cfg):
+ """
+ Tests the XDP headroom shrinking support.
+
+ Args:
+ cfg: Configuration object containing network settings.
+
+ This function sets up the packet size and offset lists, then calls the
+ _test_xdp_native_head_adjst_mb function to perform the actual test. The
+ test is passed if the headroom is successfully shrunk for given packet
+ sizes and offsets.
+ """
+ pkt_sz_lst = [512, 1024, 2048]
+
+ # Positive values result in headroom growing, resulting in shrinking of payload
+ offset_lst = [16, 32, 64, 128, 256]
+ res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst)
+
+ _validate_res(res, offset_lst, pkt_sz_lst)
+
+
+def main():
+ """
+ Main function to execute the XDP tests.
+
+ This function runs a series of tests to validate the XDP support for
+ both the single and multi-buffer. It uses the NetDrvEpEnv context
+ manager to manage the network driver environment and the ksft_run
+ function to execute the tests.
+ """
+ with NetDrvEpEnv(__file__) as cfg:
+ cfg.netnl = EthtoolFamily()
+ ksft_run(
+ [
+ test_xdp_native_pass_sb,
+ test_xdp_native_pass_mb,
+ test_xdp_native_drop_sb,
+ test_xdp_native_drop_mb,
+ test_xdp_native_tx_mb,
+ test_xdp_native_adjst_tail_grow_data,
+ test_xdp_native_adjst_tail_shrnk_data,
+ test_xdp_native_adjst_head_grow_data,
+ test_xdp_native_adjst_head_shrnk_data,
+ ],
+ args=(cfg,))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common
index b1f40857307d..38c51158adf8 100644
--- a/tools/testing/selftests/hid/config.common
+++ b/tools/testing/selftests/hid/config.common
@@ -135,6 +135,7 @@ CONFIG_NET_EMATCH=y
CONFIG_NETFILTER_NETLINK_LOG=y
CONFIG_NETFILTER_NETLINK_QUEUE=y
CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_MATCH_COMMENT=y
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index c6dd2a335cf4..47c293c2962f 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -34,6 +34,7 @@ reuseport_bpf_numa
reuseport_dualstack
rxtimestamp
sctp_hello
+scm_inq
scm_pidfd
scm_rights
sk_bind_sendto_listen
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 332f387615d7..b31a71f2b372 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -41,6 +41,7 @@ TEST_PROGS += netns-name.sh
TEST_PROGS += link_netns.py
TEST_PROGS += nl_netdev.py
TEST_PROGS += rtnetlink.py
+TEST_PROGS += rtnetlink_notification.sh
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -62,6 +63,7 @@ TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
TEST_PROGS += netns-sysctl.sh
+TEST_PROGS += netdev-l2addr.sh
TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -99,6 +101,7 @@ TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
TEST_PROGS += test_bridge_backup_port.sh
+TEST_PROGS += test_neigh.sh
TEST_PROGS += fdb_flush.sh fdb_notify.sh
TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
@@ -112,6 +115,8 @@ TEST_PROGS += skf_net_off.sh
TEST_GEN_FILES += skf_net_off
TEST_GEN_FILES += tfo
TEST_PROGS += tfo_passive.sh
+TEST_PROGS += broadcast_pmtu.sh
+TEST_PROGS += ipv6_force_forwarding.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 50584479540b..a4b61c6d0290 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
+TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
new file mode 100644
index 000000000000..9d22561e7b8f
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "../../kselftest_harness.h"
+
+#define NR_CHUNKS 100
+#define MSG_LEN 256
+
+struct scm_inq {
+ struct cmsghdr cmsghdr;
+ int inq;
+};
+
+FIXTURE(scm_inq)
+{
+ int fd[2];
+};
+
+FIXTURE_VARIANT(scm_inq)
+{
+ int type;
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, stream)
+{
+ .type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, dgram)
+{
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, seqpacket)
+{
+ .type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(scm_inq)
+{
+ int err;
+
+ err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(scm_inq)
+{
+ close(self->fd[0]);
+ close(self->fd[1]);
+}
+
+static void send_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ char buf[MSG_LEN] = {};
+ int i, ret;
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ ret = send(self->fd[0], buf, sizeof(buf), 0);
+ ASSERT_EQ(sizeof(buf), ret);
+ }
+}
+
+static void recv_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ struct msghdr msg = {};
+ struct iovec iov = {};
+ struct scm_inq cmsg;
+ char buf[MSG_LEN];
+ int i, ret;
+ int inq;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &cmsg;
+ msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq));
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ memset(buf, 0, sizeof(buf));
+ memset(&cmsg, 0, sizeof(cmsg));
+
+ ret = recvmsg(self->fd[1], &msg, 0);
+ ASSERT_EQ(MSG_LEN, ret);
+ ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg));
+ ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len);
+ ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level);
+ ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type);
+
+ ret = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, ret);
+ ASSERT_EQ(cmsg.inq, inq);
+ }
+}
+
+TEST_F(scm_inq, basic)
+{
+ int err, inq;
+
+ err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int));
+ if (variant->type != SOCK_STREAM) {
+ ASSERT_EQ(-ENOPROTOOPT, -errno);
+ return;
+ }
+
+ ASSERT_EQ(0, err);
+
+ err = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, err);
+ ASSERT_EQ(0, inq);
+
+ send_chunks(_metadata, self);
+ recv_chunks(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile
new file mode 100644
index 000000000000..2546c45e42f7
--- /dev/null
+++ b/tools/testing/selftests/net/bench/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_MODS_DIR := page_pool
+
+TEST_PROGS += test_bench_page_pool.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile
new file mode 100644
index 000000000000..0549a16ba275
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/Makefile
@@ -0,0 +1,17 @@
+BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+obj-m += bench_page_pool.o
+bench_page_pool-y += bench_page_pool_simple.o time_bench.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean
diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
new file mode 100644
index 000000000000..cb6468adbda4
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmark module for page_pool.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/interrupt.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <net/page_pool/helpers.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+#define MY_POOL_SIZE 1024
+
+/* Makes tests selectable. Useful for perf-record to analyze a single test.
+ * Hint: Bash shells support writing binary number like: $((2#101010)
+ *
+ * # modprobe bench_page_pool_simple run_flags=$((2#100))
+ */
+static unsigned long run_flags = 0xFFFFFFFF;
+module_param(run_flags, ulong, 0);
+MODULE_PARM_DESC(run_flags, "Limit which bench test that runs");
+
+/* Count the bit number from the enum */
+enum benchmark_bit {
+ bit_run_bench_baseline,
+ bit_run_bench_no_softirq01,
+ bit_run_bench_no_softirq02,
+ bit_run_bench_no_softirq03,
+};
+
+#define bit(b) (1 << (b))
+#define enabled(b) ((run_flags & (bit(b))))
+
+/* notice time_bench is limited to U32_MAX nr loops */
+static unsigned long loops = 10000000;
+module_param(loops, ulong, 0);
+MODULE_PARM_DESC(loops, "Specify loops bench will run");
+
+/* Timing at the nanosec level, we need to know the overhead
+ * introduced by the for loop itself
+ */
+static int time_bench_for_loop(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ int i;
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+static int time_bench_atomic_inc(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ atomic_t cnt;
+ int i;
+
+ atomic_set(&cnt, 0);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ atomic_inc(&cnt);
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ loops_cnt = atomic_read(&cnt);
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum
+ * overhead of taking+releasing a spinlock, to know the cycles that can be saved
+ * by e.g. amortizing this via bulking.
+ */
+static int time_bench_lock(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ spinlock_t lock;
+ int i;
+
+ spin_lock_init(&lock);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ spin_lock(&lock);
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ spin_unlock(&lock);
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* Helper for filling some page's into ptr_ring */
+static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
+{
+ /* GFP_ATOMIC needed when under run softirq */
+ gfp_t gfp_mask = GFP_ATOMIC;
+ struct page **array;
+ int i;
+
+ array = kcalloc(elems, sizeof(struct page *), gfp_mask);
+
+ for (i = 0; i < elems; i++)
+ array[i] = page_pool_alloc_pages(pp, gfp_mask);
+ for (i = 0; i < elems; i++)
+ page_pool_put_page(pp, array[i], -1, false);
+
+ kfree(array);
+}
+
+enum test_type { type_fast_path, type_ptr_ring, type_page_allocator };
+
+/* Depends on compile optimizing this function */
+static int time_bench_page_pool(struct time_bench_record *rec, void *data,
+ enum test_type type, const char *func)
+{
+ uint64_t loops_cnt = 0;
+ gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */
+ int i, err;
+
+ struct page_pool *pp;
+ struct page *page;
+
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = MY_POOL_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = NULL, /* Only use for DMA mapping */
+ .dma_dir = DMA_BIDIRECTIONAL,
+ };
+
+ pp = page_pool_create(&pp_params);
+ if (IS_ERR(pp)) {
+ err = PTR_ERR(pp);
+ pr_warn("%s: Error(%d) creating page_pool\n", func, err);
+ goto out;
+ }
+ pp_fill_ptr_ring(pp, 64);
+
+ if (in_serving_softirq())
+ pr_warn("%s(): in_serving_softirq fast-path\n", func);
+ else
+ pr_warn("%s(): Cannot use page_pool fast-path\n", func);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ /* Common fast-path alloc that depend on in_serving_softirq() */
+ page = page_pool_alloc_pages(pp, gfp_mask);
+ if (!page)
+ break;
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+
+ /* The benchmarks purpose it to test different return paths.
+ * Compiler should inline optimize other function calls out
+ */
+ if (type == type_fast_path) {
+ /* Fast-path recycling e.g. XDP_DROP use-case */
+ page_pool_recycle_direct(pp, page);
+
+ } else if (type == type_ptr_ring) {
+ /* Normal return path */
+ page_pool_put_page(pp, page, -1, false);
+
+ } else if (type == type_page_allocator) {
+ /* Test if not pages are recycled, but instead
+ * returned back into systems page allocator
+ */
+ get_page(page); /* cause no-recycling */
+ page_pool_put_page(pp, page, -1, false);
+ put_page(page);
+ } else {
+ BUILD_BUG();
+ }
+ }
+ time_bench_stop(rec, loops_cnt);
+out:
+ page_pool_destroy(pp);
+ return loops_cnt;
+}
+
+static int time_bench_page_pool01_fast_path(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_fast_path, __func__);
+}
+
+static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_ptr_ring, __func__);
+}
+
+static int time_bench_page_pool03_slow(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_page_allocator, __func__);
+}
+
+static int run_benchmark_tests(void)
+{
+ uint32_t nr_loops = loops;
+
+ /* Baseline tests */
+ if (enabled(bit_run_bench_baseline)) {
+ time_bench_loop(nr_loops * 10, 0, "for_loop", NULL,
+ time_bench_for_loop);
+ time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL,
+ time_bench_atomic_inc);
+ time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock);
+ }
+
+ /* This test cannot activate correct code path, due to no-softirq ctx */
+ if (enabled(bit_run_bench_no_softirq01))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL,
+ time_bench_page_pool01_fast_path);
+ if (enabled(bit_run_bench_no_softirq02))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL,
+ time_bench_page_pool02_ptr_ring);
+ if (enabled(bit_run_bench_no_softirq03))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL,
+ time_bench_page_pool03_slow);
+
+ return 0;
+}
+
+static int __init bench_page_pool_simple_module_init(void)
+{
+ if (verbose)
+ pr_info("Loaded\n");
+
+ if (loops > U32_MAX) {
+ pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops,
+ U32_MAX);
+ return -ECHRNG;
+ }
+
+ run_benchmark_tests();
+
+ return 0;
+}
+module_init(bench_page_pool_simple_module_init);
+
+static void __exit bench_page_pool_simple_module_exit(void)
+{
+ if (verbose)
+ pr_info("Unloaded\n");
+}
+module_exit(bench_page_pool_simple_module_exit);
+
+MODULE_DESCRIPTION("Benchmark of page_pool simple cases");
+MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c
new file mode 100644
index 000000000000..073bb36ec5f2
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/time.h>
+
+#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */
+
+/* For concurrency testing */
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+
+/** TSC (Time-Stamp Counter) based **
+ * See: linux/time_bench.h
+ * tsc_start_clock() and tsc_stop_clock()
+ */
+
+/** Wall-clock based **
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ */
+#define PERF_FORMAT \
+ (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \
+ PERF_FORMAT_TOTAL_TIME_RUNNING)
+
+struct raw_perf_event {
+ uint64_t config; /* event */
+ uint64_t config1; /* umask */
+ struct perf_event *save;
+ char *desc;
+};
+
+/* if HT is enable a maximum of 4 events (5 if one is instructions
+ * retired can be specified, if HT is disabled a maximum of 8 (9 if
+ * one is instructions retired) can be specified.
+ *
+ * From Table 19-1. Architectural Performance Events
+ * Architectures Software Developer’s Manual Volume 3: System Programming
+ * Guide
+ */
+struct raw_perf_event perf_events[] = {
+ { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" },
+ { 0xc0, 0x00, NULL, "Instruction Retired" }
+};
+
+#define NUM_EVTS (ARRAY_SIZE(perf_events))
+
+/* WARNING: PMU config is currently broken!
+ */
+bool time_bench_PMU_config(bool enable)
+{
+ int i;
+ struct perf_event_attr perf_conf;
+ struct perf_event *perf_event;
+ int cpu;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+ pr_info("DEBUG: cpu:%d\n", cpu);
+ preempt_enable();
+
+ memset(&perf_conf, 0, sizeof(struct perf_event_attr));
+ perf_conf.type = PERF_TYPE_RAW;
+ perf_conf.size = sizeof(struct perf_event_attr);
+ perf_conf.read_format = PERF_FORMAT;
+ perf_conf.pinned = 1;
+ perf_conf.exclude_user = 1; /* No userspace events */
+ perf_conf.exclude_kernel = 0; /* Only kernel events */
+
+ for (i = 0; i < NUM_EVTS; i++) {
+ perf_conf.disabled = enable;
+ //perf_conf.disabled = (i == 0) ? 1 : 0;
+ perf_conf.config = perf_events[i].config;
+ perf_conf.config1 = perf_events[i].config1;
+ if (verbose)
+ pr_info("%s() enable PMU counter: %s\n",
+ __func__, perf_events[i].desc);
+ perf_event = perf_event_create_kernel_counter(&perf_conf, cpu,
+ NULL /* task */,
+ NULL /* overflow_handler*/,
+ NULL /* context */);
+ if (perf_event) {
+ perf_events[i].save = perf_event;
+ pr_info("%s():DEBUG perf_event success\n", __func__);
+
+ perf_event_enable(perf_event);
+ } else {
+ pr_info("%s():DEBUG perf_event is NULL\n", __func__);
+ }
+ }
+
+ return true;
+}
+
+/** Generic functions **
+ */
+
+/* Calculate stats, store results in record */
+bool time_bench_calc_stats(struct time_bench_record *rec)
+{
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+ uint64_t ns_per_call_tmp_rem = 0;
+ uint32_t ns_per_call_remainder = 0;
+ uint64_t pmc_ipc_tmp_rem = 0;
+ uint32_t pmc_ipc_remainder = 0;
+ uint32_t pmc_ipc_div = 0;
+ uint32_t invoked_cnt_precision = 0;
+ uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ if (rec->invoked_cnt < 1000) {
+ pr_err("ERR: need more(>1000) loops(%llu) for timing\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ if (rec->invoked_cnt > ((1ULL << 32) - 1)) {
+ /* div_u64_rem() can only support div with 32bit*/
+ pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ invoked_cnt = (uint32_t)rec->invoked_cnt;
+ }
+
+ /* TSC (Time-Stamp Counter) records */
+ if (rec->flags & TIME_BENCH_TSC) {
+ rec->tsc_interval = rec->tsc_stop - rec->tsc_start;
+ if (rec->tsc_interval == 0) {
+ pr_err("ABORT: timing took ZERO TSC time\n");
+ return false;
+ }
+ /* Calculate stats */
+ if (rec->flags & TIME_BENCH_LOOP)
+ rec->tsc_cycles = rec->tsc_interval / invoked_cnt;
+ else
+ rec->tsc_cycles = rec->tsc_interval;
+ }
+
+ /* Wall-clock time calc */
+ if (rec->flags & TIME_BENCH_WALLCLOCK) {
+ rec->time_start = rec->ts_start.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_start.tv_sec);
+ rec->time_stop = rec->ts_stop.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_stop.tv_sec);
+ rec->time_interval = rec->time_stop - rec->time_start;
+ if (rec->time_interval == 0) {
+ pr_err("ABORT: timing took ZERO wallclock time\n");
+ return false;
+ }
+ /* Calculate stats */
+ /*** Division in kernel it tricky ***/
+ /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */
+ /* remainder only correct because NANOSEC_PER_SEC is 10^9 */
+ rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC,
+ &rec->time_sec_remainder);
+ //TODO: use existing struct timespec records instead of div?
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ /*** Division in kernel it tricky ***/
+ /* Orig: ns = ((double)time_interval / invoked_cnt); */
+ /* First get quotient */
+ rec->ns_per_call_quotient =
+ div_u64_rem(rec->time_interval, invoked_cnt,
+ &ns_per_call_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ ns_per_call_tmp_rem = ns_per_call_remainder;
+ invoked_cnt_precision = invoked_cnt / 1000;
+ if (invoked_cnt_precision > 0) {
+ rec->ns_per_call_decimal =
+ div_u64_rem(ns_per_call_tmp_rem,
+ invoked_cnt_precision,
+ &ns_per_call_remainder);
+ }
+ }
+ }
+
+ /* Performance Monitor Unit (PMU) counters */
+ if (rec->flags & TIME_BENCH_PMU) {
+ //FIXME: Overflow handling???
+ rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start;
+ rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start;
+
+ /* Calc Instruction Per Cycle (IPC) */
+ /* First get quotient */
+ rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk,
+ &pmc_ipc_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ pmc_ipc_tmp_rem = pmc_ipc_remainder;
+ pmc_ipc_div = rec->pmc_clk / 1000;
+ if (pmc_ipc_div > 0) {
+ rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem,
+ pmc_ipc_div,
+ &pmc_ipc_remainder);
+ }
+ }
+
+ return true;
+}
+
+/* Generic function for invoking a loop function and calculating
+ * execution time stats. The function being called/timed is assumed
+ * to perform a tight loop, and update the timing record struct.
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ struct time_bench_record rec;
+
+ /* Setup record */
+ memset(&rec, 0, sizeof(rec)); /* zero func might not update all */
+ rec.version_abi = 1;
+ rec.loops = loops;
+ rec.step = step;
+ rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK);
+
+ /*** Loop function being timed ***/
+ if (!func(&rec, data)) {
+ pr_err("ABORT: function being timed failed\n");
+ return false;
+ }
+
+ if (rec.invoked_cnt < loops)
+ pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n",
+ rec.invoked_cnt, loops);
+
+ /* Calculate stats */
+ time_bench_calc_stats(&rec);
+
+ pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ txt, rec.tsc_cycles, rec.ns_per_call_quotient,
+ rec.ns_per_call_decimal, rec.step, rec.time_sec,
+ rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt,
+ rec.tsc_interval);
+ if (rec.flags & TIME_BENCH_PMU)
+ pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n",
+ txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient,
+ rec.pmc_ipc_decimal);
+ return true;
+}
+
+/* Function getting invoked by kthread */
+static int invoke_test_on_cpu_func(void *private)
+{
+ struct time_bench_cpu *cpu = private;
+ struct time_bench_sync *sync = cpu->sync;
+ cpumask_t newmask = CPU_MASK_NONE;
+ void *data = cpu->data;
+
+ /* Restrict CPU */
+ cpumask_set_cpu(cpu->rec.cpu, &newmask);
+ set_cpus_allowed_ptr(current, &newmask);
+
+ /* Synchronize start of concurrency test */
+ atomic_inc(&sync->nr_tests_running);
+ wait_for_completion(&sync->start_event);
+
+ /* Start benchmark function */
+ if (!cpu->bench_func(&cpu->rec, data)) {
+ pr_err("ERROR: function being timed failed on CPU:%d(%d)\n",
+ cpu->rec.cpu, smp_processor_id());
+ } else {
+ if (verbose)
+ pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu,
+ smp_processor_id());
+ }
+ cpu->did_bench_run = true;
+
+ /* End test */
+ atomic_dec(&sync->nr_tests_running);
+ /* Wait for kthread_stop() telling us to stop */
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+ return 0;
+}
+
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask)
+{
+ uint64_t average = 0;
+ int cpu;
+ int step = 0;
+ struct sum {
+ uint64_t tsc_cycles;
+ int records;
+ } sum = { 0 };
+
+ /* Get stats */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+ struct time_bench_record *rec = &c->rec;
+
+ /* Calculate stats */
+ time_bench_calc_stats(rec);
+
+ pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient,
+ rec->ns_per_call_decimal, rec->step, rec->time_sec,
+ rec->time_sec_remainder, rec->time_interval,
+ rec->invoked_cnt, rec->tsc_interval);
+
+ /* Collect average */
+ sum.records++;
+ sum.tsc_cycles += rec->tsc_cycles;
+ step = rec->step;
+ }
+
+ if (sum.records) /* avoid div-by-zero */
+ average = sum.tsc_cycles / sum.records;
+ pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc,
+ average, sum.records, step);
+}
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync,
+ struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ int cpu, running = 0;
+
+ if (verbose) // DEBUG
+ pr_warn("%s() Started on CPU:%d\n", __func__,
+ smp_processor_id());
+
+ /* Reset sync conditions */
+ atomic_set(&sync->nr_tests_running, 0);
+ init_completion(&sync->start_event);
+
+ /* Spawn off jobs on all CPUs */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ running++;
+ c->sync = sync; /* Send sync variable along */
+ c->data = data; /* Send opaque along */
+
+ /* Init benchmark record */
+ memset(&c->rec, 0, sizeof(struct time_bench_record));
+ c->rec.version_abi = 1;
+ c->rec.loops = loops;
+ c->rec.step = step;
+ c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC |
+ TIME_BENCH_WALLCLOCK);
+ c->rec.cpu = cpu;
+ c->bench_func = func;
+ c->task = kthread_run(invoke_test_on_cpu_func, c,
+ "time_bench%d", cpu);
+ if (IS_ERR(c->task)) {
+ pr_err("%s(): Failed to start test func\n", __func__);
+ return; /* Argh, what about cleanup?! */
+ }
+ }
+
+ /* Wait until all processes are running */
+ while (atomic_read(&sync->nr_tests_running) < running) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+ /* Kick off all CPU concurrently on completion event */
+ complete_all(&sync->start_event);
+
+ /* Wait for CPUs to finish */
+ while (atomic_read(&sync->nr_tests_running)) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+
+ /* Stop the kthreads */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ kthread_stop(c->task);
+ }
+
+ if (verbose) // DEBUG - happens often, finish on another CPU
+ pr_warn("%s() Finished on CPU:%d\n", __func__,
+ smp_processor_id());
+}
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h
new file mode 100644
index 000000000000..e113fcf341dc
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ * for licensing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_TIME_BENCH_H
+#define _LINUX_TIME_BENCH_H
+
+/* Main structure used for recording a benchmark run */
+struct time_bench_record {
+ uint32_t version_abi;
+ uint32_t loops; /* Requested loop invocations */
+ uint32_t step; /* option for e.g. bulk invocations */
+
+ uint32_t flags; /* Measurements types enabled */
+#define TIME_BENCH_LOOP BIT(0)
+#define TIME_BENCH_TSC BIT(1)
+#define TIME_BENCH_WALLCLOCK BIT(2)
+#define TIME_BENCH_PMU BIT(3)
+
+ uint32_t cpu; /* Used when embedded in time_bench_cpu */
+
+ /* Records */
+ uint64_t invoked_cnt; /* Returned actual invocations */
+ uint64_t tsc_start;
+ uint64_t tsc_stop;
+ struct timespec64 ts_start;
+ struct timespec64 ts_stop;
+ /* PMU counters for instruction and cycles
+ * instructions counter including pipelined instructions
+ */
+ uint64_t pmc_inst_start;
+ uint64_t pmc_inst_stop;
+ /* CPU unhalted clock counter */
+ uint64_t pmc_clk_start;
+ uint64_t pmc_clk_stop;
+
+ /* Result records */
+ uint64_t tsc_interval;
+ uint64_t time_start, time_stop, time_interval; /* in nanosec */
+ uint64_t pmc_inst, pmc_clk;
+
+ /* Derived result records */
+ uint64_t tsc_cycles; // +decimal?
+ uint64_t ns_per_call_quotient, ns_per_call_decimal;
+ uint64_t time_sec;
+ uint32_t time_sec_remainder;
+ uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */
+};
+
+/* For synchronizing parallel CPUs to run concurrently */
+struct time_bench_sync {
+ atomic_t nr_tests_running;
+ struct completion start_event;
+};
+
+/* Keep track of CPUs executing our bench function.
+ *
+ * Embed a time_bench_record for storing info per cpu
+ */
+struct time_bench_cpu {
+ struct time_bench_record rec;
+ struct time_bench_sync *sync; /* back ptr */
+ struct task_struct *task;
+ /* "data" opaque could have been placed in time_bench_sync,
+ * but to avoid any false sharing, place it per CPU
+ */
+ void *data;
+ /* Support masking outsome CPUs, mark if it ran */
+ bool did_bench_run;
+ /* int cpu; // note CPU stored in time_bench_record */
+ int (*bench_func)(struct time_bench_record *record, void *data);
+};
+
+/*
+ * Below TSC assembler code is not compatible with other archs, and
+ * can also fail on guests if cpu-flags are not correct.
+ *
+ * The way TSC reading is used, many iterations, does not require as
+ * high accuracy as described below (in Intel Doc #324264).
+ *
+ * Considering changing to use get_cycles() (#include <asm/timex.h>).
+ */
+
+/** TSC (Time-Stamp Counter) based **
+ * Recommend reading, to understand details of reading TSC accurately:
+ * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel"
+ *
+ * Consider getting exclusive ownership of CPU by using:
+ * unsigned long flags;
+ * preempt_disable();
+ * raw_local_irq_save(flags);
+ * _your_code_
+ * raw_local_irq_restore(flags);
+ * preempt_enable();
+ *
+ * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx"
+ * RDTSC only change "%rax" and "%rdx" but
+ * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx)
+ */
+static __always_inline uint64_t tsc_start_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("CPUID\n\t"
+ "RDTSC\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ //FIXME: on 32bit use clobbered %eax + %edx
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+static __always_inline uint64_t tsc_stop_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("RDTSCP\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ "CPUID\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+/** Wall-clock based **
+ *
+ * use: getnstimeofday()
+ * getnstimeofday(&rec->ts_start);
+ * getnstimeofday(&rec->ts_stop);
+ *
+ * API changed see: Documentation/core-api/timekeeping.rst
+ * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday
+ *
+ * We should instead use: ktime_get_real_ts64() is a direct
+ * replacement, but consider using monotonic time (ktime_get_ts64())
+ * and/or a ktime_t based interface (ktime_get()/ktime_get_real()).
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ *
+ * Needed for calculating: Instructions Per Cycle (IPC)
+ * - The IPC number tell how efficient the CPU pipelining were
+ */
+//lookup: perf_event_create_kernel_counter()
+
+bool time_bench_PMU_config(bool enable);
+
+/* Raw reading via rdpmc() using fixed counters
+ *
+ * From: https://github.com/andikleen/simple-pmu
+ */
+enum {
+ FIXED_SELECT = (1U << 30), /* == 0x40000000 */
+ FIXED_INST_RETIRED_ANY = 0,
+ FIXED_CPU_CLK_UNHALTED_CORE = 1,
+ FIXED_CPU_CLK_UNHALTED_REF = 2,
+};
+
+static __always_inline unsigned int long long p_rdpmc(unsigned int in)
+{
+ unsigned int d, a;
+
+ asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory");
+ return ((unsigned long long)d << 32) | a;
+}
+
+/* These PMU counter needs to be enabled, but I don't have the
+ * configure code implemented. My current hack is running:
+ * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko
+ */
+/* Reading all pipelined instruction */
+static __always_inline unsigned long long pmc_inst(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY);
+}
+
+/* Reading CPU clock cycles */
+static __always_inline unsigned long long pmc_clk(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE);
+}
+
+/* Raw reading via MSR rdmsr() is likely wrong
+ * FIXME: How can I know which raw MSR registers are conf for what?
+ */
+#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */
+#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */
+#define MSR_IA32_PCM2 0x400000C3
+static inline uint64_t msr_inst(unsigned long long *msr_result)
+{
+ return rdmsrq_safe(MSR_IA32_PCM0, msr_result);
+}
+
+/** Generic functions **
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *rec, void *data));
+bool time_bench_calc_stats(struct time_bench_record *rec);
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data));
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask);
+
+//FIXME: use rec->flags to select measurement, should be MACRO
+static __always_inline void time_bench_start(struct time_bench_record *rec)
+{
+ //getnstimeofday(&rec->ts_start);
+ ktime_get_real_ts64(&rec->ts_start);
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_start = pmc_inst();
+ rec->pmc_clk_start = pmc_clk();
+ }
+ rec->tsc_start = tsc_start_clock();
+}
+
+static __always_inline void time_bench_stop(struct time_bench_record *rec,
+ uint64_t invoked_cnt)
+{
+ rec->tsc_stop = tsc_stop_clock();
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_stop = pmc_inst();
+ rec->pmc_clk_stop = pmc_clk();
+ }
+ //getnstimeofday(&rec->ts_stop);
+ ktime_get_real_ts64(&rec->ts_stop);
+ rec->invoked_cnt = invoked_cnt;
+}
+
+#endif /* _LINUX_TIME_BENCH_H */
diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
new file mode 100755
index 000000000000..7b8b18cfedce
--- /dev/null
+++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+set -e
+
+DRIVER="./page_pool/bench_page_pool.ko"
+result=""
+
+function run_test()
+{
+ rmmod "bench_page_pool.ko" || true
+ insmod $DRIVER > /dev/null 2>&1
+ result=$(dmesg | tail -10)
+ echo "$result"
+
+ echo
+ echo "Fast path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "ptr_ring results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "slow path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+}
+
+run_test
+
+exit 0
diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh
new file mode 100755
index 000000000000..726eb5d25839
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_pmtu.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Ensures broadcast route MTU is respected
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP4="192.168.0.1/24"
+CLIENT_BROADCAST_ADDRESS="192.168.0.255"
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP4="192.168.0.2/24"
+
+setup() {
+ ip netns add "${CLIENT_NS}"
+ ip netns add "${SERVER_NS}"
+
+ ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" link set link0 mtu 9000
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+ ip -net "${SERVER_NS}" link set link1 mtu 1500
+ ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1
+
+ read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)"
+ ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}"
+ ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500
+
+ ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0
+}
+
+cleanup() {
+ ip -net "${SERVER_NS}" link del link1
+ ip netns del "${CLIENT_NS}"
+ ip netns del "${SERVER_NS}"
+}
+
+trap cleanup EXIT
+
+setup &&
+ echo "Testing for broadcast route MTU" &&
+ ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1
+
+exit $?
+
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3cfef5153823..c24417d0047b 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -30,16 +30,25 @@ CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NF_CONNTRACK=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_RAW=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IPV6_GRE=m
CONFIG_IPV6_SEG6_LWTUNNEL=y
@@ -57,6 +66,8 @@ CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_NAT=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GACT=m
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 00bde7b6f39e..d7bb2e80e88c 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -102,6 +102,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
vxlan_bridge_1d_port_8472.sh \
vxlan_bridge_1d.sh \
vxlan_bridge_1q_ipv6.sh \
+ vxlan_bridge_1q_mc_ul.sh \
vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_bridge_1q_port_8472.sh \
vxlan_bridge_1q.sh \
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 508f3c700d71..890b3374dacd 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -37,6 +37,7 @@ declare -A NETIFS=(
: "${TEAMD:=teamd}"
: "${MCD:=smcrouted}"
: "${MC_CLI:=smcroutectl}"
+: "${MCD_TABLE_NAME:=selftests}"
# Constants for netdevice bring-up:
# Default time in seconds to wait for an interface to come up before giving up
@@ -141,6 +142,20 @@ check_tc_version()
fi
}
+check_tc_erspan_support()
+{
+ local dev=$1; shift
+
+ tc filter add dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing erspan support"
+ return $ksft_skip
+ fi
+ tc filter del dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+}
+
# Old versions of tc don't understand "mpls_uc"
check_tc_mpls_support()
{
@@ -525,9 +540,9 @@ setup_wait_dev_with_timeout()
return 1
}
-setup_wait()
+setup_wait_n()
{
- local num_netifs=${1:-$NUM_NETIFS}
+ local num_netifs=$1; shift
local i
for ((i = 1; i <= num_netifs; ++i)); do
@@ -538,6 +553,11 @@ setup_wait()
sleep $WAIT_TIME
}
+setup_wait()
+{
+ setup_wait_n "$NUM_NETIFS"
+}
+
wait_for_dev()
{
local dev=$1; shift
@@ -1757,6 +1777,51 @@ mc_send()
msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
}
+adf_mcd_start()
+{
+ local ifs=("$@")
+
+ local table_name="$MCD_TABLE_NAME"
+ local smcroutedir
+ local pid
+ local if
+ local i
+
+ check_command "$MCD" || return 1
+ check_command "$MC_CLI" || return 1
+
+ smcroutedir=$(mktemp -d)
+ defer rm -rf "$smcroutedir"
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ echo "phyint ${NETIFS[p$i]} enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ for if in "${ifs[@]}"; do
+ if ! ip_link_has_flag "$if" MULTICAST; then
+ ip link set dev "$if" multicast on
+ defer ip link set dev "$if" multicast off
+ fi
+
+ echo "phyint $if enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
+ -P "$smcroutedir/$table_name.pid"
+ busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
+ pid=$(cat "$smcroutedir/$table_name.pid")
+ defer kill_process "$pid"
+}
+
+mc_cli()
+{
+ local table_name="$MCD_TABLE_NAME"
+
+ "$MC_CLI" -I "$table_name" "$@"
+}
+
start_ip_monitor()
{
local mtype=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 5a58b1ec8aef..83e52abdbc2e 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -33,10 +33,6 @@ NUM_NETIFS=6
source lib.sh
source tc_common.sh
-require_command $MCD
-require_command $MC_CLI
-table_name=selftests
-
h1_create()
{
simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64
@@ -149,25 +145,6 @@ router_destroy()
ip link set dev $rp1 down
}
-start_mcd()
-{
- SMCROUTEDIR="$(mktemp -d)"
-
- for ((i = 1; i <= $NUM_NETIFS; ++i)); do
- echo "phyint ${NETIFS[p$i]} enable" >> \
- $SMCROUTEDIR/$table_name.conf
- done
-
- $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
- -P $SMCROUTEDIR/$table_name.pid
-}
-
-kill_mcd()
-{
- pkill $MCD
- rm -rf $SMCROUTEDIR
-}
-
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -179,7 +156,7 @@ setup_prepare()
rp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
- start_mcd
+ adf_mcd_start || exit "$EXIT_STATUS"
vrf_prepare
@@ -206,7 +183,7 @@ cleanup()
vrf_cleanup
- kill_mcd
+ defer_scopes_cleanup
}
create_mcast_sg()
@@ -214,9 +191,9 @@ create_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+ mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
delete_mcast_sg()
@@ -224,9 +201,9 @@ delete_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+ mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
mcast_v4()
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index b1daad19b01e..b58909a93112 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
match_ip_tos_test match_indev_test match_ip_ttl_test
match_mpls_label_test \
match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \
- match_mpls_lse_test"
+ match_mpls_lse_test match_erspan_opts_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -676,6 +676,56 @@ match_mpls_lse_test()
log_test "mpls lse match ($tcflags)"
}
+match_erspan_opts_test()
+{
+ RET=0
+
+ check_tc_erspan_support $h2 || return 0
+
+ # h1 erspan setup
+ tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \
+ tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II
+ tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \
+ tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \
+ # ERSPAN Type III
+ ip link set dev erspan1 master v$h1
+ ip link set dev erspan2 master v$h1
+ # h2 erspan setup
+ ip link add ep-ex type erspan ttl 64 external # To collect tunnel info
+ ip link set ep-ex up
+ ip link set dev ep-ex master v$h2
+ tc qdisc add dev ep-ex clsact
+
+ # ERSPAN Type II [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 101 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1001 erspan_opts 1:6789:0:0 \
+ action drop
+ # ERSPAN Type III [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 102 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1002 erspan_opts 2:0:1:63 action drop
+
+ ep1mac=$(mac_get erspan1)
+ $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 101 1
+ check_err $? "ERSPAN Type II"
+
+ ep2mac=$(mac_get erspan2)
+ $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 102 1
+ check_err $? "ERSPAN Type III"
+
+ # h2 erspan cleanup
+ tc qdisc del dev ep-ex clsact
+ tunnel_destroy ep-ex
+ # h1 erspan cleanup
+ tunnel_destroy erspan2 # ERSPAN Type III
+ tunnel_destroy erspan1 # ERSPAN Type II
+
+ log_test "erspan_opts match ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
new file mode 100755
index 000000000000..462db0b603e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -0,0 +1,771 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------------------------+
+# | + $h1.10 + $h1.20 |
+# | | 192.0.2.1/28 | 2001:db8:1::1/64 |
+# | \________ ________/ |
+# | \ / |
+# | + $h1 H1 (vrf) |
+# +-----------|-----------------------------+
+# |
+# +-----------|----------------------------------------------------------------+
+# | +---------|--------------------------------------+ SWITCH (main vrf) |
+# | | + $swp1 BR1 (802.1q) | |
+# | | vid 10 20 | |
+# | | | |
+# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) |
+# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 |
+# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 |
+# | | id 1000 id 2000 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | +------------------------------------------------+ |
+# | |
+# | + $swp2 $swp3 + |
+# | | 192.0.2.33/28 192.0.2.65/28 | |
+# | | 2001:db8:2::1/64 2001:db8:3::1/64 | |
+# | | | |
+# +---|--------------------------------------------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | | H2 (vrf) | | H3 (vrf) | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | |
+# | | | | | | | |
+# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | | | | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + |
+# | 192.0.2.34/28 | | 192.0.2.66/28 |
+# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 |
+# | | | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# | | BR1 (802.1q) | | | | BR1 (802.1q) | |
+# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | |
+# | | local 192.0.2.34 | | | | local 192.0.2.50 | |
+# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | |
+# | | | | | | | |
+# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | |
+# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | |
+# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | |
+# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 10 20 | | | | | vid 10 20 | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | | | | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# +------------------------------------+ +------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+
+: "${VXPORT:=4789}"
+export VXPORT
+
+: "${GROUP4:=233.252.0.1}"
+export GROUP4
+
+: "${GROUP6:=ff0e::1:2:3}"
+export GROUP6
+
+: "${IPMR:=lo10}"
+
+ALL_TESTS="
+ ipv4_nomcroute
+ ipv4_mcroute
+ ipv4_mcroute_changelink
+ ipv4_mcroute_starg
+ ipv4_mcroute_noroute
+ ipv4_mcroute_fdb
+ ipv4_mcroute_fdb_oif0
+ ipv4_mcroute_fdb_oif0_sep
+
+ ipv6_nomcroute
+ ipv6_mcroute
+ ipv6_mcroute_changelink
+ ipv6_mcroute_starg
+ ipv6_mcroute_noroute
+ ipv6_mcroute_fdb
+ ipv6_mcroute_fdb_oif0
+
+ ipv4_nomcroute_rx
+ ipv4_mcroute_rx
+ ipv4_mcroute_starg_rx
+ ipv4_mcroute_fdb_oif0_sep_rx
+ ipv4_mcroute_fdb_sep_rx
+
+ ipv6_nomcroute_rx
+ ipv6_mcroute_rx
+ ipv6_mcroute_starg_rx
+ ipv6_mcroute_fdb_sep_rx
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init "$h1"
+ defer simple_if_fini "$h1"
+
+ ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10
+ ip_link_set_up "$h1.10"
+ ip_addr_add "$h1.10" 192.0.2.1/28
+
+ ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20
+ ip_link_set_up "$h1.20"
+ ip_addr_add "$h1.20" 2001:db8:1::1/64
+}
+
+install_capture()
+{
+ local dev=$1; shift
+
+ tc qdisc add dev "$dev" clsact
+ defer tc qdisc del dev "$dev" clsact
+
+ tc filter add dev "$dev" ingress proto ip pref 104 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ip pref 104
+
+ tc filter add dev "$dev" ingress proto ipv6 pref 106 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ipv6 pref 106
+}
+
+h2_create()
+{
+ # $h2
+ ip_link_set_up "$h2"
+
+ # H2
+ vrf_create "v$h2"
+ defer vrf_destroy "v$h2"
+
+ ip_link_set_up "v$h2"
+
+ # br2
+ ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0
+ ip_link_set_master br2 "v$h2"
+ ip_link_set_up br2
+
+ # $h2
+ ip_link_set_master "$h2" br2
+ install_capture "$h2"
+
+ # v1$h2
+ ip_link_set_up "v1$h2"
+ ip_link_set_master "v1$h2" br2
+}
+
+h3_create()
+{
+ # $h3
+ ip_link_set_up "$h3"
+
+ # H3
+ vrf_create "v$h3"
+ defer vrf_destroy "v$h3"
+
+ ip_link_set_up "v$h3"
+
+ # br3
+ ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0
+ ip_link_set_master br3 "v$h3"
+ ip_link_set_up br3
+
+ # $h3
+ ip_link_set_master "$h3" br3
+ install_capture "$h3"
+
+ # v1$h3
+ ip_link_set_up "v1$h3"
+ ip_link_set_master "v1$h3" br3
+}
+
+switch_create()
+{
+ local swp1_mac
+
+ # br1
+ swp1_mac=$(mac_get "$swp1")
+ ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip_link_set_addr br1 "$swp1_mac"
+ ip_link_set_up br1
+
+ # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on
+ # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test.
+ ip_link_add "X$IPMR" up type dummy
+
+ # IPMR
+ ip_link_add "$IPMR" up type dummy
+ ip_addr_add "$IPMR" 192.0.2.100/28
+ ip_addr_add "$IPMR" 2001:db8:4::1/64
+
+ # $swp1
+ ip_link_set_up "$swp1"
+ ip_link_set_master "$swp1" br1
+ bridge_vlan_add vid 10 dev "$swp1"
+ bridge_vlan_add vid 20 dev "$swp1"
+
+ # $swp2
+ ip_link_set_up "$swp2"
+ ip_addr_add "$swp2" 192.0.2.33/28
+ ip_addr_add "$swp2" 2001:db8:2::1/64
+
+ # $swp3
+ ip_link_set_up "$swp3"
+ ip_addr_add "$swp3" 192.0.2.65/28
+ ip_addr_add "$swp3" 2001:db8:3::1/64
+}
+
+vx_create()
+{
+ local name=$1; shift
+ local vid=$1; shift
+
+ ip_link_add "$name" up type vxlan dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 16 \
+ "$@"
+ ip_link_set_master "$name" br1
+ bridge_vlan_add vid "$vid" dev "$name" pvid untagged
+}
+export -f vx_create
+
+vx_wait()
+{
+ # Wait for all the ARP, IGMP etc. noise to settle down so that the
+ # tunnel is clear for measurements.
+ sleep 10
+}
+
+vx10_create()
+{
+ vx_create vx10 10 id 1000 "$@"
+}
+export -f vx10_create
+
+vx20_create()
+{
+ vx_create vx20 20 id 2000 "$@"
+}
+export -f vx20_create
+
+vx10_create_wait()
+{
+ vx10_create "$@"
+ vx_wait
+}
+
+vx20_create_wait()
+{
+ vx20_create "$@"
+ vx_wait
+}
+
+ns_init_common()
+{
+ local ns=$1; shift
+ local if_in=$1; shift
+ local ipv4_in=$1; shift
+ local ipv6_in=$1; shift
+ local ipv4_host=$1; shift
+ local ipv6_host=$1; shift
+
+ # v2$h2 / v2$h3
+ ip_link_set_up "$if_in"
+ ip_addr_add "$if_in" "$ipv4_in"
+ ip_addr_add "$if_in" "$ipv6_in"
+
+ # br1
+ ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip_link_set_up br1
+
+ # vx10, vx20
+ vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in"
+ vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in"
+
+ # w1
+ ip_link_add w1 type veth peer name w2
+ ip_link_set_master w1 br1
+ ip_link_set_up w1
+ bridge_vlan_add vid 10 dev w1
+ bridge_vlan_add vid 20 dev w1
+
+ # w2
+ simple_if_init w2
+ defer simple_if_fini w2
+
+ # w2.10
+ ip_link_add w2.10 master vw2 link w2 type vlan id 10
+ ip_link_set_up w2.10
+ ip_addr_add w2.10 "$ipv4_host"
+
+ # w2.20
+ ip_link_add w2.20 master vw2 link w2 type vlan id 20
+ ip_link_set_up w2.20
+ ip_addr_add w2.20 "$ipv6_host"
+}
+export -f ns_init_common
+
+ns2_create()
+{
+ # NS2
+ ip netns add ns2
+ defer ip netns del ns2
+
+ # v2$h2
+ ip link set dev "v2$h2" netns ns2
+ defer ip -n ns2 link set dev "v2$h2" netns 1
+
+ in_ns ns2 \
+ ns_init_common ns2 "v2$h2" \
+ 192.0.2.34/28 2001:db8:2::2/64 \
+ 192.0.2.3/28 2001:db8:1::3/64
+}
+
+ns3_create()
+{
+ # NS3
+ ip netns add ns3
+ defer ip netns del ns3
+
+ # v2$h3
+ ip link set dev "v2$h3" netns ns3
+ defer ip -n ns3 link set dev "v2$h3" netns 1
+
+ ip -n ns3 link set dev "v2$h3" up
+
+ in_ns ns3 \
+ ns_init_common ns3 "v2$h3" \
+ 192.0.2.66/28 2001:db8:3::2/64 \
+ 192.0.2.4/28 2001:db8:1::4/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ defer vrf_cleanup
+
+ forwarding_enable
+ defer forwarding_restore
+
+ ip_link_add "v1$h2" type veth peer name "v2$h2"
+ ip_link_add "v1$h3" type veth peer name "v2$h3"
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+ ns2_create
+ ns3_create
+}
+
+adf_install_broken_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+
+ mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+}
+
+adf_install_rx()
+{
+ mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp2" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp2" :: "$GROUP6" lo10
+
+ mc_cli add "$swp3" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp3" :: "$GROUP6" lo10
+}
+
+adf_install_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_sg_sep()
+{
+ adf_mcd_start lo || exit "$EXIT_STATUS"
+
+ mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+}
+
+adf_install_sg_sep_rx()
+{
+ local lo=$1; shift
+
+ adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS"
+
+ mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_starg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+do_packets_v4()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q
+}
+
+do_packets_v6()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q
+}
+
+do_test()
+{
+ local ipv=$1; shift
+ local expect_h2=$1; shift
+ local expect_h3=$1; shift
+ local what=$1; shift
+
+ local pref=$((100 + ipv))
+ local t0_h2
+ local t0_h3
+ local t1_h2
+ local t1_h3
+ local d_h2
+ local d_h3
+
+ RET=0
+
+ t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ "do_packets_v$ipv"
+ sleep 1
+
+ t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ d_h2=$((t1_h2 - t0_h2))
+ d_h3=$((t1_h3 - t0_h3))
+
+ ((d_h2 == expect_h2))
+ check_err $? "Expected $expect_h2 packets on H2, got $d_h2"
+
+ ((d_h3 == expect_h3))
+ check_err $? "Expected $expect_h3 packets on H3, got $d_h3"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping_do "$h1.10" 192.0.2.3
+ check_err $? "H2 should respond"
+
+ ping_do "$h1.10" 192.0.2.4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv6_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping6_do "$h1.20" 2001:db8:1::3
+ check_err $? "H2 should respond"
+
+ ping6_do "$h1.20" 2001:db8:1::4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_nomcroute()
+{
+ # Install a misleading (S,G) rule to attempt to trick the system into
+ # pushing the packets elsewhere.
+ adf_install_broken_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ do_test 4 10 0 "IPv4 nomcroute"
+}
+
+ipv6_nomcroute()
+{
+ # Like for IPv4, install a misleading (S,G).
+ adf_install_broken_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ do_test 6 10 0 "IPv6 nomcroute"
+}
+
+ipv4_nomcroute_rx()
+{
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ ipv4_do_test_rx 1 "IPv4 nomcroute ping"
+}
+
+ipv6_nomcroute_rx()
+{
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ ipv6_do_test_rx 1 "IPv6 nomcroute ping"
+}
+
+ipv4_mcroute()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute"
+}
+
+ipv6_mcroute()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute"
+}
+
+ipv4_mcroute_rx()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute ping"
+}
+
+ipv6_mcroute_rx()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute ping"
+}
+
+ipv4_mcroute_changelink()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR"
+ ip link set dev vx10 type vxlan mcroute
+ sleep 1
+ do_test 4 10 10 "IPv4 mcroute changelink"
+}
+
+ipv6_mcroute_changelink()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ip link set dev vx20 type vxlan mcroute
+ sleep 1
+ do_test 6 10 10 "IPv6 mcroute changelink"
+}
+
+ipv4_mcroute_starg()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute (*,G)"
+}
+
+ipv6_mcroute_starg()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute (*,G)"
+}
+
+ipv4_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping"
+}
+
+ipv6_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping"
+}
+
+ipv4_mcroute_noroute()
+{
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 0 0 "IPv4 mcroute, no route"
+}
+
+ipv6_mcroute_noroute()
+{
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 0 0 "IPv6 mcroute, no route"
+}
+
+ipv4_mcroute_fdb()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute
+ bridge fdb add dev vx10 \
+ 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR"
+ do_test 4 10 10 "IPv4 mcroute FDB"
+}
+
+ipv6_mcroute_fdb()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute
+ bridge -6 fdb add dev vx20 \
+ 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR"
+ do_test 6 10 10 "IPv6 mcroute FDB"
+}
+
+# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup.
+ipv4_mcroute_fdb_oif0()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute oif=0"
+}
+
+ipv6_mcroute_fdb_oif0()
+{
+ # The IPv6 tunnel lookup does not fall back to selection by source
+ # address. Instead it just does a FIB match, and that would find one of
+ # the several ff00::/8 multicast routes -- each device has one. In order
+ # to reliably force the $IPMR device, add a /128 route for the
+ # destination group address.
+ ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR"
+ defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR"
+
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6"
+ do_test 6 10 10 "IPv6 mcroute oif=0"
+}
+
+# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR.
+# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT.
+ipv4_mcroute_fdb_oif0_sep()
+{
+ adf_install_sg_sep
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0"
+}
+
+ipv4_mcroute_fdb_oif0_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping"
+}
+
+ipv4_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add \
+ dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping"
+}
+
+ipv6_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx "X$IPMR"
+
+ ip_addr_add "X$IPMR" 2001:db8:5::1/64
+ vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 \
+ self static dst "$GROUP6" via "X$IPMR"
+ ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh
new file mode 100755
index 000000000000..bf0243366caa
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test IPv6 force_forwarding interface property
+#
+# This test verifies that the force_forwarding property works correctly:
+# - When global forwarding is disabled, packets are not forwarded normally
+# - When force_forwarding is enabled on an interface, packets are forwarded
+# regardless of the global forwarding setting
+
+source lib.sh
+
+cleanup() {
+ cleanup_ns $ns1 $ns2 $ns3
+}
+
+trap cleanup EXIT
+
+setup_test() {
+ # Create three namespaces: sender, router, receiver
+ setup_ns ns1 ns2 ns3
+
+ # Create veth pairs: ns1 <-> ns2 <-> ns3
+ ip link add name veth12 type veth peer name veth21
+ ip link add name veth23 type veth peer name veth32
+
+ # Move interfaces to namespaces
+ ip link set veth12 netns $ns1
+ ip link set veth21 netns $ns2
+ ip link set veth23 netns $ns2
+ ip link set veth32 netns $ns3
+
+ # Configure interfaces
+ ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad
+ ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad
+ ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad
+ ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad
+
+ # Bring up interfaces
+ ip -n $ns1 link set veth12 up
+ ip -n $ns2 link set veth21 up
+ ip -n $ns2 link set veth23 up
+ ip -n $ns3 link set veth32 up
+
+ # Add routes
+ ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2
+ ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1
+
+ # Disable global forwarding
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0
+}
+
+test_force_forwarding() {
+ local ret=0
+
+ echo "TEST: force_forwarding functionality"
+
+ # Check if force_forwarding sysctl exists
+ if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then
+ echo "SKIP: force_forwarding not available"
+ return $ksft_skip
+ fi
+
+ # Test 1: Without force_forwarding, ping should fail
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "FAIL: ping succeeded when forwarding disabled"
+ ret=1
+ else
+ echo "PASS: forwarding disabled correctly"
+ fi
+
+ # Test 2: With force_forwarding enabled, ping should succeed
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "PASS: force_forwarding enabled forwarding"
+ else
+ echo "FAIL: ping failed with force_forwarding enabled"
+ ret=1
+ fi
+
+ return $ret
+}
+
+echo "IPv6 force_forwarding test"
+echo "=========================="
+
+setup_test
+test_force_forwarding
+ret=$?
+
+if [ $ret -eq 0 ]; then
+ echo "OK"
+ exit 0
+elif [ $ret -eq $ksft_skip ]; then
+ echo "SKIP"
+ exit $ksft_skip
+else
+ echo "FAIL"
+ exit 1
+fi
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 86a216e9aca8..c7add0dc4c60 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -240,6 +240,29 @@ create_netdevsim() {
echo nsim$id
}
+create_netdevsim_port() {
+ local nsim_id="$1"
+ local ns="$2"
+ local port_id="$3"
+ local perm_addr="$4"
+ local orig_dev
+ local new_dev
+ local nsim_path
+
+ nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id"
+
+ echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1
+
+ orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1)
+ orig_dev=$(basename "$orig_dev")
+ new_dev="nsim${nsim_id}p$port_id"
+
+ ip -netns "$ns" link set dev "$orig_dev" name "$new_dev"
+ ip -netns "$ns" link set dev "$new_dev" up
+
+ echo "$new_dev"
+}
+
# Remove netdevsim with given id.
cleanup_netdevsim() {
local id="$1"
@@ -547,13 +570,19 @@ ip_link_set_addr()
defer ip link set dev "$name" address "$old_addr"
}
-ip_link_is_up()
+ip_link_has_flag()
{
local name=$1; shift
+ local flag=$1; shift
local state=$(ip -j link show "$name" |
- jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"')
- [[ $state == "UP" ]]
+ jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)')
+ [[ $state == true ]]
+}
+
+ip_link_is_up()
+{
+ ip_link_has_flag "$1" UP
}
ip_link_set_up()
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 8697bd27dc30..02be28dcc089 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter
from .nsim import *
from .utils import *
from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
-from .ynl import NetshaperFamily
+from .ynl import NetshaperFamily, DevlinkFamily
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 61287c203b6e..8e35ed12ed9e 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -32,6 +32,7 @@ class KsftTerminate(KeyboardInterrupt):
def ksft_pr(*objs, **kwargs):
+ kwargs["flush"] = True
print("#", *objs, **kwargs)
@@ -139,7 +140,7 @@ def ktap_result(ok, cnt=1, case="", comment=""):
res += "." + str(case.__name__)
if comment:
res += " # " + comment
- print(res)
+ print(res, flush=True)
def ksft_flush_defer():
@@ -227,8 +228,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
- print("TAP version 13")
- print("1.." + str(len(cases)))
+ print("TAP version 13", flush=True)
+ print("1.." + str(len(cases)), flush=True)
global KSFT_RESULT
cnt = 0
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 34470d65d871..f395c90fb0f1 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -175,6 +175,10 @@ def tool(name, args, json=None, ns=None, host=None):
return cmd_obj
+def bpftool(args, json=None, ns=None, host=None):
+ return tool('bpftool', args, json=json, ns=ns, host=host)
+
+
def ip(args, json=None, ns=None, host=None):
if ns:
args = f'-netns {ns} ' + args
@@ -185,6 +189,41 @@ def ethtool(args, json=None, ns=None, host=None):
return tool('ethtool', args, json=json, ns=ns, host=host)
+def bpftrace(expr, json=None, ns=None, host=None, timeout=None):
+ """
+ Run bpftrace and return map data (if json=True).
+ The output of bpftrace is inconvenient, so the helper converts
+ to a dict indexed by map name, e.g.:
+ {
+ "@": { ... },
+ "@map2": { ... },
+ }
+ """
+ cmd_arr = ['bpftrace']
+ # Throw in --quiet if json, otherwise the output has two objects
+ if json:
+ cmd_arr += ['-f', 'json', '-q']
+ if timeout:
+ expr += ' interval:s:' + str(timeout) + ' { exit(); }'
+ cmd_arr += ['-e', expr]
+ cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False)
+ if json:
+ # bpftrace prints objects as lines
+ ret = {}
+ for l in cmd_obj.stdout.split('\n'):
+ if not l.strip():
+ continue
+ one = _json.loads(l)
+ if one.get('type') != 'map':
+ continue
+ for k, v in one["data"].items():
+ if k.startswith('@'):
+ k = k.lstrip('@')
+ ret[k] = v
+ return ret
+ return cmd_obj
+
+
def rand_port(type=socket.SOCK_STREAM):
"""
Get a random unprivileged port.
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 6329ae805abf..2b3a61ea3bfa 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -56,3 +56,8 @@ class NetshaperFamily(YnlFamily):
def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(),
schema='', recv_size=recv_size)
+
+class DevlinkFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
new file mode 100644
index 000000000000..521ba38f2ddd
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_ADJST_OFFSET 256
+#define MAX_PAYLOAD_LEN 5000
+#define MAX_HDR_LEN 64
+
+enum {
+ XDP_MODE = 0,
+ XDP_PORT = 1,
+ XDP_ADJST_OFFSET = 2,
+ XDP_ADJST_TAG = 3,
+} xdp_map_setup_keys;
+
+enum {
+ XDP_MODE_PASS = 0,
+ XDP_MODE_DROP = 1,
+ XDP_MODE_TX = 2,
+ XDP_MODE_TAIL_ADJST = 3,
+ XDP_MODE_HEAD_ADJST = 4,
+} xdp_map_modes;
+
+enum {
+ STATS_RX = 0,
+ STATS_PASS = 1,
+ STATS_DROP = 2,
+ STATS_TX = 3,
+ STATS_ABORT = 4,
+} xdp_stats;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __s32);
+} map_xdp_setup SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __u64);
+} map_xdp_stats SEC(".maps");
+
+static __u32 min(__u32 a, __u32 b)
+{
+ return a < b ? a : b;
+}
+
+static void record_stats(struct xdp_md *ctx, __u32 stat_type)
+{
+ __u64 *count;
+
+ count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type);
+
+ if (count)
+ __sync_fetch_and_add(count, 1);
+}
+
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ } else {
+ return NULL;
+ }
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ if (udph->dest != bpf_htons(port))
+ return NULL;
+
+ record_stats(ctx, STATS_RX);
+
+ return udph;
+}
+
+static int xdp_mode_pass(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_PASS);
+
+ return XDP_PASS;
+}
+
+static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_DROP);
+
+ return XDP_DROP;
+}
+
+static void swap_machdr(void *data)
+{
+ struct ethhdr *eth = data;
+ __u8 tmp_mac[ETH_ALEN];
+
+ __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN);
+ __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN);
+}
+
+static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_PASS;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+ __be32 tmp_ip = iph->saddr;
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*iph) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ swap_machdr((void *)eth);
+
+ iph->saddr = iph->daddr;
+ iph->daddr = tmp_ip;
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ struct in6_addr tmp_ipv6;
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*ipv6h) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ swap_machdr((void *)eth);
+
+ __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr,
+ sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6));
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+ }
+
+ return XDP_PASS;
+}
+
+static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+ __u32 len, len_new;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+ __u16 total_len;
+
+ if (iph + 1 > (struct iphdr *)data_end)
+ return NULL;
+
+ iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset);
+
+ udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ __u16 payload_len;
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ *udp_csum = ~((__u32)udph->check);
+
+ len = ipv6h->payload_len;
+ len_new = bpf_htons(bpf_ntohs(len) + offset);
+ ipv6h->payload_len = len_new;
+
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+
+ len = udph->len;
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+ } else {
+ return NULL;
+ }
+
+ udph->len = len_new;
+
+ return udph;
+}
+
+static __u16 csum_fold_helper(__u32 csum)
+{
+ return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff;
+}
+
+static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
+ __u32 hdr_len)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ struct udphdr *udph = NULL;
+ __u32 buff_len;
+
+ udph = update_pkt(ctx, 0 - offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ /* Make sure we have enough data to avoid eating the header */
+ if (buff_len - offset < hdr_len)
+ return -1;
+
+ buff_pos = buff_len - offset;
+ if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ __u32 buff_len, hdr_len, key;
+ struct udphdr *udph;
+ __s32 *val;
+ __u8 tag;
+
+ /* Proceed to update the packet headers before attempting to adjuste
+ * the tail. Once the tail is adjusted we lose access to the offset
+ * amount of data at the end of the packet which is crucial to update
+ * the checksum.
+ * Since any failure beyond this would abort the packet, we should
+ * not worry about passing a packet up the stack with wrong headers
+ */
+ udph = update_pkt(ctx, offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&tmp_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ if (bpf_xdp_adjust_tail(ctx, offset) < 0) {
+ bpf_printk("Failed to adjust tail\n");
+ return -1;
+ }
+
+ if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port)
+{
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ __s32 *adjust_offset, *val;
+ __u32 key, hdr_len;
+ void *offset_ptr;
+ __u8 tag;
+ int ret;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph - data + sizeof(struct udphdr);
+ key = XDP_ADJST_OFFSET;
+ adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!adjust_offset)
+ return XDP_PASS;
+
+ if (*adjust_offset < 0)
+ ret = xdp_adjst_tail_shrnk_data(ctx,
+ (__u16)(0 - *adjust_offset),
+ hdr_len);
+ else
+ ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset));
+ if (ret)
+ goto abort_pkt;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort_pkt:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ struct udphdr *udph;
+ void *offset_ptr;
+ __u32 udp_csum = 0;
+
+ /* Update the length information in the IP and UDP headers before
+ * adjusting the headroom. This simplifies accessing the relevant
+ * fields in the IP and UDP headers for fragmented packets. Any
+ * failure beyond this point will result in the packet being aborted,
+ * so we don't need to worry about incorrect length information for
+ * passed packets.
+ */
+ udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, offset) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char hdr_buff[MAX_HDR_LEN];
+ char data_buff[MAX_ADJST_OFFSET];
+ void *offset_ptr;
+ __s32 *val;
+ __u32 key;
+ __u8 tag;
+ __u32 udp_csum = 0;
+ struct udphdr *udph;
+
+ udph = update_pkt(ctx, (__s16)(offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&data_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_head_adjst(struct xdp_md *ctx, __u16 port)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph_ptr = NULL;
+ __u32 key, size, hdr_len;
+ __s32 *val;
+ int res;
+
+ /* Filter packets based on UDP port */
+ udph_ptr = filter_udphdr(ctx, port);
+ if (!udph_ptr)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr);
+
+ key = XDP_ADJST_OFFSET;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return XDP_PASS;
+
+ switch (*val) {
+ case -16:
+ case 16:
+ size = 16;
+ break;
+ case -32:
+ case 32:
+ size = 32;
+ break;
+ case -64:
+ case 64:
+ size = 64;
+ break;
+ case -128:
+ case 128:
+ size = 128;
+ break;
+ case -256:
+ case 256:
+ size = 256;
+ break;
+ default:
+ bpf_printk("Invalid adjustment offset: %d\n", *val);
+ goto abort;
+ }
+
+ if (*val < 0)
+ res = xdp_adjst_head_grow_data(ctx, hdr_len, size);
+ else
+ res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size);
+
+ if (res)
+ goto abort;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_prog_common(struct xdp_md *ctx)
+{
+ __u32 key, *port;
+ __s32 *mode;
+
+ key = XDP_MODE;
+ mode = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!mode)
+ return XDP_PASS;
+
+ key = XDP_PORT;
+ port = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!port)
+ return XDP_PASS;
+
+ switch (*mode) {
+ case XDP_MODE_PASS:
+ return xdp_mode_pass(ctx, (__u16)(*port));
+ case XDP_MODE_DROP:
+ return xdp_mode_drop_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TX:
+ return xdp_mode_tx_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TAIL_ADJST:
+ return xdp_adjst_tail(ctx, (__u16)(*port));
+ case XDP_MODE_HEAD_ADJST:
+ return xdp_head_adjst(ctx, (__u16)(*port));
+ }
+
+ /* Default action is to simple pass */
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_prog(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+SEC("xdp.frags")
+int xdp_prog_frags(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 4f80014cae49..968d440c03fe 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -13,6 +13,7 @@ CONFIG_NETFILTER_NETLINK=m
CONFIG_NF_TABLES=m
CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
@@ -25,6 +26,7 @@ CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IP6_NF_FILTER=m
CONFIG_NET_ACT_CSUM=m
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 7ea5fb28c93d..1d5d3c4e7e87 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -77,6 +77,7 @@
static int cfg_cork;
static bool cfg_cork_mixed;
static int cfg_cpu = -1; /* default: pin to last cpu */
+static int cfg_expect_zerocopy = -1;
static int cfg_family = PF_UNSPEC;
static int cfg_ifindex = 1;
static int cfg_payload_len;
@@ -92,9 +93,9 @@ static socklen_t cfg_alen;
static struct sockaddr_storage cfg_dst_addr;
static struct sockaddr_storage cfg_src_addr;
+static int exitcode;
static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
-static int zerocopied = -1;
static uint32_t next_completion;
static uint32_t sends_since_notify;
@@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain)
next_completion = hi + 1;
zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
- if (zerocopied == -1)
- zerocopied = zerocopy;
- else if (zerocopied != zerocopy) {
- fprintf(stderr, "serr: inconsistent\n");
- zerocopied = zerocopy;
+ if (cfg_expect_zerocopy != -1 &&
+ cfg_expect_zerocopy != zerocopy) {
+ fprintf(stderr, "serr: ee_code: %u != expected %u\n",
+ zerocopy, cfg_expect_zerocopy);
+ exitcode = 1;
+ /* suppress repeated messages */
+ cfg_expect_zerocopy = zerocopy;
}
if (cfg_verbose >= 2)
@@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol)
fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
packets, bytes >> 20, completions,
- zerocopied == 1 ? 'y' : 'n');
+ cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n');
}
static int do_setup_rx(int domain, int type, int protocol)
@@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv)
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
+ while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_zerocopy = true;
break;
+ case 'Z':
+ cfg_expect_zerocopy = !!atoi(optarg);
+ break;
}
}
@@ -817,5 +823,5 @@ int main(int argc, char **argv)
else
error(1, 0, "unknown cfg_test %s", cfg_test);
- return 0;
+ return exitcode;
}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 89c22f5320e0..28178a38a4e7 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -6,6 +6,7 @@
set -e
readonly DEV="veth0"
+readonly DUMMY_DEV="dummy0"
readonly DEV_MTU=65535
readonly BIN="./msg_zerocopy"
@@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}"
readonly NS1="${NSPREFIX}1"
readonly NS2="${NSPREFIX}2"
-readonly SADDR4='192.168.1.1'
-readonly DADDR4='192.168.1.2'
-readonly SADDR6='fd::1'
-readonly DADDR6='fd::2'
+readonly LPREFIX4='192.168.1'
+readonly RPREFIX4='192.168.2'
+readonly LPREFIX6='fd'
+readonly RPREFIX6='fc'
+
readonly path_sysctl_mem="net.core.optmem_max"
# No arguments: automated test
if [[ "$#" -eq "0" ]]; then
- $0 4 tcp -t 1
- $0 6 tcp -t 1
- $0 4 udp -t 1
- $0 6 udp -t 1
- echo "OK. All tests passed"
- exit 0
+ ret=0
+
+ $0 4 tcp -t 1 || ret=1
+ $0 6 tcp -t 1 || ret=1
+ $0 4 udp -t 1 || ret=1
+ $0 6 udp -t 1 || ret=1
+
+ [[ "$ret" == "0" ]] && echo "OK. All tests passed"
+ exit $ret
fi
# Argument parsing
@@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@"
# Argument parsing: configure addresses
if [[ "${IP}" == "4" ]]; then
- readonly SADDR="${SADDR4}"
- readonly DADDR="${DADDR4}"
+ readonly SADDR="${LPREFIX4}.1"
+ readonly DADDR="${LPREFIX4}.2"
+ readonly DUMMY_ADDR="${RPREFIX4}.1"
+ readonly DADDR_TXONLY="${RPREFIX4}.2"
+ readonly MASK="24"
elif [[ "${IP}" == "6" ]]; then
- readonly SADDR="${SADDR6}"
- readonly DADDR="${DADDR6}"
+ readonly SADDR="${LPREFIX6}::1"
+ readonly DADDR="${LPREFIX6}::2"
+ readonly DUMMY_ADDR="${RPREFIX6}::1"
+ readonly DADDR_TXONLY="${RPREFIX6}::2"
+ readonly MASK="64"
+ readonly NODAD="nodad"
else
echo "Invalid IP version ${IP}"
exit 1
@@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy
+
# Bring the devices up
ip -netns "${NS1}" link set "${DEV}" up
ip -netns "${NS2}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DUMMY_DEV}" up
# Set fixed MAC addresses on the devices
ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
# Add fixed IP addresses to the devices
-ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
-ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
-ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
-ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD}
+
+ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}"
+ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}"
+
+ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1
+ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1
# Optionally disable sg or csum offload to test edge cases
# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+ret=0
+
do_test() {
local readonly ARGS="$1"
- echo "ipv${IP} ${TXMODE} ${ARGS}"
- ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+ # tx-rx test
+ # packets queued to a local socket are copied,
+ # sender notification has SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n"
+ ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
sleep 0.2
- ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1
wait
+
+ # next test is unconnected tx to dummy0, cannot exercise with tcp
+ [[ "${TXMODE}" == "tcp" ]] && return
+
+ # tx-only test: send out dummy0
+ # packets leaving the host are not copied,
+ # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1
}
do_test "${EXTRA_ARGS}"
do_test "-z ${EXTRA_ARGS}"
-echo ok
+
+[[ "$ret" == "0" ]] && echo "OK"
diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh
new file mode 100755
index 000000000000..18509da293e5
--- /dev/null
+++ b/tools/testing/selftests/net/netdev-l2addr.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+set -o pipefail
+
+NSIM_ADDR=2025
+TEST_ADDR="d0:be:d0:be:d0:00"
+
+RET_CODE=0
+
+cleanup() {
+ cleanup_netdevsim "$NSIM_ADDR"
+ cleanup_ns "$NS"
+}
+
+trap cleanup EXIT
+
+fail() {
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ RET_CODE=1
+}
+
+get_addr()
+{
+ local type="$1"
+ local dev="$2"
+ local ns="$3"
+
+ ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type"
+}
+
+setup_ns NS
+
+nsim=$(create_netdevsim $NSIM_ADDR "$NS")
+
+get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr"
+get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it"
+
+ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR"
+ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR"
+
+[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr"
+[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr"
+
+if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then
+ fail "Created netdevsim with broadcast permaddr"
+fi
+
+nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR")
+
+get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr"
+[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr"
+
+cleanup_netdevsim "$NSIM_ADDR" "$NS"
+
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 363646f4fefe..79d5b33966ba 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -1,6 +1,8 @@
CONFIG_AUDIT=y
CONFIG_BPF_SYSCALL=y
CONFIG_BRIDGE=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
CONFIG_BRIDGE_EBT_BROUTE=m
CONFIG_BRIDGE_EBT_IP=m
CONFIG_BRIDGE_EBT_REDIRECT=m
@@ -14,7 +16,10 @@ CONFIG_INET_ESP=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP_NF_RAW=m
@@ -92,4 +97,4 @@ CONFIG_XFRM_STATISTICS=y
CONFIG_NET_PKTGEN=m
CONFIG_TUN=m
CONFIG_INET_DIAG=m
-CONFIG_SCTP_DIAG=m
+CONFIG_INET_SCTP_DIAG=m
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
index 6af2ea3ad6b8..9c9d5b38ab71 100755
--- a/tools/testing/selftests/net/netfilter/ipvs.sh
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -151,7 +151,7 @@ test_nat() {
test_tun() {
ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
- ip netns exec "${ns1}" modprobe -q ipip
+ modprobe -q ipip
ip netns exec "${ns1}" ip link set tunl0 up
ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
@@ -160,10 +160,10 @@ test_tun() {
ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
- ip netns exec "${ns2}" modprobe -q ipip
ip netns exec "${ns2}" ip link set tunl0 up
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
index 5ff7be9daeee..c0fffaa6dbd9 100755
--- a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -10,6 +10,8 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
checktool "iperf3 --version" "run test without iperf3 tool"
+read kernel_tainted < /proc/sys/kernel/tainted
+
# how many seconds to torture the kernel?
# default to 80% of max run time but don't exceed 48s
TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
@@ -135,7 +137,8 @@ else
wait
fi
-[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || {
+
+[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && {
echo "FAIL: Kernel is tainted!"
exit $ksft_fail
}
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index cd8a58097448..1f5227f3d64d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len)
name[0] = '\0';
rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
if (rc < 0)
- log_err_errno("setsockopt(SO_BINDTODEVICE)");
+ log_err_errno("getsockopt(SO_BINDTODEVICE)");
return rc;
}
@@ -535,7 +535,7 @@ static int set_freebind(int sd, int version)
break;
case AF_INET6:
if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
- log_err_errno("setsockopt(IPV6_FREEBIND");
+ log_err_errno("setsockopt(IPV6_FREEBIND)");
rc = -1;
}
break;
@@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str,
sep++;
if (str_to_uint(sep, 1, pfx_len_max,
&args->prefix_len) != 0) {
- fprintf(stderr, "Invalid port\n");
+ fprintf(stderr, "Invalid prefix length\n");
return 1;
}
} else {
@@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen,
}
}
- nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
+ nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
while (1) {
FD_ZERO(&rfds);
FD_SET(sd, &rfds);
@@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args)
sd = socket(args->version, args->type, args->protocol);
if (sd < 0) {
log_err_errno("Error opening socket");
- return -1;
+ return -1;
}
if (set_reuseaddr(sd) != 0)
@@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
* waiting to be told when to continue
*/
if (read(fd, &buf, sizeof(buf)) <= 0) {
- log_err_errno("Failed to read IPC status from status");
+ log_err_errno("Failed to read IPC status from pipe");
return 1;
}
if (!buf) {
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index beaee5e4e2aa..5c66421ab8aa 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -2,8 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
import time
+from os import system
from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait
from lib.py import NetdevFamily, NetdevSimDev, ip
@@ -34,6 +35,128 @@ def napi_list_check(nf) -> None:
ksft_eq(len(napis), 100,
comment=f"queue count after reset queue {q} mode {i}")
+def napi_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at napi and device level.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "enabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ ip(f"link set dev {nsim.ifname} down")
+ ip(f"link set dev {nsim.ifname} up")
+
+ # verify if napi threaded is still set
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is still not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # unset napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "disabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+
+ # set threaded at device level
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # set napi threaded for napi0
+ nf.napi_set({'id': napi0_id, 'threaded': 1})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+def dev_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at device level using sysfs.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set threaded
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
def nsim_rxq_reset_down(nf) -> None:
"""
@@ -122,7 +245,7 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
- nsim_rxq_reset_down],
+ dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index ef8b25a606d8..c5b01e1bd4c7 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -39,11 +39,15 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
# xfail tests that are known flaky with dbg config, not fixable.
# still run them for coverage (and expect 100% pass without dbg).
declare -ar xfail_list=(
+ "tcp_blocking_blocking-connect.pkt"
+ "tcp_blocking_blocking-read.pkt"
"tcp_eor_no-coalesce-retrans.pkt"
"tcp_fast_recovery_prr-ss.*.pkt"
+ "tcp_sack_sack-route-refresh-ip-tos.pkt"
"tcp_slow_start_slow-start-after-win-update.pkt"
"tcp_timestamping.*.pkt"
"tcp_user_timeout_user-timeout-probe.pkt"
+ "tcp_zerocopy_cl.*.pkt"
"tcp_zerocopy_epoll_.*.pkt"
"tcp_tcp_info_tcp-info-.*-limited.pkt"
)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
index 914eabab367a..657e42ca65b5 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test for blocking read.
+
--tolerance_usecs=10000
+--mss=1000
`./defaults.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
new file mode 100644
index 000000000000..c790d0af635e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test various DSACK (RFC 2883) behaviors.
+
+--mss=1000
+
+`./defaults.sh`
+
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+// First SACK range.
+ +0 < P. 1001:2001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001>
+
+// Check SACK coalescing (contiguous sequence).
+ +0 < P. 2001:3001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001>
+
+// Check we have two SACK ranges for non contiguous sequences.
+ +0 < P. 4001:5001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001>
+
+// Three ranges.
+ +0 < P. 7001:8001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001>
+
+// DSACK (1001:3001) + SACK (6001:7001)
+ +0 < P. 1:6001(6000) ack 1 win 1024
+ +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001>
+
+// DSACK (7001:8001)
+ +0 < P. 6001:8001(2000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001>
+
+// DSACK for an older segment.
+ +0 < P. 1:1001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
index df49c67645ac..e13f0eee9795 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test TCP_INQ and TCP_CM_INQ on the client side.
+
+--mss=1000
+
`./defaults.sh
`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
index 04a5e2590c62..14dd5f813d50 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test TCP_INQ and TCP_CM_INQ on the server side.
+
+--mss=1000
+
`./defaults.sh
`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
new file mode 100644
index 000000000000..7e6bc5fb0c8d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 2001:11001(9000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001>
+
+// check that ooo packet properly updates tcpi_rcv_mss
+ +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }%
+
+ +0 < . 11001:21001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001>
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
new file mode 100644
index 000000000000..3848b419e68c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:4001(4000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +1 < P. 5001:55001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 70001:80001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+ +0 read(4, ..., 100000) = 4000
+
+// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 54001 win 0
+
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times.
++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
new file mode 100644
index 000000000000..f575c0ff89da
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:20001(20000) ack 1 win 257
+ +.04 > . 1:1(0) ack 20001 win 18000
+
+ +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 20001 win 18000
+
+ +0 read(4, ..., 20000) = 20000
+// A too big packet is accepted if the receive queue is empty
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 80001 win 0
+
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 2e8243a65b50..d6c00efeb664 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -21,6 +21,7 @@ ALL_TESTS="
kci_test_vrf
kci_test_encap
kci_test_macsec
+ kci_test_macsec_vlan
kci_test_ipsec
kci_test_ipsec_offload
kci_test_fdb_get
@@ -30,6 +31,7 @@ ALL_TESTS="
kci_test_address_proto
kci_test_enslave_bonding
kci_test_mngtmpaddr
+ kci_test_operstate
"
devdummy="test-dummy0"
@@ -291,6 +293,17 @@ kci_test_route_get()
end_test "PASS: route get"
}
+check_addr_not_exist()
+{
+ dev=$1
+ addr=$2
+ if ip addr show dev $dev | grep -q $addr; then
+ return 1
+ else
+ return 0
+ fi
+}
+
kci_test_addrlft()
{
for i in $(seq 10 100) ;do
@@ -298,9 +311,8 @@ kci_test_addrlft()
run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
done
- sleep 5
- run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy"
- if [ $? -eq 0 ]; then
+ slowwait 5 check_addr_not_exist "$devdummy" "10.23.11."
+ if [ $? -eq 1 ]; then
check_err 1
end_test "FAIL: preferred_lft addresses remaining"
return
@@ -561,6 +573,41 @@ kci_test_macsec()
end_test "PASS: macsec"
}
+# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock.
+# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance
+# lock.
+# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/
+kci_test_macsec_vlan()
+{
+ msname="test_macsec1"
+ vlanname="test_vlan1"
+ local ret=0
+ run_cmd_grep "^Usage: ip macsec" ip macsec help
+ if [ $? -ne 0 ]; then
+ end_test "SKIP: macsec: iproute2 too old"
+ return $ksft_skip
+ fi
+ run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: can't add macsec interface, skipping test"
+ return 1
+ fi
+
+ run_cmd ip link set dev "$msname" up
+ ip link add link "$msname" name "$vlanname" type vlan id 1
+ ip link set dev "$vlanname" address 00:11:22:33:44:88
+ ip link set dev "$vlanname" up
+ run_cmd ip link del dev "$vlanname"
+ run_cmd ip link del dev "$msname"
+
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: macsec_vlan"
+ return 1
+ fi
+
+ end_test "PASS: macsec_vlan"
+}
+
#-------------------------------------------------------------------
# Example commands
# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
@@ -673,6 +720,11 @@ kci_test_ipsec_offload()
sysfsf=$sysfsd/ipsec
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
probed=false
+ esp4_offload_probed_default=false
+
+ if lsmod | grep -q esp4_offload; then
+ esp4_offload_probed_default=true
+ fi
if ! mount | grep -q debugfs; then
mount -t debugfs none /sys/kernel/debug/ &> /dev/null
@@ -766,6 +818,7 @@ EOF
fi
# clean up any leftovers
+ ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload
echo 0 > /sys/bus/netdevsim/del_device
$probed && rmmod netdevsim
@@ -1334,6 +1387,39 @@ kci_test_mngtmpaddr()
return $ret
}
+kci_test_operstate()
+{
+ local ret=0
+
+ # Check that it is possible to set operational state during device
+ # creation and that it is preserved when the administrative state of
+ # the device is toggled.
+ run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UP" ip link show dev vx0
+ run_cmd ip link set dev vx0 down
+ run_cmd_grep "state DOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ # Check that it is possible to set the operational state of the device
+ # after creation.
+ run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UNKNOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 state up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ if [ "$ret" -ne 0 ]; then
+ end_test "FAIL: operstate"
+ return 1
+ fi
+
+ end_test "PASS: operstate"
+}
+
kci_test_rtnl()
{
local current_test
diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh
new file mode 100755
index 000000000000..3f9780232bd6
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink_notification.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking rtnetlink notification callpaths, and get as much
+# coverage as possible.
+#
+# set -e
+
+ALL_TESTS="
+ kci_test_mcast_addr_notification
+ kci_test_anycast_addr_notification
+"
+
+source lib.sh
+test_dev="test-dummy1"
+
+kci_test_mcast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor maddr > $tmpfile &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "mcast addr notification: iproute2 too old"
+ return $RET
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 4 line matches as follows.
+ # 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ # 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile")
+ if [ "$match_result" -ne 4 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "mcast addr notification: Expected 4 matches, got $match_result"
+ return $RET
+}
+
+kci_test_anycast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor acaddress > "$tmpfile" &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "anycast addr notification: iproute2 too old"
+ return "$RET"
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ sysctl -qw net.ipv6.conf."$test_dev".forwarding=1
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 2 line matches as follows.
+ # 9: dummy2 inet6 any fe80:: scope global
+ # Deleted 9: dummy2 inet6 any fe80:: scope global
+ match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile")
+ if [ "$match_result" -ne 2 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "anycast addr notification: Expected 2 matches, got $match_result"
+ return "$RET"
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ RET=$ksft_skip
+ log_test "need root privileges"
+ exit $RET
+fi
+
+require_command ip
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
index ba730655a7bf..4bc135e5c22c 100755
--- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -594,7 +594,7 @@ setup_rt_local_sids()
dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index 4b86040c58c6..34b781a2ae74 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -72,6 +72,9 @@
# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
# the selftest network.
#
+# In addition, every router interface connecting rt-x to rt-y is assigned an
+# IPv6 link-local address fe80::x:y/64.
+#
# Local SID/C-SID table
# =====================
#
@@ -521,6 +524,9 @@ setup_rt_networking()
ip -netns "${nsname}" addr \
add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+ ip -netns "${nsname}" addr \
+ add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad
+
ip -netns "${nsname}" link set "${devname}" up
done
@@ -609,6 +615,27 @@ set_end_x_nextcsid()
nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
}
+set_end_x_ll_nextcsid()
+{
+ local rt="$1"
+ local adj="$2"
+
+ eval nsname=\${$(get_rtname "${rt}")}
+ lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+ nh6_ll_addr="fe80::${adj}:${rt}"
+ oifname="veth-rt-${rt}-${adj}"
+
+ # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop
+ # address (note that "dev" is the dummy dum0 device chosen for the sake
+ # of simplicity).
+ ip -netns "${nsname}" -6 route \
+ replace "${lcnode_func_prefix}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.X nh6 "${nh6_ll_addr}" \
+ oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \
+ nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
+}
+
set_underlay_sids_reachability()
{
local rt="$1"
@@ -654,7 +681,7 @@ setup_rt_local_sids()
set_underlay_sids_reachability "${rt}" "${rt_neighs}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -1016,6 +1043,27 @@ host_vpn_tests()
check_and_log_hs_ipv4_connectivity 1 2
check_and_log_hs_ipv4_connectivity 2 1
+
+ # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local
+ # addresses.
+ # - rt-3 SRv6 End.X adjacency with rt-4
+ # - rt-4 SRv6 End.X adjacency with rt-1
+ set_end_x_ll_nextcsid 3 4
+ set_end_x_ll_nextcsid 4 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local"
+
+ check_and_log_hs_ipv6_connectivity 1 2
+ check_and_log_hs_ipv6_connectivity 2 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local"
+
+ check_and_log_hs_ipv4_connectivity 1 2
+ check_and_log_hs_ipv4_connectivity 2 1
+
+ # Restore the previous adjacencies.
+ set_end_x_nextcsid 3 4
+ set_end_x_nextcsid 4 1
}
__nextcsid_end_x_behavior_test()
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
index 3efce1718c5f..6a68c7eff1dc 100755
--- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -395,7 +395,7 @@ setup_rt_local_sids()
dev "${VRF_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
index cabc70538ffe..0979b5316fdf 100755
--- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -343,7 +343,7 @@ setup_rt_local_sids()
encap seg6local action End dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behaviors instaces are grouped together in the 'localsid'
+ # Endpoint behaviors instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule add \
to "${VPN_LOCATOR_SERVICE}::/16" \
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index f00245263b20..6478da6a71c3 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Check that after SEQ number wrap-around:
* 1. SEQ-extension has upper bytes set
- * 2. TCP conneciton is alive and no TCPAOBad segments
+ * 2. TCP connection is alive and no TCPAOBad segments
* In order to test (2), the test doesn't just adjust seq number for a queue
* on a connected socket, but migrates it to another sk+port number, so
* that there won't be any delayed packets that will fail to verify
diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh
new file mode 100755
index 000000000000..388056472b5b
--- /dev/null
+++ b/tools/testing/selftests/net/test_neigh.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ extern_valid_ipv4
+ extern_valid_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+ set -e
+
+ setup_ns ns1 ns2
+
+ ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2"
+ ip -n "$ns1" link set dev veth0 up
+ ip -n "$ns2" link set dev veth1 up
+
+ ip -n "$ns1" address add 192.0.2.1/24 dev veth0
+ ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad
+ ip -n "$ns2" address add 192.0.2.2/24 dev veth1
+ ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad
+
+ ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+
+ sleep 5
+
+ set +e
+}
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+extern_valid_common()
+{
+ local af_str=$1; shift
+ local ip_addr=$1; shift
+ local tbl_name=$1; shift
+ local subnet=$1; shift
+ local mac
+
+ mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]')
+
+ RET=0
+
+ # Check that simple addition works.
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "No \"extern_valid\" flag after addition"
+
+ log_test "$af_str \"extern_valid\" flag: Add entry"
+
+ RET=0
+
+ # Check that an entry cannot be added with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Add with an invalid state"
+
+ RET=0
+
+ # Check that entry cannot be added with both "extern_valid" flag and
+ # "use" / "managed" flag.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag"
+
+ log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag"
+
+ RET=0
+
+ # Check that "extern_valid" flag can be toggled using replace.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Did not manage to set \"extern_valid\" flag with replace"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_fail $? "Did not manage to clear \"extern_valid\" flag with replace"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry"
+
+ RET=0
+
+ # Check that an existing "extern_valid" entry can be marked as
+ # "managed".
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed"
+ check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag"
+
+ RET=0
+
+ # Check that entry cannot be replaced with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Replace with an invalid state"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is flushed when the interface is
+ # put administratively down.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 link set dev veth0 down"
+ run_cmd "ip -n $ns1 link set dev veth0 up"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_fail $? "\"extern_valid\" entry not flushed upon interface down"
+
+ log_test "$af_str \"extern_valid\" flag: Interface down"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is not flushed when the interface
+ # loses its carrier.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns2 link set dev veth1 down"
+ run_cmd "ip -n $ns2 link set dev veth1 up"
+ run_cmd "sleep 2"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_err $? "\"extern_valid\" entry flushed upon carrier down"
+
+ log_test "$af_str \"extern_valid\" flag: Carrier down"
+
+ RET=0
+
+ # Check that when entry transitions to "reachable" state it maintains
+ # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request /
+ # NS to be sent.
+ local delay_probe
+
+ delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]')
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ run_cmd "sleep $((delay_probe / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\""
+ check_err $? "Entry did not transition to \"reachable\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state"
+
+ log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state"
+
+ RET=0
+
+ # Drop all packets, trigger resolution and check that entry goes back
+ # to "stale" state instead of "failed".
+ local mcast_reprobes
+ local retrans_time
+ local ucast_probes
+ local app_probes
+ local probes
+ local delay
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "tc -n $ns2 qdisc add dev veth1 clsact"
+ run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]')
+ ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]')
+ app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]')
+ mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]')
+ delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time))
+ run_cmd "sleep $((delay / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\""
+ check_err $? "Entry did not return to \"stale\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state"
+ probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]')
+ if [[ $probes -eq 0 ]]; then
+ check_err 1 "No probes were sent"
+ fi
+
+ log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state"
+
+ run_cmd "tc -n $ns2 qdisc del dev veth1 clsact"
+
+ RET=0
+
+ # Forced garbage collection runs whenever the number of entries is
+ # larger than "thresh3" and deletes stale entries that have not been
+ # updated in the last 5 seconds.
+ #
+ # Check that an "extern_valid" entry survives a forced garbage
+ # collection. Add an entry, wait 5 seconds and add more entries than
+ # "thresh3" so that forced garbage collection will run.
+ #
+ # Note that the garbage collection thresholds are global resources and
+ # that changes in the initial namespace affect all the namespaces.
+ local forced_gc_runs_t0
+ local forced_gc_runs_t1
+ local orig_thresh1
+ local orig_thresh2
+ local orig_thresh3
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]')
+ orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]')
+ run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ run_cmd "sleep 5"
+ forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then
+ check_err 1 "Forced garbage collection did not run"
+ fi
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Forced garbage collection"
+
+ run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1"
+
+ RET=0
+
+ # Periodic garbage collection runs every "base_reachable"/2 seconds and
+ # if the number of entries is larger than "thresh1", then it deletes
+ # stale entries that have not been used in the last "gc_stale" seconds.
+ #
+ # Check that an "extern_valid" entry survives a periodic garbage
+ # collection. Add an "extern_valid" entry, add more than "thresh1"
+ # regular entries, wait "base_reachable" (longer than "gc_stale")
+ # seconds and check that the "extern_valid" entry was not deleted.
+ #
+ # Note that the garbage collection thresholds and "base_reachable" are
+ # global resources and that changes in the initial namespace affect all
+ # the namespaces.
+ local periodic_gc_runs_t0
+ local periodic_gc_runs_t1
+ local orig_base_reachable
+ local orig_gc_stale
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]')
+ run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000"
+ orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]')
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 5000"
+ # Wait orig_base_reachable/2 for the new interval to take effect.
+ run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ run_cmd "sleep 10"
+ periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]]
+ check_err $? "Periodic garbage collection did not run"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Periodic garbage collection"
+
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale"
+ run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable"
+}
+
+extern_valid_ipv4()
+{
+ extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2.
+}
+
+extern_valid_ipv6()
+{
+ extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1::
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command jq
+
+if ! ip neigh help 2>&1 | grep -q "extern_valid"; then
+ echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
index 6127a78ee988..8deacc565afa 100755
--- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -146,18 +146,17 @@ run_cmd()
}
check_hv_connectivity() {
- ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
- sleep 1
- ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
return $?
}
check_vm_connectivity() {
- run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
+ slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
- run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
+ slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
}
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index e9c2f71da207..ce34cb2e6e0b 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -275,7 +275,7 @@ setup_sym()
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
setup_asym()
@@ -370,7 +370,7 @@ setup_asym()
ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
check_connectivity()
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index edc08a4433fd..ed1e2886ba3c 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -120,6 +120,7 @@ static void usage(char *progname)
" -c query the ptp clock's capabilities\n"
" -d name device to open\n"
" -e val read 'val' external time stamp events\n"
+ " -E val enable rising (1), falling (2), or both (3) edges\n"
" -f val adjust the ptp clock frequency by 'val' ppb\n"
" -F chan Enable single channel mask and keep device open for debugfs verification.\n"
" -g get the ptp clock time\n"
@@ -178,6 +179,7 @@ int main(int argc, char *argv[])
int adjphase = 0;
int capabilities = 0;
int extts = 0;
+ int edge = 0;
int flagtest = 0;
int gettime = 0;
int index = 0;
@@ -202,7 +204,7 @@ int main(int argc, char *argv[])
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:E:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -213,6 +215,11 @@ int main(int argc, char *argv[])
case 'e':
extts = atoi(optarg);
break;
+ case 'E':
+ edge = atoi(optarg);
+ edge = (edge & 1 ? PTP_RISING_EDGE : 0) |
+ (edge & 2 ? PTP_FALLING_EDGE : 0);
+ break;
case 'f':
adjfreq = atoi(optarg);
break;
@@ -444,7 +451,7 @@ int main(int argc, char *argv[])
if (!readonly) {
memset(&extts_request, 0, sizeof(extts_request));
extts_request.index = index;
- extts_request.flags = PTP_ENABLE_FEATURE;
+ extts_request.flags = PTP_ENABLE_FEATURE | edge;
if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
perror("PTP_EXTTS_REQUEST");
extts = 0;
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index db176fe7d0c3..c20aa16b1d63 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -21,6 +21,7 @@ CONFIG_NF_NAT=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NET_SCHED=y
+CONFIG_IP_SET=m
#
# Queueing/Scheduling
@@ -30,6 +31,7 @@ CONFIG_NET_SCH_CBS=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_CODEL=m
CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_DUALPI2=m
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_FQ_CODEL=m
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index c6db7fa94f55..23a61e5b99d0 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -504,7 +504,6 @@
"$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
"$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1",
"$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit",
- "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%",
"$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2",
"ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true",
"$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1",
@@ -517,8 +516,8 @@
{
"kind": "hfsc",
"handle": "1:",
- "bytes": 392,
- "packets": 4
+ "bytes": 294,
+ "packets": 3
}
],
"matchCount": "1",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json
new file mode 100644
index 000000000000..cd1f2ee8f354
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json
@@ -0,0 +1,254 @@
+[
+ {
+ "id": "a4c7",
+ "name": "Create DualPI2 with default setting",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* step_thresh 1ms min_qlen_step 0p coupling_factor 2 drop_on_overload drop_dequeue classic_protection 10% l4s_ect split_gso",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "1ea4",
+ "name": "Create DualPI2 with memlimit",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 memlimit 20000000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* memlimit 20000000B",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2130",
+ "name": "Create DualPI2 with typical_rtt and max_rtt",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 typical_rtt 20ms max_rtt 200ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 20ms tupdate 20ms alpha 0.042969 beta 1.496094",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "90c1",
+ "name": "Create DualPI2 with max_rtt",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 max_rtt 300ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 50ms tupdate 50ms alpha 0.050781 beta 0.996094",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "7b3c",
+ "name": "Create DualPI2 with any_ect option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 any_ect",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* any_ect",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "49a3",
+ "name": "Create DualPI2 with overflow option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 overflow",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* overflow",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "d0a1",
+ "name": "Create DualPI2 with drop_enqueue option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 drop_enqueue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* drop_enqueue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "f051",
+ "name": "Create DualPI2 with no_split_gso option",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 no_split_gso",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* no_split_gso",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "456b",
+ "name": "Create DualPI2 with packet step_thresh",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 step_thresh 3p",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* step_thresh 3p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "610c",
+ "name": "Create DualPI2 with packet min_qlen_step",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 min_qlen_step 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* min_qlen_step 1p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "b4fa",
+ "name": "Create DualPI2 with packet coupling_factor",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 coupling_factor 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* coupling_factor 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "37f1",
+ "name": "Create DualPI2 with packet classic_protection",
+ "category": [
+ "qdisc",
+ "dualpi2"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 classic_protection 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* classic_protection 0%",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
index 3c4444961488..718d2df2aafa 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
@@ -336,5 +336,86 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "d34d",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change root",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: netem limit 1",
+ "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: netem duplicate 50%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "2",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
+ },
+ {
+ "id": "b33f",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change non-root",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: netem limit 1",
+ "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 2: netem duplicate 50%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "2",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
+ },
+ {
+ "id": "cafe",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: netem limit 1 duplicate 100%"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1: handle 2: netem duplicate 100%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
+ },
+ {
+ "id": "1337",
+ "name": "NETEM test qdisc duplication restriction in qdisc tree across branches",
+ "category": ["qdisc", "netem"],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY parent root handle 1:0 hfsc",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc rt m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc rt m2 10Mbit"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
index 28c6ce6da7db..531a2f6e4900 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
@@ -264,5 +264,41 @@
"matchPattern": "sfq",
"matchCount": "0",
"teardown": []
+ },
+ {
+ "id": "cdc1",
+ "name": "Check that a negative perturb timer is rejected",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb -10",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "a9f0",
+ "name": "Check that a too big perturb timer is rejected",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb 1000000000",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": []
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index 589b18ed758a..dae19687912d 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -4,8 +4,7 @@
# If a module is required and was not compiled
# the test that requires it will fail anyways
try_modprobe() {
- modprobe -q -R "$1"
- if [ $? -ne 0 ]; then
+ if ! modprobe -q -R "$1"; then
echo "Module $1 not found... skipping."
else
modprobe "$1"
@@ -67,4 +66,5 @@ try_modprobe sch_hfsc
try_modprobe sch_hhf
try_modprobe sch_htb
try_modprobe sch_teql
-./tdc.py -J`nproc`
+try_modprobe sch_dualpi2
+./tdc.py -J"$(nproc)"
diff --git a/tools/testing/selftests/vsock/.gitignore b/tools/testing/selftests/vsock/.gitignore
new file mode 100644
index 000000000000..9c5bf379480f
--- /dev/null
+++ b/tools/testing/selftests/vsock/.gitignore
@@ -0,0 +1,2 @@
+vmtest.log
+vsock_test
diff --git a/tools/testing/selftests/vsock/Makefile b/tools/testing/selftests/vsock/Makefile
new file mode 100644
index 000000000000..c407c0afd938
--- /dev/null
+++ b/tools/testing/selftests/vsock/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CURDIR := $(abspath .)
+TOOLSDIR := $(abspath ../../..)
+VSOCK_TEST_DIR := $(TOOLSDIR)/testing/vsock
+VSOCK_TEST_SRCS := $(wildcard $(VSOCK_TEST_DIR)/*.c $(VSOCK_TEST_DIR)/*.h)
+
+$(OUTPUT)/vsock_test: $(VSOCK_TEST_DIR)/vsock_test
+ install -m 755 $< $@
+
+$(VSOCK_TEST_DIR)/vsock_test: $(VSOCK_TEST_SRCS)
+ $(MAKE) -C $(VSOCK_TEST_DIR) vsock_test
+TEST_PROGS += vmtest.sh
+TEST_GEN_FILES := vsock_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/vsock/config b/tools/testing/selftests/vsock/config
new file mode 100644
index 000000000000..5f0a4f17dfc9
--- /dev/null
+++ b/tools/testing/selftests/vsock/config
@@ -0,0 +1,111 @@
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BPF=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_BPF_EVENTS=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_KPROBES=y
+CONFIG_KPROBE_EVENTS=y
+CONFIG_ARCH_SUPPORTS_UPROBES=y
+CONFIG_UPROBES=y
+CONFIG_UPROBE_EVENTS=y
+CONFIG_DEBUG_FS=y
+CONFIG_FW_CFG_SYSFS=y
+CONFIG_FW_CFG_SYSFS_CMDLINE=y
+CONFIG_DRM=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_DRM_VIRTIO_GPU_KMS=y
+CONFIG_DRM_BOCHS=y
+CONFIG_VIRTIO_IOMMU=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_PCI=y
+CONFIG_SND_INTEL8X0=y
+CONFIG_SND_HDA_CODEC_REALTEK=y
+CONFIG_SECURITYFS=y
+CONFIG_CGROUP_BPF=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_SQUASHFS_ZSTD=y
+CONFIG_FUSE_FS=y
+CONFIG_VIRTIO_FS=y
+CONFIG_SERIO=y
+CONFIG_PCI=y
+CONFIG_INPUT=y
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_X86_VERBOSE_BOOTUP=y
+CONFIG_VGA_CONSOLE=y
+CONFIG_FB=y
+CONFIG_FB_VESA=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_KVM_GUEST=y
+CONFIG_KVM=y
+CONFIG_KVM_INTEL=y
+CONFIG_KVM_AMD=y
+CONFIG_VSOCKETS=y
+CONFIG_VSOCKETS_DIAG=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_VMWARE_VMCI_VSOCKETS=y
+CONFIG_VIRTIO_VSOCKETS=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
+CONFIG_HYPERV_VSOCKETS=y
+CONFIG_VMWARE_VMCI=y
+CONFIG_VHOST_VSOCK=y
+CONFIG_HYPERV=y
+CONFIG_UEVENT_HELPER=n
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_NET=y
+CONFIG_NET_CORE=y
+CONFIG_NETDEVICES=y
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_INET=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_9P_FS=y
+CONFIG_VIRTIO_NET=y
+CONFIG_CMDLINE_OVERRIDE=n
+CONFIG_BINFMT_SCRIPT=y
+CONFIG_SHMEM=y
+CONFIG_TMPFS=y
+CONFIG_UNIX=y
+CONFIG_MODULE_SIG_FORCE=n
+CONFIG_DEVTMPFS=y
+CONFIG_TTY=y
+CONFIG_VT=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_INOTIFY_USER=y
+CONFIG_BLOCK=y
+CONFIG_SCSI_LOWLEVEL=y
+CONFIG_SCSI=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_CORE=y
+CONFIG_I6300ESB_WDT=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_OVERLAY_FS=y
+CONFIG_DAX=y
+CONFIG_DAX_DRIVER=y
+CONFIG_FS_DAX=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_ZONE_DEVICE=y
diff --git a/tools/testing/selftests/vsock/settings b/tools/testing/selftests/vsock/settings
new file mode 100644
index 000000000000..694d70710ff0
--- /dev/null
+++ b/tools/testing/selftests/vsock/settings
@@ -0,0 +1 @@
+timeout=300
diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh
new file mode 100755
index 000000000000..edacebfc1632
--- /dev/null
+++ b/tools/testing/selftests/vsock/vmtest.sh
@@ -0,0 +1,487 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Meta Platforms, Inc. and affiliates
+#
+# Dependencies:
+# * virtme-ng
+# * busybox-static (used by virtme-ng)
+# * qemu (used by virtme-ng)
+
+readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../)
+
+source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh
+
+readonly VSOCK_TEST="${SCRIPT_DIR}"/vsock_test
+readonly TEST_GUEST_PORT=51000
+readonly TEST_HOST_PORT=50000
+readonly TEST_HOST_PORT_LISTENER=50001
+readonly SSH_GUEST_PORT=22
+readonly SSH_HOST_PORT=2222
+readonly VSOCK_CID=1234
+readonly WAIT_PERIOD=3
+readonly WAIT_PERIOD_MAX=60
+readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX ))
+readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid)
+
+# virtme-ng offers a netdev for ssh when using "--ssh", but we also need a
+# control port forwarded for vsock_test. Because virtme-ng doesn't support
+# adding an additional port to forward to the device created from "--ssh" and
+# virtme-init mistakenly sets identical IPs to the ssh device and additional
+# devices, we instead opt out of using --ssh, add the device manually, and also
+# add the kernel cmdline options that virtme-init uses to setup the interface.
+readonly QEMU_TEST_PORT_FWD="hostfwd=tcp::${TEST_HOST_PORT}-:${TEST_GUEST_PORT}"
+readonly QEMU_SSH_PORT_FWD="hostfwd=tcp::${SSH_HOST_PORT}-:${SSH_GUEST_PORT}"
+readonly QEMU_OPTS="\
+ -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \
+ -device virtio-net-pci,netdev=n0 \
+ -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \
+ --pidfile ${QEMU_PIDFILE} \
+"
+readonly KERNEL_CMDLINE="\
+ virtme.dhcp net.ifnames=0 biosdevname=0 \
+ virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \
+"
+readonly LOG=$(mktemp /tmp/vsock_vmtest_XXXX.log)
+readonly TEST_NAMES=(vm_server_host_client vm_client_host_server vm_loopback)
+readonly TEST_DESCS=(
+ "Run vsock_test in server mode on the VM and in client mode on the host."
+ "Run vsock_test in client mode on the VM and in server mode on the host."
+ "Run vsock_test using the loopback transport in the VM."
+)
+
+VERBOSE=0
+
+usage() {
+ local name
+ local desc
+ local i
+
+ echo
+ echo "$0 [OPTIONS] [TEST]..."
+ echo "If no TEST argument is given, all tests will be run."
+ echo
+ echo "Options"
+ echo " -b: build the kernel from the current source tree and use it for guest VMs"
+ echo " -q: set the path to or name of qemu binary"
+ echo " -v: verbose output"
+ echo
+ echo "Available tests"
+
+ for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do
+ name=${TEST_NAMES[${i}]}
+ desc=${TEST_DESCS[${i}]}
+ printf "\t%-35s%-35s\n" "${name}" "${desc}"
+ done
+ echo
+
+ exit 1
+}
+
+die() {
+ echo "$*" >&2
+ exit "${KSFT_FAIL}"
+}
+
+vm_ssh() {
+ ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@"
+ return $?
+}
+
+cleanup() {
+ if [[ -s "${QEMU_PIDFILE}" ]]; then
+ pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1
+ fi
+
+ # If failure occurred during or before qemu start up, then we need
+ # to clean this up ourselves.
+ if [[ -e "${QEMU_PIDFILE}" ]]; then
+ rm "${QEMU_PIDFILE}"
+ fi
+}
+
+check_args() {
+ local found
+
+ for arg in "$@"; do
+ found=0
+ for name in "${TEST_NAMES[@]}"; do
+ if [[ "${name}" = "${arg}" ]]; then
+ found=1
+ break
+ fi
+ done
+
+ if [[ "${found}" -eq 0 ]]; then
+ echo "${arg} is not an available test" >&2
+ usage
+ fi
+ done
+
+ for arg in "$@"; do
+ if ! command -v > /dev/null "test_${arg}"; then
+ echo "Test ${arg} not found" >&2
+ usage
+ fi
+ done
+}
+
+check_deps() {
+ for dep in vng ${QEMU} busybox pkill ssh; do
+ if [[ ! -x $(command -v "${dep}") ]]; then
+ echo -e "skip: dependency ${dep} not found!\n"
+ exit "${KSFT_SKIP}"
+ fi
+ done
+
+ if [[ ! -x $(command -v "${VSOCK_TEST}") ]]; then
+ printf "skip: %s not found!" "${VSOCK_TEST}"
+ printf " Please build the kselftest vsock target.\n"
+ exit "${KSFT_SKIP}"
+ fi
+}
+
+check_vng() {
+ local tested_versions
+ local version
+ local ok
+
+ tested_versions=("1.33" "1.36")
+ version="$(vng --version)"
+
+ ok=0
+ for tv in "${tested_versions[@]}"; do
+ if [[ "${version}" == *"${tv}"* ]]; then
+ ok=1
+ break
+ fi
+ done
+
+ if [[ ! "${ok}" -eq 1 ]]; then
+ printf "warning: vng version '%s' has not been tested and may " "${version}" >&2
+ printf "not function properly.\n\tThe following versions have been tested: " >&2
+ echo "${tested_versions[@]}" >&2
+ fi
+}
+
+handle_build() {
+ if [[ ! "${BUILD}" -eq 1 ]]; then
+ return
+ fi
+
+ if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then
+ echo "-b requires vmtest.sh called from the kernel source tree" >&2
+ exit 1
+ fi
+
+ pushd "${KERNEL_CHECKOUT}" &>/dev/null
+
+ if ! vng --kconfig --config "${SCRIPT_DIR}"/config; then
+ die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})"
+ fi
+
+ if ! make -j$(nproc); then
+ die "failed to build kernel from source tree (${KERNEL_CHECKOUT})"
+ fi
+
+ popd &>/dev/null
+}
+
+vm_start() {
+ local logfile=/dev/null
+ local verbose_opt=""
+ local kernel_opt=""
+ local qemu
+
+ qemu=$(command -v "${QEMU}")
+
+ if [[ "${VERBOSE}" -eq 1 ]]; then
+ verbose_opt="--verbose"
+ logfile=/dev/stdout
+ fi
+
+ if [[ "${BUILD}" -eq 1 ]]; then
+ kernel_opt="${KERNEL_CHECKOUT}"
+ fi
+
+ vng \
+ --run \
+ ${kernel_opt} \
+ ${verbose_opt} \
+ --qemu-opts="${QEMU_OPTS}" \
+ --qemu="${qemu}" \
+ --user root \
+ --append "${KERNEL_CMDLINE}" \
+ --rw &> ${logfile} &
+
+ if ! timeout ${WAIT_TOTAL} \
+ bash -c 'while [[ ! -s '"${QEMU_PIDFILE}"' ]]; do sleep 1; done; exit 0'; then
+ die "failed to boot VM"
+ fi
+}
+
+vm_wait_for_ssh() {
+ local i
+
+ i=0
+ while true; do
+ if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then
+ die "Timed out waiting for guest ssh"
+ fi
+ if vm_ssh -- true; then
+ break
+ fi
+ i=$(( i + 1 ))
+ sleep ${WAIT_PERIOD}
+ done
+}
+
+# derived from selftests/net/net_helper.sh
+wait_for_listener()
+{
+ local port=$1
+ local interval=$2
+ local max_intervals=$3
+ local protocol=tcp
+ local pattern
+ local i
+
+ pattern=":$(printf "%04X" "${port}") "
+
+ # for tcp protocol additionally check the socket state
+ [ "${protocol}" = "tcp" ] && pattern="${pattern}0A"
+ for i in $(seq "${max_intervals}"); do
+ if awk '{print $2" "$4}' /proc/net/"${protocol}"* | \
+ grep -q "${pattern}"; then
+ break
+ fi
+ sleep "${interval}"
+ done
+}
+
+vm_wait_for_listener() {
+ local port=$1
+
+ vm_ssh <<EOF
+$(declare -f wait_for_listener)
+wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX}
+EOF
+}
+
+host_wait_for_listener() {
+ wait_for_listener "${TEST_HOST_PORT_LISTENER}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}"
+}
+
+__log_stdin() {
+ cat | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+}
+
+__log_args() {
+ echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+}
+
+log() {
+ local prefix="$1"
+
+ shift
+ local redirect=
+ if [[ ${VERBOSE} -eq 0 ]]; then
+ redirect=/dev/null
+ else
+ redirect=/dev/stdout
+ fi
+
+ if [[ "$#" -eq 0 ]]; then
+ __log_stdin | tee -a "${LOG}" > ${redirect}
+ else
+ __log_args "$@" | tee -a "${LOG}" > ${redirect}
+ fi
+}
+
+log_setup() {
+ log "setup" "$@"
+}
+
+log_host() {
+ local testname=$1
+
+ shift
+ log "test:${testname}:host" "$@"
+}
+
+log_guest() {
+ local testname=$1
+
+ shift
+ log "test:${testname}:guest" "$@"
+}
+
+test_vm_server_host_client() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=server \
+ --control-port="${TEST_GUEST_PORT}" \
+ --peer-cid=2 \
+ 2>&1 | log_guest "${testname}" &
+
+ vm_wait_for_listener "${TEST_GUEST_PORT}"
+
+ ${VSOCK_TEST} \
+ --mode=client \
+ --control-host=127.0.0.1 \
+ --peer-cid="${VSOCK_CID}" \
+ --control-port="${TEST_HOST_PORT}" 2>&1 | log_host "${testname}"
+
+ return $?
+}
+
+test_vm_client_host_server() {
+ local testname="${FUNCNAME[0]#test_}"
+
+ ${VSOCK_TEST} \
+ --mode "server" \
+ --control-port "${TEST_HOST_PORT_LISTENER}" \
+ --peer-cid "${VSOCK_CID}" 2>&1 | log_host "${testname}" &
+
+ host_wait_for_listener
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=client \
+ --control-host=10.0.2.2 \
+ --peer-cid=2 \
+ --control-port="${TEST_HOST_PORT_LISTENER}" 2>&1 | log_guest "${testname}"
+
+ return $?
+}
+
+test_vm_loopback() {
+ local testname="${FUNCNAME[0]#test_}"
+ local port=60000 # non-forwarded local port
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=server \
+ --control-port="${port}" \
+ --peer-cid=1 2>&1 | log_guest "${testname}" &
+
+ vm_wait_for_listener "${port}"
+
+ vm_ssh -- "${VSOCK_TEST}" \
+ --mode=client \
+ --control-host="127.0.0.1" \
+ --control-port="${port}" \
+ --peer-cid=1 2>&1 | log_guest "${testname}"
+
+ return $?
+}
+
+run_test() {
+ local host_oops_cnt_before
+ local host_warn_cnt_before
+ local vm_oops_cnt_before
+ local vm_warn_cnt_before
+ local host_oops_cnt_after
+ local host_warn_cnt_after
+ local vm_oops_cnt_after
+ local vm_warn_cnt_after
+ local name
+ local rc
+
+ host_oops_cnt_before=$(dmesg | grep -c -i 'Oops')
+ host_warn_cnt_before=$(dmesg --level=warn | wc -l)
+ vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops')
+ vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l)
+
+ name=$(echo "${1}" | awk '{ print $1 }')
+ eval test_"${name}"
+ rc=$?
+
+ host_oops_cnt_after=$(dmesg | grep -i 'Oops' | wc -l)
+ if [[ ${host_oops_cnt_after} -gt ${host_oops_cnt_before} ]]; then
+ echo "FAIL: kernel oops detected on host" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ host_warn_cnt_after=$(dmesg --level=warn | wc -l)
+ if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then
+ echo "FAIL: kernel warning detected on host" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l)
+ if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then
+ echo "FAIL: kernel oops detected on vm" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l)
+ if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then
+ echo "FAIL: kernel warning detected on vm" | log_host "${name}"
+ rc=$KSFT_FAIL
+ fi
+
+ return "${rc}"
+}
+
+QEMU="qemu-system-$(uname -m)"
+
+while getopts :hvsq:b o
+do
+ case $o in
+ v) VERBOSE=1;;
+ b) BUILD=1;;
+ q) QEMU=$OPTARG;;
+ h|*) usage;;
+ esac
+done
+shift $((OPTIND-1))
+
+trap cleanup EXIT
+
+if [[ ${#} -eq 0 ]]; then
+ ARGS=("${TEST_NAMES[@]}")
+else
+ ARGS=("$@")
+fi
+
+check_args "${ARGS[@]}"
+check_deps
+check_vng
+handle_build
+
+echo "1..${#ARGS[@]}"
+
+log_setup "Booting up VM"
+vm_start
+vm_wait_for_ssh
+log_setup "VM booted up"
+
+cnt_pass=0
+cnt_fail=0
+cnt_skip=0
+cnt_total=0
+for arg in "${ARGS[@]}"; do
+ run_test "${arg}"
+ rc=$?
+ if [[ ${rc} -eq $KSFT_PASS ]]; then
+ cnt_pass=$(( cnt_pass + 1 ))
+ echo "ok ${cnt_total} ${arg}"
+ elif [[ ${rc} -eq $KSFT_SKIP ]]; then
+ cnt_skip=$(( cnt_skip + 1 ))
+ echo "ok ${cnt_total} ${arg} # SKIP"
+ elif [[ ${rc} -eq $KSFT_FAIL ]]; then
+ cnt_fail=$(( cnt_fail + 1 ))
+ echo "not ok ${cnt_total} ${arg} # exit=$rc"
+ fi
+ cnt_total=$(( cnt_total + 1 ))
+done
+
+echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}"
+echo "Log: ${LOG}"
+
+if [ $((cnt_pass + cnt_skip)) -eq ${cnt_total} ]; then
+ exit "$KSFT_PASS"
+else
+ exit "$KSFT_FAIL"
+fi
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index f314d3789f17..0a5381717e9f 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -16,9 +16,13 @@ CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=y
CONFIG_NF_NAT=y
CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_NAT=y
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
CONFIG_NETFILTER_XT_MARK=y
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_MANGLE=y
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
index 6e0b4e95e230..88211fd132d2 100644
--- a/tools/testing/vsock/Makefile
+++ b/tools/testing/vsock/Makefile
@@ -5,6 +5,7 @@ vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o msg_ze
vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o
vsock_perf: vsock_perf.o msg_zerocopy_common.o
+vsock_test: LDLIBS = -lpthread
vsock_uring_test: LDLIBS = -luring
vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 0c7e9cbcbc85..7b861a8e997a 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -7,6 +7,7 @@
* Author: Stefan Hajnoczi <stefanha@redhat.com>
*/
+#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdint.h>
@@ -16,6 +17,7 @@
#include <unistd.h>
#include <assert.h>
#include <sys/epoll.h>
+#include <sys/ioctl.h>
#include <sys/mman.h>
#include <linux/sockios.h>
@@ -23,6 +25,9 @@
#include "control.h"
#include "util.h"
+#define KALLSYMS_PATH "/proc/kallsyms"
+#define KALLSYMS_LINE_LEN 512
+
/* Install signal handlers */
void init_signals(void)
{
@@ -97,39 +102,52 @@ void vsock_wait_remote_close(int fd)
close(epollfd);
}
-/* Wait until transport reports no data left to be sent.
- * Return false if transport does not implement the unsent_bytes() callback.
+/* Wait until ioctl gives an expected int value.
+ * Return false if the op is not supported.
*/
-bool vsock_wait_sent(int fd)
+bool vsock_ioctl_int(int fd, unsigned long op, int expected)
{
- int ret, sock_bytes_unsent;
+ int actual, ret;
+ char name[32];
+
+ snprintf(name, sizeof(name), "ioctl(%lu)", op);
timeout_begin(TIMEOUT);
do {
- ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
+ ret = ioctl(fd, op, &actual);
if (ret < 0) {
- if (errno == EOPNOTSUPP)
+ if (errno == EOPNOTSUPP || errno == ENOTTY)
break;
- perror("ioctl(SIOCOUTQ)");
+ perror(name);
exit(EXIT_FAILURE);
}
- timeout_check("SIOCOUTQ");
- } while (sock_bytes_unsent != 0);
+ timeout_check(name);
+ } while (actual != expected);
timeout_end();
- return !ret;
+ return ret >= 0;
}
-/* Create socket <type>, bind to <cid, port> and return the file descriptor. */
-int vsock_bind(unsigned int cid, unsigned int port, int type)
+/* Wait until transport reports no data left to be sent.
+ * Return false if transport does not implement the unsent_bytes() callback.
+ */
+bool vsock_wait_sent(int fd)
+{
+ return vsock_ioctl_int(fd, SIOCOUTQ, 0);
+}
+
+/* Create socket <type>, bind to <cid, port>.
+ * Return the file descriptor, or -1 on error.
+ */
+int vsock_bind_try(unsigned int cid, unsigned int port, int type)
{
struct sockaddr_vm sa = {
.svm_family = AF_VSOCK,
.svm_cid = cid,
.svm_port = port,
};
- int fd;
+ int fd, saved_errno;
fd = socket(AF_VSOCK, type, 0);
if (fd < 0) {
@@ -138,6 +156,22 @@ int vsock_bind(unsigned int cid, unsigned int port, int type)
}
if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) {
+ saved_errno = errno;
+ close(fd);
+ errno = saved_errno;
+ fd = -1;
+ }
+
+ return fd;
+}
+
+/* Create socket <type>, bind to <cid, port> and return the file descriptor. */
+int vsock_bind(unsigned int cid, unsigned int port, int type)
+{
+ int fd;
+
+ fd = vsock_bind_try(cid, port, type);
+ if (fd < 0) {
perror("bind");
exit(EXIT_FAILURE);
}
@@ -836,3 +870,55 @@ void enable_so_linger(int fd, int timeout)
exit(EXIT_FAILURE);
}
}
+
+static int __get_transports(void)
+{
+ char buf[KALLSYMS_LINE_LEN];
+ const char *ksym;
+ int ret = 0;
+ FILE *f;
+
+ f = fopen(KALLSYMS_PATH, "r");
+ if (!f) {
+ perror("Can't open " KALLSYMS_PATH);
+ exit(EXIT_FAILURE);
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ char *match;
+ int i;
+
+ assert(buf[strlen(buf) - 1] == '\n');
+
+ for (i = 0; i < TRANSPORT_NUM; ++i) {
+ if (ret & BIT(i))
+ continue;
+
+ /* Match should be followed by '\t' or '\n'.
+ * See kallsyms.c:s_show().
+ */
+ ksym = transport_ksyms[i];
+ match = strstr(buf, ksym);
+ if (match && isspace(match[strlen(ksym)])) {
+ ret |= BIT(i);
+ break;
+ }
+ }
+ }
+
+ fclose(f);
+ return ret;
+}
+
+/* Return integer with TRANSPORT_* bit set for every (known) registered vsock
+ * transport.
+ */
+int get_transports(void)
+{
+ static int tr = -1;
+
+ if (tr == -1)
+ tr = __get_transports();
+
+ return tr;
+}
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index 5e2db67072d5..142c02a6834a 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -3,8 +3,40 @@
#define UTIL_H
#include <sys/socket.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
#include <linux/vm_sockets.h>
+/* All known vsock transports, see callers of vsock_core_register() */
+#define KNOWN_TRANSPORTS(x) \
+ x(LOOPBACK, "loopback") \
+ x(VIRTIO, "virtio") \
+ x(VHOST, "vhost") \
+ x(VMCI, "vmci") \
+ x(HYPERV, "hvs")
+
+enum transport {
+ TRANSPORT_COUNTER_BASE = __COUNTER__ + 1,
+ #define x(name, symbol) \
+ TRANSPORT_##name = BIT(__COUNTER__ - TRANSPORT_COUNTER_BASE),
+ KNOWN_TRANSPORTS(x)
+ TRANSPORT_NUM = __COUNTER__ - TRANSPORT_COUNTER_BASE,
+ #undef x
+};
+
+static const char * const transport_ksyms[] = {
+ #define x(name, symbol) "d " symbol "_transport",
+ KNOWN_TRANSPORTS(x)
+ #undef x
+};
+
+static_assert(ARRAY_SIZE(transport_ksyms) == TRANSPORT_NUM);
+static_assert(BITS_PER_TYPE(int) >= TRANSPORT_NUM);
+
+#define TRANSPORTS_G2H (TRANSPORT_VIRTIO | TRANSPORT_VMCI | TRANSPORT_HYPERV)
+#define TRANSPORTS_H2G (TRANSPORT_VHOST | TRANSPORT_VMCI)
+#define TRANSPORTS_LOCAL (TRANSPORT_LOOPBACK)
+
/* Tests can either run as the client or the server */
enum test_mode {
TEST_MODE_UNSET,
@@ -44,6 +76,7 @@ int vsock_connect(unsigned int cid, unsigned int port, int type);
int vsock_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp, int type);
int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_bind_try(unsigned int cid, unsigned int port, int type);
int vsock_bind(unsigned int cid, unsigned int port, int type);
int vsock_bind_connect(unsigned int cid, unsigned int port,
unsigned int bind_port, int type);
@@ -54,6 +87,7 @@ int vsock_stream_listen(unsigned int cid, unsigned int port);
int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp);
void vsock_wait_remote_close(int fd);
+bool vsock_ioctl_int(int fd, unsigned long op, int expected);
bool vsock_wait_sent(int fd);
void send_buf(int fd, const void *buf, size_t len, int flags,
ssize_t expected_ret);
@@ -81,4 +115,5 @@ void setsockopt_timeval_check(int fd, int level, int optname,
struct timeval val, char const *errmsg);
void enable_so_zerocopy_check(int fd);
void enable_so_linger(int fd, int timeout);
+int get_transports(void);
#endif /* UTIL_H */
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index f669baaa0dca..d4517386e551 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -22,6 +22,9 @@
#include <signal.h>
#include <sys/ioctl.h>
#include <linux/time64.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <linux/sockios.h>
#include "vsock_test_zerocopy.h"
#include "timeout.h"
@@ -1305,6 +1308,54 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type)
close(fd);
}
+static void test_unread_bytes_server(const struct test_opts *opts, int type)
+{
+ unsigned char buf[MSG_BUF_IOCTL_LEN];
+ int client_fd;
+
+ client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type);
+ if (client_fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ for (int i = 0; i < sizeof(buf); i++)
+ buf[i] = rand() & 0xFF;
+
+ send_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf));
+ control_writeln("SENT");
+
+ close(client_fd);
+}
+
+static void test_unread_bytes_client(const struct test_opts *opts, int type)
+{
+ unsigned char buf[MSG_BUF_IOCTL_LEN];
+ int fd;
+
+ fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("SENT");
+ /* The data has arrived but has not been read. The expected is
+ * MSG_BUF_IOCTL_LEN.
+ */
+ if (!vsock_ioctl_int(fd, SIOCINQ, MSG_BUF_IOCTL_LEN)) {
+ fprintf(stderr, "Test skipped, SIOCINQ not supported.\n");
+ goto out;
+ }
+
+ recv_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
+ /* All data has been consumed, so the expected is 0. */
+ vsock_ioctl_int(fd, SIOCINQ, 0);
+
+out:
+ close(fd);
+}
+
static void test_stream_unsent_bytes_client(const struct test_opts *opts)
{
test_unsent_bytes_client(opts, SOCK_STREAM);
@@ -1325,6 +1376,26 @@ static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts)
test_unsent_bytes_server(opts, SOCK_SEQPACKET);
}
+static void test_stream_unread_bytes_client(const struct test_opts *opts)
+{
+ test_unread_bytes_client(opts, SOCK_STREAM);
+}
+
+static void test_stream_unread_bytes_server(const struct test_opts *opts)
+{
+ test_unread_bytes_server(opts, SOCK_STREAM);
+}
+
+static void test_seqpacket_unread_bytes_client(const struct test_opts *opts)
+{
+ test_unread_bytes_client(opts, SOCK_SEQPACKET);
+}
+
+static void test_seqpacket_unread_bytes_server(const struct test_opts *opts)
+{
+ test_unread_bytes_server(opts, SOCK_SEQPACKET);
+}
+
#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128)
/* This define is the same as in 'include/linux/virtio_vsock.h':
* it is used to decide when to send credit update message during
@@ -1718,16 +1789,27 @@ static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts)
#define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */
-/* Test attempts to trigger a transport release for an unbound socket. This can
- * lead to a reference count mishandling.
- */
-static void test_stream_transport_uaf_client(const struct test_opts *opts)
+static bool test_stream_transport_uaf(int cid)
{
int sockets[MAX_PORT_RETRIES];
struct sockaddr_vm addr;
- int fd, i, alen;
+ socklen_t alen;
+ int fd, i, c;
+ bool ret;
- fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM);
+ /* Probe for a transport by attempting a local CID bind. Unavailable
+ * transport (or more specifically: an unsupported transport/CID
+ * combination) results in EADDRNOTAVAIL, other errnos are fatal.
+ */
+ fd = vsock_bind_try(cid, VMADDR_PORT_ANY, SOCK_STREAM);
+ if (fd < 0) {
+ if (errno != EADDRNOTAVAIL) {
+ perror("Unexpected bind() errno");
+ exit(EXIT_FAILURE);
+ }
+
+ return false;
+ }
alen = sizeof(addr);
if (getsockname(fd, (struct sockaddr *)&addr, &alen)) {
@@ -1735,38 +1817,83 @@ static void test_stream_transport_uaf_client(const struct test_opts *opts)
exit(EXIT_FAILURE);
}
+ /* Drain the autobind pool; see __vsock_bind_connectible(). */
for (i = 0; i < MAX_PORT_RETRIES; ++i)
- sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port,
- SOCK_STREAM);
+ sockets[i] = vsock_bind(cid, ++addr.svm_port, SOCK_STREAM);
close(fd);
- fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ /* Setting SOCK_NONBLOCK makes connect() return soon after
+ * (re-)assigning the transport. We are not connecting to anything
+ * anyway, so there is no point entering the main loop in
+ * vsock_connect(); waiting for timeout, checking for signals, etc.
+ */
+ fd = socket(AF_VSOCK, SOCK_STREAM | SOCK_NONBLOCK, 0);
if (fd < 0) {
perror("socket");
exit(EXIT_FAILURE);
}
- if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) {
- perror("Unexpected connect() #1 success");
+ /* Assign transport, while failing to autobind. Autobind pool was
+ * drained, so EADDRNOTAVAIL coming from __vsock_bind_connectible() is
+ * expected.
+ *
+ * One exception is ENODEV which is thrown by vsock_assign_transport(),
+ * i.e. before vsock_auto_bind(), when the only transport loaded is
+ * vhost.
+ */
+ if (!connect(fd, (struct sockaddr *)&addr, alen)) {
+ fprintf(stderr, "Unexpected connect() success\n");
exit(EXIT_FAILURE);
}
-
- /* Vulnerable system may crash now. */
- if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) {
- perror("Unexpected connect() #2 success");
+ if (errno == ENODEV && cid == VMADDR_CID_HOST) {
+ ret = false;
+ goto cleanup;
+ }
+ if (errno != EADDRNOTAVAIL) {
+ perror("Unexpected connect() errno");
exit(EXIT_FAILURE);
}
+ /* Reassign transport, triggering old transport release and
+ * (potentially) unbinding of an unbound socket.
+ *
+ * Vulnerable system may crash now.
+ */
+ for (c = VMADDR_CID_HYPERVISOR; c <= VMADDR_CID_HOST + 1; ++c) {
+ if (c != cid) {
+ addr.svm_cid = c;
+ (void)connect(fd, (struct sockaddr *)&addr, alen);
+ }
+ }
+
+ ret = true;
+cleanup:
close(fd);
while (i--)
close(sockets[i]);
- control_writeln("DONE");
+ return ret;
}
-static void test_stream_transport_uaf_server(const struct test_opts *opts)
+/* Test attempts to trigger a transport release for an unbound socket. This can
+ * lead to a reference count mishandling.
+ */
+static void test_stream_transport_uaf_client(const struct test_opts *opts)
{
- control_expectln("DONE");
+ bool tested = false;
+ int cid, tr;
+
+ for (cid = VMADDR_CID_HYPERVISOR; cid <= VMADDR_CID_HOST + 1; ++cid)
+ tested |= test_stream_transport_uaf(cid);
+
+ tr = get_transports();
+ if (!tr)
+ fprintf(stderr, "No transports detected\n");
+ else if (tr == TRANSPORT_VIRTIO)
+ fprintf(stderr, "Setup unsupported: sole virtio transport\n");
+ else if (!tested)
+ fprintf(stderr, "No transports tested\n");
}
static void test_stream_connect_retry_client(const struct test_opts *opts)
@@ -1811,6 +1938,180 @@ static void test_stream_connect_retry_server(const struct test_opts *opts)
close(fd);
}
+#define TRANSPORT_CHANGE_TIMEOUT 2 /* seconds */
+
+static void *test_stream_transport_change_thread(void *vargp)
+{
+ pid_t *pid = (pid_t *)vargp;
+ int ret;
+
+ ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
+ if (ret) {
+ fprintf(stderr, "pthread_setcanceltype: %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ while (true) {
+ if (kill(*pid, SIGUSR1) < 0) {
+ perror("kill");
+ exit(EXIT_FAILURE);
+ }
+ }
+ return NULL;
+}
+
+static void test_transport_change_signal_handler(int signal)
+{
+ /* We need a custom handler for SIGUSR1 as the default one terminates the process. */
+}
+
+static void test_stream_transport_change_client(const struct test_opts *opts)
+{
+ __sighandler_t old_handler;
+ pid_t pid = getpid();
+ pthread_t thread_id;
+ time_t tout;
+ int ret, tr;
+
+ tr = get_transports();
+
+ /* Print a warning if there is a G2H transport loaded.
+ * This is on a best effort basis because VMCI can be either G2H and H2G, and there is
+ * no easy way to understand it.
+ * The bug we are testing only appears when G2H transports are not loaded.
+ * This is because `vsock_assign_transport`, when using CID 0, assigns a G2H transport
+ * to vsk->transport. If none is available it is set to NULL, causing the null-ptr-deref.
+ */
+ if (tr & TRANSPORTS_G2H)
+ fprintf(stderr, "G2H Transport detected. This test will not fail.\n");
+
+ old_handler = signal(SIGUSR1, test_transport_change_signal_handler);
+ if (old_handler == SIG_ERR) {
+ perror("signal");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = pthread_create(&thread_id, NULL, test_stream_transport_change_thread, &pid);
+ if (ret) {
+ fprintf(stderr, "pthread_create: %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ control_expectln("LISTENING");
+
+ tout = current_nsec() + TRANSPORT_CHANGE_TIMEOUT * NSEC_PER_SEC;
+ do {
+ struct sockaddr_vm sa = {
+ .svm_family = AF_VSOCK,
+ .svm_cid = opts->peer_cid,
+ .svm_port = opts->peer_port,
+ };
+ bool send_control = false;
+ int s;
+
+ s = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = connect(s, (struct sockaddr *)&sa, sizeof(sa));
+ /* The connect can fail due to signals coming from the thread,
+ * or because the receiver connection queue is full.
+ * Ignoring also the latter case because there is no way
+ * of synchronizing client's connect and server's accept when
+ * connect(s) are constantly being interrupted by signals.
+ */
+ if (ret == -1 && (errno != EINTR && errno != ECONNRESET)) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Notify the server if the connect() is successful or the
+ * receiver connection queue is full, so it will do accept()
+ * to drain it.
+ */
+ if (!ret || errno == ECONNRESET)
+ send_control = true;
+
+ /* Set CID to 0 cause a transport change. */
+ sa.svm_cid = 0;
+
+ /* There is a case where this will not fail:
+ * if the previous connect() is interrupted while the
+ * connection request is already sent, this second
+ * connect() will wait for the response.
+ */
+ ret = connect(s, (struct sockaddr *)&sa, sizeof(sa));
+ if (!ret || errno == ECONNRESET)
+ send_control = true;
+
+ close(s);
+
+ if (send_control)
+ control_writeulong(CONTROL_CONTINUE);
+
+ } while (current_nsec() < tout);
+
+ control_writeulong(CONTROL_DONE);
+
+ ret = pthread_cancel(thread_id);
+ if (ret) {
+ fprintf(stderr, "pthread_cancel: %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = pthread_join(thread_id, NULL);
+ if (ret) {
+ fprintf(stderr, "pthread_join: %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ if (signal(SIGUSR1, old_handler) == SIG_ERR) {
+ perror("signal");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void test_stream_transport_change_server(const struct test_opts *opts)
+{
+ int s = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port);
+
+ /* Set the socket to be nonblocking because connects that have been interrupted
+ * (EINTR) can fill the receiver's accept queue anyway, leading to connect failure.
+ * As of today (6.15) in such situation there is no way to understand, from the
+ * client side, if the connection has been queued in the server or not.
+ */
+ if (fcntl(s, F_SETFL, fcntl(s, F_GETFL, 0) | O_NONBLOCK) < 0) {
+ perror("fcntl");
+ exit(EXIT_FAILURE);
+ }
+ control_writeln("LISTENING");
+
+ while (control_readulong() == CONTROL_CONTINUE) {
+ /* Must accept the connection, otherwise the `listen`
+ * queue will fill up and new connections will fail.
+ * There can be more than one queued connection,
+ * clear them all.
+ */
+ while (true) {
+ int client = accept(s, NULL, NULL);
+
+ if (client < 0) {
+ if (errno == EAGAIN)
+ break;
+
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ close(client);
+ }
+ }
+
+ close(s);
+}
+
static void test_stream_linger_client(const struct test_opts *opts)
{
int fd;
@@ -2034,7 +2335,6 @@ static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM transport release use-after-free",
.run_client = test_stream_transport_uaf_client,
- .run_server = test_stream_transport_uaf_server,
},
{
.name = "SOCK_STREAM retry failed connect()",
@@ -2051,6 +2351,21 @@ static struct test_case test_cases[] = {
.run_client = test_stream_nolinger_client,
.run_server = test_stream_nolinger_server,
},
+ {
+ .name = "SOCK_STREAM transport change null-ptr-deref",
+ .run_client = test_stream_transport_change_client,
+ .run_server = test_stream_transport_change_server,
+ },
+ {
+ .name = "SOCK_STREAM ioctl(SIOCINQ) functionality",
+ .run_client = test_stream_unread_bytes_client,
+ .run_server = test_stream_unread_bytes_server,
+ },
+ {
+ .name = "SOCK_SEQPACKET ioctl(SIOCINQ) functionality",
+ .run_client = test_seqpacket_unread_bytes_client,
+ .run_server = test_seqpacket_unread_bytes_server,
+ },
{},
};