aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile16
-rw-r--r--tools/include/uapi/asm-generic/socket.h2
-rw-r--r--tools/include/uapi/linux/if_xdp.h6
-rw-r--r--tools/include/uapi/linux/netdev.h1
-rw-r--r--tools/lib/bpf/libbpf.h5
-rw-r--r--tools/lib/bpf/netlink.c20
-rw-r--r--tools/net/ynl/Makefile.deps17
-rw-r--r--tools/net/ynl/generated/Makefile7
-rw-r--r--tools/net/ynl/lib/ynl-priv.h19
-rw-r--r--tools/net/ynl/lib/ynl.c160
-rw-r--r--tools/net/ynl/lib/ynl.h18
-rwxr-xr-xtools/net/ynl/pyynl/cli.py15
-rw-r--r--tools/net/ynl/pyynl/lib/__init__.py5
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py39
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py843
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_rst.py2
-rw-r--r--tools/net/ynl/samples/.gitignore6
-rw-r--r--tools/net/ynl/samples/devlink.c7
-rw-r--r--tools/net/ynl/samples/rt-addr.c80
-rw-r--r--tools/net/ynl/samples/rt-link.c184
-rw-r--r--tools/net/ynl/samples/rt-route.c80
-rw-r--r--tools/net/ynl/samples/tc.c80
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c231
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c447
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_common.h27
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c41
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c126
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c756
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h1
-rw-r--r--tools/testing/selftests/bpf/progs/setget_sockopt.c11
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c24
-rw-r--r--tools/testing/selftests/bpf/progs/xsk_xdp_progs.c50
-rw-r--r--tools/testing/selftests/bpf/xsk_xdp_common.h1
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c118
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h2
-rw-r--r--tools/testing/selftests/drivers/net/.gitignore2
-rw-r--r--tools/testing/selftests/drivers/net/Makefile6
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile3
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py45
-rw-r--r--tools/testing/selftests/drivers/net/hw/iou-zcrx.c27
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py140
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py1
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py222
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c327
-rw-r--r--tools/testing/selftests/drivers/net/hw/nic_link_layer.py113
-rw-r--r--tools/testing/selftests/drivers/net/hw/nic_performance.py137
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/xsk_reconfig.py60
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py2
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py20
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_id.py23
-rw-r--r--tools/testing/selftests/drivers/net/napi_id_helper.c83
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py16
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py4
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile2
-rw-r--r--tools/testing/selftests/drivers/net/team/config1
-rwxr-xr-xtools/testing/selftests/drivers/net/team/propagation.sh80
-rw-r--r--tools/testing/selftests/nci/nci_dev.c2
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c80
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh49
-rwxr-xr-xtools/testing/selftests/net/busy_poll_test.sh2
-rw-r--r--tools/testing/selftests/net/can/.gitignore2
-rw-r--r--tools/testing/selftests/net/can/Makefile11
-rw-r--r--tools/testing/selftests/net/can/test_raw_filter.c405
-rwxr-xr-xtools/testing/selftests/net/can/test_raw_filter.sh45
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh3
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh123
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_igmp.sh80
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mld.sh81
-rw-r--r--tools/testing/selftests/net/forwarding/config1
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh2
-rwxr-xr-xtools/testing/selftests/net/ipv6_route_update_soft_lockup.sh1
-rw-r--r--tools/testing/selftests/net/lib.sh47
-rw-r--r--tools/testing/selftests/net/lib/.gitignore1
-rw-r--r--tools/testing/selftests/net/lib/Makefile1
-rw-r--r--tools/testing/selftests/net/lib/ksft.h56
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py24
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py4
-rw-r--r--tools/testing/selftests/net/lib/xdp_helper.c (renamed from tools/testing/selftests/drivers/net/xdp_helper.c)82
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh32
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c21
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c231
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c16
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh26
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh10
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c16
-rw-r--r--tools/testing/selftests/net/net_helper.sh25
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh3
-rwxr-xr-xtools/testing/selftests/net/netfilter/bridge_brouter.sh2
-rw-r--r--tools/testing/selftests/net/netfilter/config1
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh427
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_vrf.sh37
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh6
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh165
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh635
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh154
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat_zones.sh2
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh38
-rwxr-xr-xtools/testing/selftests/net/netfilter/rpath.sh18
-rw-r--r--tools/testing/selftests/net/ovpn/.gitignore2
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile32
-rw-r--r--tools/testing/selftests/net/ovpn/common.sh108
-rw-r--r--tools/testing/selftests/net/ovpn/config10
-rw-r--r--tools/testing/selftests/net/ovpn/data64.key5
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c2383
-rw-r--r--tools/testing/selftests/net/ovpn/tcp_peers.txt5
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-chachapoly.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket.sh45
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-float.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test.sh117
-rw-r--r--tools/testing/selftests/net/ovpn/udp_peers.txt6
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh1
-rw-r--r--tools/testing/selftests/net/reuseport_addr_any.c36
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh5
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh5
-rwxr-xr-xtools/testing/selftests/net/srv6_end_flavors_test.sh4
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh77
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh83
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh74
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh83
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh125
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json35
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh4
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh29
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile3
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config1
-rw-r--r--tools/testing/vsock/timeout.c18
-rw-r--r--tools/testing/vsock/timeout.h1
-rw-r--r--tools/testing/vsock/util.c38
-rw-r--r--tools/testing/vsock/util.h2
-rw-r--r--tools/testing/vsock/vsock_test.c129
145 files changed, 9804 insertions, 1453 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 5e1254eb66de..c31cbbd12c45 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -41,6 +41,7 @@ help:
@echo ' mm - misc mm tools'
@echo ' wmi - WMI interface examples'
@echo ' x86_energy_perf_policy - Intel energy policy tool'
+ @echo ' ynl - ynl headers, library, and python tool'
@echo ''
@echo 'You can do:'
@echo ' $$ make -C tools/ <tool>_install'
@@ -118,11 +119,14 @@ freefall: FORCE
kvm_stat: FORCE
$(call descend,kvm/$@)
+ynl: FORCE
+ $(call descend,net/ynl)
+
all: acpi counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
- debugging tracing thermal thermometer thermal-engine
+ debugging tracing thermal thermometer thermal-engine ynl
acpi_install:
$(call descend,power/$(@:_install=),install)
@@ -157,13 +161,16 @@ freefall_install:
kvm_stat_install:
$(call descend,kvm/$(@:_install=),install)
+ynl_install:
+ $(call descend,net/$(@:_install=),install)
+
install: acpi_install counter_install cpupower_install gpio_install \
hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install mm_install bpf_install x86_energy_perf_policy_install \
tmon_install freefall_install objtool_install kvm_stat_install \
wmi_install debugging_install intel-speed-select_install \
- tracing_install thermometer_install thermal-engine_install
+ tracing_install thermometer_install thermal-engine_install ynl_install
acpi_clean:
$(call descend,power/acpi,clean)
@@ -214,12 +221,15 @@ freefall_clean:
build_clean:
$(call descend,build,clean)
+ynl_clean:
+ $(call descend,net/$(@:_clean=),clean)
+
clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
gpio_clean objtool_clean leds_clean wmi_clean firmware_clean debugging_clean \
intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean \
- sched_ext_clean
+ sched_ext_clean ynl_clean
.PHONY: FORCE
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index aa5016ff3d91..f333a0ac4ee4 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -145,6 +145,8 @@
#define SO_RCVPRIORITY 82
+#define SO_PASSRIGHTS 83
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 42869770776e..44f2bb93e7e6 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -7,8 +7,8 @@
* Magnus Karlsson <magnus.karlsson@intel.com>
*/
-#ifndef _LINUX_IF_XDP_H
-#define _LINUX_IF_XDP_H
+#ifndef _UAPI_LINUX_IF_XDP_H
+#define _UAPI_LINUX_IF_XDP_H
#include <linux/types.h>
@@ -180,4 +180,4 @@ struct xdp_desc {
/* TX packet carries valid metadata. */
#define XDP_TX_METADATA (1 << 1)
-#endif /* _LINUX_IF_XDP_H */
+#endif /* _UAPI_LINUX_IF_XDP_H */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 7600bf62dbdf..7eb9571786b8 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -219,6 +219,7 @@ enum {
NETDEV_CMD_QSTATS_GET,
NETDEV_CMD_BIND_RX,
NETDEV_CMD_NAPI_SET,
+ NETDEV_CMD_BIND_TX,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index e0605403f977..fdcee6a71e0f 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1283,6 +1283,7 @@ enum bpf_tc_attach_point {
BPF_TC_INGRESS = 1 << 0,
BPF_TC_EGRESS = 1 << 1,
BPF_TC_CUSTOM = 1 << 2,
+ BPF_TC_QDISC = 1 << 3,
};
#define BPF_TC_PARENT(a, b) \
@@ -1297,9 +1298,11 @@ struct bpf_tc_hook {
int ifindex;
enum bpf_tc_attach_point attach_point;
__u32 parent;
+ __u32 handle;
+ const char *qdisc;
size_t :0;
};
-#define bpf_tc_hook__last_field parent
+#define bpf_tc_hook__last_field qdisc
struct bpf_tc_opts {
size_t sz;
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 68a2def17175..c997e69d507f 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -529,9 +529,9 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
}
-typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
+typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook);
-static int clsact_config(struct libbpf_nla_req *req)
+static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
{
req->tc.tcm_parent = TC_H_CLSACT;
req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
@@ -539,6 +539,16 @@ static int clsact_config(struct libbpf_nla_req *req)
return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
}
+static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
+{
+ const char *qdisc = OPTS_GET(hook, qdisc, NULL);
+
+ req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT);
+ req->tc.tcm_handle = OPTS_GET(hook, handle, 0);
+
+ return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1);
+}
+
static int attach_point_to_config(struct bpf_tc_hook *hook,
qdisc_config_t *config)
{
@@ -552,6 +562,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook,
return 0;
case BPF_TC_CUSTOM:
return -EOPNOTSUPP;
+ case BPF_TC_QDISC:
+ *config = &qdisc_config;
+ return 0;
default:
return -EINVAL;
}
@@ -596,7 +609,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
req.tc.tcm_family = AF_UNSPEC;
req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
- ret = config(&req);
+ ret = config(&req, hook);
if (ret < 0)
return ret;
@@ -639,6 +652,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
case BPF_TC_INGRESS:
case BPF_TC_EGRESS:
return libbpf_err(__bpf_tc_detach(hook, NULL, true));
+ case BPF_TC_QDISC:
case BPF_TC_INGRESS | BPF_TC_EGRESS:
return libbpf_err(tc_qdisc_delete(hook));
case BPF_TC_CUSTOM:
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index f3269ce39e5b..90686e241157 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -20,13 +20,30 @@ CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
+CFLAGS_lockd_netlink:=$(call get_hdr_inc,_LINUX_LOCKD_NETLINK_H,lockd_netlink.h)
CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h)
CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
CFLAGS_nl80211:=$(call get_hdr_inc,__LINUX_NL802121_H,nl80211.h)
CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h)
CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
+CFLAGS_ovpn:=$(call get_hdr_inc,_LINUX_OVPN_H,ovpn.h)
CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_rt-addr:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+ $(call get_hdr_inc,__LINUX_IF_ADDR_H,if_addr.h)
+CFLAGS_rt-link:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+ $(call get_hdr_inc,_LINUX_IF_LINK_H,if_link.h)
+CFLAGS_rt-neigh:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+ $(call get_hdr_inc,__LINUX_NEIGHBOUR_H,neighbour.h)
+CFLAGS_rt-route:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h)
+CFLAGS_rt-rule:=$(call get_hdr_inc,__LINUX_FIB_RULES_H,fib_rules.h)
+CFLAGS_tc:= $(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+ $(call get_hdr_inc,__LINUX_PKT_SCHED_H,pkt_sched.h) \
+ $(call get_hdr_inc,__LINUX_PKT_CLS_H,pkt_cls.h) \
+ $(call get_hdr_inc,_TC_CT_H,tc_act/tc_ct.h) \
+ $(call get_hdr_inc,_TC_MIRRED_H,tc_act/tc_mirred.h) \
+ $(call get_hdr_inc,_TC_SKBEDIT_H,tc_act/tc_skbedit.h) \
+ $(call get_hdr_inc,_TC_TUNNEL_KEY_H,tc_act/tc_tunnel_key.h)
CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h)
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
index 21f9e299dc75..86e1e4a959a7 100644
--- a/tools/net/ynl/generated/Makefile
+++ b/tools/net/ynl/generated/Makefile
@@ -22,10 +22,9 @@ TOOL:=../pyynl/ynl_gen_c.py
TOOL_RST:=../pyynl/ynl_gen_rst.py
SPECS_DIR:=../../../../Documentation/netlink/specs
-GENS_PATHS=$(shell grep -nrI --files-without-match \
- 'protocol: netlink' \
- $(SPECS_DIR))
-GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS})
+SPECS_PATHS=$(wildcard $(SPECS_DIR)/*.yaml)
+GENS_UNSUP=conntrack nftables
+GENS=$(filter-out ${GENS_UNSUP},$(patsubst $(SPECS_DIR)/%.yaml,%,${SPECS_PATHS}))
SRCS=$(patsubst %,%-user.c,${GENS})
HDRS=$(patsubst %,%-user.h,${GENS})
OBJS=$(patsubst %,%-user.o,${GENS})
diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
index 3c09a7bbfba5..824777d7e05e 100644
--- a/tools/net/ynl/lib/ynl-priv.h
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -25,6 +25,7 @@ enum ynl_policy_type {
YNL_PT_UINT,
YNL_PT_NUL_STR,
YNL_PT_BITFIELD32,
+ YNL_PT_SUBMSG,
};
enum ynl_parse_result {
@@ -42,7 +43,10 @@ typedef int (*ynl_parse_cb_t)(const struct nlmsghdr *nlh,
struct ynl_parse_arg *yarg);
struct ynl_policy_attr {
- enum ynl_policy_type type;
+ enum ynl_policy_type type:8;
+ __u8 is_submsg:1;
+ __u8 is_selector:1;
+ __u16 selector_type;
unsigned int len;
const char *name;
const struct ynl_policy_nest *nest;
@@ -94,12 +98,17 @@ struct ynl_ntf_base_type {
unsigned char data[] __attribute__((aligned(8)));
};
+struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id, __u16 flags);
+struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id);
+
struct nlmsghdr *
ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
struct nlmsghdr *
ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
+int ynl_submsg_failed(struct ynl_parse_arg *yarg, const char *field_name,
+ const char *sel_name);
/* YNL specific helpers used by the auto-generated code */
@@ -204,11 +213,15 @@ static inline void *ynl_attr_data_end(const struct nlattr *attr)
NLMSG_HDRLEN + fixed_hdr_sz); attr; \
(attr) = ynl_attr_next(ynl_nlmsg_end_addr(nlh), attr))
-#define ynl_attr_for_each_nested(attr, outer) \
+#define ynl_attr_for_each_nested_off(attr, outer, offset) \
for ((attr) = ynl_attr_first(outer, outer->nla_len, \
- sizeof(struct nlattr)); attr; \
+ sizeof(struct nlattr) + offset); \
+ attr; \
(attr) = ynl_attr_next(ynl_attr_data_end(outer), attr))
+#define ynl_attr_for_each_nested(attr, outer) \
+ ynl_attr_for_each_nested_off(attr, outer, 0)
+
#define ynl_attr_for_each_payload(start, len, attr) \
for ((attr) = ynl_attr_first(start, len, 0); attr; \
(attr) = ynl_attr_next(start + len, attr))
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index c4da34048ef8..2a169c3c0797 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -45,8 +45,39 @@
#define perr(_ys, _msg) __yerr(&(_ys)->err, errno, _msg)
/* -- Netlink boiler plate */
+static bool
+ynl_err_walk_is_sel(const struct ynl_policy_nest *policy,
+ const struct nlattr *attr)
+{
+ unsigned int type = ynl_attr_type(attr);
+
+ return policy && type <= policy->max_attr &&
+ policy->table[type].is_selector;
+}
+
+static const struct ynl_policy_nest *
+ynl_err_walk_sel_policy(const struct ynl_policy_attr *policy_attr,
+ const struct nlattr *selector)
+{
+ const struct ynl_policy_nest *policy = policy_attr->nest;
+ const char *sel;
+ unsigned int i;
+
+ if (!policy_attr->is_submsg)
+ return policy;
+
+ sel = ynl_attr_get_str(selector);
+ for (i = 0; i <= policy->max_attr; i++) {
+ if (!strcmp(sel, policy->table[i].name))
+ return policy->table[i].nest;
+ }
+
+ return NULL;
+}
+
static int
-ynl_err_walk_report_one(const struct ynl_policy_nest *policy, unsigned int type,
+ynl_err_walk_report_one(const struct ynl_policy_nest *policy,
+ const struct nlattr *selector, unsigned int type,
char *str, int str_sz, int *n)
{
if (!policy) {
@@ -67,9 +98,34 @@ ynl_err_walk_report_one(const struct ynl_policy_nest *policy, unsigned int type,
return 1;
}
- if (*n < str_sz)
- *n += snprintf(str, str_sz - *n,
- ".%s", policy->table[type].name);
+ if (*n < str_sz) {
+ int sz;
+
+ sz = snprintf(str, str_sz - *n,
+ ".%s", policy->table[type].name);
+ *n += sz;
+ str += sz;
+ }
+
+ if (policy->table[type].is_submsg) {
+ if (!selector) {
+ if (*n < str_sz)
+ *n += snprintf(str, str_sz, "(!selector)");
+ return 1;
+ }
+
+ if (ynl_attr_type(selector) !=
+ policy->table[type].selector_type) {
+ if (*n < str_sz)
+ *n += snprintf(str, str_sz, "(!=selector)");
+ return 1;
+ }
+
+ if (*n < str_sz)
+ *n += snprintf(str, str_sz - *n, "(%s)",
+ ynl_attr_get_str(selector));
+ }
+
return 0;
}
@@ -78,6 +134,8 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
const struct ynl_policy_nest *policy, char *str, int str_sz,
const struct ynl_policy_nest **nest_pol)
{
+ const struct ynl_policy_nest *next_pol;
+ const struct nlattr *selector = NULL;
unsigned int astart_off, aend_off;
const struct nlattr *attr;
unsigned int data_len;
@@ -96,6 +154,10 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
ynl_attr_for_each_payload(start, data_len, attr) {
astart_off = (char *)attr - (char *)start;
aend_off = (char *)ynl_attr_data_end(attr) - (char *)start;
+
+ if (ynl_err_walk_is_sel(policy, attr))
+ selector = attr;
+
if (aend_off <= off)
continue;
@@ -109,16 +171,20 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
type = ynl_attr_type(attr);
- if (ynl_err_walk_report_one(policy, type, str, str_sz, &n))
+ if (ynl_err_walk_report_one(policy, selector, type, str, str_sz, &n))
+ return n;
+
+ next_pol = ynl_err_walk_sel_policy(&policy->table[type], selector);
+ if (!next_pol)
return n;
if (!off) {
if (nest_pol)
- *nest_pol = policy->table[type].nest;
+ *nest_pol = next_pol;
return n;
}
- if (!policy->table[type].nest) {
+ if (!next_pol) {
if (n < str_sz)
n += snprintf(str, str_sz, "!nest");
return n;
@@ -128,7 +194,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
start = ynl_attr_data(attr);
end = start + ynl_attr_data_len(attr);
- return n + ynl_err_walk(ys, start, end, off, policy->table[type].nest,
+ return n + ynl_err_walk(ys, start, end, off, next_pol,
&str[n], str_sz - n, nest_pol);
}
@@ -191,12 +257,12 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ",
str ? " (" : "");
- start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+ start = ynl_nlmsg_data_offset(ys->nlh, ys->req_hdr_len);
end = ynl_nlmsg_end_addr(ys->nlh);
off = ys->err.attr_offs;
off -= sizeof(struct nlmsghdr);
- off -= ys->family->hdr_len;
+ off -= ys->req_hdr_len;
n += ynl_err_walk(ys, start, end, off, ys->req_policy,
&bad_attr[n], sizeof(bad_attr) - n, NULL);
@@ -216,14 +282,14 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ",
bad_attr[0] ? ", " : (str ? " (" : ""));
- start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+ start = ynl_nlmsg_data_offset(ys->nlh, ys->req_hdr_len);
end = ynl_nlmsg_end_addr(ys->nlh);
nest_pol = ys->req_policy;
if (tb[NLMSGERR_ATTR_MISS_NEST]) {
off = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
off -= sizeof(struct nlmsghdr);
- off -= ys->family->hdr_len;
+ off -= ys->req_hdr_len;
n += ynl_err_walk(ys, start, end, off, ys->req_policy,
&miss_attr[n], sizeof(miss_attr) - n,
@@ -231,7 +297,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
}
n2 = 0;
- ynl_err_walk_report_one(nest_pol, type, &miss_attr[n],
+ ynl_err_walk_report_one(nest_pol, NULL, type, &miss_attr[n],
sizeof(miss_attr) - n, &n2);
n += n2;
@@ -384,6 +450,15 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
return 0;
}
+int ynl_submsg_failed(struct ynl_parse_arg *yarg, const char *field_name,
+ const char *sel_name)
+{
+ yerr(yarg->ys, YNL_ERROR_SUBMSG_KEY,
+ "Parsing error: Sub-message key not set (msg %s, key %s)",
+ field_name, sel_name);
+ return YNL_PARSE_CB_ERROR;
+}
+
/* Generic code */
static void ynl_err_reset(struct ynl_sock *ys)
@@ -451,14 +526,14 @@ ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
return nlh;
}
-void ynl_msg_start_req(struct ynl_sock *ys, __u32 id)
+struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id, __u16 flags)
{
- ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK);
+ return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | flags);
}
-void ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
+struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
{
- ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
+ return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
}
struct nlmsghdr *
@@ -663,6 +738,7 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
struct sockaddr_nl addr;
struct ynl_sock *ys;
socklen_t addrlen;
+ int sock_type;
int one = 1;
ys = malloc(sizeof(*ys) + 2 * YNL_SOCKET_BUFFER_SIZE);
@@ -675,7 +751,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
ys->rx_buf = &ys->raw_buf[YNL_SOCKET_BUFFER_SIZE];
ys->ntf_last_next = &ys->ntf_first;
- ys->socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ sock_type = yf->is_classic ? yf->classic_id : NETLINK_GENERIC;
+
+ ys->socket = socket(AF_NETLINK, SOCK_RAW, sock_type);
if (ys->socket < 0) {
__perr(yse, "failed to create a netlink socket");
goto err_free_sock;
@@ -708,8 +786,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
ys->portid = addr.nl_pid;
ys->seq = random();
-
- if (ynl_sock_read_family(ys, yf->name)) {
+ if (yf->is_classic) {
+ ys->family_id = yf->classic_id;
+ } else if (ynl_sock_read_family(ys, yf->name)) {
if (yse)
memcpy(yse, &ys->err, sizeof(*yse));
goto err_close_sock;
@@ -791,13 +870,21 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
struct ynl_parse_arg yarg = { .ys = ys, };
const struct ynl_ntf_info *info;
struct ynl_ntf_base_type *rsp;
- struct genlmsghdr *gehdr;
+ __u32 cmd;
int ret;
- gehdr = ynl_nlmsg_data(nlh);
- if (gehdr->cmd >= ys->family->ntf_info_size)
+ if (ys->family->is_classic) {
+ cmd = nlh->nlmsg_type;
+ } else {
+ struct genlmsghdr *gehdr;
+
+ gehdr = ynl_nlmsg_data(nlh);
+ cmd = gehdr->cmd;
+ }
+
+ if (cmd >= ys->family->ntf_info_size)
return YNL_PARSE_CB_ERROR;
- info = &ys->family->ntf_info[gehdr->cmd];
+ info = &ys->family->ntf_info[cmd];
if (!info->cb)
return YNL_PARSE_CB_ERROR;
@@ -811,7 +898,7 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
goto err_free;
rsp->family = nlh->nlmsg_type;
- rsp->cmd = gehdr->cmd;
+ rsp->cmd = cmd;
*ys->ntf_last_next = rsp;
ys->ntf_last_next = &rsp->next;
@@ -863,18 +950,23 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg)
static int
ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd)
{
- struct genlmsghdr *gehdr;
+ if (ys->family->is_classic) {
+ if (nlh->nlmsg_type != rsp_cmd)
+ return ynl_ntf_parse(ys, nlh);
+ } else {
+ struct genlmsghdr *gehdr;
+
+ if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
+ yerr(ys, YNL_ERROR_INV_RESP,
+ "Kernel responded with truncated message");
+ return -1;
+ }
- if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
- yerr(ys, YNL_ERROR_INV_RESP,
- "Kernel responded with truncated message");
- return -1;
+ gehdr = ynl_nlmsg_data(nlh);
+ if (gehdr->cmd != rsp_cmd)
+ return ynl_ntf_parse(ys, nlh);
}
- gehdr = ynl_nlmsg_data(nlh);
- if (gehdr->cmd != rsp_cmd)
- return ynl_ntf_parse(ys, nlh);
-
return 0;
}
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
index 6cd570b283ea..db7c0591a63f 100644
--- a/tools/net/ynl/lib/ynl.h
+++ b/tools/net/ynl/lib/ynl.h
@@ -2,6 +2,7 @@
#ifndef __YNL_C_H
#define __YNL_C_H 1
+#include <stdbool.h>
#include <stddef.h>
#include <linux/genetlink.h>
#include <linux/types.h>
@@ -22,6 +23,7 @@ enum ynl_error_code {
YNL_ERROR_INV_RESP,
YNL_ERROR_INPUT_INVALID,
YNL_ERROR_INPUT_TOO_BIG,
+ YNL_ERROR_SUBMSG_KEY,
};
/**
@@ -48,6 +50,8 @@ struct ynl_family {
/* private: */
const char *name;
size_t hdr_len;
+ bool is_classic;
+ __u16 classic_id;
const struct ynl_ntf_info *ntf_info;
unsigned int ntf_info_size;
};
@@ -77,11 +81,25 @@ struct ynl_sock {
struct nlmsghdr *nlh;
const struct ynl_policy_nest *req_policy;
+ size_t req_hdr_len;
unsigned char *tx_buf;
unsigned char *rx_buf;
unsigned char raw_buf[];
};
+/**
+ * struct ynl_string - parsed individual string
+ * @len: length of the string (excluding terminating character)
+ * @str: value of the string
+ *
+ * Parsed and nul-terminated string. This struct is only used for arrays of
+ * strings. Non-array string members are placed directly in respective types.
+ */
+struct ynl_string {
+ unsigned int len;
+ char str[];
+};
+
struct ynl_sock *
ynl_sock_create(const struct ynl_family *yf, struct ynl_error *e);
void ynl_sock_destroy(struct ynl_sock *ys);
diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py
index 794e3c7dcc65..33ccc5c1843b 100755
--- a/tools/net/ynl/pyynl/cli.py
+++ b/tools/net/ynl/pyynl/cli.py
@@ -144,16 +144,17 @@ def main():
ops = [ (item[0], json.loads(item[1]), args.flags or []) for item in args.multi ]
reply = ynl.do_multi(ops)
output(reply)
- except NlError as e:
- print(e)
- exit(1)
- if args.ntf:
- try:
+ if args.ntf:
for msg in ynl.poll_ntf(duration=args.duration):
output(msg)
- except KeyboardInterrupt:
- pass
+ except NlError as e:
+ print(e)
+ exit(1)
+ except KeyboardInterrupt:
+ pass
+ except BrokenPipeError:
+ pass
if __name__ == "__main__":
diff --git a/tools/net/ynl/pyynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py
index 9137b83e580a..71518b9842ee 100644
--- a/tools/net/ynl/pyynl/lib/__init__.py
+++ b/tools/net/ynl/pyynl/lib/__init__.py
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
- SpecFamily, SpecOperation
+ SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat
from .ynl import YnlFamily, Netlink, NlError
__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
- "SpecFamily", "SpecOperation", "YnlFamily", "Netlink", "NlError"]
+ "SpecFamily", "SpecOperation", "SpecSubMessage", "SpecSubMessageFormat",
+ "YnlFamily", "Netlink", "NlError"]
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index dcc2c6b298d6..55b59f6c79b8 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -594,7 +594,7 @@ class YnlFamily(SpecFamily):
scalar_selector = self._get_scalar(attr, value["selector"])
attr_payload = struct.pack("II", scalar_value, scalar_selector)
elif attr['type'] == 'sub-message':
- msg_format = self._resolve_selector(attr, search_attrs)
+ msg_format, _ = self._resolve_selector(attr, search_attrs)
attr_payload = b''
if msg_format.fixed_header:
attr_payload += self._encode_struct(msg_format.fixed_header, value)
@@ -712,10 +712,10 @@ class YnlFamily(SpecFamily):
raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'")
spec = sub_msg_spec.formats[value]
- return spec
+ return spec, value
def _decode_sub_msg(self, attr, attr_spec, search_attrs):
- msg_format = self._resolve_selector(attr_spec, search_attrs)
+ msg_format, _ = self._resolve_selector(attr_spec, search_attrs)
decoded = {}
offset = 0
if msg_format.fixed_header:
@@ -787,7 +787,7 @@ class YnlFamily(SpecFamily):
return rsp
- def _decode_extack_path(self, attrs, attr_set, offset, target):
+ def _decode_extack_path(self, attrs, attr_set, offset, target, search_attrs):
for attr in attrs:
try:
attr_spec = attr_set.attrs_by_val[attr.type]
@@ -801,26 +801,37 @@ class YnlFamily(SpecFamily):
if offset + attr.full_len <= target:
offset += attr.full_len
continue
- if attr_spec['type'] != 'nest':
+
+ pathname = attr_spec.name
+ if attr_spec['type'] == 'nest':
+ sub_attrs = self.attr_sets[attr_spec['nested-attributes']]
+ search_attrs = SpaceAttrs(sub_attrs, search_attrs.lookup(attr_spec['name']))
+ elif attr_spec['type'] == 'sub-message':
+ msg_format, value = self._resolve_selector(attr_spec, search_attrs)
+ if msg_format is None:
+ raise Exception(f"Can't resolve sub-message of {attr_spec['name']} for extack")
+ sub_attrs = self.attr_sets[msg_format.attr_set]
+ pathname += f"({value})"
+ else:
raise Exception(f"Can't dive into {attr.type} ({attr_spec['name']}) for extack")
offset += 4
- subpath = self._decode_extack_path(NlAttrs(attr.raw),
- self.attr_sets[attr_spec['nested-attributes']],
- offset, target)
+ subpath = self._decode_extack_path(NlAttrs(attr.raw), sub_attrs,
+ offset, target, search_attrs)
if subpath is None:
return None
- return '.' + attr_spec.name + subpath
+ return '.' + pathname + subpath
return None
- def _decode_extack(self, request, op, extack):
+ def _decode_extack(self, request, op, extack, vals):
if 'bad-attr-offs' not in extack:
return
msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set), op)
offset = self.nlproto.msghdr_size() + self._struct_size(op.fixed_header)
+ search_attrs = SpaceAttrs(op.attr_set, vals)
path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset,
- extack['bad-attr-offs'])
+ extack['bad-attr-offs'], search_attrs)
if path:
del extack['bad-attr-offs']
extack['bad-attr'] = path
@@ -1012,7 +1023,7 @@ class YnlFamily(SpecFamily):
for (method, vals, flags) in ops:
op = self.ops[method]
msg = self._encode_message(op, vals, flags, req_seq)
- reqs_by_seq[req_seq] = (op, msg, flags)
+ reqs_by_seq[req_seq] = (op, vals, msg, flags)
payload += msg
req_seq += 1
@@ -1027,9 +1038,9 @@ class YnlFamily(SpecFamily):
self._recv_dbg_print(reply, nms)
for nl_msg in nms:
if nl_msg.nl_seq in reqs_by_seq:
- (op, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq]
+ (op, vals, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq]
if nl_msg.extack:
- self._decode_extack(req_msg, op, nl_msg.extack)
+ self._decode_extack(req_msg, op, nl_msg.extack, vals)
else:
op = None
req_flags = []
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index a7f08edbc235..76032e01c2e7 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -14,6 +14,7 @@ import yaml
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry
+from lib import SpecSubMessage, SpecSubMessageFormat
def c_upper(name):
@@ -56,11 +57,20 @@ class Type(SpecAttr):
self.request = False
self.reply = False
+ self.is_selector = False
+
if 'len' in attr:
self.len = attr['len']
if 'nested-attributes' in attr:
- self.nested_attrs = attr['nested-attributes']
+ nested = attr['nested-attributes']
+ elif 'sub-message' in attr:
+ nested = attr['sub-message']
+ else:
+ nested = None
+
+ if nested:
+ self.nested_attrs = nested
if self.nested_attrs == family.name:
self.nested_render_name = c_lower(f"{family.ident_name}")
else:
@@ -119,7 +129,9 @@ class Type(SpecAttr):
return c_upper(value)
def resolve(self):
- if 'name-prefix' in self.attr:
+ if 'parent-sub-message' in self.attr:
+ enum_name = self.attr['parent-sub-message'].enum_name
+ elif 'name-prefix' in self.attr:
enum_name = f"{self.attr['name-prefix']}{self.name}"
else:
enum_name = f"{self.attr_set.name_prefix}{self.name}"
@@ -142,19 +154,19 @@ class Type(SpecAttr):
return self.is_recursive() and not ri.op
def presence_type(self):
- return 'bit'
+ return 'present'
def presence_member(self, space, type_filter):
if self.presence_type() != type_filter:
return
- if self.presence_type() == 'bit':
+ if self.presence_type() == 'present':
pfx = '__' if space == 'user' else ''
return f"{pfx}u32 {self.c_name}:1;"
- if self.presence_type() == 'len':
+ if self.presence_type() in {'len', 'count'}:
pfx = '__' if space == 'user' else ''
- return f"{pfx}u32 {self.c_name}_len;"
+ return f"{pfx}u32 {self.c_name};"
def _complex_member_type(self, ri):
return None
@@ -163,7 +175,7 @@ class Type(SpecAttr):
return False
def _free_lines(self, ri, var, ref):
- if self.is_multi_val() or self.presence_type() == 'len':
+ if self.is_multi_val() or self.presence_type() in {'count', 'len'}:
return [f'free({var}->{ref}{self.c_name});']
return []
@@ -175,21 +187,21 @@ class Type(SpecAttr):
def arg_member(self, ri):
member = self._complex_member_type(ri)
if member:
- arg = [member + ' *' + self.c_name]
+ spc = ' ' if member[-1] != '*' else ''
+ arg = [member + spc + '*' + self.c_name]
if self.presence_type() == 'count':
arg += ['unsigned int n_' + self.c_name]
return arg
raise Exception(f"Struct member not implemented for class type {self.type}")
def struct_member(self, ri):
- if self.is_multi_val():
- ri.cw.p(f"unsigned int n_{self.c_name};")
member = self._complex_member_type(ri)
if member:
ptr = '*' if self.is_multi_val() else ''
if self.is_recursive_for_op(ri):
ptr = '*'
- ri.cw.p(f"{member} {ptr}{self.c_name};")
+ spc = ' ' if member[-1] != '*' else ''
+ ri.cw.p(f"{member}{spc}{ptr}{self.c_name};")
return
members = self.arg_member(ri)
for one in members:
@@ -215,10 +227,9 @@ class Type(SpecAttr):
cw.p(f'[{self.enum_name}] = {"{"} .name = "{self.name}", {typol}{"}"},')
def _attr_put_line(self, ri, var, line):
- if self.presence_type() == 'bit':
- ri.cw.p(f"if ({var}->_present.{self.c_name})")
- elif self.presence_type() == 'len':
- ri.cw.p(f"if ({var}->_present.{self.c_name}_len)")
+ presence = self.presence_type()
+ if presence in {'present', 'len'}:
+ ri.cw.p(f"if ({var}->_{presence}.{self.c_name})")
ri.cw.p(f"{line};")
def _attr_put_simple(self, ri, var, put_type):
@@ -248,7 +259,7 @@ class Type(SpecAttr):
if not self.is_multi_val():
ri.cw.p("if (ynl_attr_validate(yarg, attr))")
ri.cw.p("return YNL_PARSE_CB_ERROR;")
- if self.presence_type() == 'bit':
+ if self.presence_type() == 'present':
ri.cw.p(f"{var}->_present.{self.c_name} = 1;")
if init_lines:
@@ -279,7 +290,8 @@ class Type(SpecAttr):
presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
# Every layer below last is a nest, so we know it uses bit presence
# last layer is "self" and may be a complex type
- if i == len(ref) - 1 and self.presence_type() != 'bit':
+ if i == len(ref) - 1 and self.presence_type() != 'present':
+ presence = f"{var}->{'.'.join(ref[:i] + [''])}_{self.presence_type()}.{ref[i]}"
continue
code.append(presence + ' = 1;')
ref_path = '.'.join(ref[:-1])
@@ -355,26 +367,10 @@ class TypeScalar(Type):
if 'byte-order' in attr:
self.byte_order_comment = f" /* {attr['byte-order']} */"
- if 'enum' in self.attr:
- enum = self.family.consts[self.attr['enum']]
- low, high = enum.value_range()
- if 'min' not in self.checks:
- if low != 0 or self.type[0] == 's':
- self.checks['min'] = low
- if 'max' not in self.checks:
- self.checks['max'] = high
-
- if 'min' in self.checks and 'max' in self.checks:
- if self.get_limit('min') > self.get_limit('max'):
- raise Exception(f'Invalid limit for "{self.name}" min: {self.get_limit("min")} max: {self.get_limit("max")}')
- self.checks['range'] = True
-
- low = min(self.get_limit('min', 0), self.get_limit('max', 0))
- high = max(self.get_limit('min', 0), self.get_limit('max', 0))
- if low < 0 and self.type[0] == 'u':
- raise Exception(f'Invalid limit for "{self.name}" negative limit for unsigned type')
- if low < -32768 or high > 32767:
- self.checks['full-range'] = True
+ # Classic families have some funny enums, don't bother
+ # computing checks, since we only need them for kernel policies
+ if not family.is_classic():
+ self._init_checks()
# Added by resolve():
self.is_bitfield = None
@@ -399,6 +395,31 @@ class TypeScalar(Type):
else:
self.type_name = '__' + self.type
+ def _init_checks(self):
+ if 'enum' in self.attr:
+ enum = self.family.consts[self.attr['enum']]
+ low, high = enum.value_range()
+ if low == None and high == None:
+ self.checks['sparse'] = True
+ else:
+ if 'min' not in self.checks:
+ if low != 0 or self.type[0] == 's':
+ self.checks['min'] = low
+ if 'max' not in self.checks:
+ self.checks['max'] = high
+
+ if 'min' in self.checks and 'max' in self.checks:
+ if self.get_limit('min') > self.get_limit('max'):
+ raise Exception(f'Invalid limit for "{self.name}" min: {self.get_limit("min")} max: {self.get_limit("max")}')
+ self.checks['range'] = True
+
+ low = min(self.get_limit('min', 0), self.get_limit('max', 0))
+ high = max(self.get_limit('min', 0), self.get_limit('max', 0))
+ if low < 0 and self.type[0] == 'u':
+ raise Exception(f'Invalid limit for "{self.name}" negative limit for unsigned type')
+ if low < -32768 or high > 32767:
+ self.checks['full-range'] = True
+
def _attr_policy(self, policy):
if 'flags-mask' in self.checks or self.is_bitfield:
if self.is_bitfield:
@@ -417,6 +438,8 @@ class TypeScalar(Type):
return f"NLA_POLICY_MIN({policy}, {self.get_limit_str('min')})"
elif 'max' in self.checks:
return f"NLA_POLICY_MAX({policy}, {self.get_limit_str('max')})"
+ elif 'sparse' in self.checks:
+ return f"NLA_POLICY_VALIDATE_FN({policy}, &{c_lower(self.enum_name)}_validate)"
return super()._attr_policy(policy)
def _attr_typol(self):
@@ -463,7 +486,10 @@ class TypeString(Type):
ri.cw.p(f"char *{self.c_name};")
def _attr_typol(self):
- return f'.type = YNL_PT_NUL_STR, '
+ typol = f'.type = YNL_PT_NUL_STR, '
+ if self.is_selector:
+ typol += '.is_selector = 1, '
+ return typol
def _attr_policy(self, policy):
if 'exact-len' in self.checks:
@@ -488,7 +514,7 @@ class TypeString(Type):
self._attr_put_simple(ri, var, 'str')
def _attr_get(self, ri, var):
- len_mem = var + '->_present.' + self.c_name + '_len'
+ len_mem = var + '->_len.' + self.c_name
return [f"{len_mem} = len;",
f"{var}->{self.c_name} = malloc(len + 1);",
f"memcpy({var}->{self.c_name}, ynl_attr_get_str(attr), len);",
@@ -497,10 +523,10 @@ class TypeString(Type):
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
- return [f"{presence}_len = strlen({self.c_name});",
- f"{member} = malloc({presence}_len + 1);",
- f'memcpy({member}, {self.c_name}, {presence}_len);',
- f'{member}[{presence}_len] = 0;']
+ return [f"{presence} = strlen({self.c_name});",
+ f"{member} = malloc({presence} + 1);",
+ f'memcpy({member}, {self.c_name}, {presence});',
+ f'{member}[{presence}] = 0;']
class TypeBinary(Type):
@@ -539,10 +565,10 @@ class TypeBinary(Type):
def attr_put(self, ri, var):
self._attr_put_line(ri, var, f"ynl_attr_put(nlh, {self.enum_name}, " +
- f"{var}->{self.c_name}, {var}->_present.{self.c_name}_len)")
+ f"{var}->{self.c_name}, {var}->_len.{self.c_name})")
def _attr_get(self, ri, var):
- len_mem = var + '->_present.' + self.c_name + '_len'
+ len_mem = var + '->_len.' + self.c_name
return [f"{len_mem} = len;",
f"{var}->{self.c_name} = malloc(len);",
f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
@@ -550,9 +576,60 @@ class TypeBinary(Type):
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
- return [f"{presence}_len = len;",
- f"{member} = malloc({presence}_len);",
- f'memcpy({member}, {self.c_name}, {presence}_len);']
+ return [f"{presence} = len;",
+ f"{member} = malloc({presence});",
+ f'memcpy({member}, {self.c_name}, {presence});']
+
+
+class TypeBinaryStruct(TypeBinary):
+ def struct_member(self, ri):
+ ri.cw.p(f'struct {c_lower(self.get("struct"))} *{self.c_name};')
+
+ def _attr_get(self, ri, var):
+ struct_sz = 'sizeof(struct ' + c_lower(self.get("struct")) + ')'
+ len_mem = var + '->_' + self.presence_type() + '.' + self.c_name
+ return [f"{len_mem} = len;",
+ f"if (len < {struct_sz})",
+ f"{var}->{self.c_name} = calloc(1, {struct_sz});",
+ "else",
+ f"{var}->{self.c_name} = malloc(len);",
+ f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
+ ['len = ynl_attr_data_len(attr);'], \
+ ['unsigned int len;']
+
+
+class TypeBinaryScalarArray(TypeBinary):
+ def arg_member(self, ri):
+ return [f'__{self.get("sub-type")} *{self.c_name}', 'size_t count']
+
+ def presence_type(self):
+ return 'count'
+
+ def struct_member(self, ri):
+ ri.cw.p(f'__{self.get("sub-type")} *{self.c_name};')
+
+ def attr_put(self, ri, var):
+ presence = self.presence_type()
+ ri.cw.block_start(line=f"if ({var}->_{presence}.{self.c_name})")
+ ri.cw.p(f"i = {var}->_{presence}.{self.c_name} * sizeof(__{self.get('sub-type')});")
+ ri.cw.p(f"ynl_attr_put(nlh, {self.enum_name}, " +
+ f"{var}->{self.c_name}, i);")
+ ri.cw.block_end()
+
+ def _attr_get(self, ri, var):
+ len_mem = var + '->_count.' + self.c_name
+ return [f"{len_mem} = len / sizeof(__{self.get('sub-type')});",
+ f"len = {len_mem} * sizeof(__{self.get('sub-type')});",
+ f"{var}->{self.c_name} = malloc(len);",
+ f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
+ ['len = ynl_attr_data_len(attr);'], \
+ ['unsigned int len;']
+
+ def _setter_lines(self, ri, member, presence):
+ return [f"{presence} = count;",
+ f"count *= sizeof(__{self.get('sub-type')});",
+ f"{member} = malloc(count);",
+ f'memcpy({member}, {self.c_name}, count);']
class TypeBitfield32(Type):
@@ -608,7 +685,11 @@ class TypeNest(Type):
f"{self.enum_name}, {at}{var}->{self.c_name})")
def _attr_get(self, ri, var):
- get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))",
+ pns = self.family.pure_nested_structs[self.nested_attrs]
+ args = ["&parg", "attr"]
+ for sel in pns.external_selectors():
+ args.append(f'{var}->{sel.name}')
+ get_lines = [f"if ({self.nested_render_name}_parse({', '.join(args)}))",
"return YNL_PARSE_CB_ERROR;"]
init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
f"parg.data = &{var}->{self.c_name};"]
@@ -638,22 +719,40 @@ class TypeMultiAttr(Type):
def _complex_member_type(self, ri):
if 'type' not in self.attr or self.attr['type'] == 'nest':
return self.nested_struct_type
+ elif self.attr['type'] == 'binary' and 'struct' in self.attr:
+ return None # use arg_member()
+ elif self.attr['type'] == 'string':
+ return 'struct ynl_string *'
elif self.attr['type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
return scalar_pfx + self.attr['type']
else:
raise Exception(f"Sub-type {self.attr['type']} not supported yet")
+ def arg_member(self, ri):
+ if self.type == 'binary' and 'struct' in self.attr:
+ return [f'struct {c_lower(self.attr["struct"])} *{self.c_name}',
+ f'unsigned int n_{self.c_name}']
+ return super().arg_member(ri)
+
def free_needs_iter(self):
- return 'type' not in self.attr or self.attr['type'] == 'nest'
+ return self.attr['type'] in {'nest', 'string'}
def _free_lines(self, ri, var, ref):
lines = []
if self.attr['type'] in scalars:
lines += [f"free({var}->{ref}{self.c_name});"]
+ elif self.attr['type'] == 'binary':
+ lines += [f"free({var}->{ref}{self.c_name});"]
+ elif self.attr['type'] == 'string':
+ lines += [
+ f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)",
+ f"free({var}->{ref}{self.c_name}[i]);",
+ f"free({var}->{ref}{self.c_name});",
+ ]
elif 'type' not in self.attr or self.attr['type'] == 'nest':
lines += [
- f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)",
+ f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)",
f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);',
f"free({var}->{ref}{self.c_name});",
]
@@ -673,18 +772,22 @@ class TypeMultiAttr(Type):
def attr_put(self, ri, var):
if self.attr['type'] in scalars:
put_type = self.type
- ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
+ elif self.attr['type'] == 'binary' and 'struct' in self.attr:
+ ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
+ ri.cw.p(f"ynl_attr_put(nlh, {self.enum_name}, &{var}->{self.c_name}[i], sizeof(struct {c_lower(self.attr['struct'])}));")
+ elif self.attr['type'] == 'string':
+ ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
+ ri.cw.p(f"ynl_attr_put_str(nlh, {self.enum_name}, {var}->{self.c_name}[i]->str);")
elif 'type' not in self.attr or self.attr['type'] == 'nest':
- ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
f"{self.enum_name}, &{var}->{self.c_name}[i])")
else:
raise Exception(f"Put of MultiAttr sub-type {self.attr['type']} not supported yet")
def _setter_lines(self, ri, member, presence):
- # For multi-attr we have a count, not presence, hack up the presence
- presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name
return [f"{member} = {self.c_name};",
f"{presence} = n_{self.c_name};"]
@@ -702,12 +805,22 @@ class TypeArrayNest(Type):
elif self.attr['sub-type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
return scalar_pfx + self.attr['sub-type']
+ elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
+ return None # use arg_member()
else:
raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet")
+ def arg_member(self, ri):
+ if self.sub_type == 'binary' and 'exact-len' in self.checks:
+ return [f'unsigned char (*{self.c_name})[{self.checks["exact-len"]}]',
+ f'unsigned int n_{self.c_name}']
+ return super().arg_member(ri)
+
def _attr_typol(self):
if self.attr['sub-type'] in scalars:
return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, '
+ elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
+ return f'.type = YNL_PT_BINARY, .len = {self.checks["exact-len"]}, '
else:
return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
@@ -717,10 +830,31 @@ class TypeArrayNest(Type):
'ynl_attr_for_each_nested(attr2, attr) {',
'\tif (ynl_attr_validate(yarg, attr2))',
'\t\treturn YNL_PARSE_CB_ERROR;',
- f'\t{var}->n_{self.c_name}++;',
+ f'\t{var}->_count.{self.c_name}++;',
'}']
return get_lines, None, local_vars
+ def attr_put(self, ri, var):
+ ri.cw.p(f'array = ynl_attr_nest_start(nlh, {self.enum_name});')
+ if self.sub_type in scalars:
+ put_type = self.sub_type
+ ri.cw.block_start(line=f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
+ ri.cw.p(f"ynl_attr_put_{put_type}(nlh, i, {var}->{self.c_name}[i]);")
+ ri.cw.block_end()
+ elif self.sub_type == 'binary' and 'exact-len' in self.checks:
+ ri.cw.p(f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
+ ri.cw.p(f"ynl_attr_put(nlh, i, {var}->{self.c_name}[i], {self.checks['exact-len']});")
+ elif self.sub_type == 'nest':
+ ri.cw.p(f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
+ ri.cw.p(f"{self.nested_render_name}_put(nlh, i, &{var}->{self.c_name}[i]);")
+ else:
+ raise Exception(f"Put for ArrayNest sub-type {self.attr['sub-type']} not supported, yet")
+ ri.cw.p('ynl_attr_nest_end(nlh, array);')
+
+ def _setter_lines(self, ri, member, presence):
+ return [f"{member} = {self.c_name};",
+ f"{presence} = n_{self.c_name};"]
+
class TypeNestTypeValue(Type):
def _complex_member_type(self, ri):
@@ -752,14 +886,71 @@ class TypeNestTypeValue(Type):
return get_lines, init_lines, local_vars
+class TypeSubMessage(TypeNest):
+ def __init__(self, family, attr_set, attr, value):
+ super().__init__(family, attr_set, attr, value)
+
+ self.selector = Selector(attr, attr_set)
+
+ def _attr_typol(self):
+ typol = f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
+ typol += '.is_submsg = 1, '
+ # Reverse-parsing of the policy (ynl_err_walk() in ynl.c) does not
+ # support external selectors. No family uses sub-messages with external
+ # selector for requests so this is fine for now.
+ if not self.selector.is_external():
+ typol += f'.selector_type = {self.attr_set[self["selector"]].value} '
+ return typol
+
+ def _attr_get(self, ri, var):
+ sel = c_lower(self['selector'])
+ if self.selector.is_external():
+ sel_var = f"_sel_{sel}"
+ else:
+ sel_var = f"{var}->{sel}"
+ get_lines = [f'if (!{sel_var})',
+ f'return ynl_submsg_failed(yarg, "%s", "%s");' %
+ (self.name, self['selector']),
+ f"if ({self.nested_render_name}_parse(&parg, {sel_var}, attr))",
+ "return YNL_PARSE_CB_ERROR;"]
+ init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
+ f"parg.data = &{var}->{self.c_name};"]
+ return get_lines, init_lines, None
+
+
+class Selector:
+ def __init__(self, msg_attr, attr_set):
+ self.name = msg_attr["selector"]
+
+ if self.name in attr_set:
+ self.attr = attr_set[self.name]
+ self.attr.is_selector = True
+ self._external = False
+ else:
+ # The selector will need to get passed down thru the structs
+ self.attr = None
+ self._external = True
+
+ def set_attr(self, attr):
+ self.attr = attr
+
+ def is_external(self):
+ return self._external
+
+
class Struct:
- def __init__(self, family, space_name, type_list=None, inherited=None):
+ def __init__(self, family, space_name, type_list=None, fixed_header=None,
+ inherited=None, submsg=None):
self.family = family
self.space_name = space_name
self.attr_set = family.attr_sets[space_name]
# Use list to catch comparisons with empty sets
self._inherited = inherited if inherited is not None else []
self.inherited = []
+ self.fixed_header = None
+ if fixed_header:
+ self.fixed_header = 'struct ' + c_lower(fixed_header)
+ self.submsg = submsg
self.nested = type_list is None
if family.name == c_lower(space_name):
@@ -809,6 +1000,19 @@ class Struct:
raise Exception("Inheriting different members not supported")
self.inherited = [c_lower(x) for x in sorted(self._inherited)]
+ def external_selectors(self):
+ sels = []
+ for name, attr in self.attr_list:
+ if isinstance(attr, TypeSubMessage) and attr.selector.is_external():
+ sels.append(attr.selector)
+ return sels
+
+ def free_needs_iter(self):
+ for _, attr in self.attr_list:
+ if attr.free_needs_iter():
+ return True
+ return False
+
class EnumEntry(SpecEnumEntry):
def __init__(self, enum_set, yaml, prev, value_start):
@@ -862,7 +1066,7 @@ class EnumSet(SpecEnumSet):
high = max([x.value for x in self.entries.values()])
if high - low + 1 != len(self.entries):
- raise Exception("Can't get value range for a noncontiguous enum")
+ return None, None
return low, high
@@ -909,18 +1113,25 @@ class AttrSet(SpecAttrSet):
elif elem['type'] == 'string':
t = TypeString(self.family, self, elem, value)
elif elem['type'] == 'binary':
- t = TypeBinary(self.family, self, elem, value)
+ if 'struct' in elem:
+ t = TypeBinaryStruct(self.family, self, elem, value)
+ elif elem.get('sub-type') in scalars:
+ t = TypeBinaryScalarArray(self.family, self, elem, value)
+ else:
+ t = TypeBinary(self.family, self, elem, value)
elif elem['type'] == 'bitfield32':
t = TypeBitfield32(self.family, self, elem, value)
elif elem['type'] == 'nest':
t = TypeNest(self.family, self, elem, value)
elif elem['type'] == 'indexed-array' and 'sub-type' in elem:
- if elem["sub-type"] in ['nest', 'u32']:
+ if elem["sub-type"] in ['binary', 'nest', 'u32']:
t = TypeArrayNest(self.family, self, elem, value)
else:
raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}')
elif elem['type'] == 'nest-type-value':
t = TypeNestTypeValue(self.family, self, elem, value)
+ elif elem['type'] == 'sub-message':
+ t = TypeSubMessage(self.family, self, elem, value)
else:
raise Exception(f"No typed class for type {elem['type']}")
@@ -932,6 +1143,14 @@ class AttrSet(SpecAttrSet):
class Operation(SpecOperation):
def __init__(self, family, yaml, req_value, rsp_value):
+ # Fill in missing operation properties (for fixed hdr-only msgs)
+ for mode in ['do', 'dump', 'event']:
+ for direction in ['request', 'reply']:
+ try:
+ yaml[mode][direction].setdefault('attributes', [])
+ except KeyError:
+ pass
+
super().__init__(family, yaml, req_value, rsp_value)
self.render_name = c_lower(family.ident_name + '_' + self.name)
@@ -957,6 +1176,16 @@ class Operation(SpecOperation):
self.has_ntf = True
+class SubMessage(SpecSubMessage):
+ def __init__(self, family, yaml):
+ super().__init__(family, yaml)
+
+ self.render_name = c_lower(family.ident_name + '-' + yaml['name'])
+
+ def resolve(self):
+ self.resolve_up(super())
+
+
class Family(SpecFamily):
def __init__(self, file_name, exclude_ops):
# Added by resolve:
@@ -993,9 +1222,6 @@ class Family(SpecFamily):
def resolve(self):
self.resolve_up(super())
- if self.yaml.get('protocol', 'genetlink') not in {'genetlink', 'genetlink-c', 'genetlink-legacy'}:
- raise Exception("Codegen only supported for genetlink")
-
self.c_name = c_lower(self.ident_name)
if 'name-prefix' in self.yaml['operations']:
self.op_prefix = c_upper(self.yaml['operations']['name-prefix'])
@@ -1018,7 +1244,7 @@ class Family(SpecFamily):
# dict space-name -> 'request': set(attrs), 'reply': set(attrs)
self.root_sets = dict()
- # dict space-name -> set('request', 'reply')
+ # dict space-name -> Struct
self.pure_nested_structs = dict()
self._mark_notify()
@@ -1027,6 +1253,7 @@ class Family(SpecFamily):
self._load_root_sets()
self._load_nested_sets()
self._load_attr_use()
+ self._load_selector_passing()
self._load_hooks()
self.kernel_policy = self.yaml.get('kernel-policy', 'split')
@@ -1042,6 +1269,12 @@ class Family(SpecFamily):
def new_operation(self, elem, req_value, rsp_value):
return Operation(self, elem, req_value, rsp_value)
+ def new_sub_message(self, elem):
+ return SubMessage(self, elem)
+
+ def is_classic(self):
+ return self.proto == 'netlink-raw'
+
def _mark_notify(self):
for op in self.msgs.values():
if 'notify' in op:
@@ -1091,20 +1324,85 @@ class Family(SpecFamily):
for _, spec in self.attr_sets[name].items():
if 'nested-attributes' in spec:
nested = spec['nested-attributes']
- # If the unknown nest we hit is recursive it's fine, it'll be a pointer
- if self.pure_nested_structs[nested].recursive:
- continue
- if nested not in pns_key_seen:
- # Dicts are sorted, this will make struct last
- struct = self.pure_nested_structs.pop(name)
- self.pure_nested_structs[name] = struct
- finished = False
- break
+ elif 'sub-message' in spec:
+ nested = spec.sub_message
+ else:
+ continue
+
+ # If the unknown nest we hit is recursive it's fine, it'll be a pointer
+ if self.pure_nested_structs[nested].recursive:
+ continue
+ if nested not in pns_key_seen:
+ # Dicts are sorted, this will make struct last
+ struct = self.pure_nested_structs.pop(name)
+ self.pure_nested_structs[name] = struct
+ finished = False
+ break
if finished:
pns_key_seen.add(name)
else:
pns_key_list.append(name)
+ def _load_nested_set_nest(self, spec):
+ inherit = set()
+ nested = spec['nested-attributes']
+ if nested not in self.root_sets:
+ if nested not in self.pure_nested_structs:
+ self.pure_nested_structs[nested] = \
+ Struct(self, nested, inherited=inherit,
+ fixed_header=spec.get('fixed-header'))
+ else:
+ raise Exception(f'Using attr set as root and nested not supported - {nested}')
+
+ if 'type-value' in spec:
+ if nested in self.root_sets:
+ raise Exception("Inheriting members to a space used as root not supported")
+ inherit.update(set(spec['type-value']))
+ elif spec['type'] == 'indexed-array':
+ inherit.add('idx')
+ self.pure_nested_structs[nested].set_inherited(inherit)
+
+ return nested
+
+ def _load_nested_set_submsg(self, spec):
+ # Fake the struct type for the sub-message itself
+ # its not a attr_set but codegen wants attr_sets.
+ submsg = self.sub_msgs[spec["sub-message"]]
+ nested = submsg.name
+
+ attrs = []
+ for name, fmt in submsg.formats.items():
+ attr = {
+ "name": name,
+ "parent-sub-message": spec,
+ }
+ if 'attribute-set' in fmt:
+ attr |= {
+ "type": "nest",
+ "nested-attributes": fmt['attribute-set'],
+ }
+ if 'fixed-header' in fmt:
+ attr |= { "fixed-header": fmt["fixed-header"] }
+ elif 'fixed-header' in fmt:
+ attr |= {
+ "type": "binary",
+ "struct": fmt["fixed-header"],
+ }
+ else:
+ attr["type"] = "flag"
+ attrs.append(attr)
+
+ self.attr_sets[nested] = AttrSet(self, {
+ "name": nested,
+ "name-pfx": self.name + '-' + spec.name + '-',
+ "attributes": attrs
+ })
+
+ if nested not in self.pure_nested_structs:
+ self.pure_nested_structs[nested] = Struct(self, nested, submsg=submsg)
+
+ return nested
+
def _load_nested_sets(self):
attr_set_queue = list(self.root_sets.keys())
attr_set_seen = set(self.root_sets.keys())
@@ -1112,37 +1410,32 @@ class Family(SpecFamily):
while len(attr_set_queue):
a_set = attr_set_queue.pop(0)
for attr, spec in self.attr_sets[a_set].items():
- if 'nested-attributes' not in spec:
+ if 'nested-attributes' in spec:
+ nested = self._load_nested_set_nest(spec)
+ elif 'sub-message' in spec:
+ nested = self._load_nested_set_submsg(spec)
+ else:
continue
- nested = spec['nested-attributes']
if nested not in attr_set_seen:
attr_set_queue.append(nested)
attr_set_seen.add(nested)
- inherit = set()
- if nested not in self.root_sets:
- if nested not in self.pure_nested_structs:
- self.pure_nested_structs[nested] = Struct(self, nested, inherited=inherit)
- else:
- raise Exception(f'Using attr set as root and nested not supported - {nested}')
-
- if 'type-value' in spec:
- if nested in self.root_sets:
- raise Exception("Inheriting members to a space used as root not supported")
- inherit.update(set(spec['type-value']))
- elif spec['type'] == 'indexed-array':
- inherit.add('idx')
- self.pure_nested_structs[nested].set_inherited(inherit)
-
for root_set, rs_members in self.root_sets.items():
for attr, spec in self.attr_sets[root_set].items():
if 'nested-attributes' in spec:
nested = spec['nested-attributes']
+ elif 'sub-message' in spec:
+ nested = spec.sub_message
+ else:
+ nested = None
+
+ if nested:
if attr in rs_members['request']:
self.pure_nested_structs[nested].request = True
if attr in rs_members['reply']:
self.pure_nested_structs[nested].reply = True
+
if spec.is_multi_val():
child = self.pure_nested_structs.get(nested)
child.in_multi_val = True
@@ -1152,20 +1445,26 @@ class Family(SpecFamily):
# Propagate the request / reply / recursive
for attr_set, struct in reversed(self.pure_nested_structs.items()):
for _, spec in self.attr_sets[attr_set].items():
- if 'nested-attributes' in spec:
- child_name = spec['nested-attributes']
- struct.child_nests.add(child_name)
- child = self.pure_nested_structs.get(child_name)
- if child:
- if not child.recursive:
- struct.child_nests.update(child.child_nests)
- child.request |= struct.request
- child.reply |= struct.reply
- if spec.is_multi_val():
- child.in_multi_val = True
if attr_set in struct.child_nests:
struct.recursive = True
+ if 'nested-attributes' in spec:
+ child_name = spec['nested-attributes']
+ elif 'sub-message' in spec:
+ child_name = spec.sub_message
+ else:
+ continue
+
+ struct.child_nests.add(child_name)
+ child = self.pure_nested_structs.get(child_name)
+ if child:
+ if not child.recursive:
+ struct.child_nests.update(child.child_nests)
+ child.request |= struct.request
+ child.reply |= struct.reply
+ if spec.is_multi_val():
+ child.in_multi_val = True
+
self._sort_pure_types()
def _load_attr_use(self):
@@ -1184,6 +1483,30 @@ class Family(SpecFamily):
if attr in rs_members['reply']:
spec.set_reply()
+ def _load_selector_passing(self):
+ def all_structs():
+ for k, v in reversed(self.pure_nested_structs.items()):
+ yield k, v
+ for k, _ in self.root_sets.items():
+ yield k, None # we don't have a struct, but it must be terminal
+
+ for attr_set, struct in all_structs():
+ for _, spec in self.attr_sets[attr_set].items():
+ if 'nested-attributes' in spec:
+ child_name = spec['nested-attributes']
+ elif 'sub-message' in spec:
+ child_name = spec.sub_message
+ else:
+ continue
+
+ child = self.pure_nested_structs.get(child_name)
+ for selector in child.external_selectors():
+ if selector.name in self.attr_sets[attr_set]:
+ sel_attr = self.attr_sets[attr_set][selector.name]
+ selector.set_attr(sel_attr)
+ else:
+ raise Exception("Passing selector thru more than one layer not supported")
+
def _load_global_policy(self):
global_set = set()
attr_set_name = None
@@ -1233,12 +1556,19 @@ class RenderInfo:
self.op_mode = op_mode
self.op = op
- self.fixed_hdr = None
+ fixed_hdr = op.fixed_header if op else None
+ self.fixed_hdr_len = 'ys->family->hdr_len'
if op and op.fixed_header:
- self.fixed_hdr = 'struct ' + c_lower(op.fixed_header)
+ if op.fixed_header != family.fixed_header:
+ if family.is_classic():
+ self.fixed_hdr_len = f"sizeof(struct {c_lower(fixed_hdr)})"
+ else:
+ raise Exception(f"Per-op fixed header not supported, yet")
+
# 'do' and 'dump' response parsing is identical
self.type_consistent = True
+ self.type_oneside = False
if op_mode != 'do' and 'dump' in op:
if 'do' in op:
if ('reply' in op['do']) != ('reply' in op["dump"]):
@@ -1246,7 +1576,8 @@ class RenderInfo:
elif 'reply' in op['do'] and op["do"]["reply"] != op["dump"]["reply"]:
self.type_consistent = False
else:
- self.type_consistent = False
+ self.type_consistent = True
+ self.type_oneside = True
self.attr_set = attr_set
if not self.attr_set:
@@ -1264,15 +1595,26 @@ class RenderInfo:
self.struct = dict()
if op_mode == 'notify':
- op_mode = 'do'
+ op_mode = 'do' if 'do' in op else 'dump'
for op_dir in ['request', 'reply']:
if op:
type_list = []
if op_dir in op[op_mode]:
type_list = op[op_mode][op_dir]['attributes']
- self.struct[op_dir] = Struct(family, self.attr_set, type_list=type_list)
+ self.struct[op_dir] = Struct(family, self.attr_set,
+ fixed_header=fixed_hdr,
+ type_list=type_list)
if op_mode == 'event':
- self.struct['reply'] = Struct(family, self.attr_set, type_list=op['event']['attributes'])
+ self.struct['reply'] = Struct(family, self.attr_set,
+ fixed_header=fixed_hdr,
+ type_list=op['event']['attributes'])
+
+ def type_empty(self, key):
+ return len(self.struct[key].attr_list) == 0 and \
+ self.struct['request'].fixed_header is None
+
+ def needs_nlflags(self, direction):
+ return self.op_mode == 'do' and direction == 'request' and self.family.is_classic()
class CodeWriter:
@@ -1330,6 +1672,7 @@ class CodeWriter:
if self._silent_block:
ind += 1
self._silent_block = line.endswith(')') and CodeWriter._is_cond(line)
+ self._silent_block |= line.strip() == 'else'
if line[0] == '#':
ind = 0
if add_ind:
@@ -1540,7 +1883,9 @@ def op_prefix(ri, direction, deref=False):
suffix += f"{direction_to_suffix[direction]}"
else:
if direction == 'request':
- suffix += '_req_dump'
+ suffix += '_req'
+ if not ri.type_oneside:
+ suffix += '_dump'
else:
if ri.type_consistent:
if deref:
@@ -1584,11 +1929,37 @@ def print_dump_prototype(ri):
print_prototype(ri, "request")
+def put_typol_submsg(cw, struct):
+ cw.block_start(line=f'const struct ynl_policy_attr {struct.render_name}_policy[] =')
+
+ i = 0
+ for name, arg in struct.member_list():
+ nest = ""
+ if arg.type == 'nest':
+ nest = f" .nest = &{arg.nested_render_name}_nest,"
+ cw.p('[%d] = { .type = YNL_PT_SUBMSG, .name = "%s",%s },' %
+ (i, name, nest))
+ i += 1
+
+ cw.block_end(line=';')
+ cw.nl()
+
+ cw.block_start(line=f'const struct ynl_policy_nest {struct.render_name}_nest =')
+ cw.p(f'.max_attr = {i - 1},')
+ cw.p(f'.table = {struct.render_name}_policy,')
+ cw.block_end(line=';')
+ cw.nl()
+
+
def put_typol_fwd(cw, struct):
cw.p(f'extern const struct ynl_policy_nest {struct.render_name}_nest;')
def put_typol(cw, struct):
+ if struct.submsg:
+ put_typol_submsg(cw, struct)
+ return
+
type_max = struct.attr_set.max_name
cw.block_start(line=f'const struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =')
@@ -1674,13 +2045,24 @@ def put_req_nested(ri, struct):
local_vars = []
init_lines = []
- local_vars.append('struct nlattr *nest;')
- init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);")
-
+ if struct.submsg is None:
+ local_vars.append('struct nlattr *nest;')
+ init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);")
+ if struct.fixed_header:
+ local_vars.append('void *hdr;')
+ struct_sz = f'sizeof({struct.fixed_header})'
+ init_lines.append(f"hdr = ynl_nlmsg_put_extra_header(nlh, {struct_sz});")
+ init_lines.append(f"memcpy(hdr, &obj->_hdr, {struct_sz});")
+
+ has_anest = False
+ has_count = False
for _, arg in struct.member_list():
- if arg.presence_type() == 'count':
- local_vars.append('unsigned int i;')
- break
+ has_anest |= arg.type == 'indexed-array'
+ has_count |= arg.presence_type() == 'count'
+ if has_anest:
+ local_vars.append('struct nlattr *array;')
+ if has_count:
+ local_vars.append('unsigned int i;')
put_req_nested_prototype(ri, struct, suffix='')
ri.cw.block_start()
@@ -1692,7 +2074,8 @@ def put_req_nested(ri, struct):
for _, arg in struct.member_list():
arg.attr_put(ri, "obj")
- ri.cw.p("ynl_attr_nest_end(nlh, nest);")
+ if struct.submsg is None:
+ ri.cw.p("ynl_attr_nest_end(nlh, nest);")
ri.cw.nl()
ri.cw.p('return 0;')
@@ -1701,19 +2084,27 @@ def put_req_nested(ri, struct):
def _multi_parse(ri, struct, init_lines, local_vars):
+ if struct.fixed_header:
+ local_vars += ['void *hdr;']
if struct.nested:
- iter_line = "ynl_attr_for_each_nested(attr, nested)"
+ if struct.fixed_header:
+ iter_line = f"ynl_attr_for_each_nested_off(attr, nested, sizeof({struct.fixed_header}))"
+ else:
+ iter_line = "ynl_attr_for_each_nested(attr, nested)"
else:
- if ri.fixed_hdr:
- local_vars += ['void *hdr;']
iter_line = "ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)"
+ if ri.op.fixed_header != ri.family.fixed_header:
+ if ri.family.is_classic():
+ iter_line = f"ynl_attr_for_each(attr, nlh, sizeof({struct.fixed_header}))"
+ else:
+ raise Exception(f"Per-op fixed header not supported, yet")
array_nests = set()
multi_attrs = set()
needs_parg = False
for arg, aspec in struct.member_list():
if aspec['type'] == 'indexed-array' and 'sub-type' in aspec:
- if aspec["sub-type"] == 'nest':
+ if aspec["sub-type"] in {'binary', 'nest'}:
local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
array_nests.add(arg)
elif aspec['sub-type'] in scalars:
@@ -1724,6 +2115,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if 'multi-attr' in aspec:
multi_attrs.add(arg)
needs_parg |= 'nested-attributes' in aspec
+ needs_parg |= 'sub-message' in aspec
if array_nests or multi_attrs:
local_vars.append('int i;')
if needs_parg:
@@ -1745,9 +2137,14 @@ def _multi_parse(ri, struct, init_lines, local_vars):
for arg in struct.inherited:
ri.cw.p(f'dst->{arg} = {arg};')
- if ri.fixed_hdr:
- ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
- ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));")
+ if struct.fixed_header:
+ if struct.nested:
+ ri.cw.p('hdr = ynl_attr_data(nested);')
+ elif ri.family.is_classic():
+ ri.cw.p('hdr = ynl_nlmsg_data(nlh);')
+ else:
+ ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
+ ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({struct.fixed_header}));")
for anest in sorted(all_multi):
aspec = struct[anest]
ri.cw.p(f"if (dst->{aspec.c_name})")
@@ -1772,7 +2169,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.block_start(line=f"if (n_{aspec.c_name})")
ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
- ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
+ ri.cw.p(f"dst->_count.{aspec.c_name} = n_{aspec.c_name};")
ri.cw.p('i = 0;')
if 'nested-attributes' in aspec:
ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
@@ -1783,6 +2180,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p('return YNL_PARSE_CB_ERROR;')
elif aspec.sub_type in scalars:
ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.sub_type}(attr);")
+ elif aspec.sub_type == 'binary' and 'exact-len' in aspec.checks:
+ # Length is validated by typol
+ ri.cw.p(f'memcpy(dst->{aspec.c_name}[i], ynl_attr_data(attr), {aspec.checks["exact-len"]});')
else:
raise Exception(f"Nest parsing type not supported in {aspec['name']}")
ri.cw.p('i++;')
@@ -1794,7 +2194,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
aspec = struct[anest]
ri.cw.block_start(line=f"if (n_{aspec.c_name})")
ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
- ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
+ ri.cw.p(f"dst->_count.{aspec.c_name} = n_{aspec.c_name};")
ri.cw.p('i = 0;')
if 'nested-attributes' in aspec:
ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
@@ -1806,8 +2206,22 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p('return YNL_PARSE_CB_ERROR;')
elif aspec.type in scalars:
ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.type}(attr);")
+ elif aspec.type == 'binary' and 'struct' in aspec:
+ ri.cw.p('size_t len = ynl_attr_data_len(attr);')
+ ri.cw.nl()
+ ri.cw.p(f'if (len > sizeof(dst->{aspec.c_name}[0]))')
+ ri.cw.p(f'len = sizeof(dst->{aspec.c_name}[0]);')
+ ri.cw.p(f"memcpy(&dst->{aspec.c_name}[i], ynl_attr_data(attr), len);")
+ elif aspec.type == 'string':
+ ri.cw.p('unsigned int len;')
+ ri.cw.nl()
+ ri.cw.p('len = strnlen(ynl_attr_get_str(attr), ynl_attr_data_len(attr));')
+ ri.cw.p(f'dst->{aspec.c_name}[i] = malloc(sizeof(struct ynl_string) + len + 1);')
+ ri.cw.p(f"dst->{aspec.c_name}[i]->len = len;")
+ ri.cw.p(f"memcpy(dst->{aspec.c_name}[i]->str, ynl_attr_get_str(attr), len);")
+ ri.cw.p(f"dst->{aspec.c_name}[i]->str[len] = 0;")
else:
- raise Exception('Nest parsing type not supported yet')
+ raise Exception(f'Nest parsing of type {aspec.type} not supported yet')
ri.cw.p('i++;')
ri.cw.block_end()
ri.cw.block_end()
@@ -1822,9 +2236,49 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.nl()
+def parse_rsp_submsg(ri, struct):
+ parse_rsp_nested_prototype(ri, struct, suffix='')
+
+ var = 'dst'
+ local_vars = {'const struct nlattr *attr = nested;',
+ f'{struct.ptr_name}{var} = yarg->data;',
+ 'struct ynl_parse_arg parg;'}
+
+ for _, arg in struct.member_list():
+ _, _, l_vars = arg._attr_get(ri, var)
+ local_vars |= set(l_vars) if l_vars else set()
+
+ ri.cw.block_start()
+ ri.cw.write_func_lvar(list(local_vars))
+ ri.cw.p('parg.ys = yarg->ys;')
+ ri.cw.nl()
+
+ first = True
+ for name, arg in struct.member_list():
+ kw = 'if' if first else 'else if'
+ first = False
+
+ ri.cw.block_start(line=f'{kw} (!strcmp(sel, "{name}"))')
+ get_lines, init_lines, _ = arg._attr_get(ri, var)
+ for line in init_lines or []:
+ ri.cw.p(line)
+ for line in get_lines:
+ ri.cw.p(line)
+ if arg.presence_type() == 'present':
+ ri.cw.p(f"{var}->_present.{arg.c_name} = 1;")
+ ri.cw.block_end()
+ ri.cw.p('return 0;')
+ ri.cw.block_end()
+ ri.cw.nl()
+
+
def parse_rsp_nested_prototype(ri, struct, suffix=';'):
func_args = ['struct ynl_parse_arg *yarg',
'const struct nlattr *nested']
+ for sel in struct.external_selectors():
+ func_args.append('const char *_sel_' + sel.name)
+ if struct.submsg:
+ func_args.insert(1, 'const char *sel')
for arg in struct.inherited:
func_args.append('__u32 ' + arg)
@@ -1833,6 +2287,9 @@ def parse_rsp_nested_prototype(ri, struct, suffix=';'):
def parse_rsp_nested(ri, struct):
+ if struct.submsg:
+ return parse_rsp_submsg(ri, struct)
+
parse_rsp_nested_prototype(ri, struct, suffix='')
local_vars = ['const struct nlattr *attr;',
@@ -1885,7 +2342,7 @@ def print_req(ri):
ret_err = 'NULL'
local_vars += [f'{type_name(ri, rdir(direction))} *rsp;']
- if ri.fixed_hdr:
+ if ri.struct["request"].fixed_header:
local_vars += ['size_t hdr_len;',
'void *hdr;']
@@ -1898,14 +2355,18 @@ def print_req(ri):
ri.cw.block_start()
ri.cw.write_func_lvar(local_vars)
- ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
+ if ri.family.is_classic():
+ ri.cw.p(f"nlh = ynl_msg_start_req(ys, {ri.op.enum_name}, req->_nlmsg_flags);")
+ else:
+ ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
+ ri.cw.p(f"ys->req_hdr_len = {ri.fixed_hdr_len};")
if 'reply' in ri.op[ri.op_mode]:
ri.cw.p(f"yrs.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
ri.cw.nl()
- if ri.fixed_hdr:
+ if ri.struct['request'].fixed_header:
ri.cw.p("hdr_len = sizeof(req->_hdr);")
ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);")
ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
@@ -1951,7 +2412,7 @@ def print_dump(ri):
'struct nlmsghdr *nlh;',
'int err;']
- if ri.fixed_hdr:
+ if ri.struct['request'].fixed_header:
local_vars += ['size_t hdr_len;',
'void *hdr;']
@@ -1967,9 +2428,12 @@ def print_dump(ri):
else:
ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};')
ri.cw.nl()
- ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
+ if ri.family.is_classic():
+ ri.cw.p(f"nlh = ynl_msg_start_dump(ys, {ri.op.enum_name});")
+ else:
+ ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
- if ri.fixed_hdr:
+ if ri.struct['request'].fixed_header:
ri.cw.p("hdr_len = sizeof(req->_hdr);")
ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);")
ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
@@ -1977,6 +2441,7 @@ def print_dump(ri):
if "request" in ri.op[ri.op_mode]:
ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
+ ri.cw.p(f"ys->req_hdr_len = {ri.fixed_hdr_len};")
ri.cw.nl()
for _, attr in ri.struct["request"].member_list():
attr.attr_put(ri, "req")
@@ -2022,32 +2487,45 @@ def print_free_prototype(ri, direction, suffix=';'):
ri.cw.write_func_prot('void', f"{name}_free", [f"struct {struct_name} *{arg}"], suffix=suffix)
+def print_nlflags_set(ri, direction):
+ name = op_prefix(ri, direction)
+ ri.cw.write_func_prot(f'static inline void', f"{name}_set_nlflags",
+ [f"struct {name} *req", "__u16 nl_flags"])
+ ri.cw.block_start()
+ ri.cw.p('req->_nlmsg_flags = nl_flags;')
+ ri.cw.block_end()
+ ri.cw.nl()
+
+
def _print_type(ri, direction, struct):
suffix = f'_{ri.type_name}{direction_to_suffix[direction]}'
if not direction and ri.type_name_conflict:
suffix += '_'
- if ri.op_mode == 'dump':
+ if ri.op_mode == 'dump' and not ri.type_oneside:
suffix += '_dump'
ri.cw.block_start(line=f"struct {ri.family.c_name}{suffix}")
- if ri.fixed_hdr:
- ri.cw.p(ri.fixed_hdr + ' _hdr;')
+ if ri.needs_nlflags(direction):
+ ri.cw.p('__u16 _nlmsg_flags;')
+ ri.cw.nl()
+ if struct.fixed_header:
+ ri.cw.p(struct.fixed_header + ' _hdr;')
ri.cw.nl()
- meta_started = False
- for _, attr in struct.member_list():
- for type_filter in ['len', 'bit']:
+ for type_filter in ['present', 'len', 'count']:
+ meta_started = False
+ for _, attr in struct.member_list():
line = attr.presence_member(ri.ku_space, type_filter)
if line:
if not meta_started:
ri.cw.block_start(line=f"struct")
meta_started = True
ri.cw.p(line)
- if meta_started:
- ri.cw.block_end(line='_present;')
- ri.cw.nl()
+ if meta_started:
+ ri.cw.block_end(line=f'_{type_filter};')
+ ri.cw.nl()
for arg in struct.inherited:
ri.cw.p(f"__u32 {arg};")
@@ -2071,6 +2549,9 @@ def print_type_helpers(ri, direction, deref=False):
print_free_prototype(ri, direction)
ri.cw.nl()
+ if ri.needs_nlflags(direction):
+ print_nlflags_set(ri, direction)
+
if ri.ku_space == 'user' and direction == 'request':
for _, attr in ri.struct[direction].member_list():
attr.setter(ri, ri.attr_set, direction, deref=deref)
@@ -2078,7 +2559,7 @@ def print_type_helpers(ri, direction, deref=False):
def print_req_type_helpers(ri):
- if len(ri.struct["request"].attr_list) == 0:
+ if ri.type_empty("request"):
return
print_alloc_wrapper(ri, "request")
print_type_helpers(ri, "request")
@@ -2101,7 +2582,7 @@ def print_parse_prototype(ri, direction, terminate=True):
def print_req_type(ri):
- if len(ri.struct["request"].attr_list) == 0:
+ if ri.type_empty("request"):
return
print_type(ri, "request")
@@ -2139,11 +2620,9 @@ def print_wrapped_type(ri):
def _free_type_members_iter(ri, struct):
- for _, attr in struct.member_list():
- if attr.free_needs_iter():
- ri.cw.p('unsigned int i;')
- ri.cw.nl()
- break
+ if struct.free_needs_iter():
+ ri.cw.p('unsigned int i;')
+ ri.cw.nl()
def _free_type_members(ri, var, struct, ref=''):
@@ -2280,6 +2759,46 @@ def print_kernel_policy_ranges(family, cw):
cw.nl()
+def print_kernel_policy_sparse_enum_validates(family, cw):
+ first = True
+ for _, attr_set in family.attr_sets.items():
+ if attr_set.subset_of:
+ continue
+
+ for _, attr in attr_set.items():
+ if not attr.request:
+ continue
+ if not attr.enum_name:
+ continue
+ if 'sparse' not in attr.checks:
+ continue
+
+ if first:
+ cw.p('/* Sparse enums validation callbacks */')
+ first = False
+
+ sign = '' if attr.type[0] == 'u' else '_signed'
+ suffix = 'ULL' if attr.type[0] == 'u' else 'LL'
+ cw.write_func_prot('static int', f'{c_lower(attr.enum_name)}_validate',
+ ['const struct nlattr *attr', 'struct netlink_ext_ack *extack'])
+ cw.block_start()
+ cw.block_start(line=f'switch (nla_get_{attr["type"]}(attr))')
+ enum = family.consts[attr['enum']]
+ first_entry = True
+ for entry in enum.entries.values():
+ if first_entry:
+ first_entry = False
+ else:
+ cw.p('fallthrough;')
+ cw.p(f'case {entry.c_name}:')
+ cw.p('return 0;')
+ cw.block_end()
+ cw.p('NL_SET_ERR_MSG_ATTR(extack, attr, "invalid enum value");')
+ cw.p('return -EINVAL;')
+ cw.block_end()
+ cw.nl()
+
+
def print_kernel_op_table_fwd(family, cw, terminate):
exported = not kernel_can_gen_family_struct(family)
@@ -2739,7 +3258,11 @@ def render_uapi(family, cw):
def _render_user_ntf_entry(ri, op):
- ri.cw.block_start(line=f"[{op.enum_name}] = ")
+ if not ri.family.is_classic():
+ ri.cw.block_start(line=f"[{op.enum_name}] = ")
+ else:
+ crud_op = ri.family.req_by_value[op.rsp_value]
+ ri.cw.block_start(line=f"[{crud_op.enum_name}] = ")
ri.cw.p(f".alloc_sz\t= sizeof({type_name(ri, 'event')}),")
ri.cw.p(f".cb\t\t= {op_prefix(ri, 'reply', deref=True)}_parse,")
ri.cw.p(f".policy\t\t= &{ri.struct['reply'].render_name}_nest,")
@@ -2754,7 +3277,7 @@ def render_user_family(family, cw, prototype):
return
if family.ntfs:
- cw.block_start(line=f"static const struct ynl_ntf_info {family['name']}_ntf_info[] = ")
+ cw.block_start(line=f"static const struct ynl_ntf_info {family.c_name}_ntf_info[] = ")
for ntf_op_name, ntf_op in family.ntfs.items():
if 'notify' in ntf_op:
op = family.ops[ntf_op['notify']]
@@ -2774,13 +3297,19 @@ def render_user_family(family, cw, prototype):
cw.block_start(f'{symbol} = ')
cw.p(f'.name\t\t= "{family.c_name}",')
- if family.fixed_header:
+ if family.is_classic():
+ cw.p(f'.is_classic\t= true,')
+ cw.p(f'.classic_id\t= {family.get("protonum")},')
+ if family.is_classic():
+ if family.fixed_header:
+ cw.p(f'.hdr_len\t= sizeof(struct {c_lower(family.fixed_header)}),')
+ elif family.fixed_header:
cw.p(f'.hdr_len\t= sizeof(struct genlmsghdr) + sizeof(struct {c_lower(family.fixed_header)}),')
else:
cw.p('.hdr_len\t= sizeof(struct genlmsghdr),')
if family.ntfs:
- cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
- cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family['name']}_ntf_info),")
+ cw.p(f".ntf_info\t= {family.c_name}_ntf_info,")
+ cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family.c_name}_ntf_info),")
cw.block_end(line=';')
@@ -2887,7 +3416,7 @@ def main():
cw.p(f'#include "{hdr_file}"')
cw.p('#include "ynl.h"')
headers = []
- for definition in parsed['definitions']:
+ for definition in parsed['definitions'] + parsed['attribute-sets']:
if 'header' in definition:
headers.append(definition['header'])
if args.mode == 'user':
@@ -2941,6 +3470,7 @@ def main():
print_kernel_family_struct_hdr(parsed, cw)
else:
print_kernel_policy_ranges(parsed, cw)
+ print_kernel_policy_sparse_enum_validates(parsed, cw)
for _, struct in sorted(parsed.pure_nested_structs.items()):
if struct.request:
@@ -3009,7 +3539,7 @@ def main():
ri = RenderInfo(cw, parsed, args.mode, op, 'dump')
print_req_type(ri)
print_req_type_helpers(ri)
- if not ri.type_consistent:
+ if not ri.type_consistent or ri.type_oneside:
print_rsp_type(ri)
print_wrapped_type(ri)
print_dump_prototype(ri)
@@ -3047,8 +3577,7 @@ def main():
has_recursive_nests = True
if has_recursive_nests:
cw.nl()
- for name in parsed.pure_nested_structs:
- struct = Struct(parsed, name)
+ for struct in parsed.pure_nested_structs.values():
put_typol(cw, struct)
for name in parsed.root_sets:
struct = Struct(parsed, name)
@@ -3087,7 +3616,7 @@ def main():
if 'dump' in op:
cw.p(f"/* {op.enum_name} - dump */")
ri = RenderInfo(cw, parsed, args.mode, op, "dump")
- if not ri.type_consistent:
+ if not ri.type_consistent or ri.type_oneside:
parse_rsp_msg(ri, deref=True)
print_req_free(ri)
print_dump_type_free(ri)
diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 6c56d0d726b4..0cb6348e28d3 100755
--- a/tools/net/ynl/pyynl/ynl_gen_rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
@@ -392,7 +392,7 @@ def parse_arguments() -> argparse.Namespace:
def parse_yaml_file(filename: str) -> str:
- """Transform the YAML specified by filename into a rst-formmated string"""
+ """Transform the YAML specified by filename into an RST-formatted string"""
with open(filename, "r", encoding="utf-8") as spec_file:
yaml_data = yaml.safe_load(spec_file)
content = parse_yaml(yaml_data)
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index dda6686257a7..7f5fca7682d7 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -2,4 +2,8 @@ ethtool
devlink
netdev
ovs
-page-pool \ No newline at end of file
+page-pool
+rt-addr
+rt-link
+rt-route
+tc
diff --git a/tools/net/ynl/samples/devlink.c b/tools/net/ynl/samples/devlink.c
index d2611d7ebab4..ac9dfb01f280 100644
--- a/tools/net/ynl/samples/devlink.c
+++ b/tools/net/ynl/samples/devlink.c
@@ -22,6 +22,7 @@ int main(int argc, char **argv)
ynl_dump_foreach(devs, d) {
struct devlink_info_get_req *info_req;
struct devlink_info_get_rsp *info_rsp;
+ unsigned i;
printf("%s/%s:\n", d->bus_name, d->dev_name);
@@ -34,11 +35,11 @@ int main(int argc, char **argv)
if (!info_rsp)
goto err_free_devs;
- if (info_rsp->_present.info_driver_name_len)
+ if (info_rsp->_len.info_driver_name)
printf(" driver: %s\n", info_rsp->info_driver_name);
- if (info_rsp->n_info_version_running)
+ if (info_rsp->_count.info_version_running)
printf(" running fw:\n");
- for (unsigned i = 0; i < info_rsp->n_info_version_running; i++)
+ for (i = 0; i < info_rsp->_count.info_version_running; i++)
printf(" %s: %s\n",
info_rsp->info_version_running[i].info_version_name,
info_rsp->info_version_running[i].info_version_value);
diff --git a/tools/net/ynl/samples/rt-addr.c b/tools/net/ynl/samples/rt-addr.c
new file mode 100644
index 000000000000..2edde5c36b18
--- /dev/null
+++ b/tools/net/ynl/samples/rt-addr.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-addr-user.h"
+
+static void rt_addr_print(struct rt_addr_getaddr_rsp *a)
+{
+ char ifname[IF_NAMESIZE];
+ char addr_str[64];
+ const char *addr;
+ const char *name;
+
+ name = if_indextoname(a->_hdr.ifa_index, ifname);
+ if (name)
+ printf("%16s: ", name);
+
+ switch (a->_len.address) {
+ case 4:
+ addr = inet_ntop(AF_INET, a->address,
+ addr_str, sizeof(addr_str));
+ break;
+ case 16:
+ addr = inet_ntop(AF_INET6, a->address,
+ addr_str, sizeof(addr_str));
+ break;
+ default:
+ addr = NULL;
+ break;
+ }
+ if (addr)
+ printf("%s", addr);
+ else
+ printf("[%d]", a->_len.address);
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct rt_addr_getaddr_list *rsp;
+ struct rt_addr_getaddr_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_addr_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ req = rt_addr_getaddr_req_alloc();
+ if (!req)
+ goto err_destroy;
+
+ rsp = rt_addr_getaddr_dump(ys, req);
+ rt_addr_getaddr_req_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no addresses reported\n");
+ ynl_dump_foreach(rsp, addr)
+ rt_addr_print(addr);
+ rt_addr_getaddr_list_free(rsp);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/rt-link.c b/tools/net/ynl/samples/rt-link.c
new file mode 100644
index 000000000000..acdd4b4a0f74
--- /dev/null
+++ b/tools/net/ynl/samples/rt-link.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-link-user.h"
+
+static void rt_link_print(struct rt_link_getlink_rsp *r)
+{
+ unsigned int i;
+
+ printf("%3d: ", r->_hdr.ifi_index);
+
+ if (r->_len.ifname)
+ printf("%16s: ", r->ifname);
+
+ if (r->_present.mtu)
+ printf("mtu %5d ", r->mtu);
+
+ if (r->linkinfo._len.kind)
+ printf("kind %-8s ", r->linkinfo.kind);
+ else
+ printf(" %8s ", "");
+
+ if (r->prop_list._count.alt_ifname) {
+ printf("altname ");
+ for (i = 0; i < r->prop_list._count.alt_ifname; i++)
+ printf("%s ", r->prop_list.alt_ifname[i]->str);
+ printf(" ");
+ }
+
+ if (r->linkinfo._present.data && r->linkinfo.data._present.netkit) {
+ struct rt_link_linkinfo_netkit_attrs *netkit;
+ const char *name;
+
+ netkit = &r->linkinfo.data.netkit;
+ printf("primary %d ", netkit->primary);
+
+ name = NULL;
+ if (netkit->_present.policy)
+ name = rt_link_netkit_policy_str(netkit->policy);
+ if (name)
+ printf("policy %s ", name);
+ }
+
+ printf("\n");
+}
+
+static int rt_link_create_netkit(struct ynl_sock *ys)
+{
+ struct rt_link_getlink_ntf *ntf_gl;
+ struct rt_link_newlink_req *req;
+ struct ynl_ntf_base_type *ntf;
+ int ret;
+
+ req = rt_link_newlink_req_alloc();
+ if (!req) {
+ fprintf(stderr, "Can't alloc req\n");
+ return -1;
+ }
+
+ /* rtnetlink doesn't provide info about the created object.
+ * It expects us to set the ECHO flag and the dig the info out
+ * of the notifications...
+ */
+ rt_link_newlink_req_set_nlflags(req, NLM_F_CREATE | NLM_F_ECHO);
+
+ rt_link_newlink_req_set_linkinfo_kind(req, "netkit");
+
+ /* Test error messages */
+ rt_link_newlink_req_set_linkinfo_data_netkit_policy(req, 10);
+ ret = rt_link_newlink(ys, req);
+ if (ret) {
+ printf("Testing error message for policy being bad:\n\t%s\n", ys->err.msg);
+ } else {
+ fprintf(stderr, "Warning: unexpected success creating netkit with bad attrs\n");
+ goto created;
+ }
+
+ rt_link_newlink_req_set_linkinfo_data_netkit_policy(req, NETKIT_DROP);
+
+ ret = rt_link_newlink(ys, req);
+created:
+ rt_link_newlink_req_free(req);
+ if (ret) {
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+ return -1;
+ }
+
+ if (!ynl_has_ntf(ys)) {
+ fprintf(stderr,
+ "Warning: interface created but received no notification, won't delete the interface\n");
+ return 0;
+ }
+
+ ntf = ynl_ntf_dequeue(ys);
+ if (ntf->cmd != RTM_NEWLINK) {
+ fprintf(stderr,
+ "Warning: unexpected notification type, won't delete the interface\n");
+ return 0;
+ }
+ ntf_gl = (void *)ntf;
+ ret = ntf_gl->obj._hdr.ifi_index;
+ ynl_ntf_free(ntf);
+
+ return ret;
+}
+
+static void rt_link_del(struct ynl_sock *ys, int ifindex)
+{
+ struct rt_link_dellink_req *req;
+
+ req = rt_link_dellink_req_alloc();
+ if (!req) {
+ fprintf(stderr, "Can't alloc req\n");
+ return;
+ }
+
+ req->_hdr.ifi_index = ifindex;
+ if (rt_link_dellink(ys, req))
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+ else
+ fprintf(stderr,
+ "Trying to delete a Netkit interface (ifindex %d)\n",
+ ifindex);
+
+ rt_link_dellink_req_free(req);
+}
+
+int main(int argc, char **argv)
+{
+ struct rt_link_getlink_req_dump *req;
+ struct rt_link_getlink_list *rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int created = 0;
+
+ ys = ynl_sock_create(&ynl_rt_link_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ if (argc > 1) {
+ fprintf(stderr, "Trying to create a Netkit interface\n");
+ created = rt_link_create_netkit(ys);
+ if (created < 0)
+ goto err_destroy;
+ }
+
+ req = rt_link_getlink_req_dump_alloc();
+ if (!req)
+ goto err_del_ifc;
+
+ rsp = rt_link_getlink_dump(ys, req);
+ rt_link_getlink_req_dump_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no links reported\n");
+ ynl_dump_foreach(rsp, link)
+ rt_link_print(link);
+ rt_link_getlink_list_free(rsp);
+
+ if (created)
+ rt_link_del(ys, created);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_del_ifc:
+ if (created)
+ rt_link_del(ys, created);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/rt-route.c b/tools/net/ynl/samples/rt-route.c
new file mode 100644
index 000000000000..7427104a96df
--- /dev/null
+++ b/tools/net/ynl/samples/rt-route.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-route-user.h"
+
+static void rt_route_print(struct rt_route_getroute_rsp *r)
+{
+ char ifname[IF_NAMESIZE];
+ char route_str[64];
+ const char *route;
+ const char *name;
+
+ /* Ignore local */
+ if (r->_hdr.rtm_table == RT_TABLE_LOCAL)
+ return;
+
+ if (r->_present.oif) {
+ name = if_indextoname(r->oif, ifname);
+ if (name)
+ printf("oif: %-16s ", name);
+ }
+
+ if (r->_len.dst) {
+ route = inet_ntop(r->_hdr.rtm_family, r->dst,
+ route_str, sizeof(route_str));
+ printf("dst: %s/%d", route, r->_hdr.rtm_dst_len);
+ }
+
+ if (r->_len.gateway) {
+ route = inet_ntop(r->_hdr.rtm_family, r->gateway,
+ route_str, sizeof(route_str));
+ printf("gateway: %s ", route);
+ }
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct rt_route_getroute_req_dump *req;
+ struct rt_route_getroute_list *rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_route_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ req = rt_route_getroute_req_dump_alloc();
+ if (!req)
+ goto err_destroy;
+
+ rsp = rt_route_getroute_dump(ys, req);
+ rt_route_getroute_req_dump_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no routeesses reported\n");
+ ynl_dump_foreach(rsp, route)
+ rt_route_print(route);
+ rt_route_getroute_list_free(rsp);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/tc.c b/tools/net/ynl/samples/tc.c
new file mode 100644
index 000000000000..0bfff0fdd792
--- /dev/null
+++ b/tools/net/ynl/samples/tc.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <net/if.h>
+
+#include "tc-user.h"
+
+static void tc_qdisc_print(struct tc_getqdisc_rsp *q)
+{
+ char ifname[IF_NAMESIZE];
+ const char *name;
+
+ name = if_indextoname(q->_hdr.tcm_ifindex, ifname);
+ if (name)
+ printf("%16s: ", name);
+
+ if (q->_len.kind) {
+ printf("%s ", q->kind);
+
+ if (q->options._present.fq_codel) {
+ struct tc_fq_codel_attrs *fq_codel;
+ struct tc_fq_codel_xstats *stats;
+
+ fq_codel = &q->options.fq_codel;
+ stats = q->stats2.app.fq_codel;
+
+ if (fq_codel->_present.limit)
+ printf("limit: %dp ", fq_codel->limit);
+ if (fq_codel->_present.target)
+ printf("target: %dms ",
+ (fq_codel->target + 500) / 1000);
+ if (q->stats2.app._len.fq_codel)
+ printf("new_flow_cnt: %d ",
+ stats->qdisc_stats.new_flow_count);
+ }
+ }
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct tc_getqdisc_req_dump *req;
+ struct tc_getqdisc_list *rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_tc_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ req = tc_getqdisc_req_dump_alloc();
+ if (!req)
+ goto err_destroy;
+
+ rsp = tc_getqdisc_dump(ys, req);
+ tc_getqdisc_req_dump_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no addresses reported\n");
+ ynl_dump_foreach(rsp, qdisc)
+ tc_qdisc_print(qdisc);
+ tc_getqdisc_list_free(rsp);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 84e7f4ed4c97..6aa11cd3db42 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -67,11 +67,13 @@ TARGETS += mseal_system_mappings
TARGETS += nci
TARGETS += net
TARGETS += net/af_unix
+TARGETS += net/can
TARGETS += net/forwarding
TARGETS += net/hsr
TARGETS += net/mptcp
TARGETS += net/netfilter
TARGETS += net/openvswitch
+TARGETS += net/ovpn
TARGETS += net/packetdrill
TARGETS += net/rds
TARGETS += net/tcp_ao
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index c378d5d07e02..3201a962b3dc 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -71,8 +71,10 @@ CONFIG_NET_IPGRE=y
CONFIG_NET_IPGRE_DEMUX=y
CONFIG_NET_IPIP=y
CONFIG_NET_MPLS_GSO=y
+CONFIG_NET_SCH_BPF=y
CONFIG_NET_SCH_FQ=y
CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_SCH_HTB=y
CONFIG_NET_SCHED=y
CONFIG_NETDEVSIM=y
CONFIG_NETFILTER=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
new file mode 100644
index 000000000000..730357cd0c9a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "bpf_qdisc_fifo.skel.h"
+#include "bpf_qdisc_fq.skel.h"
+#include "bpf_qdisc_fail__incompl_ops.skel.h"
+
+#define LO_IFINDEX 1
+
+static const unsigned int total_bytes = 10 * 1024 * 1024;
+
+static void do_test(char *qdisc)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_ROOT,
+ .handle = 0x8000000,
+ .qdisc = qdisc);
+ int srv_fd = -1, cli_fd = -1;
+ int err;
+
+ err = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(err, "attach qdisc"))
+ return;
+
+ srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_OK_FD(srv_fd, "start server"))
+ goto done;
+
+ cli_fd = connect_to_fd(srv_fd, 0);
+ if (!ASSERT_OK_FD(cli_fd, "connect to client"))
+ goto done;
+
+ err = send_recv_data(srv_fd, cli_fd, total_bytes);
+ ASSERT_OK(err, "send_recv_data");
+
+done:
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+
+ bpf_tc_hook_destroy(&hook);
+}
+
+static void test_fifo(void)
+{
+ struct bpf_qdisc_fifo *fifo_skel;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ do_test("bpf_fifo");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_fq(void)
+{
+ struct bpf_qdisc_fq *fq_skel;
+
+ fq_skel = bpf_qdisc_fq__open_and_load();
+ if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fq__attach(fq_skel), "bpf_qdisc_fq__attach"))
+ goto out;
+
+ do_test("bpf_fq");
+out:
+ bpf_qdisc_fq__destroy(fq_skel);
+}
+
+static void test_qdisc_attach_to_mq(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_MAKE(1 << 16, 1),
+ .handle = 0x11 << 16,
+ .qdisc = "bpf_fifo");
+ struct bpf_qdisc_fifo *fifo_skel;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ hook.ifindex = if_nametoindex("veth0");
+ SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+ err = bpf_tc_hook_create(&hook);
+ ASSERT_OK(err, "attach qdisc");
+
+ bpf_tc_hook_destroy(&hook);
+
+ SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_qdisc_attach_to_non_root(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_MAKE(1 << 16, 1),
+ .handle = 0x11 << 16,
+ .qdisc = "bpf_fifo");
+ struct bpf_qdisc_fifo *fifo_skel;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ SYS(out, "tc qdisc add dev lo root handle 1: htb");
+ SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit");
+
+ err = bpf_tc_hook_create(&hook);
+ if (!ASSERT_ERR(err, "attach qdisc"))
+ bpf_tc_hook_destroy(&hook);
+
+out_del_htb:
+ SYS(out, "tc qdisc delete dev lo root htb");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_incompl_ops(void)
+{
+ struct bpf_qdisc_fail__incompl_ops *skel;
+ struct bpf_link *link;
+
+ skel = bpf_qdisc_fail__incompl_ops__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.test);
+ if (!ASSERT_ERR_PTR(link, "bpf_map__attach_struct_ops"))
+ bpf_link__destroy(link);
+
+ bpf_qdisc_fail__incompl_ops__destroy(skel);
+}
+
+static int get_default_qdisc(char *qdisc_name)
+{
+ FILE *f;
+ int num;
+
+ f = fopen("/proc/sys/net/core/default_qdisc", "r");
+ if (!f)
+ return -errno;
+
+ num = fscanf(f, "%s", qdisc_name);
+ fclose(f);
+
+ return num == 1 ? 0 : -EFAULT;
+}
+
+static void test_default_qdisc_attach_to_mq(void)
+{
+ char default_qdisc[IFNAMSIZ] = {};
+ struct bpf_qdisc_fifo *fifo_skel;
+ struct netns_obj *netns = NULL;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ err = get_default_qdisc(default_qdisc);
+ if (!ASSERT_OK(err, "read sysctl net.core.default_qdisc"))
+ goto out;
+
+ err = write_sysctl("/proc/sys/net/core/default_qdisc", "bpf_fifo");
+ if (!ASSERT_OK(err, "write sysctl net.core.default_qdisc"))
+ goto out;
+
+ netns = netns_new("bpf_qdisc_ns", true);
+ if (!ASSERT_OK_PTR(netns, "netns_new"))
+ goto out;
+
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+ ASSERT_EQ(fifo_skel->bss->init_called, true, "init_called");
+
+ SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+ netns_free(netns);
+ if (default_qdisc[0])
+ write_sysctl("/proc/sys/net/core/default_qdisc", default_qdisc);
+
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+void test_ns_bpf_qdisc(void)
+{
+ if (test__start_subtest("fifo"))
+ test_fifo();
+ if (test__start_subtest("fq"))
+ test_fq();
+ if (test__start_subtest("attach to mq"))
+ test_qdisc_attach_to_mq();
+ if (test__start_subtest("attach to non root"))
+ test_qdisc_attach_to_non_root();
+ if (test__start_subtest("incompl_ops"))
+ test_incompl_ops();
+}
+
+void serial_test_bpf_qdisc_default(void)
+{
+ test_default_qdisc_attach_to_mq();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index d56e18b25528..a4517bee34d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -7,14 +7,433 @@
#define TEST_NS "sock_iter_batch_netns"
+static const int init_batch_size = 16;
static const int nr_soreuse = 4;
+struct iter_out {
+ int idx;
+ __u64 cookie;
+} __packed;
+
+struct sock_count {
+ __u64 cookie;
+ int count;
+};
+
+static int insert(__u64 cookie, struct sock_count counts[], int counts_len)
+{
+ int insert = -1;
+ int i = 0;
+
+ for (; i < counts_len; i++) {
+ if (!counts[i].cookie) {
+ insert = i;
+ } else if (counts[i].cookie == cookie) {
+ insert = i;
+ break;
+ }
+ }
+ if (insert < 0)
+ return insert;
+
+ counts[insert].cookie = cookie;
+ counts[insert].count++;
+
+ return counts[insert].count;
+}
+
+static int read_n(int iter_fd, int n, struct sock_count counts[],
+ int counts_len)
+{
+ struct iter_out out;
+ int nread = 1;
+ int i = 0;
+
+ for (; nread > 0 && (n < 0 || i < n); i++) {
+ nread = read(iter_fd, &out, sizeof(out));
+ if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread"))
+ break;
+ ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert");
+ }
+
+ ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n");
+
+ return i;
+}
+
+static __u64 socket_cookie(int fd)
+{
+ __u64 cookie;
+ socklen_t cookie_len = sizeof(cookie);
+
+ if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie,
+ &cookie_len), "getsockopt(SO_COOKIE)"))
+ return 0;
+ return cookie;
+}
+
+static bool was_seen(int fd, struct sock_count counts[], int counts_len)
+{
+ __u64 cookie = socket_cookie(fd);
+ int i = 0;
+
+ for (; cookie && i < counts_len; i++)
+ if (cookie == counts[i].cookie)
+ return true;
+
+ return false;
+}
+
+static int get_seen_socket(int *fds, struct sock_count counts[], int n)
+{
+ int i = 0;
+
+ for (; i < n; i++)
+ if (was_seen(fds[i], counts, n))
+ return i;
+ return -1;
+}
+
+static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n)
+{
+ int i, nread, iter_fd;
+ int nth_sock_idx = -1;
+ struct iter_out out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ return -1;
+
+ for (; n >= 0; n--) {
+ nread = read(iter_fd, &out, sizeof(out));
+ if (!nread || !ASSERT_GE(nread, 1, "nread"))
+ goto done;
+ }
+
+ for (i = 0; i < fds_len && nth_sock_idx < 0; i++)
+ if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie)
+ nth_sock_idx = i;
+done:
+ close(iter_fd);
+ return nth_sock_idx;
+}
+
+static int get_seen_count(int fd, struct sock_count counts[], int n)
+{
+ __u64 cookie = socket_cookie(fd);
+ int count = 0;
+ int i = 0;
+
+ for (; cookie && !count && i < n; i++)
+ if (cookie == counts[i].cookie)
+ count = counts[i].count;
+
+ return count;
+}
+
+static void check_n_were_seen_once(int *fds, int fds_len, int n,
+ struct sock_count counts[], int counts_len)
+{
+ int seen_once = 0;
+ int seen_cnt;
+ int i = 0;
+
+ for (; i < fds_len; i++) {
+ /* Skip any sockets that were closed or that weren't seen
+ * exactly once.
+ */
+ if (fds[i] < 0)
+ continue;
+ seen_cnt = get_seen_count(fds[i], counts, counts_len);
+ if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt"))
+ seen_once++;
+ }
+
+ ASSERT_EQ(seen_once, n, "seen_once");
+}
+
+static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through the first socks_len - 1 sockets. */
+ read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+
+ /* Close a socket we've already seen to remove it from the bucket. */
+ close_idx = get_seen_socket(socks, counts, counts_len);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the last socket wasn't skipped and that there were no
+ * repeats.
+ */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+}
+
+static void remove_unseen(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through the first socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw a socket from fds. */
+ check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+ /* Close what would be the next socket in the bucket to exercise the
+ * condition where we need to skip past the first cookie we remembered.
+ */
+ close_idx = get_nth_socket(socks, socks_len, link, 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the remaining sockets were seen exactly once and that we
+ * didn't repeat the socket that was already seen.
+ */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+}
+
+static void remove_all(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx, i;
+
+ /* Iterate through the first socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw a socket from fds. */
+ check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+ /* Close all remaining sockets to exhaust the list of saved cookies and
+ * exit without putting any sockets into the batch on the next read.
+ */
+ for (i = 0; i < socks_len - 1; i++) {
+ close_idx = get_nth_socket(socks, socks_len, link, 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+ }
+
+ /* Make sure there are no more sockets returned */
+ ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+}
+
+static void add_some(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through the first socks_len - 1 sockets. */
+ read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+
+ /* Double the number of sockets in the bucket. */
+ new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+ socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each of the original sockets was seen exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len, counts,
+ counts_len);
+done:
+ free_fds(new_socks, socks_len);
+}
+
+static void force_realloc(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through the first socket just to initialize the batch. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Double the number of sockets in the bucket to force a realloc on the
+ * next read.
+ */
+ new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+ socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each socket from the first set was seen exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len, counts,
+ counts_len);
+done:
+ free_fds(new_socks, socks_len);
+}
+
+struct test_case {
+ void (*test)(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd);
+ const char *description;
+ int init_socks;
+ int max_socks;
+ int sock_type;
+ int family;
+};
+
+static struct test_case resume_tests[] = {
+ {
+ .description = "udp: resume after removing a seen socket",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_seen,
+ },
+ {
+ .description = "udp: resume after removing one unseen socket",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_unseen,
+ },
+ {
+ .description = "udp: resume after removing all unseen sockets",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_all,
+ },
+ {
+ .description = "udp: resume after adding a few sockets",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ /* Use AF_INET so that new sockets are added to the head of the
+ * bucket's list.
+ */
+ .family = AF_INET,
+ .test = add_some,
+ },
+ {
+ .description = "udp: force a realloc to occur",
+ .init_socks = init_batch_size,
+ .max_socks = init_batch_size * 2,
+ .sock_type = SOCK_DGRAM,
+ /* Use AF_INET6 so that new sockets are added to the tail of the
+ * bucket's list, needing to be added to the next batch to force
+ * a realloc.
+ */
+ .family = AF_INET6,
+ .test = force_realloc,
+ },
+};
+
+static void do_resume_test(struct test_case *tc)
+{
+ struct sock_iter_batch *skel = NULL;
+ static const __u16 port = 10001;
+ struct bpf_link *link = NULL;
+ struct sock_count *counts;
+ int err, iter_fd = -1;
+ const char *addr;
+ int *fds = NULL;
+ int local_port;
+
+ counts = calloc(tc->max_socks, sizeof(*counts));
+ if (!ASSERT_OK_PTR(counts, "counts"))
+ goto done;
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ goto done;
+
+ /* Prepare a bucket of sockets in the kernel hashtable */
+ addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1";
+ fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0,
+ tc->init_socks);
+ if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
+ goto done;
+ local_port = get_socket_local_port(*fds);
+ if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
+ goto done;
+ skel->rodata->ports[0] = ntohs(local_port);
+ skel->rodata->sf = tc->family;
+
+ err = sock_iter_batch__load(skel);
+ if (!ASSERT_OK(err, "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ?
+ skel->progs.iter_tcp_soreuse :
+ skel->progs.iter_udp_soreuse,
+ NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ goto done;
+
+ tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
+ counts, tc->max_socks, link, iter_fd);
+done:
+ free(counts);
+ free_fds(fds, tc->init_socks);
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+}
+
+static void do_resume_tests(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(resume_tests); i++) {
+ if (test__start_subtest(resume_tests[i].description)) {
+ do_resume_test(&resume_tests[i]);
+ }
+ }
+}
+
static void do_test(int sock_type, bool onebyone)
{
int err, i, nread, to_read, total_read, iter_fd = -1;
- int first_idx, second_idx, indices[nr_soreuse];
+ struct iter_out outputs[nr_soreuse];
struct bpf_link *link = NULL;
struct sock_iter_batch *skel;
+ int first_idx, second_idx;
int *fds[2] = {};
skel = sock_iter_batch__open();
@@ -34,6 +453,7 @@ static void do_test(int sock_type, bool onebyone)
goto done;
skel->rodata->ports[i] = ntohs(local_port);
}
+ skel->rodata->sf = AF_INET6;
err = sock_iter_batch__load(skel);
if (!ASSERT_OK(err, "sock_iter_batch__load"))
@@ -55,38 +475,38 @@ static void do_test(int sock_type, bool onebyone)
* from a bucket and leave one socket out from
* that bucket on purpose.
*/
- to_read = (nr_soreuse - 1) * sizeof(*indices);
+ to_read = (nr_soreuse - 1) * sizeof(*outputs);
total_read = 0;
first_idx = -1;
do {
- nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
- if (nread <= 0 || nread % sizeof(*indices))
+ nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+ if (nread <= 0 || nread % sizeof(*outputs))
break;
total_read += nread;
if (first_idx == -1)
- first_idx = indices[0];
- for (i = 0; i < nread / sizeof(*indices); i++)
- ASSERT_EQ(indices[i], first_idx, "first_idx");
+ first_idx = outputs[0].idx;
+ for (i = 0; i < nread / sizeof(*outputs); i++)
+ ASSERT_EQ(outputs[i].idx, first_idx, "first_idx");
} while (total_read < to_read);
- ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread");
+ ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread");
ASSERT_EQ(total_read, to_read, "total_read");
free_fds(fds[first_idx], nr_soreuse);
fds[first_idx] = NULL;
/* Read the "whole" second bucket */
- to_read = nr_soreuse * sizeof(*indices);
+ to_read = nr_soreuse * sizeof(*outputs);
total_read = 0;
second_idx = !first_idx;
do {
- nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
- if (nread <= 0 || nread % sizeof(*indices))
+ nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+ if (nread <= 0 || nread % sizeof(*outputs))
break;
total_read += nread;
- for (i = 0; i < nread / sizeof(*indices); i++)
- ASSERT_EQ(indices[i], second_idx, "second_idx");
+ for (i = 0; i < nread / sizeof(*outputs); i++)
+ ASSERT_EQ(outputs[i].idx, second_idx, "second_idx");
} while (total_read <= to_read);
ASSERT_EQ(nread, 0, "nread");
/* Both so_reuseport ports should be in different buckets, so
@@ -128,6 +548,7 @@ void test_sock_iter_batch(void)
do_test(SOCK_DGRAM, true);
do_test(SOCK_DGRAM, false);
}
+ do_resume_tests();
close_netns(nstoken);
done:
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
new file mode 100644
index 000000000000..3754f581b328
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_QDISC_COMMON_H
+#define _BPF_QDISC_COMMON_H
+
+#define NET_XMIT_SUCCESS 0x00
+#define NET_XMIT_DROP 0x01 /* skb dropped */
+#define NET_XMIT_CN 0x02 /* congestion notification */
+
+#define TC_PRIO_CONTROL 7
+#define TC_PRIO_MAX 15
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+
+struct bpf_sk_buff_ptr;
+
+static struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
+{
+ return (struct qdisc_skb_cb *)skb->cb;
+}
+
+static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
+{
+ return qdisc_skb_cb(skb)->pkt_len;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
new file mode 100644
index 000000000000..f188062ed730
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops")
+int BPF_PROG(bpf_qdisc_test_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+}
+
+SEC("struct_ops")
+struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch)
+{
+ return NULL;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch)
+{
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops test = {
+ .enqueue = (void *)bpf_qdisc_test_enqueue,
+ .dequeue = (void *)bpf_qdisc_test_dequeue,
+ .reset = (void *)bpf_qdisc_test_reset,
+ .destroy = (void *)bpf_qdisc_test_destroy,
+ .id = "bpf_qdisc_test",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
new file mode 100644
index 000000000000..1de2be3e370b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct skb_node {
+ struct sk_buff __kptr * skb;
+ struct bpf_list_node node;
+};
+
+private(A) struct bpf_spin_lock q_fifo_lock;
+private(A) struct bpf_list_head q_fifo __contains(skb_node, node);
+
+bool init_called;
+
+SEC("struct_ops/bpf_fifo_enqueue")
+int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ struct skb_node *skbn;
+ u32 pkt_len;
+
+ if (sch->q.qlen == sch->limit)
+ goto drop;
+
+ skbn = bpf_obj_new(typeof(*skbn));
+ if (!skbn)
+ goto drop;
+
+ pkt_len = qdisc_pkt_len(skb);
+
+ sch->q.qlen++;
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_qdisc_skb_drop(skb, to_free);
+
+ bpf_spin_lock(&q_fifo_lock);
+ bpf_list_push_back(&q_fifo, &skbn->node);
+ bpf_spin_unlock(&q_fifo_lock);
+
+ sch->qstats.backlog += pkt_len;
+ return NET_XMIT_SUCCESS;
+drop:
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+}
+
+SEC("struct_ops/bpf_fifo_dequeue")
+struct sk_buff *BPF_PROG(bpf_fifo_dequeue, struct Qdisc *sch)
+{
+ struct bpf_list_node *node;
+ struct sk_buff *skb = NULL;
+ struct skb_node *skbn;
+
+ bpf_spin_lock(&q_fifo_lock);
+ node = bpf_list_pop_front(&q_fifo);
+ bpf_spin_unlock(&q_fifo_lock);
+ if (!node)
+ return NULL;
+
+ skbn = container_of(node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+ if (!skb)
+ return NULL;
+
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ bpf_qdisc_bstats_update(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_fifo_init")
+int BPF_PROG(bpf_fifo_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ sch->limit = 1000;
+ init_called = true;
+ return 0;
+}
+
+SEC("struct_ops/bpf_fifo_reset")
+void BPF_PROG(bpf_fifo_reset, struct Qdisc *sch)
+{
+ struct bpf_list_node *node;
+ struct skb_node *skbn;
+ int i;
+
+ bpf_for(i, 0, sch->q.qlen) {
+ struct sk_buff *skb = NULL;
+
+ bpf_spin_lock(&q_fifo_lock);
+ node = bpf_list_pop_front(&q_fifo);
+ bpf_spin_unlock(&q_fifo_lock);
+
+ if (!node)
+ break;
+
+ skbn = container_of(node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_kfree_skb(skb);
+ bpf_obj_drop(skbn);
+ }
+ sch->q.qlen = 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fifo_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fifo = {
+ .enqueue = (void *)bpf_fifo_enqueue,
+ .dequeue = (void *)bpf_fifo_dequeue,
+ .init = (void *)bpf_fifo_init,
+ .reset = (void *)bpf_fifo_reset,
+ .destroy = (void *)bpf_fifo_destroy,
+ .id = "bpf_fifo",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
new file mode 100644
index 000000000000..1a3233a275c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
@@ -0,0 +1,756 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* bpf_fq is intended for testing the bpf qdisc infrastructure and not a direct
+ * copy of sch_fq. bpf_fq implements the scheduling algorithm of sch_fq before
+ * 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") was
+ * introduced. It gives each flow a fair chance to transmit packets in a
+ * round-robin fashion. Note that for flow pacing, bpf_fq currently only
+ * respects skb->tstamp but not skb->sk->sk_pacing_rate. In addition, if there
+ * are multiple bpf_fq instances, they will have a shared view of flows and
+ * configuration since some key data structure such as fq_prio_flows,
+ * fq_nonprio_flows, and fq_bpf_data are global.
+ *
+ * To use bpf_fq alone without running selftests, use the following commands.
+ *
+ * 1. Register bpf_fq to the kernel
+ * bpftool struct_ops register bpf_qdisc_fq.bpf.o /sys/fs/bpf
+ * 2. Add bpf_fq to an interface
+ * tc qdisc add dev <interface name> root handle <handle> bpf_fq
+ * 3. Delete bpf_fq attached to the interface
+ * tc qdisc delete dev <interface name> root
+ * 4. Unregister bpf_fq
+ * bpftool struct_ops unregister name fq
+ *
+ * The qdisc name, bpf_fq, used in tc commands is defined by Qdisc_ops.id.
+ * The struct_ops_map_name, fq, used in the bpftool command is the name of the
+ * Qdisc_ops.
+ *
+ * SEC(".struct_ops")
+ * struct Qdisc_ops fq = {
+ * ...
+ * .id = "bpf_fq",
+ * };
+ */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_SEC 1000000000L
+
+#define NUM_QUEUE (1 << 20)
+
+struct fq_bpf_data {
+ u32 quantum;
+ u32 initial_quantum;
+ u32 flow_refill_delay;
+ u32 flow_plimit;
+ u64 horizon;
+ u32 orphan_mask;
+ u32 timer_slack;
+ u64 time_next_delayed_flow;
+ u64 unthrottle_latency_ns;
+ u8 horizon_drop;
+ u32 new_flow_cnt;
+ u32 old_flow_cnt;
+ u64 ktime_cache;
+};
+
+enum {
+ CLS_RET_PRIO = 0,
+ CLS_RET_NONPRIO = 1,
+ CLS_RET_ERR = 2,
+};
+
+struct skb_node {
+ u64 tstamp;
+ struct sk_buff __kptr * skb;
+ struct bpf_rb_node node;
+};
+
+struct fq_flow_node {
+ int credit;
+ u32 qlen;
+ u64 age;
+ u64 time_next_packet;
+ struct bpf_list_node list_node;
+ struct bpf_rb_node rb_node;
+ struct bpf_rb_root queue __contains(skb_node, node);
+ struct bpf_spin_lock lock;
+ struct bpf_refcount refcount;
+};
+
+struct dequeue_nonprio_ctx {
+ bool stop_iter;
+ u64 expire;
+ u64 now;
+};
+
+struct remove_flows_ctx {
+ bool gc_only;
+ u32 reset_cnt;
+ u32 reset_max;
+};
+
+struct unset_throttled_flows_ctx {
+ bool unset_all;
+ u64 now;
+};
+
+struct fq_stashed_flow {
+ struct fq_flow_node __kptr * flow;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, struct fq_stashed_flow);
+ __uint(max_entries, NUM_QUEUE);
+} fq_nonprio_flows SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, struct fq_stashed_flow);
+ __uint(max_entries, 1);
+} fq_prio_flows SEC(".maps");
+
+private(A) struct bpf_spin_lock fq_delayed_lock;
+private(A) struct bpf_rb_root fq_delayed __contains(fq_flow_node, rb_node);
+
+private(B) struct bpf_spin_lock fq_new_flows_lock;
+private(B) struct bpf_list_head fq_new_flows __contains(fq_flow_node, list_node);
+
+private(C) struct bpf_spin_lock fq_old_flows_lock;
+private(C) struct bpf_list_head fq_old_flows __contains(fq_flow_node, list_node);
+
+private(D) struct fq_bpf_data q;
+
+/* Wrapper for bpf_kptr_xchg that expects NULL dst */
+static void bpf_kptr_xchg_back(void *map_val, void *ptr)
+{
+ void *ret;
+
+ ret = bpf_kptr_xchg(map_val, ptr);
+ if (ret)
+ bpf_obj_drop(ret);
+}
+
+static bool skbn_tstamp_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct skb_node *skbn_a;
+ struct skb_node *skbn_b;
+
+ skbn_a = container_of(a, struct skb_node, node);
+ skbn_b = container_of(b, struct skb_node, node);
+
+ return skbn_a->tstamp < skbn_b->tstamp;
+}
+
+static bool fn_time_next_packet_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct fq_flow_node *flow_a;
+ struct fq_flow_node *flow_b;
+
+ flow_a = container_of(a, struct fq_flow_node, rb_node);
+ flow_b = container_of(b, struct fq_flow_node, rb_node);
+
+ return flow_a->time_next_packet < flow_b->time_next_packet;
+}
+
+static void
+fq_flows_add_head(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct fq_flow_node *flow, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &flow->list_node);
+ bpf_spin_unlock(lock);
+ *flow_cnt += 1;
+}
+
+static void
+fq_flows_add_tail(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct fq_flow_node *flow, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ bpf_list_push_back(head, &flow->list_node);
+ bpf_spin_unlock(lock);
+ *flow_cnt += 1;
+}
+
+static void
+fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct bpf_list_node **node, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ *node = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ *flow_cnt -= 1;
+}
+
+static bool
+fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock)
+{
+ struct bpf_list_node *node;
+
+ bpf_spin_lock(lock);
+ node = bpf_list_pop_front(head);
+ if (node) {
+ bpf_list_push_front(head, node);
+ bpf_spin_unlock(lock);
+ return false;
+ }
+ bpf_spin_unlock(lock);
+
+ return true;
+}
+
+/* flow->age is used to denote the state of the flow (not-detached, detached, throttled)
+ * as well as the timestamp when the flow is detached.
+ *
+ * 0: not-detached
+ * 1 - (~0ULL-1): detached
+ * ~0ULL: throttled
+ */
+static void fq_flow_set_detached(struct fq_flow_node *flow)
+{
+ flow->age = bpf_jiffies64();
+}
+
+static bool fq_flow_is_detached(struct fq_flow_node *flow)
+{
+ return flow->age != 0 && flow->age != ~0ULL;
+}
+
+static bool sk_listener(struct sock *sk)
+{
+ return (1 << sk->__sk_common.skc_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
+}
+
+static void fq_gc(void);
+
+static int fq_new_flow(void *flow_map, struct fq_stashed_flow **sflow, u64 hash)
+{
+ struct fq_stashed_flow tmp = {};
+ struct fq_flow_node *flow;
+ int ret;
+
+ flow = bpf_obj_new(typeof(*flow));
+ if (!flow)
+ return -ENOMEM;
+
+ flow->credit = q.initial_quantum,
+ flow->qlen = 0,
+ flow->age = 1,
+ flow->time_next_packet = 0,
+
+ ret = bpf_map_update_elem(flow_map, &hash, &tmp, 0);
+ if (ret == -ENOMEM || ret == -E2BIG) {
+ fq_gc();
+ bpf_map_update_elem(&fq_nonprio_flows, &hash, &tmp, 0);
+ }
+
+ *sflow = bpf_map_lookup_elem(flow_map, &hash);
+ if (!*sflow) {
+ bpf_obj_drop(flow);
+ return -ENOMEM;
+ }
+
+ bpf_kptr_xchg_back(&(*sflow)->flow, flow);
+ return 0;
+}
+
+static int
+fq_classify(struct sk_buff *skb, struct fq_stashed_flow **sflow)
+{
+ struct sock *sk = skb->sk;
+ int ret = CLS_RET_NONPRIO;
+ u64 hash = 0;
+
+ if ((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL) {
+ *sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+ ret = CLS_RET_PRIO;
+ } else {
+ if (!sk || sk_listener(sk)) {
+ hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+ /* Avoid collision with an existing flow hash, which
+ * only uses the lower 32 bits of hash, by setting the
+ * upper half of hash to 1.
+ */
+ hash |= (1ULL << 32);
+ } else if (sk->__sk_common.skc_state == TCP_CLOSE) {
+ hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+ hash |= (1ULL << 32);
+ } else {
+ hash = sk->__sk_common.skc_hash;
+ }
+ *sflow = bpf_map_lookup_elem(&fq_nonprio_flows, &hash);
+ }
+
+ if (!*sflow)
+ ret = fq_new_flow(&fq_nonprio_flows, sflow, hash) < 0 ?
+ CLS_RET_ERR : CLS_RET_NONPRIO;
+
+ return ret;
+}
+
+static bool fq_packet_beyond_horizon(struct sk_buff *skb)
+{
+ return (s64)skb->tstamp > (s64)(q.ktime_cache + q.horizon);
+}
+
+SEC("struct_ops/bpf_fq_enqueue")
+int BPF_PROG(bpf_fq_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ struct fq_flow_node *flow = NULL, *flow_copy;
+ struct fq_stashed_flow *sflow;
+ u64 time_to_send, jiffies;
+ struct skb_node *skbn;
+ int ret;
+
+ if (sch->q.qlen >= sch->limit)
+ goto drop;
+
+ if (!skb->tstamp) {
+ time_to_send = q.ktime_cache = bpf_ktime_get_ns();
+ } else {
+ if (fq_packet_beyond_horizon(skb)) {
+ q.ktime_cache = bpf_ktime_get_ns();
+ if (fq_packet_beyond_horizon(skb)) {
+ if (q.horizon_drop)
+ goto drop;
+
+ skb->tstamp = q.ktime_cache + q.horizon;
+ }
+ }
+ time_to_send = skb->tstamp;
+ }
+
+ ret = fq_classify(skb, &sflow);
+ if (ret == CLS_RET_ERR)
+ goto drop;
+
+ flow = bpf_kptr_xchg(&sflow->flow, flow);
+ if (!flow)
+ goto drop;
+
+ if (ret == CLS_RET_NONPRIO) {
+ if (flow->qlen >= q.flow_plimit) {
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+ goto drop;
+ }
+
+ if (fq_flow_is_detached(flow)) {
+ flow_copy = bpf_refcount_acquire(flow);
+
+ jiffies = bpf_jiffies64();
+ if ((s64)(jiffies - (flow_copy->age + q.flow_refill_delay)) > 0) {
+ if (flow_copy->credit < q.quantum)
+ flow_copy->credit = q.quantum;
+ }
+ flow_copy->age = 0;
+ fq_flows_add_tail(&fq_new_flows, &fq_new_flows_lock, flow_copy,
+ &q.new_flow_cnt);
+ }
+ }
+
+ skbn = bpf_obj_new(typeof(*skbn));
+ if (!skbn) {
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+ goto drop;
+ }
+
+ skbn->tstamp = skb->tstamp = time_to_send;
+
+ sch->qstats.backlog += qdisc_pkt_len(skb);
+
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_qdisc_skb_drop(skb, to_free);
+
+ bpf_spin_lock(&flow->lock);
+ bpf_rbtree_add(&flow->queue, &skbn->node, skbn_tstamp_less);
+ bpf_spin_unlock(&flow->lock);
+
+ flow->qlen++;
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+
+ sch->q.qlen++;
+ return NET_XMIT_SUCCESS;
+
+drop:
+ bpf_qdisc_skb_drop(skb, to_free);
+ sch->qstats.drops++;
+ return NET_XMIT_DROP;
+}
+
+static int fq_unset_throttled_flows(u32 index, struct unset_throttled_flows_ctx *ctx)
+{
+ struct bpf_rb_node *node = NULL;
+ struct fq_flow_node *flow;
+
+ bpf_spin_lock(&fq_delayed_lock);
+
+ node = bpf_rbtree_first(&fq_delayed);
+ if (!node) {
+ bpf_spin_unlock(&fq_delayed_lock);
+ return 1;
+ }
+
+ flow = container_of(node, struct fq_flow_node, rb_node);
+ if (!ctx->unset_all && flow->time_next_packet > ctx->now) {
+ q.time_next_delayed_flow = flow->time_next_packet;
+ bpf_spin_unlock(&fq_delayed_lock);
+ return 1;
+ }
+
+ node = bpf_rbtree_remove(&fq_delayed, &flow->rb_node);
+
+ bpf_spin_unlock(&fq_delayed_lock);
+
+ if (!node)
+ return 1;
+
+ flow = container_of(node, struct fq_flow_node, rb_node);
+ flow->age = 0;
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+
+ return 0;
+}
+
+static void fq_flow_set_throttled(struct fq_flow_node *flow)
+{
+ flow->age = ~0ULL;
+
+ if (q.time_next_delayed_flow > flow->time_next_packet)
+ q.time_next_delayed_flow = flow->time_next_packet;
+
+ bpf_spin_lock(&fq_delayed_lock);
+ bpf_rbtree_add(&fq_delayed, &flow->rb_node, fn_time_next_packet_less);
+ bpf_spin_unlock(&fq_delayed_lock);
+}
+
+static void fq_check_throttled(u64 now)
+{
+ struct unset_throttled_flows_ctx ctx = {
+ .unset_all = false,
+ .now = now,
+ };
+ unsigned long sample;
+
+ if (q.time_next_delayed_flow > now)
+ return;
+
+ sample = (unsigned long)(now - q.time_next_delayed_flow);
+ q.unthrottle_latency_ns -= q.unthrottle_latency_ns >> 3;
+ q.unthrottle_latency_ns += sample >> 3;
+
+ q.time_next_delayed_flow = ~0ULL;
+ bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &ctx, 0);
+}
+
+static struct sk_buff*
+fq_dequeue_nonprio_flows(u32 index, struct dequeue_nonprio_ctx *ctx)
+{
+ u64 time_next_packet, time_to_send;
+ struct bpf_rb_node *rb_node;
+ struct sk_buff *skb = NULL;
+ struct bpf_list_head *head;
+ struct bpf_list_node *node;
+ struct bpf_spin_lock *lock;
+ struct fq_flow_node *flow;
+ struct skb_node *skbn;
+ bool is_empty;
+ u32 *cnt;
+
+ if (q.new_flow_cnt) {
+ head = &fq_new_flows;
+ lock = &fq_new_flows_lock;
+ cnt = &q.new_flow_cnt;
+ } else if (q.old_flow_cnt) {
+ head = &fq_old_flows;
+ lock = &fq_old_flows_lock;
+ cnt = &q.old_flow_cnt;
+ } else {
+ if (q.time_next_delayed_flow != ~0ULL)
+ ctx->expire = q.time_next_delayed_flow;
+ goto break_loop;
+ }
+
+ fq_flows_remove_front(head, lock, &node, cnt);
+ if (!node)
+ goto break_loop;
+
+ flow = container_of(node, struct fq_flow_node, list_node);
+ if (flow->credit <= 0) {
+ flow->credit += q.quantum;
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+ return NULL;
+ }
+
+ bpf_spin_lock(&flow->lock);
+ rb_node = bpf_rbtree_first(&flow->queue);
+ if (!rb_node) {
+ bpf_spin_unlock(&flow->lock);
+ is_empty = fq_flows_is_empty(&fq_old_flows, &fq_old_flows_lock);
+ if (head == &fq_new_flows && !is_empty) {
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+ } else {
+ fq_flow_set_detached(flow);
+ bpf_obj_drop(flow);
+ }
+ return NULL;
+ }
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ time_to_send = skbn->tstamp;
+
+ time_next_packet = (time_to_send > flow->time_next_packet) ?
+ time_to_send : flow->time_next_packet;
+ if (ctx->now < time_next_packet) {
+ bpf_spin_unlock(&flow->lock);
+ flow->time_next_packet = time_next_packet;
+ fq_flow_set_throttled(flow);
+ return NULL;
+ }
+
+ rb_node = bpf_rbtree_remove(&flow->queue, rb_node);
+ bpf_spin_unlock(&flow->lock);
+
+ if (!rb_node)
+ goto add_flow_and_break;
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+
+ if (!skb)
+ goto add_flow_and_break;
+
+ flow->credit -= qdisc_skb_cb(skb)->pkt_len;
+ flow->qlen--;
+
+add_flow_and_break:
+ fq_flows_add_head(head, lock, flow, cnt);
+
+break_loop:
+ ctx->stop_iter = true;
+ return skb;
+}
+
+static struct sk_buff *fq_dequeue_prio(void)
+{
+ struct fq_flow_node *flow = NULL;
+ struct fq_stashed_flow *sflow;
+ struct bpf_rb_node *rb_node;
+ struct sk_buff *skb = NULL;
+ struct skb_node *skbn;
+ u64 hash = 0;
+
+ sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+ if (!sflow)
+ return NULL;
+
+ flow = bpf_kptr_xchg(&sflow->flow, flow);
+ if (!flow)
+ return NULL;
+
+ bpf_spin_lock(&flow->lock);
+ rb_node = bpf_rbtree_first(&flow->queue);
+ if (!rb_node) {
+ bpf_spin_unlock(&flow->lock);
+ goto out;
+ }
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ rb_node = bpf_rbtree_remove(&flow->queue, &skbn->node);
+ bpf_spin_unlock(&flow->lock);
+
+ if (!rb_node)
+ goto out;
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+
+out:
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_fq_dequeue")
+struct sk_buff *BPF_PROG(bpf_fq_dequeue, struct Qdisc *sch)
+{
+ struct dequeue_nonprio_ctx cb_ctx = {};
+ struct sk_buff *skb = NULL;
+ int i;
+
+ if (!sch->q.qlen)
+ goto out;
+
+ skb = fq_dequeue_prio();
+ if (skb)
+ goto dequeue;
+
+ q.ktime_cache = cb_ctx.now = bpf_ktime_get_ns();
+ fq_check_throttled(q.ktime_cache);
+ bpf_for(i, 0, sch->limit) {
+ skb = fq_dequeue_nonprio_flows(i, &cb_ctx);
+ if (cb_ctx.stop_iter)
+ break;
+ };
+
+ if (skb) {
+dequeue:
+ sch->q.qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ bpf_qdisc_bstats_update(sch, skb);
+ return skb;
+ }
+
+ if (cb_ctx.expire)
+ bpf_qdisc_watchdog_schedule(sch, cb_ctx.expire, q.timer_slack);
+out:
+ return NULL;
+}
+
+static int fq_remove_flows_in_list(u32 index, void *ctx)
+{
+ struct bpf_list_node *node;
+ struct fq_flow_node *flow;
+
+ bpf_spin_lock(&fq_new_flows_lock);
+ node = bpf_list_pop_front(&fq_new_flows);
+ bpf_spin_unlock(&fq_new_flows_lock);
+ if (!node) {
+ bpf_spin_lock(&fq_old_flows_lock);
+ node = bpf_list_pop_front(&fq_old_flows);
+ bpf_spin_unlock(&fq_old_flows_lock);
+ if (!node)
+ return 1;
+ }
+
+ flow = container_of(node, struct fq_flow_node, list_node);
+ bpf_obj_drop(flow);
+
+ return 0;
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+/* limit number of collected flows per round */
+#define FQ_GC_MAX 8
+#define FQ_GC_AGE (3*CONFIG_HZ)
+
+static bool fq_gc_candidate(struct fq_flow_node *flow)
+{
+ u64 jiffies = bpf_jiffies64();
+
+ return fq_flow_is_detached(flow) &&
+ ((s64)(jiffies - (flow->age + FQ_GC_AGE)) > 0);
+}
+
+static int
+fq_remove_flows(struct bpf_map *flow_map, u64 *hash,
+ struct fq_stashed_flow *sflow, struct remove_flows_ctx *ctx)
+{
+ if (sflow->flow &&
+ (!ctx->gc_only || fq_gc_candidate(sflow->flow))) {
+ bpf_map_delete_elem(flow_map, hash);
+ ctx->reset_cnt++;
+ }
+
+ return ctx->reset_cnt < ctx->reset_max ? 0 : 1;
+}
+
+static void fq_gc(void)
+{
+ struct remove_flows_ctx cb_ctx = {
+ .gc_only = true,
+ .reset_cnt = 0,
+ .reset_max = FQ_GC_MAX,
+ };
+
+ bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &cb_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_reset")
+void BPF_PROG(bpf_fq_reset, struct Qdisc *sch)
+{
+ struct unset_throttled_flows_ctx utf_ctx = {
+ .unset_all = true,
+ };
+ struct remove_flows_ctx rf_ctx = {
+ .gc_only = false,
+ .reset_cnt = 0,
+ .reset_max = NUM_QUEUE,
+ };
+ struct fq_stashed_flow *sflow;
+ u64 hash = 0;
+
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
+
+ bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &rf_ctx, 0);
+
+ rf_ctx.reset_cnt = 0;
+ bpf_for_each_map_elem(&fq_prio_flows, fq_remove_flows, &rf_ctx, 0);
+ fq_new_flow(&fq_prio_flows, &sflow, hash);
+
+ bpf_loop(NUM_QUEUE, fq_remove_flows_in_list, NULL, 0);
+ q.new_flow_cnt = 0;
+ q.old_flow_cnt = 0;
+
+ bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &utf_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_init")
+int BPF_PROG(bpf_fq_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = sch->dev_queue->dev;
+ u32 psched_mtu = dev->mtu + dev->hard_header_len;
+ struct fq_stashed_flow *sflow;
+ u64 hash = 0;
+
+ if (fq_new_flow(&fq_prio_flows, &sflow, hash) < 0)
+ return -ENOMEM;
+
+ sch->limit = 10000;
+ q.initial_quantum = 10 * psched_mtu;
+ q.quantum = 2 * psched_mtu;
+ q.flow_refill_delay = 40;
+ q.flow_plimit = 100;
+ q.horizon = 10ULL * NSEC_PER_SEC;
+ q.horizon_drop = 1;
+ q.orphan_mask = 1024 - 1;
+ q.timer_slack = 10 * NSEC_PER_USEC;
+ q.time_next_delayed_flow = ~0ULL;
+ q.unthrottle_latency_ns = 0ULL;
+ q.new_flow_cnt = 0;
+ q.old_flow_cnt = 0;
+
+ return 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fq_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fq = {
+ .enqueue = (void *)bpf_fq_enqueue,
+ .dequeue = (void *)bpf_fq_dequeue,
+ .reset = (void *)bpf_fq_reset,
+ .init = (void *)bpf_fq_init,
+ .destroy = (void *)bpf_fq_destroy,
+ .id = "bpf_fq",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 659694162739..17db400f0e0d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -128,6 +128,7 @@
#define sk_refcnt __sk_common.skc_refcnt
#define sk_state __sk_common.skc_state
#define sk_net __sk_common.skc_net
+#define sk_rcv_saddr __sk_common.skc_rcv_saddr
#define sk_v6_daddr __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
#define sk_flags __sk_common.skc_flags
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 0107a24b7522..d330b1511979 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -83,6 +83,14 @@ struct loop_ctx {
struct sock *sk;
};
+static bool sk_is_tcp(struct sock *sk)
+{
+ return (sk->__sk_common.skc_family == AF_INET ||
+ sk->__sk_common.skc_family == AF_INET6) &&
+ sk->sk_type == SOCK_STREAM &&
+ sk->sk_protocol == IPPROTO_TCP;
+}
+
static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
const struct sockopt_test *t,
int level)
@@ -91,6 +99,9 @@ static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
opt = t->opt;
+ if (opt == SO_TXREHASH && !sk_is_tcp(sk))
+ return 0;
+
if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)))
return 1;
/* kernel initialized txrehash to 255 */
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index 96531b0d9d55..8f483337e103 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -17,6 +17,12 @@ static bool ipv6_addr_loopback(const struct in6_addr *a)
a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0;
}
+static bool ipv4_addr_loopback(__be32 a)
+{
+ return a == bpf_ntohl(0x7f000001);
+}
+
+volatile const unsigned int sf;
volatile const __u16 ports[2];
unsigned int bucket[2];
@@ -26,16 +32,20 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
struct sock *sk = (struct sock *)ctx->sk_common;
struct inet_hashinfo *hinfo;
unsigned int hash;
+ __u64 sock_cookie;
struct net *net;
int idx;
if (!sk)
return 0;
+ sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
- if (sk->sk_family != AF_INET6 ||
+ if (sk->sk_family != sf ||
sk->sk_state != TCP_LISTEN ||
- !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+ sk->sk_family == AF_INET6 ?
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+ !ipv4_addr_loopback(sk->sk_rcv_saddr))
return 0;
if (sk->sk_num == ports[0])
@@ -52,6 +62,7 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
hinfo = net->ipv4.tcp_death_row.hashinfo;
bucket[idx] = hash & hinfo->lhash2_mask;
bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
return 0;
}
@@ -63,14 +74,18 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
{
struct sock *sk = (struct sock *)ctx->udp_sk;
struct udp_table *udptable;
+ __u64 sock_cookie;
int idx;
if (!sk)
return 0;
+ sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
- if (sk->sk_family != AF_INET6 ||
- !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+ if (sk->sk_family != sf ||
+ sk->sk_family == AF_INET6 ?
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+ !ipv4_addr_loopback(sk->sk_rcv_saddr))
return 0;
if (sk->sk_num == ports[0])
@@ -84,6 +99,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
udptable = sk->sk_net.net->ipv4.udp_table;
bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask;
bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index ccde6a4c6319..683306db8594 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -4,6 +4,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/errno.h>
#include "xsk_xdp_common.h"
struct {
@@ -14,6 +16,7 @@ struct {
} xsk SEC(".maps");
static unsigned int idx;
+int adjust_value = 0;
int count = 0;
SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp)
@@ -70,4 +73,51 @@ SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp)
return bpf_redirect_map(&xsk, idx, XDP_DROP);
}
+SEC("xdp.frags") int xsk_xdp_adjust_tail(struct xdp_md *xdp)
+{
+ __u32 buff_len, curr_buff_len;
+ int ret;
+
+ buff_len = bpf_xdp_get_buff_len(xdp);
+ if (buff_len == 0)
+ return XDP_DROP;
+
+ ret = bpf_xdp_adjust_tail(xdp, adjust_value);
+ if (ret < 0) {
+ /* Handle unsupported cases */
+ if (ret == -EOPNOTSUPP) {
+ /* Set adjust_value to -EOPNOTSUPP to indicate to userspace that this case
+ * is unsupported
+ */
+ adjust_value = -EOPNOTSUPP;
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+ }
+
+ return XDP_DROP;
+ }
+
+ curr_buff_len = bpf_xdp_get_buff_len(xdp);
+ if (curr_buff_len != buff_len + adjust_value)
+ return XDP_DROP;
+
+ if (curr_buff_len > buff_len) {
+ __u32 *pkt_data = (void *)(long)xdp->data;
+ __u32 len, words_to_end, seq_num;
+
+ len = curr_buff_len - PKT_HDR_ALIGN;
+ words_to_end = len / sizeof(*pkt_data) - 1;
+ seq_num = words_to_end;
+
+ /* Convert sequence number to network byte order. Store this in the last 4 bytes of
+ * the packet. Use 'adjust_value' to determine the position at the end of the
+ * packet for storing the sequence number.
+ */
+ seq_num = __constant_htonl(words_to_end);
+ bpf_xdp_store_bytes(xdp, curr_buff_len - sizeof(seq_num), &seq_num,
+ sizeof(seq_num));
+ }
+
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h
index 5a6f36f07383..45810ff552da 100644
--- a/tools/testing/selftests/bpf/xsk_xdp_common.h
+++ b/tools/testing/selftests/bpf/xsk_xdp_common.h
@@ -4,6 +4,7 @@
#define XSK_XDP_COMMON_H_
#define MAX_SOCKETS 2
+#define PKT_HDR_ALIGN (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
struct xdp_info {
__u64 count;
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 11f047b8af75..0ced4026ee44 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -524,6 +524,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
test->nb_sockets = 1;
test->fail = false;
test->set_ring = false;
+ test->adjust_tail = false;
+ test->adjust_tail_support = false;
test->mtu = MAX_ETH_PKT_SIZE;
test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
@@ -757,14 +759,15 @@ static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
}
-static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
{
- struct pkt_stream *pkt_stream;
+ ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+}
- pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
- test->ifobj_tx->xsk->pkt_stream = pkt_stream;
- pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
- test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+ pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
+ pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
}
static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
@@ -991,6 +994,31 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
return true;
}
+static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx)
+{
+ struct bpf_map *data_map;
+ int adjust_value = 0;
+ int key = 0;
+ int ret;
+
+ data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+ if (!data_map || !bpf_map__is_internal(data_map)) {
+ ksft_print_msg("Error: could not find bss section of XDP program\n");
+ exit_with_error(errno);
+ }
+
+ ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
+ if (ret) {
+ ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
+ exit_with_error(errno);
+ }
+
+ /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
+ * helper is not supported. Skip the adjust_tail test case in this scenario.
+ */
+ return adjust_value != -EOPNOTSUPP;
+}
+
static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
u32 bytes_processed)
{
@@ -1767,8 +1795,13 @@ static void *worker_testapp_validate_rx(void *arg)
if (!err && ifobject->validation_func)
err = ifobject->validation_func(ifobject);
- if (err)
- report_failure(test);
+
+ if (err) {
+ if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs))
+ test->adjust_tail_support = false;
+ else
+ report_failure(test);
+ }
pthread_exit(NULL);
}
@@ -2515,6 +2548,71 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test)
return testapp_validate_traffic(test);
}
+static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
+ skel_tx->progs.xsk_xdp_adjust_tail,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ skel_rx->bss->adjust_value = adjust_value;
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
+{
+ int ret;
+
+ test->adjust_tail_support = true;
+ test->adjust_tail = true;
+ test->total_steps = 1;
+
+ pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
+ pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
+
+ ret = testapp_xdp_adjust_tail(test, value);
+ if (ret)
+ return ret;
+
+ if (!test->adjust_tail_support) {
+ ksft_test_result_skip("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
+ mode_string(test), busy_poll_string(test));
+ return TEST_SKIP;
+ }
+
+ return 0;
+}
+
+static int testapp_adjust_tail_shrink(struct test_spec *test)
+{
+ /* Shrink by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
+}
+
+static int testapp_adjust_tail_shrink_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Shrink by the frag size */
+ return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+static int testapp_adjust_tail_grow(struct test_spec *test)
+{
+ /* Grow by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
+}
+
+static int testapp_adjust_tail_grow_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
+ return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
+ XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
static void run_pkt_test(struct test_spec *test)
{
int ret;
@@ -2621,6 +2719,10 @@ static const struct test_spec tests[] = {
{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
{.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
{.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+ {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
+ {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
+ {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
+ {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
};
static void print_tests(void)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index e46e823f6a1a..67fc44b2813b 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -173,6 +173,8 @@ struct test_spec {
u16 nb_sockets;
bool fail;
bool set_ring;
+ bool adjust_tail;
+ bool adjust_tail_support;
enum test_mode mode;
char name[MAX_TEST_NAME_SIZE];
};
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
index ec746f374e85..d634d8395d90 100644
--- a/tools/testing/selftests/drivers/net/.gitignore
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-xdp_helper
+napi_id_helper
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 0c95bd944d56..be780bcb73a3 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -3,12 +3,14 @@ CFLAGS += $(KHDR_INCLUDES)
TEST_INCLUDES := $(wildcard lib/py/*.py) \
$(wildcard lib/sh/*.sh) \
- ../../net/net_helper.sh \
../../net/lib.sh \
-TEST_GEN_FILES := xdp_helper
+TEST_GEN_FILES := \
+ napi_id_helper \
+# end of TEST_GEN_FILES
TEST_PROGS := \
+ napi_id.py \
netcons_basic.sh \
netcons_fragmented_msg.sh \
netcons_overflow.sh \
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 07cddb19ba35..df2c047ffa90 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -15,12 +15,11 @@ TEST_PROGS = \
iou-zcrx.py \
irq.py \
loopback.sh \
- nic_link_layer.py \
- nic_performance.py \
pp_alloc_fail.py \
rss_ctx.py \
rss_input_xfrm.py \
tso.py \
+ xsk_reconfig.py \
#
TEST_FILES := \
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index 3947e9157115..7947650210a0 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+from os import path
from lib.py import ksft_run, ksft_exit
from lib.py import ksft_eq, KsftSkipEx
from lib.py import NetDrvEpEnv
@@ -10,8 +11,7 @@ from lib.py import ksft_disruptive
def require_devmem(cfg):
if not hasattr(cfg, "_devmem_probed"):
- port = rand_port()
- probe_command = f"./ncdevmem -f {cfg.ifname}"
+ probe_command = f"{cfg.bin_local} -f {cfg.ifname}"
cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0
cfg._devmem_probed = True
@@ -21,22 +21,55 @@ def require_devmem(cfg):
@ksft_disruptive
def check_rx(cfg) -> None:
- cfg.require_ipver("6")
require_devmem(cfg)
port = rand_port()
- listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}"
+ socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}"
+ listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7"
+
+ with bkg(listen_cmd, exit_wait=True) as ncdevmem:
+ wait_port_listen(port)
+ cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \
+ head -c 1K | {socat}", host=cfg.remote, shell=True)
+
+ ksft_eq(ncdevmem.ret, 0)
+
+
+@ksft_disruptive
+def check_tx(cfg) -> None:
+ require_devmem(cfg)
+
+ port = rand_port()
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
with bkg(listen_cmd) as socat:
wait_port_listen(port)
- cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.addr_v['6']}]:{port}", host=cfg.remote, shell=True)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True)
+
+ ksft_eq(socat.stdout.strip(), "hello\nworld")
+
+
+@ksft_disruptive
+def check_tx_chunks(cfg) -> None:
+ cfg.require_ipver("6")
+ require_devmem(cfg)
+
+ port = rand_port()
+ listen_cmd = f"socat -U - TCP6-LISTEN:{port}"
+
+ with bkg(listen_cmd, exit_wait=True) as socat:
+ wait_port_listen(port)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True)
ksft_eq(socat.stdout.strip(), "hello\nworld")
def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
- ksft_run([check_rx],
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ ksft_run([check_rx, check_tx, check_tx_chunks],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
index c26b4180eddd..62456df947bc 100644
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -37,8 +37,8 @@
#include <liburing.h>
-#define PAGE_SIZE (4096)
-#define AREA_SIZE (8192 * PAGE_SIZE)
+static long page_size;
+#define AREA_SIZE (8192 * page_size)
#define SEND_SIZE (512 * 4096)
#define min(a, b) \
({ \
@@ -66,7 +66,7 @@ static int cfg_oneshot_recvs;
static int cfg_send_size = SEND_SIZE;
static struct sockaddr_in6 cfg_addr;
-static char payload[SEND_SIZE] __attribute__((aligned(PAGE_SIZE)));
+static char *payload;
static void *area_ptr;
static void *ring_ptr;
static size_t ring_size;
@@ -114,8 +114,8 @@ static inline size_t get_refill_ring_size(unsigned int rq_entries)
ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe);
/* add space for the header (head/tail/etc.) */
- ring_size += PAGE_SIZE;
- return ALIGN_UP(ring_size, 4096);
+ ring_size += page_size;
+ return ALIGN_UP(ring_size, page_size);
}
static void setup_zcrx(struct io_uring *ring)
@@ -219,7 +219,7 @@ static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
connfd = cqe->res;
if (cfg_oneshot)
- add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+ add_recvzc_oneshot(ring, connfd, page_size);
else
add_recvzc(ring, connfd);
}
@@ -245,7 +245,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
if (cfg_oneshot) {
if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) {
- add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+ add_recvzc_oneshot(ring, connfd, page_size);
cfg_oneshot_recvs--;
}
} else if (!(cqe->flags & IORING_CQE_F_MORE)) {
@@ -260,7 +260,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
for (i = 0; i < n; i++) {
if (*(data + i) != payload[(received + i)])
- error(1, 0, "payload mismatch at ", i);
+ error(1, 0, "payload mismatch at %d", i);
}
received += n;
@@ -354,7 +354,7 @@ static void run_client(void)
chunk = min_t(ssize_t, cfg_payload_len, to_send);
res = send(fd, src, chunk, 0);
if (res < 0)
- error(1, 0, "send(): %d", sent);
+ error(1, 0, "send(): %zd", sent);
sent += res;
to_send -= res;
}
@@ -370,7 +370,7 @@ static void usage(const char *filepath)
static void parse_opts(int argc, char **argv)
{
- const int max_payload_len = sizeof(payload) -
+ const int max_payload_len = SEND_SIZE -
sizeof(struct ipv6hdr) -
sizeof(struct tcphdr) -
40 /* max tcp options */;
@@ -443,6 +443,13 @@ int main(int argc, char **argv)
const char *cfg_test = argv[argc - 1];
int i;
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 0)
+ return 1;
+
+ if (posix_memalign((void **)&payload, page_size, SEND_SIZE))
+ return 1;
+
parse_opts(argc, argv);
for (i = 0; i < SEND_SIZE; i++)
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 6a0378e06cab..9c03fd777f3d 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -5,13 +5,12 @@ import re
from os import path
from lib.py import ksft_run, ksft_exit
from lib.py import NetDrvEpEnv
-from lib.py import bkg, cmd, ethtool, wait_port_listen
+from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
-def _get_rx_ring_entries(cfg):
- output = ethtool(f"-g {cfg.ifname}", host=cfg.remote).stdout
- values = re.findall(r'RX:\s+(\d+)', output)
- return int(values[1])
+def _get_current_settings(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0]
+ return (output['rx'], output['hds-thresh'])
def _get_combined_channels(cfg):
@@ -20,8 +19,21 @@ def _get_combined_channels(cfg):
return int(values[1])
-def _set_flow_rule(cfg, chan):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port 9999 action {chan}", host=cfg.remote).stdout
+def _create_rss_ctx(cfg, chan):
+ output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout
+ values = re.search(r'New RSS context is (\d+)', output).group(1)
+ ctx_id = int(values)
+ return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote))
+
+
+def _set_flow_rule(cfg, port, chan):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout
+ values = re.search(r'ID (\d+)', output).group(1)
+ return int(values)
+
+
+def _set_flow_rule_rss(cfg, port, ctx_id):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
@@ -32,24 +44,29 @@ def test_zcrx(cfg) -> None:
combined_chans = _get_combined_channels(cfg)
if combined_chans < 2:
raise KsftSkipEx('at least 2 combined channels required')
- rx_ring = _get_rx_ring_entries(cfg)
-
- try:
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
-
- rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 12840"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(9999, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
- finally:
- ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(port, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
def test_zcrx_oneshot(cfg) -> None:
@@ -58,24 +75,61 @@ def test_zcrx_oneshot(cfg) -> None:
combined_chans = _get_combined_channels(cfg)
if combined_chans < 2:
raise KsftSkipEx('at least 2 combined channels required')
- rx_ring = _get_rx_ring_entries(cfg)
-
- try:
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
-
- rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 4096 -z 16384"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(9999, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
- finally:
- ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(port, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
+
+
+def test_zcrx_rss(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+ (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
+ flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(port, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
def main() -> None:
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index 399789a9676a..b582885786f5 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -9,7 +9,6 @@ try:
sys.path.append(KSFT_DIR.as_posix())
from net.lib.py import *
from drivers.net.lib.py import *
- from .linkconfig import LinkConfig
except ModuleNotFoundError as e:
ksft_pr("Failed importing `net` library from kernel sources")
ksft_pr(str(e))
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py
deleted file mode 100644
index 79fde603cbbc..000000000000
--- a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-from lib.py import cmd, ethtool, ip
-from lib.py import ksft_pr, ksft_eq, KsftSkipEx
-from typing import Optional
-import re
-import time
-import json
-
-#The LinkConfig class is implemented to handle the link layer configurations.
-#Required minimum ethtool version is 6.10
-
-class LinkConfig:
- """Class for handling the link layer configurations"""
- def __init__(self, cfg: object) -> None:
- self.cfg = cfg
- self.partner_netif = self.get_partner_netif_name()
-
- """Get the initial link configuration of local interface"""
- self.common_link_modes = self.get_common_link_modes()
-
- def get_partner_netif_name(self) -> Optional[str]:
- partner_netif = None
- try:
- if not self.verify_link_up():
- return None
- """Get partner interface name"""
- partner_json_output = ip("addr show", json=True, host=self.cfg.remote)
- for interface in partner_json_output:
- for addr in interface.get('addr_info', []):
- if addr.get('local') == self.cfg.remote_addr:
- partner_netif = interface['ifname']
- ksft_pr(f"Partner Interface name: {partner_netif}")
- if partner_netif is None:
- ksft_pr("Unable to get the partner interface name")
- except Exception as e:
- print(f"Unexpected error occurred while getting partner interface name: {e}")
- self.partner_netif = partner_netif
- return partner_netif
-
- def verify_link_up(self) -> bool:
- """Verify whether the local interface link is up"""
- with open(f"/sys/class/net/{self.cfg.ifname}/operstate", "r") as fp:
- link_state = fp.read().strip()
-
- if link_state == "down":
- ksft_pr(f"Link state of interface {self.cfg.ifname} is DOWN")
- return False
- else:
- return True
-
- def reset_interface(self, local: bool = True, remote: bool = True) -> bool:
- ksft_pr("Resetting interfaces in local and remote")
- if remote:
- if self.verify_link_up():
- if self.partner_netif is not None:
- ifname = self.partner_netif
- link_up_cmd = f"ip link set up {ifname}"
- link_down_cmd = f"ip link set down {ifname}"
- reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}"
- try:
- cmd(reset_cmd, host=self.cfg.remote)
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while resetting remote: {e}")
- else:
- ksft_pr("Partner interface not available")
- if local:
- ifname = self.cfg.ifname
- link_up_cmd = f"ip link set up {ifname}"
- link_down_cmd = f"ip link set down {ifname}"
- reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}"
- try:
- cmd(reset_cmd)
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while resetting local: {e}")
- time.sleep(10)
- if self.verify_link_up() and self.get_ethtool_field("link-detected"):
- ksft_pr("Local and remote interfaces reset to original state")
- return True
- else:
- ksft_pr("Error occurred after resetting interfaces. Link is DOWN.")
- return False
-
- def set_speed_and_duplex(self, speed: str, duplex: str, autoneg: bool = True) -> bool:
- """Set the speed and duplex state for the interface"""
- autoneg_state = "on" if autoneg is True else "off"
- process = None
- try:
- process = ethtool(f"--change {self.cfg.ifname} speed {speed} duplex {duplex} autoneg {autoneg_state}")
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while setting speed/duplex: {e}")
- if process is None or process.ret != 0:
- return False
- else:
- ksft_pr(f"Speed: {speed} Mbps, Duplex: {duplex} set for Interface: {self.cfg.ifname}")
- return True
-
- def verify_speed_and_duplex(self, expected_speed: str, expected_duplex: str) -> bool:
- if not self.verify_link_up():
- return False
- """Verifying the speed and duplex state for the interface"""
- with open(f"/sys/class/net/{self.cfg.ifname}/speed", "r") as fp:
- actual_speed = fp.read().strip()
- with open(f"/sys/class/net/{self.cfg.ifname}/duplex", "r") as fp:
- actual_duplex = fp.read().strip()
-
- ksft_eq(actual_speed, expected_speed)
- ksft_eq(actual_duplex, expected_duplex)
- return True
-
- def set_autonegotiation_state(self, state: str, remote: bool = False) -> bool:
- common_link_modes = self.common_link_modes
- speeds, duplex_modes = self.get_speed_duplex_values(self.common_link_modes)
- speed = speeds[0]
- duplex = duplex_modes[0]
- if not speed or not duplex:
- ksft_pr("No speed or duplex modes found")
- return False
-
- speed_duplex_cmd = f"speed {speed} duplex {duplex}" if state == "off" else ""
- if remote:
- if not self.verify_link_up():
- return False
- """Set the autonegotiation state for the partner"""
- command = f"-s {self.partner_netif} {speed_duplex_cmd} autoneg {state}"
- partner_autoneg_change = None
- """Set autonegotiation state for interface in remote pc"""
- try:
- partner_autoneg_change = ethtool(command, host=self.cfg.remote)
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while changing auto-neg in remote: {e}")
- if partner_autoneg_change is None or partner_autoneg_change.ret != 0:
- ksft_pr(f"Not able to set autoneg parameter for interface {self.partner_netif}.")
- return False
- ksft_pr(f"Autoneg set as {state} for {self.partner_netif}")
- else:
- """Set the autonegotiation state for the interface"""
- try:
- process = ethtool(f"-s {self.cfg.ifname} {speed_duplex_cmd} autoneg {state}")
- if process.ret != 0:
- ksft_pr(f"Not able to set autoneg parameter for interface {self.cfg.ifname}")
- return False
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while changing auto-neg in local: {e}")
- return False
- ksft_pr(f"Autoneg set as {state} for {self.cfg.ifname}")
- return True
-
- def check_autoneg_supported(self, remote: bool = False) -> bool:
- if not remote:
- local_autoneg = self.get_ethtool_field("supports-auto-negotiation")
- if local_autoneg is None:
- ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.cfg.ifname}")
- """Return autoneg status of the local interface"""
- return local_autoneg
- else:
- if not self.verify_link_up():
- raise KsftSkipEx("Link is DOWN")
- """Check remote auto-negotiation support status"""
- partner_autoneg = False
- if self.partner_netif is not None:
- partner_autoneg = self.get_ethtool_field("supports-auto-negotiation", remote=True)
- if partner_autoneg is None:
- ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.partner_netif}")
- return partner_autoneg
-
- def get_common_link_modes(self) -> set[str]:
- common_link_modes = []
- """Populate common link modes"""
- link_modes = self.get_ethtool_field("supported-link-modes")
- partner_link_modes = self.get_ethtool_field("link-partner-advertised-link-modes")
- if link_modes is None:
- raise KsftSkipEx(f"Link modes not available for {self.cfg.ifname}")
- if partner_link_modes is None:
- raise KsftSkipEx(f"Partner link modes not available for {self.cfg.ifname}")
- common_link_modes = set(link_modes) and set(partner_link_modes)
- return common_link_modes
-
- def get_speed_duplex_values(self, link_modes: list[str]) -> tuple[list[str], list[str]]:
- speed = []
- duplex = []
- """Check the link modes"""
- for data in link_modes:
- parts = data.split('/')
- speed_value = re.match(r'\d+', parts[0])
- if speed_value:
- speed.append(speed_value.group())
- else:
- ksft_pr(f"No speed value found for interface {self.ifname}")
- return None, None
- duplex.append(parts[1].lower())
- return speed, duplex
-
- def get_ethtool_field(self, field: str, remote: bool = False) -> Optional[str]:
- process = None
- if not remote:
- """Get the ethtool field value for the local interface"""
- try:
- process = ethtool(self.cfg.ifname, json=True)
- except Exception as e:
- ksft_pr("Required minimum ethtool version is 6.10")
- ksft_pr(f"Unexpected error occurred while getting ethtool field in local: {e}")
- return None
- else:
- if not self.verify_link_up():
- return None
- """Get the ethtool field value for the remote interface"""
- self.cfg.require_cmd("ethtool", remote=True)
- if self.partner_netif is None:
- ksft_pr(f"Partner interface name is unavailable.")
- return None
- try:
- process = ethtool(self.partner_netif, json=True, host=self.cfg.remote)
- except Exception as e:
- ksft_pr("Required minimum ethtool version is 6.10")
- ksft_pr(f"Unexpected error occurred while getting ethtool field in remote: {e}")
- return None
- json_data = process[0]
- """Check if the field exist in the json data"""
- if field not in json_data:
- raise KsftSkipEx(f'Field {field} does not exist in the output of interface {json_data["ifname"]}')
- return json_data[field]
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 9d48004ff1a1..02e4d3d7ded2 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -9,22 +9,31 @@
* ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201
*
* On client:
- * echo -n "hello\nworld" | nc -s <server IP> 5201 -p 5201
+ * echo -n "hello\nworld" | \
+ * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
*
- * Test data validation:
+ * Note this is compatible with regular netcat. i.e. the sender or receiver can
+ * be replaced with regular netcat to test the RX or TX path in isolation.
+ *
+ * Test data validation (devmem TCP on RX only):
*
* On server:
* ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7
*
* On client:
* yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
- * tr \\n \\0 | \
- * head -c 5G | \
+ * head -c 1G | \
* nc <server IP> 5201 -p 5201
*
+ * Test data validation (devmem TCP on RX and TX, validation happens on RX):
*
- * Note this is compatible with regular netcat. i.e. the sender or receiver can
- * be replaced with regular netcat to test the RX or TX path in isolation.
+ * On server:
+ * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1
+ *
+ * On client:
+ * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \
+ * head -c 1M | \
+ * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
*/
#define _GNU_SOURCE
#define __EXPORTED_HEADERS__
@@ -40,15 +49,18 @@
#include <fcntl.h>
#include <malloc.h>
#include <error.h>
+#include <poll.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
+#include <sys/time.h>
#include <linux/memfd.h>
#include <linux/dma-buf.h>
+#include <linux/errqueue.h>
#include <linux/udmabuf.h>
#include <linux/types.h>
#include <linux/netlink.h>
@@ -70,6 +82,9 @@
#define MSG_SOCK_DEVMEM 0x2000000
#endif
+#define MAX_IOV 1024
+
+static size_t max_chunk;
static char *server_ip;
static char *client_ip;
static char *port;
@@ -79,6 +94,8 @@ static int num_queues = -1;
static char *ifname;
static unsigned int ifindex;
static unsigned int dmabuf_id;
+static uint32_t tx_dmabuf_id;
+static int waittime_ms = 500;
struct memory_buffer {
int fd;
@@ -92,6 +109,8 @@ struct memory_buffer {
struct memory_provider {
struct memory_buffer *(*alloc)(size_t size);
void (*free)(struct memory_buffer *ctx);
+ void (*memcpy_to_device)(struct memory_buffer *dst, size_t off,
+ void *src, int n);
void (*memcpy_from_device)(void *dst, struct memory_buffer *src,
size_t off, int n);
};
@@ -152,6 +171,20 @@ static void udmabuf_free(struct memory_buffer *ctx)
free(ctx);
}
+static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off,
+ void *src, int n)
+{
+ struct dma_buf_sync sync = {};
+
+ sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE;
+ ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+
+ memcpy(dst->buf_mem + off, src, n);
+
+ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE;
+ ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+}
+
static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
size_t off, int n)
{
@@ -169,6 +202,7 @@ static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
static struct memory_provider udmabuf_memory_provider = {
.alloc = udmabuf_alloc,
.free = udmabuf_free,
+ .memcpy_to_device = udmabuf_memcpy_to_device,
.memcpy_from_device = udmabuf_memcpy_from_device,
};
@@ -187,14 +221,16 @@ void validate_buffer(void *line, size_t size)
{
static unsigned char seed = 1;
unsigned char *ptr = line;
- int errors = 0;
+ unsigned char expected;
+ static int errors;
size_t i;
for (i = 0; i < size; i++) {
- if (ptr[i] != seed) {
+ expected = seed ? seed : '\n';
+ if (ptr[i] != expected) {
fprintf(stderr,
"Failed validation: expected=%u, actual=%u, index=%lu\n",
- seed, ptr[i], i);
+ expected, ptr[i], i);
errors++;
if (errors > 20)
error(1, 0, "validation failed.");
@@ -337,7 +373,8 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin)
server_addr = strrchr(server_addr, ':') + 1;
}
- return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2",
+ /* Try configure 5-tuple */
+ if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2",
ifname,
type,
client_ip ? "src-ip" : "",
@@ -345,7 +382,17 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin)
server_addr,
client_ip ? "src-port" : "",
client_ip ? port : "",
- port, start_queue);
+ port, start_queue))
+ /* If that fails, try configure 3-tuple */
+ if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2",
+ ifname,
+ type,
+ server_addr,
+ port, start_queue))
+ /* If that fails, return error */
+ return -1;
+
+ return 0;
}
static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
@@ -393,6 +440,49 @@ err_close:
return -1;
}
+static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+ struct ynl_sock **ys)
+{
+ struct netdev_bind_tx_req *req = NULL;
+ struct netdev_bind_tx_rsp *rsp = NULL;
+ struct ynl_error yerr;
+
+ *ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!*ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_bind_tx_req_alloc();
+ netdev_bind_tx_req_set_ifindex(req, ifindex);
+ netdev_bind_tx_req_set_fd(req, dmabuf_fd);
+
+ rsp = netdev_bind_tx(*ys, req);
+ if (!rsp) {
+ perror("netdev_bind_tx");
+ goto err_close;
+ }
+
+ if (!rsp->_present.id) {
+ perror("id not present");
+ goto err_close;
+ }
+
+ fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id);
+ tx_dmabuf_id = rsp->id;
+
+ netdev_bind_tx_req_free(req);
+ netdev_bind_tx_rsp_free(rsp);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+ netdev_bind_tx_req_free(req);
+ ynl_sock_destroy(*ys);
+ return -1;
+}
+
static void enable_reuseaddr(int fd)
{
int opt = 1;
@@ -447,10 +537,9 @@ static struct netdev_queue_id *create_queues(void)
return queues;
}
-int do_server(struct memory_buffer *mem)
+static int do_server(struct memory_buffer *mem)
{
char ctrl_data[sizeof(int) * 20000];
- struct netdev_queue_id *queues;
size_t non_page_aligned_frags = 0;
struct sockaddr_in6 client_addr;
struct sockaddr_in6 server_sin;
@@ -674,13 +763,216 @@ void run_devmem_tests(void)
provider->free(mem);
}
+static uint64_t gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL);
+}
+
+static int do_poll(int fd)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ ret = poll(&pfd, 1, waittime_ms);
+ if (ret == -1)
+ error(1, errno, "poll");
+
+ return ret && (pfd.revents & POLLERR);
+}
+
+static void wait_compl(int fd)
+{
+ int64_t tstop = gettimeofday_ms() + waittime_ms;
+ char control[CMSG_SPACE(100)] = {};
+ struct sock_extended_err *serr;
+ struct msghdr msg = {};
+ struct cmsghdr *cm;
+ __u32 hi, lo;
+ int ret;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ while (gettimeofday_ms() < tstop) {
+ if (!do_poll(fd))
+ continue;
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ error(1, errno, "recvmsg(MSG_ERRQUEUE)");
+ return;
+ }
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, 0, "MSG_CTRUNC\n");
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if (cm->cmsg_level != SOL_IP &&
+ cm->cmsg_level != SOL_IPV6)
+ continue;
+ if (cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type != IP_RECVERR)
+ continue;
+ if (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type != IPV6_RECVERR)
+ continue;
+
+ serr = (void *)CMSG_DATA(cm);
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+ error(1, 0, "wrong origin %u", serr->ee_origin);
+ if (serr->ee_errno != 0)
+ error(1, 0, "wrong errno %d", serr->ee_errno);
+
+ hi = serr->ee_data;
+ lo = serr->ee_info;
+
+ fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
+ return;
+ }
+ }
+
+ error(1, 0, "did not receive tx completion");
+}
+
+static int do_client(struct memory_buffer *mem)
+{
+ char ctrl_data[CMSG_SPACE(sizeof(__u32))];
+ struct sockaddr_in6 server_sin;
+ struct sockaddr_in6 client_sin;
+ struct ynl_sock *ys = NULL;
+ struct iovec iov[MAX_IOV];
+ struct msghdr msg = {};
+ ssize_t line_size = 0;
+ struct cmsghdr *cmsg;
+ char *line = NULL;
+ unsigned long mid;
+ size_t len = 0;
+ int socket_fd;
+ __u32 ddmabuf;
+ int opt = 1;
+ int ret;
+
+ ret = parse_address(server_ip, atoi(port), &server_sin);
+ if (ret < 0)
+ error(1, 0, "parse server address");
+
+ socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (socket_fd < 0)
+ error(1, socket_fd, "create socket");
+
+ enable_reuseaddr(socket_fd);
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
+ strlen(ifname) + 1);
+ if (ret)
+ error(1, errno, "bindtodevice");
+
+ if (bind_tx_queue(ifindex, mem->fd, &ys))
+ error(1, 0, "Failed to bind\n");
+
+ if (client_ip) {
+ ret = parse_address(client_ip, atoi(port), &client_sin);
+ if (ret < 0)
+ error(1, 0, "parse client address");
+
+ ret = bind(socket_fd, &client_sin, sizeof(client_sin));
+ if (ret)
+ error(1, errno, "bind");
+ }
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
+ if (ret)
+ error(1, errno, "set sock opt");
+
+ fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
+ ntohs(server_sin.sin6_port), ifname);
+
+ ret = connect(socket_fd, &server_sin, sizeof(server_sin));
+ if (ret)
+ error(1, errno, "connect");
+
+ while (1) {
+ free(line);
+ line = NULL;
+ line_size = getline(&line, &len, stdin);
+
+ if (line_size < 0)
+ break;
+
+ if (max_chunk) {
+ msg.msg_iovlen =
+ (line_size + max_chunk - 1) / max_chunk;
+ if (msg.msg_iovlen > MAX_IOV)
+ error(1, 0,
+ "can't partition %zd bytes into maximum of %d chunks",
+ line_size, MAX_IOV);
+
+ for (int i = 0; i < msg.msg_iovlen; i++) {
+ iov[i].iov_base = (void *)(i * max_chunk);
+ iov[i].iov_len = max_chunk;
+ }
+
+ iov[msg.msg_iovlen - 1].iov_len =
+ line_size - (msg.msg_iovlen - 1) * max_chunk;
+ } else {
+ iov[0].iov_base = 0;
+ iov[0].iov_len = line_size;
+ msg.msg_iovlen = 1;
+ }
+
+ msg.msg_iov = iov;
+ provider->memcpy_to_device(mem, 0, line, line_size);
+
+ msg.msg_control = ctrl_data;
+ msg.msg_controllen = sizeof(ctrl_data);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_DEVMEM_DMABUF;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+
+ ddmabuf = tx_dmabuf_id;
+
+ *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf;
+
+ ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY);
+ if (ret < 0)
+ error(1, errno, "Failed sendmsg");
+
+ fprintf(stderr, "sendmsg_ret=%d\n", ret);
+
+ if (ret != line_size)
+ error(1, errno, "Did not send all bytes %d vs %zd", ret,
+ line_size);
+
+ wait_compl(socket_fd);
+ }
+
+ fprintf(stderr, "%s: tx ok\n", TEST_PREFIX);
+
+ free(line);
+ close(socket_fd);
+
+ if (ys)
+ ynl_sock_destroy(ys);
+
+ return 0;
+}
+
int main(int argc, char *argv[])
{
struct memory_buffer *mem;
int is_server = 0, opt;
int ret;
- while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
+ while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
switch (opt) {
case 'l':
is_server = 1;
@@ -706,6 +998,9 @@ int main(int argc, char *argv[])
case 'f':
ifname = optarg;
break;
+ case 'z':
+ max_chunk = atoi(optarg);
+ break;
case '?':
fprintf(stderr, "unknown option: %c\n", optopt);
break;
@@ -717,6 +1012,8 @@ int main(int argc, char *argv[])
ifindex = if_nametoindex(ifname);
+ fprintf(stderr, "using ifindex=%u\n", ifindex);
+
if (!server_ip && !client_ip) {
if (start_queue < 0 && num_queues < 0) {
num_queues = rxq_num(ifindex);
@@ -767,7 +1064,7 @@ int main(int argc, char *argv[])
error(1, 0, "Missing -p argument\n");
mem = provider->alloc(getpagesize() * NUM_PAGES);
- ret = is_server ? do_server(mem) : 1;
+ ret = is_server ? do_server(mem) : do_client(mem);
provider->free(mem);
return ret;
diff --git a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py
deleted file mode 100644
index efd921180532..000000000000
--- a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-#Introduction:
-#This file has basic link layer tests for generic NIC drivers.
-#The test comprises of auto-negotiation, speed and duplex checks.
-#
-#Setup:
-#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1)
-#
-# DUT PC Partner PC
-#┌───────────────────────┐ ┌──────────────────────────┐
-#│ │ │ │
-#│ │ │ │
-#│ ┌───────────┐ │ │
-#│ │DUT NIC │ Eth │ │
-#│ │Interface ─┼─────────────────────────┼─ any eth Interface │
-#│ └───────────┘ │ │
-#│ │ │ │
-#│ │ │ │
-#└───────────────────────┘ └──────────────────────────┘
-#
-#Configurations:
-#Required minimum ethtool version is 6.10 (supports json)
-#Default values:
-#time_delay = 8 #time taken to wait for transitions to happen, in seconds.
-
-import time
-import argparse
-from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_eq
-from lib.py import KsftFailEx, KsftSkipEx
-from lib.py import NetDrvEpEnv
-from lib.py import LinkConfig
-
-def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None:
- if link_config.partner_netif is None:
- KsftSkipEx("Partner interface is not available")
- if not link_config.check_autoneg_supported() or not link_config.check_autoneg_supported(remote=True):
- KsftSkipEx(f"Auto-negotiation not supported for interface {cfg.ifname} or {link_config.partner_netif}")
- if not link_config.verify_link_up():
- raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
-
-def verify_autonegotiation(cfg: object, expected_state: str, link_config: LinkConfig) -> None:
- if not link_config.verify_link_up():
- raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
- """Verifying the autonegotiation state in partner"""
- partner_autoneg_output = link_config.get_ethtool_field("auto-negotiation", remote=True)
- if partner_autoneg_output is None:
- KsftSkipEx(f"Auto-negotiation state not available for interface {link_config.partner_netif}")
- partner_autoneg_state = "on" if partner_autoneg_output is True else "off"
-
- ksft_eq(partner_autoneg_state, expected_state)
-
- """Verifying the autonegotiation state of local"""
- autoneg_output = link_config.get_ethtool_field("auto-negotiation")
- if autoneg_output is None:
- KsftSkipEx(f"Auto-negotiation state not available for interface {cfg.ifname}")
- actual_state = "on" if autoneg_output is True else "off"
-
- ksft_eq(actual_state, expected_state)
-
- """Verifying the link establishment"""
- link_available = link_config.get_ethtool_field("link-detected")
- if link_available is None:
- KsftSkipEx(f"Link status not available for interface {cfg.ifname}")
- if link_available != True:
- raise KsftSkipEx("Link not established at interface {cfg.ifname} after changing auto-negotiation")
-
-def test_autonegotiation(cfg: object, link_config: LinkConfig, time_delay: int) -> None:
- _pre_test_checks(cfg, link_config)
- for state in ["off", "on"]:
- if not link_config.set_autonegotiation_state(state, remote=True):
- raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {link_config.partner_netif}")
- if not link_config.set_autonegotiation_state(state):
- raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {cfg.ifname}")
- time.sleep(time_delay)
- verify_autonegotiation(cfg, state, link_config)
-
-def test_network_speed(cfg: object, link_config: LinkConfig, time_delay: int) -> None:
- _pre_test_checks(cfg, link_config)
- common_link_modes = link_config.common_link_modes
- if not common_link_modes:
- KsftSkipEx("No common link modes exist")
- speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes)
-
- if speeds and duplex_modes and len(speeds) == len(duplex_modes):
- for idx in range(len(speeds)):
- speed = speeds[idx]
- duplex = duplex_modes[idx]
- if not link_config.set_speed_and_duplex(speed, duplex):
- raise KsftFailEx(f"Unable to set speed and duplex parameters for {cfg.ifname}")
- time.sleep(time_delay)
- if not link_config.verify_speed_and_duplex(speed, duplex):
- raise KsftSkipEx(f"Error occurred while verifying speed and duplex states for interface {cfg.ifname}")
- else:
- if not speeds or not duplex_modes:
- KsftSkipEx(f"No supported speeds or duplex modes found for interface {cfg.ifname}")
- else:
- KsftSkipEx("Mismatch in the number of speeds and duplex modes")
-
-def main() -> None:
- parser = argparse.ArgumentParser(description="Run basic link layer tests for NIC driver")
- parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.')
- args = parser.parse_args()
- time_delay = args.time_delay
- with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
- link_config = LinkConfig(cfg)
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, time_delay,))
- link_config.reset_interface()
- ksft_exit()
-
-if __name__ == "__main__":
- main()
diff --git a/tools/testing/selftests/drivers/net/hw/nic_performance.py b/tools/testing/selftests/drivers/net/hw/nic_performance.py
deleted file mode 100644
index 201403b76ea3..000000000000
--- a/tools/testing/selftests/drivers/net/hw/nic_performance.py
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-#Introduction:
-#This file has basic performance test for generic NIC drivers.
-#The test comprises of throughput check for TCP and UDP streams.
-#
-#Setup:
-#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1)
-#
-# DUT PC Partner PC
-#┌───────────────────────┐ ┌──────────────────────────┐
-#│ │ │ │
-#│ │ │ │
-#│ ┌───────────┐ │ │
-#│ │DUT NIC │ Eth │ │
-#│ │Interface ─┼─────────────────────────┼─ any eth Interface │
-#│ └───────────┘ │ │
-#│ │ │ │
-#│ │ │ │
-#└───────────────────────┘ └──────────────────────────┘
-#
-#Configurations:
-#To prevent interruptions, Add ethtool, ip to the sudoers list in remote PC and get the ssh key from remote.
-#Required minimum ethtool version is 6.10
-#Change the below configuration based on your hw needs.
-# """Default values"""
-#time_delay = 8 #time taken to wait for transitions to happen, in seconds.
-#test_duration = 10 #performance test duration for the throughput check, in seconds.
-#send_throughput_threshold = 80 #percentage of send throughput required to pass the check
-#receive_throughput_threshold = 50 #percentage of receive throughput required to pass the check
-
-import time
-import json
-import argparse
-from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_true
-from lib.py import KsftFailEx, KsftSkipEx, GenerateTraffic
-from lib.py import NetDrvEpEnv, bkg, wait_port_listen
-from lib.py import cmd
-from lib.py import LinkConfig
-
-class TestConfig:
- def __init__(self, time_delay: int, test_duration: int, send_throughput_threshold: int, receive_throughput_threshold: int) -> None:
- self.time_delay = time_delay
- self.test_duration = test_duration
- self.send_throughput_threshold = send_throughput_threshold
- self.receive_throughput_threshold = receive_throughput_threshold
-
-def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None:
- if not link_config.verify_link_up():
- KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
- common_link_modes = link_config.common_link_modes
- if common_link_modes is None:
- KsftSkipEx("No common link modes found")
- if link_config.partner_netif == None:
- KsftSkipEx("Partner interface is not available")
- if link_config.check_autoneg_supported():
- KsftSkipEx("Auto-negotiation not supported by local")
- if link_config.check_autoneg_supported(remote=True):
- KsftSkipEx("Auto-negotiation not supported by remote")
- cfg.require_cmd("iperf3", remote=True)
-
-def check_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, protocol: str, traffic: GenerateTraffic) -> None:
- common_link_modes = link_config.common_link_modes
- speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes)
- """Test duration in seconds"""
- duration = test_config.test_duration
-
- ksft_pr(f"{protocol} test")
- test_type = "-u" if protocol == "UDP" else ""
-
- send_throughput = []
- receive_throughput = []
- for idx in range(0, len(speeds)):
- if link_config.set_speed_and_duplex(speeds[idx], duplex_modes[idx]) == False:
- raise KsftFailEx(f"Not able to set speed and duplex parameters for {cfg.ifname}")
- time.sleep(test_config.time_delay)
- if not link_config.verify_link_up():
- raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
-
- send_command=f"{test_type} -b 0 -t {duration} --json"
- receive_command=f"{test_type} -b 0 -t {duration} --reverse --json"
-
- send_result = traffic.run_remote_test(cfg, command=send_command)
- if send_result.ret != 0:
- raise KsftSkipEx("Error occurred during data transmit: {send_result.stdout}")
-
- send_output = send_result.stdout
- send_data = json.loads(send_output)
-
- """Convert throughput to Mbps"""
- send_throughput.append(round(send_data['end']['sum_sent']['bits_per_second'] / 1e6, 2))
- ksft_pr(f"{protocol}: Send throughput: {send_throughput[idx]} Mbps")
-
- receive_result = traffic.run_remote_test(cfg, command=receive_command)
- if receive_result.ret != 0:
- raise KsftSkipEx("Error occurred during data receive: {receive_result.stdout}")
-
- receive_output = receive_result.stdout
- receive_data = json.loads(receive_output)
-
- """Convert throughput to Mbps"""
- receive_throughput.append(round(receive_data['end']['sum_received']['bits_per_second'] / 1e6, 2))
- ksft_pr(f"{protocol}: Receive throughput: {receive_throughput[idx]} Mbps")
-
- """Check whether throughput is not below the threshold (default values set at start)"""
- for idx in range(0, len(speeds)):
- send_threshold = float(speeds[idx]) * float(test_config.send_throughput_threshold / 100)
- receive_threshold = float(speeds[idx]) * float(test_config.receive_throughput_threshold / 100)
- ksft_true(send_throughput[idx] >= send_threshold, f"{protocol}: Send throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex")
- ksft_true(receive_throughput[idx] >= receive_threshold, f"{protocol}: Receive throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex")
-
-def test_tcp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None:
- _pre_test_checks(cfg, link_config)
- check_throughput(cfg, link_config, test_config, 'TCP', traffic)
-
-def test_udp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None:
- _pre_test_checks(cfg, link_config)
- check_throughput(cfg, link_config, test_config, 'UDP', traffic)
-
-def main() -> None:
- parser = argparse.ArgumentParser(description="Run basic performance test for NIC driver")
- parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.')
- parser.add_argument('--test-duration', type=int, default=10, help='Performance test duration for the throughput check, in seconds. Default is 10 seconds.')
- parser.add_argument('--stt', type=int, default=80, help='Send throughput Threshold: Percentage of send throughput upon actual throughput required to pass the throughput check (in percentage). Default is 80.')
- parser.add_argument('--rtt', type=int, default=50, help='Receive throughput Threshold: Percentage of receive throughput upon actual throughput required to pass the throughput check (in percentage). Default is 50.')
- args=parser.parse_args()
- test_config = TestConfig(args.time_delay, args.test_duration, args.stt, args.rtt)
- with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
- traffic = GenerateTraffic(cfg)
- link_config = LinkConfig(cfg)
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, test_config, traffic, ))
- link_config.reset_interface()
- ksft_exit()
-
-if __name__ == "__main__":
- main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index 53bb08cc29ec..f439c434ba36 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -32,6 +32,11 @@ def test_rss_input_xfrm(cfg, ipver):
if multiprocessing.cpu_count() < 2:
raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
+ cfg.require_cmd("socat", remote=True)
+
+ if not hasattr(socket, "SO_INCOMING_CPU"):
+ raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
input_xfrm = cfg.ethnl.rss_get(
{'header': {'dev-name': cfg.ifname}}).get('input_xfrm')
diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
new file mode 100755
index 000000000000..d19d1d518208
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+# This is intended to be run on a virtio-net guest interface.
+# The test binds the XDP socket to the interface without setting
+# the fill ring to trigger delayed refill_work. This helps to
+# make it easier to reproduce the deadlock when XDP program,
+# XDP socket bind/unbind, rx ring resize race with refill_work on
+# the buggy kernel.
+#
+# The Qemu command to setup virtio-net
+# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no
+# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on
+
+from lib.py import ksft_exit, ksft_run
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetDrvEnv
+from lib.py import bkg, ip, cmd, ethtool
+import time
+
+def _get_rx_ring_entries(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True)
+ return output[0]["rx"]
+
+def setup_xsk(cfg, xdp_queue_id = 0) -> bkg:
+ # Probe for support
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+ if xdp.ret == 255:
+ raise KsftSkipEx('AF_XDP unsupported')
+ elif xdp.ret > 0:
+ raise KsftFailEx('unable to create AF_XDP socket')
+
+ try:
+ return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \
+ '{xdp_queue_id} -z', ksft_wait=3)
+ except:
+ raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \
+ 'Please consider adding iommu_platform=on ' \
+ 'when setting up virtio-net-pci')
+
+def check_xdp_bind(cfg):
+ with setup_xsk(cfg):
+ ip(f"link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+ ip(f"link set dev %s xdp off" % cfg.ifname)
+
+def check_rx_resize(cfg):
+ with setup_xsk(cfg):
+ rx_ring = _get_rx_ring_entries(cfg)
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2))
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring))
+
+def main():
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([check_xdp_bind, check_rx_resize],
+ args=(cfg, ))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index ad5ff645183a..3bccddf8cbc5 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -12,7 +12,7 @@ from .remote import Remote
class NetDrvEnvBase:
"""
- Base class for a NIC / host envirnoments
+ Base class for a NIC / host environments
Attributes:
test_dir: Path to the source directory of the test
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index da5af2c680fa..d9c10613ae67 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -2,7 +2,7 @@
import time
-from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen, bkg
+from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
class GenerateTraffic:
def __init__(self, env, port=None):
@@ -23,24 +23,6 @@ class GenerateTraffic:
self.stop(verbose=True)
raise Exception("iperf3 traffic did not ramp up")
- def run_remote_test(self, env: object, port=None, command=None):
- if port is None:
- port = rand_port()
- try:
- server_cmd = f"iperf3 -s 1 -p {port} --one-off"
- with bkg(server_cmd, host=env.remote):
- #iperf3 opens TCP connection as default in server
- #-u to be specified in client command for UDP
- wait_port_listen(port, host=env.remote)
- except Exception as e:
- raise Exception(f"Unexpected error occurred while running server command: {e}")
- try:
- client_cmd = f"iperf3 -c {env.remote_addr} -p {port} {command}"
- proc = cmd(client_cmd)
- return proc
- except Exception as e:
- raise Exception(f"Unexpected error occurred while running client command: {e}")
-
def _wait_pkts(self, pkt_cnt=None, pps=None):
"""
Wait until we've seen pkt_cnt or until traffic ramps up to pps.
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 3c96b022954d..29b01b8e2215 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -33,7 +33,6 @@ NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
# Used to create and delete namespaces
source "${LIBDIR}"/../../../../net/lib.sh
-source "${LIBDIR}"/../../../../net/net_helper.sh
# Create netdevsim interfaces
create_ifaces() {
diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py
new file mode 100755
index 000000000000..356bac46ba04
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, NetDrvEpEnv
+from lib.py import bkg, cmd, rand_port, NetNSEnter
+
+def test_napi_id(cfg) -> None:
+ port = rand_port()
+ listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}"
+
+ with bkg(listen_cmd, ksft_wait=3) as server:
+ cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True)
+
+ ksft_eq(0, server.ret)
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run([test_napi_id], args=(cfg,))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c
new file mode 100644
index 000000000000..eecd610c2109
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id_helper.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#include "../../net/lib/ksft.h"
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in address;
+ unsigned int napi_id;
+ unsigned int port;
+ socklen_t optlen;
+ char buf[1024];
+ int opt = 1;
+ int server;
+ int client;
+ int ret;
+
+ server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (server < 0) {
+ perror("socket creation failed");
+ if (errno == EAFNOSUPPORT)
+ return -1;
+ return 1;
+ }
+
+ port = atoi(argv[2]);
+
+ if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {
+ perror("setsockopt");
+ return 1;
+ }
+
+ address.sin_family = AF_INET;
+ inet_pton(AF_INET, argv[1], &address.sin_addr);
+ address.sin_port = htons(port);
+
+ if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) {
+ perror("bind failed");
+ return 1;
+ }
+
+ if (listen(server, 1) < 0) {
+ perror("listen");
+ return 1;
+ }
+
+ ksft_ready();
+
+ client = accept(server, NULL, 0);
+ if (client < 0) {
+ perror("accept");
+ return 1;
+ }
+
+ optlen = sizeof(napi_id);
+ ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id,
+ &optlen);
+ if (ret != 0) {
+ perror("getsockopt");
+ return 1;
+ }
+
+ read(client, buf, 1024);
+
+ ksft_wait();
+
+ if (napi_id == 0) {
+ fprintf(stderr, "napi ID is 0\n");
+ return 1;
+ }
+
+ close(client);
+ close(server);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
index aed62d9e6c0a..1bb46ec435d4 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0-only
-source ../../../net/net_helper.sh
+source ../../../net/lib.sh
NSIM_DEV_1_ID=$((256 + RANDOM % 256))
NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index af8df2313a3b..e0f114612c1a 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -50,6 +50,16 @@ def _test_tcp(cfg) -> None:
cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
ksft_eq(nc.stdout.strip(), test_string)
+def _schedule_checksum_reset(cfg, netnl) -> None:
+ features = ethtool(f"-k {cfg.ifname}", json=True)
+ setting = ""
+ for side in ["tx", "rx"]:
+ f = features[0][side + "-checksumming"]
+ if not f["fixed"]:
+ setting += " " + side
+ setting += " " + ("on" if f["requested"] or f["active"] else "off")
+ defer(ethtool, f" -K {cfg.ifname} " + setting)
+
def _set_offload_checksum(cfg, netnl, on) -> None:
try:
ethtool(f" -K {cfg.ifname} rx {on} tx {on} ")
@@ -139,6 +149,7 @@ def set_interface_init(cfg) -> None:
def test_default_v4(cfg, netnl) -> None:
cfg.require_ipver("4")
+ _schedule_checksum_reset(cfg, netnl)
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
_test_tcp(cfg)
@@ -149,6 +160,7 @@ def test_default_v4(cfg, netnl) -> None:
def test_default_v6(cfg, netnl) -> None:
cfg.require_ipver("6")
+ _schedule_checksum_reset(cfg, netnl)
_set_offload_checksum(cfg, netnl, "off")
_test_v6(cfg)
_test_tcp(cfg)
@@ -157,6 +169,7 @@ def test_default_v6(cfg, netnl) -> None:
_test_tcp(cfg)
def test_xdp_generic_sb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
_set_xdp_generic_sb_on(cfg)
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
@@ -168,6 +181,7 @@ def test_xdp_generic_sb(cfg, netnl) -> None:
_test_tcp(cfg)
def test_xdp_generic_mb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
_set_xdp_generic_mb_on(cfg)
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
@@ -179,6 +193,7 @@ def test_xdp_generic_mb(cfg, netnl) -> None:
_test_tcp(cfg)
def test_xdp_native_sb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
_set_xdp_native_sb_on(cfg)
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
@@ -190,6 +205,7 @@ def test_xdp_native_sb(cfg, netnl) -> None:
_test_tcp(cfg)
def test_xdp_native_mb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
_set_xdp_native_mb_on(cfg)
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 06abd3f233e1..236005290a33 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -26,13 +26,13 @@ def nl_get_queues(cfg, nl, qtype='rx'):
def check_xsk(cfg, nl, xdp_queue_id=0) -> None:
# Probe for support
- xdp = cmd(f'{cfg.test_dir / "xdp_helper"} - -', fail=False)
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
if xdp.ret == 255:
raise KsftSkipEx('AF_XDP unsupported')
elif xdp.ret > 0:
raise KsftFailEx('unable to create AF_XDP socket')
- with bkg(f'{cfg.test_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
+ with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
ksft_wait=3):
rx = tx = False
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
index 2d5a76d99181..eaf6938f100e 100644
--- a/tools/testing/selftests/drivers/net/team/Makefile
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for net selftests
-TEST_PROGS := dev_addr_lists.sh
+TEST_PROGS := dev_addr_lists.sh propagation.sh
TEST_INCLUDES := \
../bonding/lag_lib.sh \
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config
index b5e3a3aad4bf..636b3525b679 100644
--- a/tools/testing/selftests/drivers/net/team/config
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -1,5 +1,6 @@
CONFIG_DUMMY=y
CONFIG_IPV6=y
CONFIG_MACVLAN=y
+CONFIG_NETDEVSIM=m
CONFIG_NET_TEAM=y
CONFIG_NET_TEAM_MODE_LOADBALANCE=y
diff --git a/tools/testing/selftests/drivers/net/team/propagation.sh b/tools/testing/selftests/drivers/net/team/propagation.sh
new file mode 100755
index 000000000000..4bea75b79878
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/propagation.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+NSIM_LRO_ID=$((256 + RANDOM % 256))
+NSIM_LRO_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_LRO_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+
+cleanup()
+{
+ set +e
+ ip link del dummyteam &>/dev/null
+ ip link del team0 &>/dev/null
+ echo $NSIM_LRO_ID > $NSIM_DEV_SYS_DEL
+ modprobe -r netdevsim
+}
+
+# Trigger LRO propagation to the lower.
+# https://lore.kernel.org/netdev/aBvOpkIoxcr9PfDg@mini-arch/
+team_lro()
+{
+ # using netdevsim because it supports NETIF_F_LRO
+ NSIM_LRO_NAME=$(find $NSIM_LRO_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_LRO_SYS/net -exec basename {} \;)
+
+ ip link add name team0 type team
+ ip link set $NSIM_LRO_NAME down
+ ip link set dev $NSIM_LRO_NAME master team0
+ ip link set team0 up
+ ethtool -K team0 large-receive-offload off
+
+ ip link del team0
+}
+
+# Trigger promisc propagation to the lower during IFLA_MASTER.
+# https://lore.kernel.org/netdev/20250506032328.3003050-1-sdf@fomichev.me/
+team_promisc()
+{
+ ip link add name dummyteam type dummy
+ ip link add name team0 type team
+ ip link set dummyteam down
+ ip link set team0 promisc on
+ ip link set dev dummyteam master team0
+ ip link set team0 up
+
+ ip link del team0
+ ip link del dummyteam
+}
+
+# Trigger promisc propagation to the lower via netif_change_flags (aka
+# ndo_change_rx_flags).
+# https://lore.kernel.org/netdev/20250514220319.3505158-1-stfomichev@gmail.com/
+team_change_flags()
+{
+ ip link add name dummyteam type dummy
+ ip link add name team0 type team
+ ip link set dummyteam down
+ ip link set dev dummyteam master team0
+ ip link set team0 up
+ ip link set team0 promisc on
+
+ # Make sure we can add more L2 addresses without any issues.
+ ip link add link team0 address 00:00:00:00:00:01 team0.1 type macvlan
+ ip link set team0.1 up
+
+ ip link del team0.1
+ ip link del team0
+ ip link del dummyteam
+}
+
+trap cleanup EXIT
+modprobe netdevsim || :
+echo $NSIM_LRO_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+team_lro
+team_promisc
+team_change_flags
diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
index 1562aa7d60b0..6dec59d64083 100644
--- a/tools/testing/selftests/nci/nci_dev.c
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2021 Samsung Electrnoics
+ * Copyright (C) 2021 Samsung Electronics
* Bongsu Jeon <bongsu.jeon@samsung.com>
*
* Test code for nci
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 70a38f485d4d..ea84b88bcb30 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -115,7 +115,7 @@ YNL_GEN_FILES := busy_poller netlink-dumps
TEST_GEN_FILES += $(YNL_GEN_FILES)
TEST_FILES := settings
-TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+TEST_FILES += in_netns.sh lib.sh setup_loopback.sh setup_veth.sh
TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
index d66336256580..8b015f16c03d 100644
--- a/tools/testing/selftests/net/af_unix/scm_rights.c
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -23,6 +23,7 @@ FIXTURE_VARIANT(scm_rights)
int type;
int flags;
bool test_listener;
+ bool disabled;
};
FIXTURE_VARIANT_ADD(scm_rights, dgram)
@@ -31,6 +32,16 @@ FIXTURE_VARIANT_ADD(scm_rights, dgram)
.type = SOCK_DGRAM,
.flags = 0,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, dgram_disabled)
+{
+ .name = "UNIX ",
+ .type = SOCK_DGRAM,
+ .flags = 0,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream)
@@ -39,6 +50,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream)
.type = SOCK_STREAM,
.flags = 0,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
@@ -47,6 +68,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
.type = SOCK_STREAM,
.flags = MSG_OOB,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_oob_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
@@ -55,6 +86,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
.type = SOCK_STREAM,
.flags = 0,
.test_listener = true,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = true,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
@@ -63,6 +104,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
.type = SOCK_STREAM,
.flags = MSG_OOB,
.test_listener = true,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = true,
+ .disabled = true,
};
static int count_sockets(struct __test_metadata *_metadata,
@@ -105,6 +156,9 @@ FIXTURE_SETUP(scm_rights)
ret = unshare(CLONE_NEWNET);
ASSERT_EQ(0, ret);
+ if (variant->disabled)
+ return;
+
ret = count_sockets(_metadata, variant);
ASSERT_EQ(0, ret);
}
@@ -113,6 +167,9 @@ FIXTURE_TEARDOWN(scm_rights)
{
int ret;
+ if (variant->disabled)
+ return;
+
sleep(1);
ret = count_sockets(_metadata, variant);
@@ -121,6 +178,7 @@ FIXTURE_TEARDOWN(scm_rights)
static void create_listeners(struct __test_metadata *_metadata,
FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
int n)
{
struct sockaddr_un addr = {
@@ -140,6 +198,12 @@ static void create_listeners(struct __test_metadata *_metadata,
ret = listen(self->fd[i], -1);
ASSERT_EQ(0, ret);
+ if (variant->disabled) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+ }
+
addrlen = sizeof(addr);
ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen);
ASSERT_EQ(0, ret);
@@ -164,6 +228,12 @@ static void create_socketpairs(struct __test_metadata *_metadata,
for (i = 0; i < n * 2; i += 2) {
ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i);
ASSERT_EQ(0, ret);
+
+ if (variant->disabled) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+ }
}
}
@@ -175,7 +245,7 @@ static void __create_sockets(struct __test_metadata *_metadata,
ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
if (variant->test_listener)
- create_listeners(_metadata, self, n);
+ create_listeners(_metadata, self, variant, n);
else
create_socketpairs(_metadata, self, variant, n);
}
@@ -230,7 +300,13 @@ void __send_fd(struct __test_metadata *_metadata,
int ret;
ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
- ASSERT_EQ(MSGLEN, ret);
+
+ if (variant->disabled) {
+ ASSERT_EQ(-1, ret);
+ ASSERT_EQ(-EPERM, -errno);
+ } else {
+ ASSERT_EQ(MSGLEN, ret);
+ }
}
#define create_sockets(n) \
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index f366cadbc5e8..4046131e7888 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -106,26 +106,16 @@
# | |
# +-----------------------------------------------------------------------+
+. ./lib.sh
+
ERR=4 # Return 4 by default, which is the SKIP code for kselftest
PING6="ping"
PAUSE_ON_FAIL="no"
-readonly NS0=$(mktemp -u ns0-XXXXXXXX)
-readonly NS1=$(mktemp -u ns1-XXXXXXXX)
-readonly NS2=$(mktemp -u ns2-XXXXXXXX)
-readonly NS3=$(mktemp -u ns3-XXXXXXXX)
-
# Exit the script after having removed the network namespaces it created
-#
-# Parameters:
-#
-# * The list of network namespaces to delete before exiting.
-#
exit_cleanup()
{
- for ns in "$@"; do
- ip netns delete "${ns}" 2>/dev/null || true
- done
+ cleanup_all_ns
if [ "${ERR}" -eq 4 ]; then
echo "Error: Setting up the testing environment failed." >&2
@@ -140,17 +130,7 @@ exit_cleanup()
# namespaces created by this script are deleted.
create_namespaces()
{
- ip netns add "${NS0}" || exit_cleanup
- ip netns add "${NS1}" || exit_cleanup "${NS0}"
- ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
- ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
-}
-
-# The trap function handler
-#
-exit_cleanup_all()
-{
- exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+ setup_ns NS0 NS1 NS2 NS3 || exit_cleanup
}
# Configure a network interface using a host route
@@ -188,10 +168,6 @@ iface_config()
#
setup_underlay()
{
- for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
- ip -netns "${ns}" link set dev lo up
- done;
-
ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
@@ -234,14 +210,6 @@ setup_overlay_ipv4()
ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
-
- # The intermediate namespaces don't have routes for the reverse path,
- # as it will be handled by tc. So we need to ensure that rp_filter is
- # not going to block the traffic.
- ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
- ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
}
setup_overlay_ipv6()
@@ -521,13 +489,10 @@ done
check_features
-# Create namespaces before setting up the exit trap.
-# Otherwise, exit_cleanup_all() could delete namespaces that were not created
-# by this script.
-create_namespaces
-
set -e
-trap exit_cleanup_all EXIT
+trap exit_cleanup EXIT
+
+create_namespaces
setup_underlay
setup_overlay_ipv4
diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh
index 7db292ec4884..7d2d40812074 100755
--- a/tools/testing/selftests/net/busy_poll_test.sh
+++ b/tools/testing/selftests/net/busy_poll_test.sh
@@ -1,6 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source net_helper.sh
+source lib.sh
NSIM_SV_ID=$((256 + RANDOM % 256))
NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
diff --git a/tools/testing/selftests/net/can/.gitignore b/tools/testing/selftests/net/can/.gitignore
new file mode 100644
index 000000000000..764a53fc837f
--- /dev/null
+++ b/tools/testing/selftests/net/can/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+test_raw_filter
diff --git a/tools/testing/selftests/net/can/Makefile b/tools/testing/selftests/net/can/Makefile
new file mode 100644
index 000000000000..5b82e60a03e7
--- /dev/null
+++ b/tools/testing/selftests/net/can/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+
+TEST_PROGS := test_raw_filter.sh
+
+TEST_GEN_FILES := test_raw_filter
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c
new file mode 100644
index 000000000000..4101c36390fd
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+/*
+ * Copyright (c) 2011 Volkswagen Group Electronic Research
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <net/if.h>
+#include <linux/if.h>
+
+#include <linux/can.h>
+#include <linux/can/raw.h>
+
+#include "../../kselftest_harness.h"
+
+#define ID 0x123
+
+char CANIF[IFNAMSIZ];
+
+static int send_can_frames(int sock, int testcase)
+{
+ struct can_frame frame;
+
+ frame.can_dlc = 1;
+ frame.data[0] = testcase;
+
+ frame.can_id = ID;
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_RTR_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_EFF_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_EFF_FLAG | CAN_RTR_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ return 0;
+
+write_err:
+ perror("write");
+ return 1;
+}
+
+FIXTURE(can_filters) {
+ int sock;
+};
+
+FIXTURE_SETUP(can_filters)
+{
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+ int recv_own_msgs = 1;
+ int s, ret;
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+ ASSERT_GE(s, 0)
+ TH_LOG("failed to create CAN_RAW socket: %d", errno);
+
+ strncpy(ifr.ifr_name, CANIF, sizeof(ifr.ifr_name));
+ ret = ioctl(s, SIOCGIFINDEX, &ifr);
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed SIOCGIFINDEX: %d", errno);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
+ &recv_own_msgs, sizeof(recv_own_msgs));
+
+ ret = bind(s, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(ret, 0)
+ TH_LOG("failed bind socket: %d", errno);
+
+ self->sock = s;
+}
+
+FIXTURE_TEARDOWN(can_filters)
+{
+ close(self->sock);
+}
+
+FIXTURE_VARIANT(can_filters) {
+ int testcase;
+ canid_t id;
+ canid_t mask;
+ int exp_num_rx;
+ canid_t exp_flags[];
+};
+
+/* Receive all frames when filtering for the ID in standard frame format */
+FIXTURE_VARIANT_ADD(can_filters, base) {
+ .testcase = 1,
+ .id = ID,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore EFF flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_eff) {
+ .testcase = 2,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore RTR flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_rtr) {
+ .testcase = 3,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore EFF and RTR flags in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_effrtr) {
+ .testcase = 4,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF frames when expecting no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff) {
+ .testcase = 5,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_eff) {
+ .testcase = 6,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF frames when expecting no EFF flag, ignoring RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_rtr) {
+ .testcase = 7,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag,
+ * ignoring RTR flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_effrtr) {
+ .testcase = 8,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive no remote frames when filtering for no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr) {
+ .testcase = 9,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive no remote frames when filtering for no RTR flag, ignoring EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_eff) {
+ .testcase = 10,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive only remote frames when filter includes RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_rtr) {
+ .testcase = 11,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only remote frames when filter includes RTR flag, ignoring EFF
+ * flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_effrtr) {
+ .testcase = 12,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF data frame when filtering for no flags */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr) {
+ .testcase = 13,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ 0,
+ },
+};
+
+/* Receive only EFF data frame when filtering for EFF but no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_eff) {
+ .testcase = 14,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive only SFF remote frame when filtering for RTR but no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_rtr) {
+ .testcase = 15,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF remote frame when filtering for EFF and RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_effrtr) {
+ .testcase = 16,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF data frame when filtering for no EFF flag and no RTR flag
+ * but based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff) {
+ .testcase = 17,
+ .id = ID,
+ .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ 0,
+ },
+};
+
+/* Receive only EFF data frame when filtering for EFF flag and no RTR flag but
+ * based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff_eff) {
+ .testcase = 18,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ },
+};
+
+/* This test verifies that the raw CAN filters work, by checking if only frames
+ * with the expected set of flags are received. For each test case, the given
+ * filter (id and mask) is added and four CAN frames are sent with every
+ * combination of set/unset EFF/RTR flags.
+ */
+TEST_F(can_filters, test_filter)
+{
+ struct can_filter rfilter;
+ int ret;
+
+ rfilter.can_id = variant->id;
+ rfilter.can_mask = variant->mask;
+ setsockopt(self->sock, SOL_CAN_RAW, CAN_RAW_FILTER,
+ &rfilter, sizeof(rfilter));
+
+ TH_LOG("filters: can_id = 0x%08X can_mask = 0x%08X",
+ rfilter.can_id, rfilter.can_mask);
+
+ ret = send_can_frames(self->sock, variant->testcase);
+ ASSERT_EQ(ret, 0)
+ TH_LOG("failed to send CAN frames");
+
+ for (int i = 0; i <= variant->exp_num_rx; i++) {
+ struct can_frame frame;
+ struct timeval tv = {
+ .tv_sec = 0,
+ .tv_usec = 50000, /* 50ms timeout */
+ };
+ fd_set rdfs;
+
+ FD_ZERO(&rdfs);
+ FD_SET(self->sock, &rdfs);
+
+ ret = select(self->sock + 1, &rdfs, NULL, NULL, &tv);
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed select for frame %d, err: %d)", i, errno);
+
+ ret = FD_ISSET(self->sock, &rdfs);
+ if (i == variant->exp_num_rx) {
+ ASSERT_EQ(ret, 0)
+ TH_LOG("too many frames received");
+ } else {
+ ASSERT_NE(ret, 0)
+ TH_LOG("too few frames received");
+
+ ret = read(self->sock, &frame, sizeof(frame));
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed to read frame %d, err: %d", i, errno);
+
+ TH_LOG("rx: can_id = 0x%08X rx = %d", frame.can_id, i);
+
+ ASSERT_EQ(ID, frame.can_id & CAN_SFF_MASK)
+ TH_LOG("received wrong can_id");
+ ASSERT_EQ(variant->testcase, frame.data[0])
+ TH_LOG("received wrong test case");
+
+ ASSERT_EQ(frame.can_id & ~CAN_ERR_MASK,
+ variant->exp_flags[i])
+ TH_LOG("received unexpected flags");
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ char *ifname = getenv("CANIF");
+
+ if (!ifname) {
+ printf("CANIF environment variable must contain the test interface\n");
+ return KSFT_FAIL;
+ }
+
+ strncpy(CANIF, ifname, sizeof(CANIF) - 1);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/net/can/test_raw_filter.sh b/tools/testing/selftests/net/can/test_raw_filter.sh
new file mode 100755
index 000000000000..276d6c06ac95
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ test_raw_filter
+"
+
+net_dir=$(dirname $0)/..
+source $net_dir/lib.sh
+
+export CANIF=${CANIF:-"vcan0"}
+BITRATE=${BITRATE:-500000}
+
+setup()
+{
+ if [[ $CANIF == vcan* ]]; then
+ ip link add name $CANIF type vcan || exit $ksft_skip
+ else
+ ip link set dev $CANIF type can bitrate $BITRATE || exit $ksft_skip
+ fi
+ ip link set dev $CANIF up
+ pwd
+}
+
+cleanup()
+{
+ ip link set dev $CANIF down
+ if [[ $CANIF == vcan* ]]; then
+ ip link delete $CANIF
+ fi
+}
+
+test_raw_filter()
+{
+ ./test_raw_filter
+ check_err $?
+ log_test "test_raw_filter"
+}
+
+trap cleanup EXIT
+setup
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 130d532b7e67..3cfef5153823 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -33,7 +33,6 @@ CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
-CONFIG_IP_DCCP=m
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP_NF_IPTABLES=m
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index c7cea556b416..5fbdd2a0b537 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -516,10 +516,7 @@ fib_rule4_test()
fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
"oif redirect to table" "oif no redirect to table"
- # Enable forwarding and disable rp_filter as all the addresses are in
- # the same subnet and egress device == ingress device.
ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
- ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
match="from $SRC_IP iif $DEV"
getnomatch="from $SRC_IP iif lo"
fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 3ea6f886a210..a94b73a53f72 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -11,7 +11,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
- ipv4_mpath_list ipv6_mpath_list"
+ ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -1085,6 +1085,35 @@ route_setup()
set +e
}
+forwarding_cleanup()
+{
+ cleanup_ns $ns3
+
+ route_cleanup
+}
+
+# extend route_setup with an ns3 reachable through ns2 over both devices
+forwarding_setup()
+{
+ forwarding_cleanup
+
+ route_setup
+
+ setup_ns ns3
+
+ ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2
+ ip -netns $ns3 link set veth5 up
+ ip -netns $ns2 link set veth6 up
+
+ ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24
+ ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24
+ ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2
+
+ ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad
+ ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad
+ ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2
+}
+
# assumption is that basic add of a single path route works
# otherwise just adding an address on an interface is broken
ipv6_rt_add()
@@ -2531,9 +2560,6 @@ ipv4_mpath_list_test()
run_cmd "ip -n $ns2 route add 203.0.113.0/24
nexthop via 172.16.201.2 nexthop via 172.16.202.2"
run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
set +e
local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
@@ -2600,6 +2626,93 @@ ipv6_mpath_list_test()
route_cleanup
}
+tc_set_flower_counter__saddr_syn() {
+ tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2"
+}
+
+ip_mpath_balance_dep_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "jq command not found. Skipping test"
+ return 1
+ fi
+}
+
+ip_mpath_balance() {
+ local -r ipver=$1
+ local -r daddr=$2
+ local -r num_conn=20
+
+ for i in $(seq 1 $num_conn); do
+ ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null &
+ sleep 0.02
+ echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000
+ done
+
+ local -r syn0="$(tc_get_flower_counter $ns1 veth1)"
+ local -r syn1="$(tc_get_flower_counter $ns1 veth3)"
+ local -r syns=$((syn0+syn1))
+
+ [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)"
+
+ [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]]
+}
+
+ipv4_mpath_balance_test()
+{
+ echo
+ echo "IPv4 multipath load balance test"
+
+ ip_mpath_balance_dep_check || return 1
+ forwarding_setup
+
+ $IP route add 172.16.105.1 \
+ nexthop via 172.16.101.2 \
+ nexthop via 172.16.103.2
+
+ ip netns exec $ns1 \
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+
+ tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1
+ tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1
+
+ ip_mpath_balance -4 172.16.105.1
+
+ log_test $? 0 "IPv4 multipath loadbalance"
+
+ forwarding_cleanup
+}
+
+ipv6_mpath_balance_test()
+{
+ echo
+ echo "IPv6 multipath load balance test"
+
+ ip_mpath_balance_dep_check || return 1
+ forwarding_setup
+
+ $IP route add 2001:db8:105::1\
+ nexthop via 2001:db8:101::2 \
+ nexthop via 2001:db8:103::2
+
+ ip netns exec $ns1 \
+ sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+ tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1
+ tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1
+
+ ip_mpath_balance -6 "[2001:db8:105::1]"
+
+ log_test $? 0 "IPv6 multipath loadbalance"
+
+ forwarding_cleanup
+}
+
################################################################################
# usage
@@ -2683,6 +2796,8 @@ do
fib6_gc_test|ipv6_gc) fib6_gc_test;;
ipv4_mpath_list) ipv4_mpath_list_test;;
ipv6_mpath_list) ipv6_mpath_list_test;;
+ ipv4_mpath_balance) ipv4_mpath_balance_test;;
+ ipv6_mpath_balance) ipv6_mpath_balance_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index e6a3e04fd83f..d4e7dd659354 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,10 +1,24 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
- v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
- v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
- v3exc_timeout_test v3star_ex_auto_add_test"
+ALL_TESTS="
+ v2reportleave_test
+ v3include_test
+ v3inc_allow_test
+ v3inc_is_include_test
+ v3inc_is_exclude_test
+ v3inc_to_exclude_test
+ v3exc_allow_test
+ v3exc_is_include_test
+ v3exc_is_exclude_test
+ v3exc_to_exclude_test
+ v3inc_block_test
+ v3exc_block_test
+ v3exc_timeout_test
+ v3star_ex_auto_add_test
+ v2per_vlan_snooping_port_stp_test
+ v2per_vlan_snooping_vlan_stp_test
+"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="239.10.10.10"
@@ -554,6 +568,64 @@ v3star_ex_auto_add_test()
v3cleanup $swp2 $TEST_GROUP
}
+v2per_vlan_snooping_stp_test()
+{
+ local is_port=$1
+
+ local msg="port"
+ [[ $is_port -ne 1 ]] && msg="vlan"
+
+ ip link set br0 up type bridge vlan_filtering 1 \
+ mcast_igmp_version 2 \
+ mcast_snooping 1 \
+ mcast_vlan_snooping 1 \
+ mcast_querier 1 \
+ mcast_stats_enabled 1
+ bridge vlan global set vid 1 dev br0 \
+ mcast_snooping 1 \
+ mcast_querier 1 \
+ mcast_query_interval 100 \
+ mcast_startup_query_count 0
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+ sleep 5
+ local tx_s=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+ sleep 5
+ local tx_e=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+ RET=0
+ local tx=$(expr $tx_e - $tx_s)
+ test $tx -gt 0
+ check_err $? "No IGMP queries after STP state becomes forwarding"
+ log_test "per vlan snooping with $msg stp state change"
+
+ # restore settings
+ bridge vlan global set vid 1 dev br0 \
+ mcast_querier 0 \
+ mcast_query_interval 12500 \
+ mcast_startup_query_count 2
+ ip link set br0 up type bridge vlan_filtering 0 \
+ mcast_vlan_snooping 0 \
+ mcast_stats_enabled 0
+}
+
+v2per_vlan_snooping_port_stp_test()
+{
+ v2per_vlan_snooping_stp_test 1
+}
+
+v2per_vlan_snooping_vlan_stp_test()
+{
+ v2per_vlan_snooping_stp_test 0
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index f84ab2e65754..4cacef5a813a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,10 +1,23 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
- mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
- mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
- mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
+ALL_TESTS="
+ mldv2include_test
+ mldv2inc_allow_test
+ mldv2inc_is_include_test
+ mldv2inc_is_exclude_test
+ mldv2inc_to_exclude_test
+ mldv2exc_allow_test
+ mldv2exc_is_include_test
+ mldv2exc_is_exclude_test
+ mldv2exc_to_exclude_test
+ mldv2inc_block_test
+ mldv2exc_block_test
+ mldv2exc_timeout_test
+ mldv2star_ex_auto_add_test
+ mldv2per_vlan_snooping_port_stp_test
+ mldv2per_vlan_snooping_vlan_stp_test
+"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="ff02::cc"
@@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test()
mldv2cleanup $swp2
}
+mldv2per_vlan_snooping_stp_test()
+{
+ local is_port=$1
+
+ local msg="port"
+ [[ $is_port -ne 1 ]] && msg="vlan"
+
+ ip link set br0 up type bridge vlan_filtering 1 \
+ mcast_mld_version 2 \
+ mcast_snooping 1 \
+ mcast_vlan_snooping 1 \
+ mcast_querier 1 \
+ mcast_stats_enabled 1
+ bridge vlan global set vid 1 dev br0 \
+ mcast_mld_version 2 \
+ mcast_snooping 1 \
+ mcast_querier 1 \
+ mcast_query_interval 100 \
+ mcast_startup_query_count 0
+
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+ sleep 5
+ local tx_s=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+ sleep 5
+ local tx_e=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+
+ RET=0
+ local tx=$(expr $tx_e - $tx_s)
+ test $tx -gt 0
+ check_err $? "No MLD queries after STP state becomes forwarding"
+ log_test "per vlan snooping with $msg stp state change"
+
+ # restore settings
+ bridge vlan global set vid 1 dev br0 \
+ mcast_querier 0 \
+ mcast_query_interval 12500 \
+ mcast_startup_query_count 2 \
+ mcast_mld_version 1
+ ip link set br0 up type bridge vlan_filtering 0 \
+ mcast_vlan_snooping 0 \
+ mcast_stats_enabled 0
+}
+
+mldv2per_vlan_snooping_port_stp_test()
+{
+ mldv2per_vlan_snooping_stp_test 1
+}
+
+mldv2per_vlan_snooping_vlan_stp_test()
+{
+ mldv2per_vlan_snooping_stp_test 0
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index 8d7a1a004b7c..18fd69d8d937 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -1,6 +1,7 @@
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_BRIDGE_IGMP_SNOOPING=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_VRF=m
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index d6f0e449c029..b13c89a99ecb 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -178,8 +178,6 @@ setup()
else
ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
- ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
- ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
index a6b2b1f9c641..c6866e42f95c 100755
--- a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
+++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
@@ -69,7 +69,6 @@
# which can affect the conditions needed to trigger a soft lockup.
source lib.sh
-source net_helper.sh
TEST_DURATION=300
ROUTING_TABLE_REFRESH_PERIOD=0.01
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 701905eeff66..006fdadcc4b9 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -217,6 +217,8 @@ setup_ns()
return $ksft_skip
fi
ip -n "${!ns_name}" link set lo up
+ ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ns_list+=("${!ns_name}")
done
NS_LIST+=("${ns_list[@]}")
@@ -270,6 +272,30 @@ tc_rule_handle_stats_get()
.options.actions[0].stats$selector"
}
+# attach a qdisc with two children match/no-match and a flower filter to match
+tc_set_flower_counter() {
+ local -r ns=$1
+ local -r ipver=$2
+ local -r dev=$3
+ local -r flower_expr=$4
+
+ tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \
+ priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+ tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo
+ tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo
+
+ tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \
+ flower $flower_expr classid 1:2
+}
+
+tc_get_flower_counter() {
+ local -r ns=$1
+ local -r dev=$2
+
+ tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets
+}
+
ret_set_ksft_status()
{
local ksft_status=$1; shift
@@ -569,3 +595,24 @@ bridge_vlan_add()
bridge vlan add "$@"
defer bridge vlan del "$@"
}
+
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+ local protocol="${3}"
+ local pattern
+ local i
+
+ pattern=":$(printf "%04X" "${port}") "
+
+ # for tcp protocol additionally check the socket state
+ [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
+ for i in $(seq 10); do
+ if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
+ /proc/net/"${protocol}"* | grep -q "${pattern}"; then
+ break
+ fi
+ sleep 0.1
+ done
+}
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore
index 1ebc6187f421..bbc97d6bf556 100644
--- a/tools/testing/selftests/net/lib/.gitignore
+++ b/tools/testing/selftests/net/lib/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
csum
+xdp_helper
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index c22623b9a2a5..88c4bc461459 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -10,6 +10,7 @@ TEST_FILES += ../../../../net/ynl
TEST_GEN_FILES += csum
TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+TEST_GEN_FILES += xdp_helper
TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h
new file mode 100644
index 000000000000..17dc34a612c6
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(__NET_KSFT_H__)
+#define __NET_KSFT_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static inline void ksft_ready(void)
+{
+ const char msg[7] = "ready\n";
+ char *env_str;
+ int fd;
+
+ env_str = getenv("KSFT_READY_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ fd = STDOUT_FILENO;
+ }
+
+ write(fd, msg, sizeof(msg));
+ if (fd != STDOUT_FILENO)
+ close(fd);
+}
+
+static inline void ksft_wait(void)
+{
+ char *env_str;
+ char byte;
+ int fd;
+
+ env_str = getenv("KSFT_WAIT_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ /* Not running in KSFT env, wait for input from STDIN instead */
+ fd = STDIN_FILENO;
+ }
+
+ read(fd, &byte, sizeof(byte));
+ if (fd != STDIN_FILENO)
+ close(fd);
+}
+
+#endif
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 3cfad0fd4570..61287c203b6e 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -3,6 +3,7 @@
import builtins
import functools
import inspect
+import signal
import sys
import time
import traceback
@@ -26,6 +27,10 @@ class KsftXfailEx(Exception):
pass
+class KsftTerminate(KeyboardInterrupt):
+ pass
+
+
def ksft_pr(*objs, **kwargs):
print("#", *objs, **kwargs)
@@ -193,6 +198,17 @@ def ksft_setup(env):
return env
+def _ksft_intr(signum, frame):
+ # ksft runner.sh sends 2 SIGTERMs in a row on a timeout
+ # if we don't ignore the second one it will stop us from handling cleanup
+ global term_cnt
+ term_cnt += 1
+ if term_cnt == 1:
+ raise KsftTerminate()
+ else:
+ ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
+
+
def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases = cases or []
@@ -205,6 +221,10 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases.append(value)
break
+ global term_cnt
+ term_cnt = 0
+ prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
+
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
print("TAP version 13")
@@ -233,7 +253,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
for line in tb.strip().split('\n'):
ksft_pr("Exception|", line)
if stop:
- ksft_pr("Stopping tests due to KeyboardInterrupt.")
+ ksft_pr(f"Stopping tests due to {type(e).__name__}.")
KSFT_RESULT = False
cnt_key = 'fail'
@@ -248,6 +268,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
if stop:
break
+ signal.signal(signal.SIGTERM, prev_sigterm)
+
print(
f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
)
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 8986c584cb37..6329ae805abf 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -39,12 +39,12 @@ class EthtoolFamily(YnlFamily):
class RtnlFamily(YnlFamily):
def __init__(self, recv_size=0):
- super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
+ super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(),
schema='', recv_size=recv_size)
class RtnlAddrFamily(YnlFamily):
def __init__(self, recv_size=0):
- super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(),
+ super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(),
schema='', recv_size=recv_size)
class NetdevFamily(YnlFamily):
diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c
index aeed25914104..eb025a9f35b1 100644
--- a/tools/testing/selftests/drivers/net/xdp_helper.c
+++ b/tools/testing/selftests/net/lib/xdp_helper.c
@@ -11,55 +11,16 @@
#include <net/if.h>
#include <inttypes.h>
+#include "ksft.h"
+
#define UMEM_SZ (1U << 16)
#define NUM_DESC (UMEM_SZ / 2048)
-/* Move this to a common header when reused! */
-static void ksft_ready(void)
-{
- const char msg[7] = "ready\n";
- char *env_str;
- int fd;
-
- env_str = getenv("KSFT_READY_FD");
- if (env_str) {
- fd = atoi(env_str);
- if (!fd) {
- fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
- env_str);
- return;
- }
- } else {
- fd = STDOUT_FILENO;
- }
-
- write(fd, msg, sizeof(msg));
- if (fd != STDOUT_FILENO)
- close(fd);
-}
-static void ksft_wait(void)
+static void print_usage(const char *bin)
{
- char *env_str;
- char byte;
- int fd;
-
- env_str = getenv("KSFT_WAIT_FD");
- if (env_str) {
- fd = atoi(env_str);
- if (!fd) {
- fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
- env_str);
- return;
- }
- } else {
- /* Not running in KSFT env, wait for input from STDIN instead */
- fd = STDIN_FILENO;
- }
-
- read(fd, &byte, sizeof(byte));
- if (fd != STDIN_FILENO)
- close(fd);
+ fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n"
+ "where:\n\t-z: force zerocopy mode", bin);
}
/* this is a simple helper program that creates an XDP socket and does the
@@ -77,12 +38,13 @@ int main(int argc, char **argv)
struct sockaddr_xdp sxdp = { 0 };
int num_desc = NUM_DESC;
void *umem_area;
+ int retry = 0;
int ifindex;
int sock_fd;
int queue;
- if (argc != 3) {
- fprintf(stderr, "Usage: %s ifindex queue_id\n", argv[0]);
+ if (argc != 3 && argc != 4) {
+ print_usage(argv[0]);
return 1;
}
@@ -132,11 +94,29 @@ int main(int argc, char **argv)
sxdp.sxdp_queue_id = queue;
sxdp.sxdp_flags = 0;
- if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) {
- munmap(umem_area, UMEM_SZ);
- perror("bind failed");
- close(sock_fd);
- return 1;
+ if (argc > 3) {
+ if (!strcmp(argv[3], "-z")) {
+ sxdp.sxdp_flags = XDP_ZEROCOPY;
+ } else {
+ print_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ while (1) {
+ if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0)
+ break;
+
+ if (errno == EBUSY && retry < 3) {
+ retry++;
+ sleep(1);
+ continue;
+ } else {
+ perror("bind failed");
+ munmap(umem_area, UMEM_SZ);
+ close(sock_fd);
+ return 1;
+ }
}
ksft_ready();
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 340e1a777e16..e47788bfa671 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -11,7 +11,7 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag
TEST_FILES := mptcp_lib.sh settings
-TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) ../net_helper.sh
+TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh)
EXTRA_CLEAN := *.pcap
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index e7a75341f0f3..7a3cb4c09e45 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -225,6 +225,37 @@ chk_dump_one()
fi
}
+chk_dump_subflow()
+{
+ local inet_diag_token
+ local subflow_line
+ local ss_output
+ local ss_token
+ local msg
+
+ ss_output=$(ss -tniN $ns)
+
+ subflow_line=$(echo "$ss_output" | \
+ grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+')
+
+ ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+')
+
+ inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \
+ grep -Eo 'token:[^ ]+')
+
+ msg="....chk dump_subflow"
+
+ mptcp_lib_print_title "$msg"
+ if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
msk_info_get_value()
{
local port="${1}"
@@ -316,6 +347,7 @@ chk_msk_fallback_nr 0 "....chk no fallback"
chk_msk_inuse 2
chk_msk_cestab 2
chk_dump_one
+chk_dump_subflow
flush_pids
chk_msk_inuse 0 "2->0"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index c83a8b47bbdf..ac1349c4b9e5 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -180,13 +180,26 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen,
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
+again:
int err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ /* glibc starts to support MPTCP since v2.42.
+ * For older versions, use IPPROTO_TCP to resolve,
+ * and use TCP/MPTCP to create socket.
+ * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82
+ */
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -292,7 +305,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -356,7 +369,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
int infd, struct wstate *winfo)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
index 284286c524cf..e084796e804d 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_diag.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -8,6 +8,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
#include <linux/tcp.h>
+#include <arpa/inet.h>
#include <unistd.h>
#include <stdlib.h>
@@ -19,6 +20,15 @@
#define IPPROTO_MPTCP 262
#endif
+#define parse_rtattr_nested(tb, max, rta) \
+ (parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \
+ NLA_F_NESTED))
+
+struct params {
+ __u32 target_token;
+ char subflow_addrs[1024];
+};
+
struct mptcp_info {
__u8 mptcpi_subflows;
__u8 mptcpi_add_addr_signal;
@@ -46,6 +56,37 @@ struct mptcp_info {
__u32 mptcpi_last_ack_recv;
};
+enum {
+ MPTCP_SUBFLOW_ATTR_UNSPEC,
+ MPTCP_SUBFLOW_ATTR_TOKEN_REM,
+ MPTCP_SUBFLOW_ATTR_TOKEN_LOC,
+ MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ,
+ MPTCP_SUBFLOW_ATTR_MAP_SEQ,
+ MPTCP_SUBFLOW_ATTR_MAP_SFSEQ,
+ MPTCP_SUBFLOW_ATTR_SSN_OFFSET,
+ MPTCP_SUBFLOW_ATTR_MAP_DATALEN,
+ MPTCP_SUBFLOW_ATTR_FLAGS,
+ MPTCP_SUBFLOW_ATTR_ID_REM,
+ MPTCP_SUBFLOW_ATTR_ID_LOC,
+ MPTCP_SUBFLOW_ATTR_PAD,
+
+ __MPTCP_SUBFLOW_ATTR_MAX
+};
+
+#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1)
+
+#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0)
+#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1)
+#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2)
+#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3)
+#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4)
+#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5)
+#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6)
+#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7)
+#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8)
+
+#define rta_getattr(type, value) (*(type *)RTA_DATA(value))
+
static void die_perror(const char *msg)
{
perror(msg);
@@ -54,11 +95,13 @@ static void die_perror(const char *msg)
static void die_usage(int r)
{
- fprintf(stderr, "Usage: mptcp_diag -t\n");
+ fprintf(stderr, "Usage:\n"
+ "mptcp_diag -t <token>\n"
+ "mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n");
exit(r);
}
-static void send_query(int fd, __u32 token)
+static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto)
{
struct sockaddr_nl nladdr = {
.nl_family = AF_NETLINK
@@ -72,31 +115,26 @@ static void send_query(int fd, __u32 token)
.nlmsg_type = SOCK_DIAG_BY_FAMILY,
.nlmsg_flags = NLM_F_REQUEST
},
- .r = {
- .sdiag_family = AF_INET,
- /* Real proto is set via INET_DIAG_REQ_PROTOCOL */
- .sdiag_protocol = IPPROTO_TCP,
- .id.idiag_cookie[0] = token,
- }
+ .r = *r
};
struct rtattr rta_proto;
struct iovec iov[6];
- int iovlen = 1;
- __u32 proto;
-
- req.r.idiag_ext |= (1 << (INET_DIAG_INFO - 1));
- proto = IPPROTO_MPTCP;
- rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
- rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+ int iovlen = 0;
- iov[0] = (struct iovec) {
+ iov[iovlen++] = (struct iovec) {
.iov_base = &req,
.iov_len = sizeof(req)
};
- iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
- iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto)};
- req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
- iovlen += 2;
+
+ if (proto == IPPROTO_MPTCP) {
+ rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
+ rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+
+ iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
+ iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)};
+ req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
+ }
+
struct msghdr msg = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
@@ -160,7 +198,67 @@ static void print_info_msg(struct mptcp_info *info)
printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked);
}
-static void parse_nlmsg(struct nlmsghdr *nlh)
+/*
+ * 'print_subflow_info' is from 'mptcp_subflow_info'
+ * which is a function in 'misc/ss.c' of iproute2.
+ */
+static void print_subflow_info(struct rtattr *tb[])
+{
+ u_int32_t flags = 0;
+
+ printf("It's a mptcp subflow, the subflow info:\n");
+ if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) {
+ char caps[32 + 1] = { 0 }, *cap = &caps[0];
+
+ flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]);
+
+ if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM)
+ *cap++ = 'M';
+ if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC)
+ *cap++ = 'm';
+ if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM)
+ *cap++ = 'J';
+ if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC)
+ *cap++ = 'j';
+ if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM)
+ *cap++ = 'B';
+ if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC)
+ *cap++ = 'b';
+ if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED)
+ *cap++ = 'e';
+ if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED)
+ *cap++ = 'c';
+ if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID)
+ *cap++ = 'v';
+
+ if (flags)
+ printf(" flags:%s", caps);
+ }
+ if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] &&
+ tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] &&
+ tb[MPTCP_SUBFLOW_ATTR_ID_REM] &&
+ tb[MPTCP_SUBFLOW_ATTR_ID_LOC])
+ printf(" token:%04x(id:%u)/%04x(id:%u)",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]),
+ rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]),
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]),
+ rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])
+ printf(" seq:%llu",
+ rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])
+ printf(" sfseq:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]));
+ if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])
+ printf(" ssnoff:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])
+ printf(" maplen:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]));
+ printf("\n");
+}
+
+static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto)
{
struct inet_diag_msg *r = NLMSG_DATA(nlh);
struct rtattr *tb[INET_DIAG_MAX + 1];
@@ -169,7 +267,7 @@ static void parse_nlmsg(struct nlmsghdr *nlh)
nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)),
NLA_F_NESTED);
- if (tb[INET_DIAG_INFO]) {
+ if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) {
int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
struct mptcp_info *info;
@@ -183,11 +281,28 @@ static void parse_nlmsg(struct nlmsghdr *nlh)
}
print_info_msg(info);
}
+ if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) {
+ struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 };
+
+ parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX,
+ tb[INET_DIAG_ULP_INFO]);
+
+ if (ulpinfo[INET_ULP_INFO_MPTCP]) {
+ struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 };
+
+ parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX,
+ ulpinfo[INET_ULP_INFO_MPTCP]);
+ print_subflow_info(sfinfo);
+ } else {
+ printf("It's a normal TCP!\n");
+ }
+ }
}
-static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
+static void recv_nlmsg(int fd, __u32 proto)
{
char rcv_buff[8192];
+ struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff;
struct sockaddr_nl rcv_nladdr = {
.nl_family = AF_NETLINK
};
@@ -204,7 +319,6 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
int len;
len = recvmsg(fd, &rcv_msg, 0);
- nlh = (struct nlmsghdr *)rcv_buff;
while (NLMSG_OK(nlh, len)) {
if (nlh->nlmsg_type == NLMSG_DONE) {
@@ -218,40 +332,84 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
-(err->error), strerror(-(err->error)));
break;
}
- parse_nlmsg(nlh);
+ parse_nlmsg(nlh, proto);
nlh = NLMSG_NEXT(nlh, len);
}
}
static void get_mptcpinfo(__u32 token)
{
- struct nlmsghdr *nlh = NULL;
+ struct inet_diag_req_v2 r = {
+ .sdiag_family = AF_INET,
+ /* Real proto is set via INET_DIAG_REQ_PROTOCOL */
+ .sdiag_protocol = IPPROTO_TCP,
+ .idiag_ext = 1 << (INET_DIAG_INFO - 1),
+ .id.idiag_cookie[0] = token,
+ };
+ __u32 proto = IPPROTO_MPTCP;
int fd;
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
if (fd < 0)
die_perror("Netlink socket");
- send_query(fd, token);
- recv_nlmsg(fd, nlh);
+ send_query(fd, &r, proto);
+ recv_nlmsg(fd, proto);
close(fd);
}
-static void parse_opts(int argc, char **argv, __u32 *target_token)
+static void get_subflow_info(char *subflow_addrs)
+{
+ struct inet_diag_req_v2 r = {
+ .sdiag_family = AF_INET,
+ .sdiag_protocol = IPPROTO_TCP,
+ .idiag_ext = 1 << (INET_DIAG_INFO - 1),
+ .id.idiag_cookie[0] = INET_DIAG_NOCOOKIE,
+ .id.idiag_cookie[1] = INET_DIAG_NOCOOKIE,
+ };
+ char saddr[64], daddr[64];
+ int sport, dport;
+ int ret;
+ int fd;
+
+ ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport);
+ if (ret != 4)
+ die_perror("IP PORT Pairs has style problems!");
+
+ printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0)
+ die_perror("Netlink socket");
+
+ r.id.idiag_sport = htons(sport);
+ r.id.idiag_dport = htons(dport);
+
+ inet_pton(AF_INET, saddr, &r.id.idiag_src);
+ inet_pton(AF_INET, daddr, &r.id.idiag_dst);
+ send_query(fd, &r, IPPROTO_TCP);
+ recv_nlmsg(fd, IPPROTO_TCP);
+}
+
+static void parse_opts(int argc, char **argv, struct params *p)
{
int c;
if (argc < 2)
die_usage(1);
- while ((c = getopt(argc, argv, "ht:")) != -1) {
+ while ((c = getopt(argc, argv, "ht:s:")) != -1) {
switch (c) {
case 'h':
die_usage(0);
break;
case 't':
- sscanf(optarg, "%x", target_token);
+ sscanf(optarg, "%x", &p->target_token);
+ break;
+ case 's':
+ strncpy(p->subflow_addrs, optarg,
+ sizeof(p->subflow_addrs) - 1);
break;
default:
die_usage(1);
@@ -262,10 +420,15 @@ static void parse_opts(int argc, char **argv, __u32 *target_token)
int main(int argc, char *argv[])
{
- __u32 target_token;
+ struct params p = { 0 };
+
+ parse_opts(argc, argv, &p);
+
+ if (p.target_token)
+ get_mptcpinfo(p.target_token);
- parse_opts(argc, argv, &target_token);
- get_mptcpinfo(target_token);
+ if (p.subflow_addrs[0] != '\0')
+ get_subflow_info(p.subflow_addrs);
return 0;
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 218aac467321..3cf1e2a612ce 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -72,13 +72,21 @@ static const char *getxinfo_strerr(int err)
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
+again:
int err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -91,7 +99,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -136,7 +144,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index befa66f5a366..b8af65373b3a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -62,6 +62,7 @@ unset sflags
unset fastclose
unset fullmesh
unset speed
+unset join_syn_rej
unset join_csum_ns1
unset join_csum_ns2
unset join_fail_nr
@@ -1403,6 +1404,7 @@ chk_join_nr()
local syn_nr=$1
local syn_ack_nr=$2
local ack_nr=$3
+ local syn_rej=${join_syn_rej:-0}
local csum_ns1=${join_csum_ns1:-0}
local csum_ns2=${join_csum_ns2:-0}
local fail_nr=${join_fail_nr:-0}
@@ -1468,6 +1470,15 @@ chk_join_nr()
fail_test "got $count JOIN[s] ack HMAC failure expected 0"
fi
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$syn_rej" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn rejected"
+ fail_test "got $count JOIN[s] syn rejected expected $syn_rej"
+ fi
+
print_results "join Rx" ${rc}
join_syn_tx="${join_syn_tx:-${syn_nr}}" \
@@ -1963,7 +1974,8 @@ subflows_tests()
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# subflow
@@ -1992,7 +2004,8 @@ subflows_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 2 1
+ join_syn_rej=1 \
+ chk_join_nr 2 2 1
fi
# single subflow, dev
@@ -3061,7 +3074,8 @@ syncookies_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 1 1
+ join_syn_rej=1 \
+ chk_join_nr 2 1 1
fi
# test signal address with cookies
@@ -3545,7 +3559,8 @@ userspace_tests()
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# userspace pm type does not send join
@@ -3568,7 +3583,8 @@ userspace_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
chk_prio_nr 0 0 0 0
fi
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 051e289d7967..09cd24b2ae46 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
. "$(dirname "${0}")/../lib.sh"
-. "$(dirname "${0}")/../net_helper.sh"
readonly KSFT_PASS=0
readonly KSFT_FAIL=1
@@ -331,12 +330,15 @@ mptcp_lib_result_print_all_tap() {
# get the value of keyword $1 in the line marked by keyword $2
mptcp_lib_get_info_value() {
- grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+ grep "${2}" 2>/dev/null |
+ sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+ # the ';q' at the end limits to the first matched entry.
}
# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
mptcp_lib_evts_get_info() {
- grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+ grep "${4:-}" "${2}" 2>/dev/null |
+ mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
}
# $1: PID
@@ -476,8 +478,6 @@ mptcp_lib_ns_init() {
local netns
for netns in "${@}"; do
ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
- ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
done
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 926b0be87c99..9934a68df237 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -159,13 +159,21 @@ static const char *getxinfo_strerr(int err)
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
+again:
int err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -178,7 +186,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -223,7 +231,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
deleted file mode 100644
index 6596fe03c77f..000000000000
--- a/tools/testing/selftests/net/net_helper.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Helper functions
-
-wait_local_port_listen()
-{
- local listener_ns="${1}"
- local port="${2}"
- local protocol="${3}"
- local pattern
- local i
-
- pattern=":$(printf "%04X" "${port}") "
-
- # for tcp protocol additionally check the socket state
- [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
- for i in $(seq 10); do
- if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
- /proc/net/"${protocol}"* | grep -q "${pattern}"; then
- break
- fi
- sleep 0.1
- done
-}
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index ffe161fac8b5..e9b2f553588d 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -12,6 +12,7 @@ TEST_PROGS += conntrack_dump_flush.sh
TEST_PROGS += conntrack_icmp_related.sh
TEST_PROGS += conntrack_ipip_mtu.sh
TEST_PROGS += conntrack_tcp_unreplied.sh
+TEST_PROGS += conntrack_resize.sh
TEST_PROGS += conntrack_sctp_collision.sh
TEST_PROGS += conntrack_vrf.sh
TEST_PROGS += conntrack_reverse_clash.sh
@@ -23,6 +24,7 @@ TEST_PROGS += nft_concat_range.sh
TEST_PROGS += nft_conntrack_helper.sh
TEST_PROGS += nft_fib.sh
TEST_PROGS += nft_flowtable.sh
+TEST_PROGS += nft_interface_stress.sh
TEST_PROGS += nft_meta.sh
TEST_PROGS += nft_nat.sh
TEST_PROGS += nft_nat_zones.sh
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
index 1559ba275105..011de8763094 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -60,9 +60,6 @@ bcast_ping()
done
}
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0
-
if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then
echo "SKIP: Can't create veth device"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
index 2549b6590693..ea76f2bc2f59 100755
--- a/tools/testing/selftests/net/netfilter/bridge_brouter.sh
+++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
@@ -22,8 +22,6 @@ trap cleanup EXIT
setup_ns nsbr ns1 ns2
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0
if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then
echo "SKIP: Can't create veth device"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 43d8b500d391..363646f4fefe 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -46,6 +46,7 @@ CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_MARK=y
diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
new file mode 100755
index 000000000000..9e033e80219e
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
@@ -0,0 +1,427 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft tool"
+
+init_net_max=0
+ct_buckets=0
+tmpfile=""
+tmpfile_proc=""
+tmpfile_uniq=""
+ret=0
+
+insert_count=2000
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400
+
+modprobe -q nf_conntrack
+if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then
+ echo "SKIP: conntrack sysctls not available"
+ exit $KSFT_SKIP
+fi
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1
+ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1
+
+cleanup() {
+ cleanup_all_ns
+
+ rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq"
+
+ # restore original sysctl setting
+ sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+ sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets
+}
+trap cleanup EXIT
+
+check_max_alias()
+{
+ local expected="$1"
+ # old name, expected to alias to the first, i.e. changing one
+ # changes the other as well.
+ local lv=$(sysctl -n net.nf_conntrack_max)
+
+ if [ $expected -ne "$lv" ];then
+ echo "nf_conntrack_max sysctls should have identical values"
+ exit 1
+ fi
+}
+
+insert_ctnetlink() {
+ local ns="$1"
+ local count="$2"
+ local i=0
+ local bulk=16
+
+ while [ $i -lt $count ] ;do
+ ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \
+ if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+ -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+ --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \
+ return;\
+ fi & \
+ done ; wait" 2>/dev/null
+
+ i=$((i+bulk))
+ done
+}
+
+check_ctcount() {
+ local ns="$1"
+ local count="$2"
+ local msg="$3"
+
+ local now=$(ip netns exec "$ns" conntrack -C)
+
+ if [ $now -ne "$count" ] ;then
+ echo "expected $count entries in $ns, not $now: $msg"
+ exit 1
+ fi
+
+ echo "PASS: got $count connections: $msg"
+}
+
+ctresize() {
+ local duration="$1"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM
+ now=$(date +%s)
+ done
+}
+
+do_rsleep() {
+ local limit="$1"
+ local r=$RANDOM
+
+ r=$((r%limit))
+ sleep "$r"
+}
+
+ct_flush_once() {
+ local ns="$1"
+
+ ip netns exec "$ns" conntrack -F 2>/dev/null
+}
+
+ctflush() {
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ do_rsleep "$duration"
+
+ while [ $now -lt $end ]; do
+ ct_flush_once "$ns"
+ do_rsleep "$duration"
+ now=$(date +%s)
+ done
+}
+
+ctflood()
+{
+ local ns="$1"
+ local duration="$2"
+ local msg="$3"
+ local now=$(date +%s)
+ local end=$((now + duration))
+ local j=0
+ local k=0
+
+ while [ $now -lt $end ]; do
+ j=$((j%256))
+ k=$((k%256))
+
+ ip netns exec "$ns" bash -c \
+ "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1
+
+ j=$((j+1))
+
+ if [ $j -eq 256 ];then
+ k=$((k+1))
+ fi
+
+ now=$(date +%s)
+ done
+
+ wait
+}
+
+# dump to /dev/null. We don't want dumps to cause infinite loops
+# or use-after-free even when conntrack table is altered while dumps
+# are in progress.
+ct_nulldump()
+{
+ local ns="$1"
+
+ ip netns exec "$ns" conntrack -L > /dev/null 2>&1 &
+
+ # Don't require /proc support in conntrack
+ if [ -r /proc/self/net/nf_conntrack ] ; then
+ ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null &
+ fi
+
+ wait
+}
+
+check_taint()
+{
+ local tainted_then="$1"
+ local msg="$2"
+
+ local tainted_now=0
+
+ if [ "$tainted_then" -ne 0 ];then
+ return
+ fi
+
+ read tainted_now < /proc/sys/kernel/tainted
+
+ if [ "$tainted_now" -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "TAINT: $msg"
+ dmesg
+ exit 1
+ fi
+}
+
+insert_flood()
+{
+ local n="$1"
+ local r=0
+
+ r=$((RANDOM%$insert_count))
+
+ ctflood "$n" "$timeout" "floodresize" &
+ insert_ctnetlink "$n" "$r" &
+ ctflush "$n" "$timeout" &
+ ct_nulldump "$n" &
+
+ wait
+}
+
+test_floodresize_all()
+{
+ local timeout=20
+ local n=""
+ local tainted_then=""
+
+ read tainted_then < /proc/sys/kernel/tainted
+
+ for n in "$nsclient1" "$nsclient2";do
+ insert_flood "$n" &
+ done
+
+ # resize table constantly while flood/insert/dump/flushs
+ # are happening in parallel.
+ ctresize "$timeout"
+
+ # wait for subshells to complete, everything is limited
+ # by $timeout.
+ wait
+
+ check_taint "$tainted_then" "resize+flood"
+}
+
+check_dump()
+{
+ local ns="$1"
+ local protoname="$2"
+ local c=0
+ local proto=0
+ local proc=0
+ local unique=""
+ local lret=0
+
+ # NOTE: assumes timeouts are large enough to not have
+ # expirations in all following tests.
+ l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l)
+ c=$(ip netns exec "$ns" conntrack -C)
+
+ if [ "$c" -eq 0 ]; then
+ echo "FAIL: conntrack count for $ns is 0"
+ lret=1
+ fi
+
+ if [ "$c" -ne "$l" ]; then
+ echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l"
+ lret=1
+ fi
+
+ # check the dump we retrieved is free of duplicated entries.
+ unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$unique" ]; then
+ echo "FAIL: listing contained redundant entries for $ns: $l != $unique"
+ diff -u "$tmpfile" "$tmpfile_uniq"
+ lret=1
+ fi
+
+ # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter.
+ proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$proto" ]; then
+ echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto"
+ diff -u "$tmpfile" "$tmpfile_uniq"
+ lret=1
+ fi
+
+ if [ -r /proc/self/net/nf_conntrack ] ; then
+ proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l")
+
+ if [ "$l" -ne "$proc" ]; then
+ echo "FAIL: proc inconsistency for $ns: $l != $proc"
+ lret=1
+ fi
+
+ proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$proc" ]; then
+ echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc"
+ diff -u "$tmpfile_proc" "$tmpfile_uniq"
+ lret=1
+ fi
+ fi
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+ else
+ echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+ ret=1
+ fi
+}
+
+test_dump_all()
+{
+ local timeout=3
+ local tainted_then=""
+
+ read tainted_then < /proc/sys/kernel/tainted
+
+ ct_flush_once "$nsclient1"
+ ct_flush_once "$nsclient2"
+
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600
+
+ ctflood "$nsclient1" $timeout "dumpall" &
+ insert_ctnetlink "$nsclient2" $insert_count
+
+ wait
+
+ check_dump "$nsclient1" "icmp"
+ check_dump "$nsclient2" "udp"
+
+ check_taint "$tainted_then" "test parallel conntrack dumps"
+}
+
+check_sysctl_immutable()
+{
+ local ns="$1"
+ local name="$2"
+ local failhard="$3"
+ local o=0
+ local n=0
+
+ o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+ n=$((o+1))
+
+ # return value isn't reliable, need to read it back
+ ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null
+
+ n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+
+ [ -z "$n" ] && return 1
+
+ if [ $o -ne $n ]; then
+ if [ $failhard -gt 0 ] ;then
+ echo "FAIL: net.$name should not be changeable from namespace (now $n)"
+ ret=1
+ fi
+ return 0
+ fi
+
+ return 1
+}
+
+test_conntrack_max_limit()
+{
+ sysctl -q net.netfilter.nf_conntrack_max=100
+ insert_ctnetlink "$nsclient1" 101
+
+ # check netns is clamped by init_net, i.e., either netns follows
+ # init_net value, or a higher pernet limit (compared to init_net) is ignored.
+ check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound"
+
+ sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+}
+
+test_conntrack_disable()
+{
+ local timeout=2
+
+ # disable conntrack pickups
+ ip netns exec "$nsclient1" nft flush table ip test_ct
+
+ ct_flush_once "$nsclient1"
+ ct_flush_once "$nsclient2"
+
+ ctflood "$nsclient1" "$timeout" "conntrack disable"
+ ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
+
+ # Disabled, should not have picked up any connection.
+ check_ctcount "$nsclient1" 0 "conntrack disabled"
+
+ # This one is still active, expect 1 connection.
+ check_ctcount "$nsclient2" 1 "conntrack enabled"
+}
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max)
+
+check_max_alias $init_net_max
+
+sysctl -q net.netfilter.nf_conntrack_max="262000"
+check_max_alias 262000
+
+setup_ns nsclient1 nsclient2
+
+# check this only works from init_net
+for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do
+ check_sysctl_immutable "$nsclient1" "net.$n" 1
+done
+
+# won't work on older kernels. If it works, check that the netns obeys the limit
+if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then
+ # subtest: if pernet is changeable, check that reducing it in pernet
+ # limits the pernet entries. Inverse, pernet clamped by a lower init_net
+ # setting, is already checked by "test_conntrack_max_limit" test.
+
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1
+ insert_ctnetlink "$nsclient1" 2
+ check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound"
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+fi
+
+for n in "$nsclient1" "$nsclient2";do
+# enable conntrack in both namespaces
+ip netns exec "$n" nft -f - <<EOF
+table ip test_ct {
+ chain input {
+ type filter hook input priority 0
+ ct state new counter
+ }
+}
+EOF
+done
+
+tmpfile=$(mktemp)
+tmpfile_proc=$(mktemp)
+tmpfile_uniq=$(mktemp)
+test_conntrack_max_limit
+test_dump_all
+test_floodresize_all
+test_conntrack_disable
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
index e95ecb37c2b1..207b79932d91 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -32,7 +32,6 @@ source lib.sh
IP0=172.30.30.1
IP1=172.30.30.2
-DUMMYNET=10.9.9
PFXL=30
ret=0
@@ -52,11 +51,6 @@ trap cleanup EXIT
setup_ns ns0 ns1
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1
-
if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
echo "SKIP: Could not add veth device"
exit $ksft_skip
@@ -67,18 +61,13 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
exit $ksft_skip
fi
-ip -net "$ns0" link add dummy0 type dummy
-
ip -net "$ns0" li set veth0 master tvrf
-ip -net "$ns0" li set dummy0 master tvrf
ip -net "$ns0" li set tvrf up
ip -net "$ns0" li set veth0 up
-ip -net "$ns0" li set dummy0 up
ip -net "$ns1" li set veth0 up
ip -net "$ns0" addr add $IP0/$PFXL dev veth0
ip -net "$ns1" addr add $IP1/$PFXL dev veth0
-ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0
listener_ready()
{
@@ -219,35 +208,9 @@ EOF
fi
}
-test_fib()
-{
-ip netns exec "$ns0" nft -f - <<EOF
-flush ruleset
-table ip t {
- counter fibcount { }
-
- chain prerouting {
- type filter hook prerouting priority 0;
- meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack
- }
-}
-EOF
- ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0
- ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null
-
- if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then
- echo "PASS: fib lookup returned exepected output interface"
- else
- echo "FAIL: fib lookup did not return exepected output interface"
- ret=1
- return
- fi
-}
-
test_ct_zone_in
test_masquerade_vrf "default"
test_masquerade_vrf "pfifo"
test_masquerade_veth
-test_fib
exit $ret
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
index d3edb16cd4b3..6af2ea3ad6b8 100755
--- a/tools/testing/selftests/net/netfilter/ipvs.sh
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -129,9 +129,6 @@ test_dr() {
# avoid incorrect arp response
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
- # avoid reverse route lookup
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
@@ -167,9 +164,6 @@ test_tun() {
ip netns exec "${ns2}" ip link set tunl0 up
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 1f5979c1510c..efea93cf23d4 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -15,10 +15,12 @@ source lib.sh
# Available test groups:
# - reported_issues: check for issues that were reported in the past
# - correctness: check that packets match given entries, and only those
+# - correctness_large: same but with additional non-matching entries
# - concurrency: attempt races between insertion, deletion and lookup
# - timeout: check that packets match entries until they expire
# - performance: estimate matching rate, compare with rbtree and hash baselines
-TESTS="reported_issues correctness concurrency timeout"
+TESTS="reported_issues correctness correctness_large concurrency timeout"
+
[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
# Set types, defined by TYPE_ variables below
@@ -1257,9 +1259,7 @@ send_nomatch() {
# - add ranged element, check that packets match it
# - check that packets outside range don't match it
# - remove some elements, check that packets don't match anymore
-test_correctness() {
- setup veth send_"${proto}" set || return ${ksft_skip}
-
+test_correctness_main() {
range_size=1
for i in $(seq "${start}" $((start + count))); do
end=$((start + range_size))
@@ -1293,6 +1293,163 @@ test_correctness() {
done
}
+test_correctness() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ test_correctness_main
+}
+
+# Repeat the correctness tests, but add extra non-matching entries.
+# This exercises the more compact '4 bit group' representation that
+# gets picked when the default 8-bit representation exceed
+# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory.
+# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust().
+#
+# The format() helper is way too slow when generating lots of
+# entries so its not used here.
+test_correctness_large() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+ # number of dummy (filler) entries to add.
+ local dcount=16385
+
+ (
+ echo -n "add element inet filter test { "
+
+ case "$type_spec" in
+ "ether_addr . ipv4_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_mac $((1000000 + i))
+ printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "inet_proto . ipv6_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . " $((RANDOM%256))
+ format_addr6 $((1000000 + i))
+ done
+ ;;
+ "inet_service . inet_proto")
+ # smaller key sizes, need more entries to hit the
+ # 4-bit threshold.
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ local proto=$((RANDOM%256))
+
+ # Test uses UDP to match, as it also fails when matching
+ # an entry that doesn't exist, so skip 'udp' entries
+ # to not trigger a wrong failure.
+ [ $proto -eq 17 ] && proto=18
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . %i " $(((i%65534) + 1)) $((proto))
+ done
+ ;;
+ "inet_service . ipv4_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "ipv4_addr . ether_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ format_mac $((1000000 + i))
+ done
+ ;;
+ "ipv4_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+ done
+ ;;
+ "ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr")
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+ format_mac $((1000000 + i))
+ printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . inet_proto")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . inet_proto . ipv4_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . ipv4_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256))
+ done
+ ;;
+ "ipv6_addr . ether_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . "
+ format_mac $((1000000 + i))
+ done
+ ;;
+ "ipv6_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1))"
+ done
+ ;;
+ "ipv6_addr . inet_service . ether_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_mac $((i + 1000000))
+ done
+ ;;
+ "ipv6_addr . inet_service . ether_addr . inet_proto")
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_mac $((i + 1000000))
+ echo -n " . $((RANDOM%256))"
+ done
+ ;;
+ "ipv6_addr . inet_service . ipv6_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_addr6 $((i + 2123456))
+ echo -n " . $((RANDOM%256))"
+ done
+ ;;
+ *)
+ "Unhandled $type_spec"
+ return 1
+ esac
+ echo -n "}"
+
+ ) | nft -f - || return 1
+
+ test_correctness_main
+}
+
# Concurrency test template:
# - add all the elements
# - start a thread for each physical thread that:
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
index ce1451c275fd..9929a9ffef65 100755
--- a/tools/testing/selftests/net/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -3,6 +3,10 @@
# This tests the fib expression.
#
# Kselftest framework requirement - SKIP code is 4.
+#
+# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99
+# dead:1::99 dead:1::1 dead:2::1 dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
source lib.sh
@@ -45,6 +49,19 @@ table inet filter {
EOF
}
+load_input_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain input {
+ type filter hook input priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
load_pbr_ruleset() {
local netns=$1
@@ -59,6 +76,89 @@ table inet filter {
EOF
}
+load_type_ruleset() {
+ local netns=$1
+
+ for family in ip ip6;do
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table $family filter {
+ chain type_match_in {
+ fib daddr type local counter comment "daddr configured on other iface"
+ fib daddr . iif type local counter comment "daddr configured on iif"
+ fib daddr type unicast counter comment "daddr not local"
+ fib daddr . iif type unicast counter comment "daddr not configured on iif"
+ }
+
+ chain type_match_out {
+ fib daddr type unicast counter
+ fib daddr . oif type unicast counter
+ fib daddr type local counter
+ fib daddr . oif type local counter
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain input {
+ type filter hook input priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain forward {
+ type filter hook forward priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain output {
+ type filter hook output priority 0;
+ icmp type echo-request counter jump type_match_out
+ icmpv6 type echo-request counter jump type_match_out
+ }
+
+ chain postrouting {
+ type filter hook postrouting priority 0;
+ icmp type echo-request counter jump type_match_out
+ icmpv6 type echo-request counter jump type_match_out
+ }
+}
+EOF
+done
+}
+
+reload_type_ruleset() {
+ ip netns exec "$1" nft flush table ip filter
+ ip netns exec "$1" nft flush table ip6 filter
+ load_type_ruleset "$1"
+}
+
+check_fib_type_counter_family() {
+ local family="$1"
+ local want="$2"
+ local ns="$3"
+ local chain="$4"
+ local what="$5"
+ local errmsg="$6"
+
+ if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then
+ echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2
+ ip netns exec "$ns" nft list chain "$family" filter "$chain"
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_type_counter() {
+ check_fib_type_counter_family "ip" "$@" || return 1
+ check_fib_type_counter_family "ip6" "$@" || return 1
+}
+
load_ruleset_count() {
local netns=$1
@@ -77,6 +177,7 @@ check_drops() {
if dmesg | grep -q ' nft_rpfilter: ';then
dmesg | grep ' nft_rpfilter: '
echo "FAIL: rpfilter did drop packets"
+ ret=1
return 1
fi
@@ -151,19 +252,506 @@ test_ping() {
return 0
}
+test_ping_unreachable() {
+ local daddr4=$1
+ local daddr6=$2
+
+ if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr4" > /dev/null; then
+ echo "FAIL: ${ns1} could reach $daddr4" 1>&2
+ return 1
+ fi
+
+ if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr6" > /dev/null; then
+ echo "FAIL: ${ns1} could reach $daddr6" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+test_fib_type() {
+ local notice="$1"
+ local errmsg="addr-on-if"
+ local lret=0
+
+ if ! load_type_ruleset "$nsrouter";then
+ echo "SKIP: Could not load fib type ruleset"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return
+ fi
+
+ # makes router receive packet for addresses configured on incoming
+ # interface.
+ test_ping 10.0.1.1 dead:1::1 || return 1
+
+ # expectation: triggers all 'local' in prerouting/input.
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1
+
+ reload_type_ruleset "$nsrouter"
+ # makes router receive packet for address configured on a different (but local)
+ # interface.
+ test_ping 10.0.2.1 dead:2::1 || return 1
+
+ # expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'.
+ errmsg="addr-on-host"
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+ reload_type_ruleset "$nsrouter"
+ test_ping 10.0.2.99 dead:2::99 || return 1
+ errmsg="addr-on-otherhost"
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: fib expression address types match ($notice)"
+ else
+ echo "FAIL: fib expression address types match ($notice)"
+ ret=1
+ fi
+}
+
+test_fib_vrf_dev_add_dummy()
+{
+ if ! ip -net "$nsrouter" link add dummy0 type dummy ;then
+ echo "SKIP: VRF tests: dummy device type not supported"
+ return 1
+ fi
+
+ if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then
+ echo "SKIP: VRF tests: vrf device type not supported"
+ return 1
+ fi
+
+ ip -net "$nsrouter" link set dummy0 master tvrf
+ ip -net "$nsrouter" link set dummy0 up
+ ip -net "$nsrouter" link set tvrf up
+}
+
+load_ruleset_vrf()
+{
+# Due to the many different possible combinations using named counters
+# or one-rule-per-expected-result is complex.
+#
+# Instead, add dynamic sets for the fib modes
+# (fib address type, fib output interface lookup .. ),
+# and then add the obtained fib results to them.
+#
+# The test is successful if the sets contain the expected results
+# and no unexpected extra entries existed.
+ip netns exec "$nsrouter" nft -f - <<EOF
+flush ruleset
+table inet t {
+ set fibif4 {
+ typeof meta iif . ip daddr . fib daddr oif
+ flags dynamic
+ counter
+ }
+
+ set fibif4iif {
+ typeof meta iif . ip daddr . fib daddr . iif oif
+ flags dynamic
+ counter
+ }
+
+ set fibif6 {
+ typeof meta iif . ip6 daddr . fib daddr oif
+ flags dynamic
+ counter
+ }
+
+ set fibif6iif {
+ typeof meta iif . ip6 daddr . fib daddr . iif oif
+ flags dynamic
+ counter
+ }
+
+ set fibtype4 {
+ typeof meta iif . ip daddr . fib daddr type
+ flags dynamic
+ counter
+ }
+
+ set fibtype4iif {
+ typeof meta iif . ip daddr . fib daddr . iif type
+ flags dynamic
+ counter
+ }
+
+ set fibtype6 {
+ typeof meta iif . ip6 daddr . fib daddr type
+ flags dynamic
+ counter
+ }
+
+ set fibtype6iif {
+ typeof meta iif . ip6 daddr . fib daddr . iif type
+ flags dynamic
+ counter
+ }
+
+ chain fib_test {
+ meta nfproto ipv4 jump {
+ add @fibif4 { meta iif . ip daddr . fib daddr oif }
+ add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif }
+ add @fibtype4 { meta iif . ip daddr . fib daddr type }
+ add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type }
+
+ add @fibif4 { meta iif . ip saddr . fib saddr oif }
+ add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif }
+ }
+
+ meta nfproto ipv6 jump {
+ add @fibif6 { meta iif . ip6 daddr . fib daddr oif }
+ add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif }
+ add @fibtype6 { meta iif . ip6 daddr . fib daddr type }
+ add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type }
+
+ add @fibif6 { meta iif . ip6 saddr . fib saddr oif }
+ add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif }
+ }
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority 0;
+ icmp type echo-request counter jump fib_test
+
+ # neighbour discovery to be ignored.
+ icmpv6 type echo-request counter jump fib_test
+ }
+}
+EOF
+
+if [ $? -ne 0 ] ;then
+ echo "SKIP: Could not load ruleset for fib vrf test"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return 1
+fi
+}
+
+check_type()
+{
+ local setname="$1"
+ local iifname="$2"
+ local addr="$3"
+ local type="$4"
+ local count="$5"
+
+ [ -z "$count" ] && count=1
+
+ if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then
+ echo "FAIL: did not find $iifname . $addr . $type in $setname"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ return 1
+ fi
+
+ # delete the entry, this allows to check if anything unexpected appeared
+ # at the end of the test run: all dynamic sets should be empty by then.
+ if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then
+ echo "FAIL: can't delete $iifname . $addr . $type in $setname"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+check_local()
+{
+ check_type $@ "local" 1
+}
+
+check_unicast()
+{
+ check_type $@ "unicast" 1
+}
+
+check_rpf()
+{
+ check_type $@
+}
+
+check_fib_vrf_sets_empty()
+{
+ local setname=""
+ local lret=0
+
+ # A non-empty set means that we have seen unexpected packets OR
+ # that a fib lookup provided unexpected results.
+ for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \
+ "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do
+ if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then
+ echo "FAIL: $setname not empty"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ lret=1
+ fi
+ done
+
+ return $lret
+}
+
+check_fib_vrf_type()
+{
+ local msg="$1"
+
+ local addr
+ # the incoming interface is always veth0. As its not linked to a VRF,
+ # the 'tvrf' device should NOT show up anywhere.
+ local ifname="veth0"
+ local lret=0
+
+ # local_veth0, local_veth1
+ for addr in "10.0.1.1" "10.0.2.1"; do
+ check_local fibtype4 "$ifname" "$addr" || lret=1
+ check_type fibif4 "$ifname" "$addr" "0" || lret=1
+ done
+ for addr in "dead:1::1" "dead:2::1";do
+ check_local fibtype6 "$ifname" "$addr" || lret=1
+ check_type fibif6 "$ifname" "$addr" "0" || lret=1
+ done
+
+ # when restricted to the incoming interface, 10.0.1.1 should
+ # be 'local', but 10.0.2.1 unicast.
+ check_local fibtype4iif "$ifname" "10.0.1.1" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1
+
+ # same for the ipv6 addresses.
+ check_local fibtype6iif "$ifname" "dead:1::1" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1
+
+ # None of these addresses should find a valid route when restricting
+ # to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are
+ # reachable via 'lo'.
+ for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do
+ check_type fibif4iif "$ifname" "$addr" "0" || lret=1
+ done
+
+ # expect default route (veth1), dummy0 is part of VRF but iif isn't.
+ for addr in "10.9.9.1" "10.9.9.2";do
+ check_unicast fibtype4 "$ifname" "$addr" || lret=1
+ check_unicast fibtype4iif "$ifname" "$addr" || lret=1
+ check_type fibif4 "$ifname" "$addr" "veth1" || lret=1
+ done
+ for addr in "dead:9::1" "dead:9::2";do
+ check_unicast fibtype6 "$ifname" "$addr" || lret=1
+ check_unicast fibtype6iif "$ifname" "$addr" || lret=1
+ check_type fibif6 "$ifname" "$addr" "veth1" || lret=1
+ done
+
+ # same for the IPv6 equivalent addresses.
+ for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do
+ check_type fibif6iif "$ifname" "$addr" "0" || lret=1
+ done
+
+ check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+ check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+ check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+ check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+ check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+ check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+ check_rpf fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+ check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 5 || lret=1
+ check_rpf fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1
+
+ check_fib_vrf_sets_empty || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "FAIL: $msg"
+ ret=1
+ fi
+}
+
+check_fib_veth_vrf_type()
+{
+ local msg="$1"
+
+ local addr
+ local ifname
+ local setname
+ local lret=0
+
+ # as veth0 is now part of tvrf interface, packets will be seen
+ # twice, once with iif veth0, then with iif tvrf.
+
+ for ifname in "veth0" "tvrf"; do
+ for addr in "10.0.1.1" "10.9.9.1"; do
+ check_local fibtype4 "$ifname" "$addr" || lret=1
+ # addr local, but nft_fib doesn't return routes with RTN_LOCAL.
+ check_type fibif4 "$ifname" "$addr" 0 || lret=1
+ check_type fibif4iif "$ifname" "$addr" 0 || lret=1
+ done
+
+ for addr in "dead:1::1" "dead:9::1"; do
+ check_local fibtype6 "$ifname" "$addr" || lret=1
+ # same, address is local but no route is returned for lo.
+ check_type fibif6 "$ifname" "$addr" 0 || lret=1
+ check_type fibif6iif "$ifname" "$addr" 0 || lret=1
+ done
+
+ for t in fibtype4 fibtype4iif; do
+ check_unicast "$t" "$ifname" 10.9.9.2 || lret=1
+ done
+ for t in fibtype6 fibtype6iif; do
+ check_unicast "$t" "$ifname" dead:9::2 || lret=1
+ done
+
+ check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1
+
+ check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+
+ check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+ check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+ check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+ check_type fibif4 "$ifname" "10.9.9.2" "dummy0" || lret=1
+ check_type fibif6 "$ifname" "dead:9::2" "dummy0" || lret=1
+
+ # restricted to iif -- MUST NOT provide result, its != $ifname.
+ check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+ check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+ check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1
+ check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1
+ check_rpf fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1
+ check_rpf fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1
+ done
+
+ check_local fibtype4iif "veth0" "10.0.1.1" || lret=1
+ check_local fibtype6iif "veth0" "dead:1::1" || lret=1
+
+ check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1
+ check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1
+
+ # 10.9.9.2 should not provide a result for iif veth, but
+ # should when iif is tvrf.
+ # This is because its reachable via dummy0 which is part of
+ # tvrf. iif veth0 MUST conceal the dummy0 result (i.e. return oif 0).
+ check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1
+ check_type fibif6iif "veth0" "dead:9::2" 0 || lret=1
+
+ check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1
+ check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1
+
+ check_fib_vrf_sets_empty || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "FAIL: $msg"
+ ret=1
+ fi
+}
+
+# Extends nsrouter config by adding dummy0+vrf.
+#
+# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99
+# dead:1::99 dead:1::1 dead:2::1 dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
+# [dummy0]
+# 10.9.9.1
+# dead:9::1
+# [tvrf]
+test_fib_vrf()
+{
+ local cntname=""
+
+ if ! test_fib_vrf_dev_add_dummy; then
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return
+ fi
+
+ ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0
+ ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad
+
+ ip -net "$nsrouter" route add default via 10.0.2.99
+ ip -net "$nsrouter" route add default via dead:2::99
+
+ load_ruleset_vrf || return
+
+ # no echo reply for these addresses: The dummy interface is part of tvrf,
+ # but veth0 (incoming interface) isn't linked to it.
+ test_ping_unreachable "10.9.9.1" "dead:9::1" &
+ test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+ # expect replies from these.
+ test_ping "10.0.1.1" "dead:1::1"
+ test_ping "10.0.2.1" "dead:2::1"
+ test_ping "10.0.2.99" "dead:2::99"
+
+ wait
+
+ check_fib_vrf_type "fib expression address types match (iif not in vrf)"
+
+ # second round: this time, make veth0 (rx interface) part of the vrf.
+ # 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2
+ # becomes unreachable.
+ ip -net "$nsrouter" link set veth0 master tvrf
+ ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ # this reload should not be needed, but in case
+ # there is some error (missing or unexpected entry) this will prevent them
+ # from leaking into round 2.
+ load_ruleset_vrf || return
+
+ test_ping "10.0.1.1" "dead:1::1"
+ test_ping "10.9.9.1" "dead:9::1"
+
+ # ns2 should no longer be reachable (veth1 not in vrf)
+ test_ping_unreachable "10.0.2.99" "dead:2::99" &
+
+ # vrf via dummy0, but host doesn't exist
+ test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+ wait
+
+ check_fib_veth_vrf_type "fib expression address types match (iif in vrf)"
+}
+
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
test_ping 10.0.2.1 dead:2::1 || exit 1
-check_drops || exit 1
+check_drops
test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1
+check_drops
+
+test_ping 10.0.1.99 dead:1::99
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1 || exit 1
check_drops || exit 1
-echo "PASS: fib expression did not cause unwanted packet drops"
+test_ping 10.0.1.99 dead:1::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not discard loopback packets"
ip netns exec "$nsrouter" nft flush table inet filter
@@ -213,7 +801,7 @@ ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
# ... pbr ruleset for the router, check iif+oif.
if ! load_pbr_ruleset "$nsrouter";then
echo "SKIP: Could not load fib forward ruleset"
- exit $ksft_skip
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
fi
ip -net "$nsrouter" rule add from all table 128
@@ -224,11 +812,36 @@ ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
# drop main ipv4 table
ip -net "$nsrouter" -4 rule delete table main
-if ! test_ping 10.0.2.99 dead:2::99;then
- ip -net "$nsrouter" nft list ruleset
- echo "FAIL: fib mismatch in pbr setup"
- exit 1
+if test_ping 10.0.2.99 dead:2::99;then
+ echo "PASS: fib expression forward check with policy based routing"
+else
+ echo "FAIL: fib expression forward check with policy based routing"
+ ret=1
fi
-echo "PASS: fib expression forward check with policy based routing"
-exit 0
+test_fib_type "policy routing"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+# Un-do policy routing changes
+ip -net "$nsrouter" rule del from all table 128
+ip -net "$nsrouter" rule del from all iif veth0 table 129
+
+ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1
+
+ip -net "$ns1" -4 route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" -4 route add default via 10.0.1.1
+ip -net "$ns1" -6 route add default via dead:1::1
+
+ip -net "$nsrouter" -4 rule add from all table main priority 32766
+
+test_fib_type "default table"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+test_fib_vrf
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
new file mode 100755
index 000000000000..5ff7be9daeee
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -0,0 +1,154 @@
+#!/bin/bash -e
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+# Torture nftables' netdevice notifier callbacks and related code by frequent
+# renaming of interfaces which netdev-family chains and flowtables hook into.
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3 tool"
+
+# how many seconds to torture the kernel?
+# default to 80% of max run time but don't exceed 48s
+TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
+[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48
+
+trap "cleanup_all_ns" EXIT
+
+setup_ns nsc nsr nss
+
+ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr
+ip -net $nsc addr add 10.0.0.1/24 dev cr0
+ip -net $nsc link set cr0 up
+ip -net $nsc route add default via 10.0.0.2
+
+ip -net $nss link add sr0 type veth peer name rs0 netns $nsr
+ip -net $nss addr add 10.1.0.1/24 dev sr0
+ip -net $nss link set sr0 up
+ip -net $nss route add default via 10.1.0.2
+
+ip -net $nsr addr add 10.0.0.2/24 dev rc0
+ip -net $nsr link set rc0 up
+ip -net $nsr addr add 10.1.0.2/24 dev rs0
+ip -net $nsr link set rs0 up
+ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1
+
+{
+ echo "table netdev t {"
+ for ((i = 0; i < 10; i++)); do
+ cat <<-EOF
+ chain chain_rc$i {
+ type filter hook ingress device rc$i priority 0
+ counter
+ }
+ chain chain_rs$i {
+ type filter hook ingress device rs$i priority 0
+ counter
+ }
+ EOF
+ done
+ echo "}"
+ echo "table ip t {"
+ for ((i = 0; i < 10; i++)); do
+ cat <<-EOF
+ flowtable ft_${i} {
+ hook ingress priority 0
+ devices = { rc$i, rs$i }
+ }
+ EOF
+ done
+ echo "chain c {"
+ echo "type filter hook forward priority 0"
+ for ((i = 0; i < 10; i++)); do
+ echo -n "iifname rc$i oifname rs$i "
+ echo "ip protocol tcp counter flow add @ft_${i}"
+ done
+ echo "counter"
+ echo "}"
+ echo "}"
+} | ip netns exec $nsr nft -f - || {
+ echo "SKIP: Could not load nft ruleset"
+ exit $ksft_skip
+}
+
+for ((o=0, n=1; ; o=n, n++, n %= 10)); do
+ ip -net $nsr link set rc$o name rc$n
+ ip -net $nsr link set rs$o name rs$n
+done &
+rename_loop_pid=$!
+
+while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done &
+nft_list_pid=$!
+
+ip netns exec $nsr nft monitor >/dev/null &
+nft_monitor_pid=$!
+
+ip netns exec $nss iperf3 --server --daemon -1
+summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p'
+rate=$(ip netns exec $nsc iperf3 \
+ --format k -c 10.1.0.1 --time $TEST_RUNTIME \
+ --length 56 --parallel 10 -i 0 | sed -n "$summary_expr")
+
+kill $nft_list_pid
+kill $nft_monitor_pid
+kill $rename_loop_pid
+wait
+
+wildcard_prep() {
+ ip netns exec $nsr nft -f - <<EOF
+table ip t {
+ flowtable ft_wild {
+ hook ingress priority 0
+ devices = { wild* }
+ }
+}
+EOF
+}
+
+if ! wildcard_prep; then
+ echo "SKIP wildcard tests: not supported by host's nft?"
+else
+ for ((i = 0; i < 100; i++)); do
+ ip -net $nsr link add wild$i type dummy &
+ done
+ wait
+ for ((i = 80; i < 100; i++)); do
+ ip -net $nsr link del wild$i &
+ done
+ for ((i = 0; i < 80; i++)); do
+ ip -net $nsr link del wild$i &
+ done
+ wait
+ for ((i = 0; i < 100; i += 10)); do
+ (
+ for ((j = 0; j < 10; j++)); do
+ ip -net $nsr link add wild$((i + j)) type dummy
+ done
+ for ((j = 0; j < 10; j++)); do
+ ip -net $nsr link del wild$((i + j))
+ done
+ ) &
+ done
+ wait
+fi
+
+[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || {
+ echo "FAIL: Kernel is tainted!"
+ exit $ksft_fail
+}
+
+[[ $rate -gt 0 ]] || {
+ echo "FAIL: Zero throughput in iperf3"
+ exit $ksft_fail
+}
+
+[[ -f /sys/kernel/debug/kmemleak && \
+ -n $(</sys/kernel/debug/kmemleak) ]] && {
+ echo "FAIL: non-empty kmemleak report"
+ exit $ksft_fail
+}
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
index 3b81d88bdde3..9f200f80253a 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
@@ -88,7 +88,6 @@ for i in $(seq 1 "$maxclients");do
echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
echo netns exec "$gw" ip link set "veth$i" up
echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2
- echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0
# clients have same IP addresses.
echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
@@ -178,7 +177,6 @@ fi
ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
# useful for debugging: allows to use 'ping' from clients to gateway.
ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 784d1b46912b..6136ceec45e0 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -10,6 +10,8 @@ source lib.sh
ret=0
timeout=5
+SCTP_TEST_TIMEOUT=60
+
cleanup()
{
ip netns pids "$ns1" | xargs kill 2>/dev/null
@@ -40,7 +42,7 @@ TMPFILE3=$(mktemp)
TMPINPUT=$(mktemp)
COUNT=200
-[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8))
dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
@@ -275,9 +277,11 @@ test_tcp_forward()
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
+ local tthen=$(date +%s)
+
ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
- wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+ wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen"
kill "$nfqpid"
}
@@ -288,13 +292,14 @@ test_tcp_localhost()
ip netns exec "$nsrouter" ./nf_queue -q 3 &
local nfqpid=$!
+ local tthen=$(date +%s)
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
- wait "$rpid" && echo "PASS: tcp via loopback"
+ wait_and_check_retval "$rpid" "tcp via loopback" "$tthen"
kill "$nfqpid"
}
@@ -417,6 +422,23 @@ check_output_files()
fi
}
+wait_and_check_retval()
+{
+ local rpid="$1"
+ local msg="$2"
+ local tthen="$3"
+ local tnow=$(date +%s)
+
+ if wait "$rpid";then
+ echo -n "PASS: "
+ else
+ echo -n "FAIL: "
+ ret=1
+ fi
+
+ printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen))
+}
+
test_sctp_forward()
{
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
@@ -428,13 +450,14 @@ table inet sctpq {
}
}
EOF
- timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
local rpid=$!
busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
local nfqpid=$!
+ local tthen=$(date +%s)
ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
@@ -443,7 +466,7 @@ EOF
exit 1
fi
- wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain"
+ wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen"
kill "$nfqpid"
check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
@@ -462,13 +485,14 @@ EOF
# reduce test file size, software segmentation causes sk wmem increase.
dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
- timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
local rpid=$!
busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
ip netns exec "$ns1" ./nf_queue -q 11 &
local nfqpid=$!
+ local tthen=$(date +%s)
ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
@@ -478,7 +502,7 @@ EOF
fi
# must wait before checking completeness of output file.
- wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO"
+ wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen"
kill "$nfqpid"
check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
index 86ec4e68594d..24ad41d526d9 100755
--- a/tools/testing/selftests/net/netfilter/rpath.sh
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -1,8 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# return code to signal skipped test
-ksft_skip=4
+source lib.sh
# search for legacy iptables (it uses the xtables extensions
if iptables-legacy --version >/dev/null 2>&1; then
@@ -32,17 +31,10 @@ if [ -z "$iptables$ip6tables$nft" ]; then
exit $ksft_skip
fi
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-trap "ip netns del $ns1; ip netns del $ns2" EXIT
-
-# create two netns, disable rp_filter in ns2 and
-# keep IPv6 address when moving into VRF
-ip netns add "$ns1"
-ip netns add "$ns2"
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+trap cleanup_all_ns EXIT
+
+# create two netns, keep IPv6 address when moving into VRF
+setup_ns ns1 ns2
ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
# a standard connection between the netns, should not trigger rp filter
diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore
new file mode 100644
index 000000000000..ee44c081ca7c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+ovpn-cli
diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile
new file mode 100644
index 000000000000..e0926d76b4c8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES)
+VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS = -I/usr/include/libnl3
+endif
+CFLAGS += $(VAR_CFLAGS)
+
+
+LDLIBS = -lmbedtls -lmbedcrypto
+VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS = -lnl-genl-3 -lnl-3
+endif
+LDLIBS += $(VAR_LDLIBS)
+
+
+TEST_FILES = common.sh
+
+TEST_PROGS = test.sh \
+ test-large-mtu.sh \
+ test-chachapoly.sh \
+ test-tcp.sh \
+ test-float.sh \
+ test-close-socket.sh \
+ test-close-socket-tcp.sh
+
+TEST_GEN_FILES := ovpn-cli
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh
new file mode 100644
index 000000000000..88869c675d03
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/common.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt}
+TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt}
+OVPN_CLI=${OVPN_CLI:-./ovpn-cli}
+ALG=${ALG:-aes}
+PROTO=${PROTO:-UDP}
+FLOAT=${FLOAT:-0}
+
+LAN_IP="11.11.11.11"
+
+create_ns() {
+ ip netns add peer${1}
+}
+
+setup_ns() {
+ MODE="P2P"
+
+ if [ ${1} -eq 0 ]; then
+ MODE="MP"
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p}
+
+ ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p}
+ ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p}
+ ip -n peer0 link set veth${p} up
+
+ ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p}
+ ip -n peer${p} link set veth${p} up
+ done
+ fi
+
+ ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE
+ ip -n peer${1} addr add ${2} dev tun${1}
+ # add a secondary IP to peer 1, to test a LAN behind a client
+ if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then
+ ip -n peer${1} addr add ${LAN_IP} dev tun${1}
+ ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0
+ fi
+ if [ -n "${3}" ]; then
+ ip -n peer${1} link set mtu ${3} dev tun${1}
+ fi
+ ip -n peer${1} link set tun${1} up
+}
+
+add_peer() {
+ if [ "${PROTO}" == "UDP" ]; then
+ if [ ${1} -eq 0 ]; then
+ ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE}
+
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \
+ data64.key
+ done
+ else
+ RADDR=$(awk "NR == ${1} {print \$2}" ${UDP_PEERS_FILE})
+ RPORT=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE})
+ LPORT=$(awk "NR == ${1} {print \$5}" ${UDP_PEERS_FILE})
+ ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} ${LPORT} \
+ ${RADDR} ${RPORT}
+ ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \
+ data64.key
+ fi
+ else
+ if [ ${1} -eq 0 ]; then
+ (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && {
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \
+ ${ALG} 0 data64.key
+ done
+ }) &
+ sleep 5
+ else
+ ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \
+ data64.key
+ fi
+ fi
+}
+
+cleanup() {
+ # some ovpn-cli processes sleep in background so they need manual poking
+ killall $(basename ${OVPN_CLI}) 2>/dev/null || true
+
+ # netns peer0 is deleted without erasing ifaces first
+ for p in $(seq 1 10); do
+ ip -n peer${p} link set tun${p} down 2>/dev/null || true
+ ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true
+ done
+ for p in $(seq 1 10); do
+ ip -n peer0 link del veth${p} 2>/dev/null || true
+ done
+ for p in $(seq 0 10); do
+ ip netns del peer${p} 2>/dev/null || true
+ done
+}
+
+if [ "${PROTO}" == "UDP" ]; then
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')}
+else
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')}
+fi
+
+
diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config
new file mode 100644
index 000000000000..71946ba9fa17
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/config
@@ -0,0 +1,10 @@
+CONFIG_NET=y
+CONFIG_INET=y
+CONFIG_STREAM_PARSER=y
+CONFIG_NET_UDP_TUNNEL=y
+CONFIG_DST_CACHE=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_CHACHA20POLY1305=y
+CONFIG_OVPN=m
diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key
new file mode 100644
index 000000000000..a99e88c4e290
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/data64.key
@@ -0,0 +1,5 @@
+jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B
+ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9
+uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6
+KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE
+BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w==
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
new file mode 100644
index 000000000000..de9c26f98b2e
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -0,0 +1,2383 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel accelerator
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <time.h>
+
+#include <linux/ovpn.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#include <netlink/socket.h>
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <mbedtls/base64.h>
+#include <mbedtls/error.h>
+
+#include <sys/socket.h>
+
+/* defines to make checkpatch happy */
+#define strscpy strncpy
+#define __always_unused __attribute__((__unused__))
+
+/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we
+ * have to explicitly do it to prevent the kernel from failing upon
+ * parsing of the message
+ */
+#define nla_nest_start(_msg, _type) \
+ nla_nest_start(_msg, (_type) | NLA_F_NESTED)
+
+/* libnl < 3.11.0 does not implement nla_get_uint() */
+uint64_t ovpn_nla_get_uint(struct nlattr *attr)
+{
+ if (nla_len(attr) == sizeof(uint32_t))
+ return nla_get_u32(attr);
+ else
+ return nla_get_u64(attr);
+}
+
+typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg);
+
+enum ovpn_key_direction {
+ KEY_DIR_IN = 0,
+ KEY_DIR_OUT,
+};
+
+#define KEY_LEN (256 / 8)
+#define NONCE_LEN 8
+
+#define PEER_ID_UNDEF 0x00FFFFFF
+#define MAX_PEERS 10
+
+struct nl_ctx {
+ struct nl_sock *nl_sock;
+ struct nl_msg *nl_msg;
+ struct nl_cb *nl_cb;
+
+ int ovpn_dco_id;
+};
+
+enum ovpn_cmd {
+ CMD_INVALID,
+ CMD_NEW_IFACE,
+ CMD_DEL_IFACE,
+ CMD_LISTEN,
+ CMD_CONNECT,
+ CMD_NEW_PEER,
+ CMD_NEW_MULTI_PEER,
+ CMD_SET_PEER,
+ CMD_DEL_PEER,
+ CMD_GET_PEER,
+ CMD_NEW_KEY,
+ CMD_DEL_KEY,
+ CMD_GET_KEY,
+ CMD_SWAP_KEYS,
+ CMD_LISTEN_MCAST,
+};
+
+struct ovpn_ctx {
+ enum ovpn_cmd cmd;
+
+ __u8 key_enc[KEY_LEN];
+ __u8 key_dec[KEY_LEN];
+ __u8 nonce[NONCE_LEN];
+
+ enum ovpn_cipher_alg cipher;
+
+ sa_family_t sa_family;
+
+ unsigned long peer_id;
+ unsigned long lport;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } remote;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } peer_ip;
+
+ bool peer_ip_set;
+
+ unsigned int ifindex;
+ char ifname[IFNAMSIZ];
+ enum ovpn_mode mode;
+ bool mode_set;
+
+ int socket;
+ int cli_sockets[MAX_PEERS];
+
+ __u32 keepalive_interval;
+ __u32 keepalive_timeout;
+
+ enum ovpn_key_direction key_dir;
+ enum ovpn_key_slot key_slot;
+ int key_id;
+
+ const char *peers_file;
+};
+
+static int ovpn_nl_recvmsgs(struct nl_ctx *ctx)
+{
+ int ret;
+
+ ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb);
+
+ switch (ret) {
+ case -NLE_INTR:
+ fprintf(stderr,
+ "netlink received interrupt due to signal - ignoring\n");
+ break;
+ case -NLE_NOMEM:
+ fprintf(stderr, "netlink out of memory error\n");
+ break;
+ case -NLE_AGAIN:
+ fprintf(stderr,
+ "netlink reports blocking read - aborting wait\n");
+ break;
+ default:
+ if (ret)
+ fprintf(stderr, "netlink reports error (%d): %s\n",
+ ret, nl_geterror(-ret));
+ break;
+ }
+
+ return ret;
+}
+
+static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd,
+ int flags)
+{
+ struct nl_ctx *ctx;
+ int err, ret;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ ctx->nl_sock = nl_socket_alloc();
+ if (!ctx->nl_sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192);
+
+ ret = genl_connect(ctx->nl_sock);
+ if (ret) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_sock;
+ }
+
+ /* enable Extended ACK for detailed error reporting */
+ err = 1;
+ setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK,
+ &err, sizeof(err));
+
+ ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME);
+ if (ctx->ovpn_dco_id < 0) {
+ fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n",
+ ctx->ovpn_dco_id);
+ goto err_free;
+ }
+
+ ctx->nl_msg = nlmsg_alloc();
+ if (!ctx->nl_msg) {
+ fprintf(stderr, "cannot allocate netlink message\n");
+ goto err_sock;
+ }
+
+ ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!ctx->nl_cb) {
+ fprintf(stderr, "failed to allocate netlink callback\n");
+ goto err_msg;
+ }
+
+ nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb);
+
+ genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0);
+
+ if (ovpn->ifindex > 0)
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex);
+
+ return ctx;
+nla_put_failure:
+err_msg:
+ nlmsg_free(ctx->nl_msg);
+err_sock:
+ nl_socket_free(ctx->nl_sock);
+err_free:
+ free(ctx);
+ return NULL;
+}
+
+static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd)
+{
+ return nl_ctx_alloc_flags(ovpn, cmd, 0);
+}
+
+static void nl_ctx_free(struct nl_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ nl_socket_free(ctx->nl_sock);
+ nlmsg_free(ctx->nl_msg);
+ nl_cb_put(ctx->nl_cb);
+ free(ctx);
+}
+
+static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1;
+ struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1];
+ int len = nlh->nlmsg_len;
+ struct nlattr *attrs;
+ int *ret = arg;
+ int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh);
+
+ *ret = err->error;
+
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return NL_STOP;
+
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ ack_len += err->msg.nlmsg_len - sizeof(*nlh);
+
+ if (len <= ack_len)
+ return NL_STOP;
+
+ attrs = (void *)((uint8_t *)nlh + ack_len);
+ len -= ack_len;
+
+ nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL);
+ if (tb_msg[NLMSGERR_ATTR_MSG]) {
+ len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]),
+ nla_len(tb_msg[NLMSGERR_ATTR_MSG]));
+ fprintf(stderr, "kernel error: %*s\n", len,
+ (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) {
+ fprintf(stderr, "missing required nesting type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) {
+ fprintf(stderr, "missing required attribute type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE]));
+ }
+
+ return NL_STOP;
+}
+
+static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_SKIP;
+}
+
+static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_STOP;
+}
+
+static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb)
+{
+ int status = 1;
+
+ nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish,
+ &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status);
+
+ if (cb)
+ nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx);
+
+ nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg);
+
+ while (status == 1)
+ ovpn_nl_recvmsgs(ctx);
+
+ if (status < 0)
+ fprintf(stderr, "failed to send netlink message: %s (%d)\n",
+ strerror(-status), status);
+
+ return status;
+}
+
+static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx)
+{
+ int idx_enc, idx_dec, ret = -1;
+ unsigned char *ckey = NULL;
+ __u8 *bkey = NULL;
+ size_t olen = 0;
+ long ckey_len;
+ FILE *fp;
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open: %s\n", file);
+ return -1;
+ }
+
+ /* get file size */
+ fseek(fp, 0L, SEEK_END);
+ ckey_len = ftell(fp);
+ rewind(fp);
+
+ /* if the file is longer, let's just read a portion */
+ if (ckey_len > 256)
+ ckey_len = 256;
+
+ ckey = malloc(ckey_len);
+ if (!ckey)
+ goto err;
+
+ ret = fread(ckey, 1, ckey_len, fp);
+ if (ret != ckey_len) {
+ fprintf(stderr,
+ "couldn't read enough data from key file: %dbytes read\n",
+ ret);
+ goto err;
+ }
+
+ olen = 0;
+ ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len);
+ if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ bkey = malloc(olen);
+ if (!bkey) {
+ fprintf(stderr, "cannot allocate binary key buffer\n");
+ goto err;
+ }
+
+ ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len);
+ if (ret) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ if (olen < 2 * KEY_LEN + NONCE_LEN) {
+ fprintf(stderr,
+ "not enough data in key file, found %zdB but needs %dB\n",
+ olen, 2 * KEY_LEN + NONCE_LEN);
+ goto err;
+ }
+
+ switch (ctx->key_dir) {
+ case KEY_DIR_IN:
+ idx_enc = 0;
+ idx_dec = 1;
+ break;
+ case KEY_DIR_OUT:
+ idx_enc = 1;
+ idx_dec = 0;
+ break;
+ default:
+ goto err;
+ }
+
+ memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN);
+ memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN);
+ memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN);
+
+ ret = 0;
+
+err:
+ fclose(fp);
+ free(bkey);
+ free(ckey);
+
+ return ret;
+}
+
+static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx)
+{
+ if (strcmp(cipher, "aes") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ else if (strcmp(cipher, "chachapoly") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305;
+ else if (strcmp(cipher, "none") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_NONE;
+ else
+ return -ENOTSUP;
+
+ return 0;
+}
+
+static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx)
+{
+ int in_dir;
+
+ in_dir = strtoll(dir, NULL, 10);
+ switch (in_dir) {
+ case KEY_DIR_IN:
+ case KEY_DIR_OUT:
+ ctx->key_dir = in_dir;
+ break;
+ default:
+ fprintf(stderr,
+ "invalid key direction provided. Can be 0 or 1 only\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto)
+{
+ struct sockaddr_storage local_sock = { 0 };
+ struct sockaddr_in6 *in6;
+ struct sockaddr_in *in;
+ int ret, s, sock_type;
+ size_t sock_len;
+
+ if (proto == IPPROTO_UDP)
+ sock_type = SOCK_DGRAM;
+ else if (proto == IPPROTO_TCP)
+ sock_type = SOCK_STREAM;
+ else
+ return -EINVAL;
+
+ s = socket(family, sock_type, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (family) {
+ case AF_INET:
+ in = (struct sockaddr_in *)&local_sock;
+ in->sin_family = family;
+ in->sin_port = htons(ctx->lport);
+ in->sin_addr.s_addr = htonl(INADDR_ANY);
+ sock_len = sizeof(*in);
+ break;
+ case AF_INET6:
+ in6 = (struct sockaddr_in6 *)&local_sock;
+ in6->sin6_family = family;
+ in6->sin6_port = htons(ctx->lport);
+ in6->sin6_addr = in6addr_any;
+ sock_len = sizeof(*in6);
+ break;
+ default:
+ return -1;
+ }
+
+ int opt = 1;
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEADDR");
+ return ret;
+ }
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEPORT");
+ return ret;
+ }
+
+ if (family == AF_INET6) {
+ opt = 0;
+ if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt,
+ sizeof(opt))) {
+ perror("failed to set IPV6_V6ONLY");
+ return -1;
+ }
+ }
+
+ ret = bind(s, (struct sockaddr *)&local_sock, sock_len);
+ if (ret < 0) {
+ perror("cannot bind socket");
+ goto err_socket;
+ }
+
+ ctx->socket = s;
+ ctx->sa_family = family;
+ return 0;
+
+err_socket:
+ close(s);
+ return -1;
+}
+
+static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ return ovpn_socket(ctx, family, IPPROTO_UDP);
+}
+
+static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ int ret;
+
+ ret = ovpn_socket(ctx, family, IPPROTO_TCP);
+ if (ret < 0)
+ return ret;
+
+ ret = listen(ctx->socket, 10);
+ if (ret < 0) {
+ perror("listen");
+ close(ctx->socket);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_accept(struct ovpn_ctx *ctx)
+{
+ socklen_t socklen;
+ int ret;
+
+ socklen = sizeof(ctx->remote);
+ ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen);
+ if (ret < 0) {
+ perror("accept");
+ goto err;
+ }
+
+ fprintf(stderr, "Connection received!\n");
+
+ switch (socklen) {
+ case sizeof(struct sockaddr_in):
+ case sizeof(struct sockaddr_in6):
+ break;
+ default:
+ fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n");
+ close(ret);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ return ret;
+err:
+ close(ctx->socket);
+ return ret;
+}
+
+static int ovpn_connect(struct ovpn_ctx *ovpn)
+{
+ socklen_t socklen;
+ int s, ret;
+
+ s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ socklen = sizeof(struct sockaddr_in);
+ break;
+ case AF_INET6:
+ socklen = sizeof(struct sockaddr_in6);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen);
+ if (ret < 0) {
+ perror("connect");
+ goto err;
+ }
+
+ fprintf(stderr, "connected\n");
+
+ ovpn->socket = s;
+
+ return 0;
+err:
+ close(s);
+ return ret;
+}
+
+static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket);
+
+ if (!is_tcp) {
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4,
+ ovpn->remote.in4.sin_addr.s_addr);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in4.sin_port);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6,
+ sizeof(ovpn->remote.in6.sin6_addr),
+ &ovpn->remote.in6.sin6_addr);
+ NLA_PUT_U32(ctx->nl_msg,
+ OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
+ ovpn->remote.in6.sin6_scope_id);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in6.sin6_port);
+ break;
+ default:
+ fprintf(stderr,
+ "Invalid family for remote socket address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ if (ovpn->peer_ip_set) {
+ switch (ovpn->peer_ip.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4,
+ ovpn->peer_ip.in4.sin_addr.s_addr);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6,
+ sizeof(struct in6_addr),
+ &ovpn->peer_ip.in6.sin6_addr);
+ break;
+ default:
+ fprintf(stderr, "Invalid family for peer address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_set_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL,
+ ovpn->keepalive_interval);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT,
+ ovpn->keepalive_timeout);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *pattrs[OVPN_A_PEER_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ __u16 rport = 0, lport = 0;
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_PEER]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]),
+ nla_len(attrs[OVPN_A_PEER]), NULL);
+
+ if (pattrs[OVPN_A_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_ID]));
+
+ if (pattrs[OVPN_A_PEER_SOCKET_NETNSID])
+ fprintf(stderr, "\tsocket NetNS ID: %d\n",
+ nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID]));
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV4]) {
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv4: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV6]) {
+ char buf[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv6: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_LOCAL_PORT])
+ lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_PORT])
+ rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6];
+ char buf[INET6_ADDRSTRLEN];
+ int scope_id = -1;
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) {
+ void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID];
+
+ scope_id = nla_get_u32(p);
+ }
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport,
+ scope_id);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6];
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4];
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) {
+ void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4];
+
+ inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) {
+ void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL];
+
+ fprintf(stderr, "\tKeepalive interval: %u sec\n",
+ nla_get_u32(p));
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])
+ fprintf(stderr, "\tKeepalive timeout: %u sec\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_BYTES])
+ fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_BYTES])
+ fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS])
+ fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS])
+ fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_BYTES])
+ fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_BYTES])
+ fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS])
+ fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS])
+ fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS]));
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_peer(struct ovpn_ctx *ovpn)
+{
+ int flags = 0, ret = -1;
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ flags = NLM_F_DUMP;
+
+ ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (ovpn->peer_id != PEER_ID_UNDEF) {
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+ }
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_new_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf, *key_dir;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_KEYCONF]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]),
+ nla_len(attrs[OVPN_A_KEYCONF]), NULL);
+
+ if (kattrs[OVPN_A_KEYCONF_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID]));
+ if (kattrs[OVPN_A_KEYCONF_SLOT]) {
+ fprintf(stderr, "\t- Slot: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) {
+ case OVPN_KEY_SLOT_PRIMARY:
+ fprintf(stderr, "primary\n");
+ break;
+ case OVPN_KEY_SLOT_SECONDARY:
+ fprintf(stderr, "secondary\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT]));
+ break;
+ }
+ }
+ if (kattrs[OVPN_A_KEYCONF_KEY_ID])
+ fprintf(stderr, "\t- Key ID: %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID]));
+ if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) {
+ fprintf(stderr, "\t- Cipher: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) {
+ case OVPN_CIPHER_ALG_NONE:
+ fprintf(stderr, "none\n");
+ break;
+ case OVPN_CIPHER_ALG_AES_GCM:
+ fprintf(stderr, "aes-gcm\n");
+ break;
+ case OVPN_CIPHER_ALG_CHACHA20_POLY1305:
+ fprintf(stderr, "chacha20poly1305\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG]));
+ break;
+ }
+ }
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_key);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_swap_keys(struct ovpn_ctx *ovpn)
+{
+ struct nl_ctx *ctx;
+ struct nlattr *kc;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP);
+ if (!ctx)
+ return -ENOMEM;
+
+ kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, kc);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+/* Helper function used to easily add attributes to a rtnl message */
+static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type,
+ const void *data, int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) {
+ fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -EMSGSIZE;
+ }
+
+ rta = nlmsg_tail(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+
+ if (!data)
+ memset(RTA_DATA(rta), 0, alen);
+ else
+ memcpy(RTA_DATA(rta), data, alen);
+
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+
+ return 0;
+}
+
+static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size,
+ int attr)
+{
+ struct rtattr *nest = nlmsg_tail(msg);
+
+ if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0)
+ return NULL;
+
+ return nest;
+}
+
+static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest)
+{
+ nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest;
+}
+
+#define RT_SNDBUF_SIZE (1024 * 2)
+#define RT_RCVBUF_SIZE (1024 * 4)
+
+/* Open RTNL socket */
+static int ovpn_rt_socket(void)
+{
+ int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd < 0) {
+ fprintf(stderr, "%s: cannot open netlink socket\n", __func__);
+ return fd;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf,
+ sizeof(sndbuf)) < 0) {
+ fprintf(stderr, "%s: SO_SNDBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf,
+ sizeof(rcvbuf)) < 0) {
+ fprintf(stderr, "%s: SO_RCVBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+/* Bind socket to Netlink subsystem */
+static int ovpn_rt_bind(int fd, uint32_t groups)
+{
+ struct sockaddr_nl local = { 0 };
+ socklen_t addr_len;
+
+ local.nl_family = AF_NETLINK;
+ local.nl_groups = groups;
+
+ if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
+ fprintf(stderr, "%s: cannot bind netlink socket: %d\n",
+ __func__, errno);
+ return -errno;
+ }
+
+ addr_len = sizeof(local);
+ if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) {
+ fprintf(stderr, "%s: cannot getsockname: %d\n", __func__,
+ errno);
+ return -errno;
+ }
+
+ if (addr_len != sizeof(local)) {
+ fprintf(stderr, "%s: wrong address length %d\n", __func__,
+ addr_len);
+ return -EINVAL;
+ }
+
+ if (local.nl_family != AF_NETLINK) {
+ fprintf(stderr, "%s: wrong address family %d\n", __func__,
+ local.nl_family);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg);
+
+/* Send Netlink message and run callback on reply (if specified) */
+static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer,
+ unsigned int groups, ovpn_parse_reply_cb cb,
+ void *arg_cb)
+{
+ int len, rem_len, fd, ret, rcv_len;
+ struct sockaddr_nl nladdr = { 0 };
+ struct nlmsgerr *err;
+ struct nlmsghdr *h;
+ char buf[1024 * 16];
+ struct iovec iov = {
+ .iov_base = payload,
+ .iov_len = payload->nlmsg_len,
+ };
+ struct msghdr nlmsg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ nladdr.nl_family = AF_NETLINK;
+ nladdr.nl_pid = peer;
+ nladdr.nl_groups = groups;
+
+ payload->nlmsg_seq = time(NULL);
+
+ /* no need to send reply */
+ if (!cb)
+ payload->nlmsg_flags |= NLM_F_ACK;
+
+ fd = ovpn_rt_socket();
+ if (fd < 0) {
+ fprintf(stderr, "%s: can't open rtnl socket\n", __func__);
+ return -errno;
+ }
+
+ ret = ovpn_rt_bind(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: can't bind rtnl socket\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ ret = sendmsg(fd, &nlmsg, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ /* prepare buffer to store RTNL replies */
+ memset(buf, 0, sizeof(buf));
+ iov.iov_base = buf;
+
+ while (1) {
+ /*
+ * iov_len is modified by recvmsg(), therefore has to be initialized before
+ * using it again
+ */
+ iov.iov_len = sizeof(buf);
+ rcv_len = recvmsg(fd, &nlmsg, 0);
+ if (rcv_len < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ fprintf(stderr, "%s: interrupted call\n",
+ __func__);
+ continue;
+ }
+ fprintf(stderr, "%s: rtnl: error on recvmsg()\n",
+ __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ if (rcv_len == 0) {
+ fprintf(stderr,
+ "%s: rtnl: socket reached unexpected EOF\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (nlmsg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr,
+ "%s: sender address length: %u (expected %zu)\n",
+ __func__, nlmsg.msg_namelen, sizeof(nladdr));
+ ret = -EIO;
+ goto out;
+ }
+
+ h = (struct nlmsghdr *)buf;
+ while (rcv_len >= (int)sizeof(*h)) {
+ len = h->nlmsg_len;
+ rem_len = len - sizeof(*h);
+
+ if (rem_len < 0 || len > rcv_len) {
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: truncated message\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+ fprintf(stderr, "%s: malformed message: len=%d\n",
+ __func__, len);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_DONE) {
+ ret = 0;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ err = (struct nlmsgerr *)NLMSG_DATA(h);
+ if (rem_len < (int)sizeof(struct nlmsgerr)) {
+ fprintf(stderr, "%s: ERROR truncated\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (err->error) {
+ fprintf(stderr, "%s: (%d) %s\n",
+ __func__, err->error,
+ strerror(-err->error));
+ ret = err->error;
+ goto out;
+ }
+
+ ret = 0;
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0)
+ ret = r;
+ }
+ goto out;
+ }
+
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0) {
+ ret = r;
+ goto out;
+ }
+ } else {
+ fprintf(stderr, "%s: RTNL: unexpected reply\n",
+ __func__);
+ }
+
+ rcv_len -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((uint8_t *)h +
+ NLMSG_ALIGN(len));
+ }
+
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: message truncated\n", __func__);
+ continue;
+ }
+
+ if (rcv_len) {
+ fprintf(stderr, "%s: rtnl: %d not parsed bytes\n",
+ __func__, rcv_len);
+ ret = -1;
+ goto out;
+ }
+ }
+out:
+ close(fd);
+
+ return ret;
+}
+
+struct ovpn_link_req {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[256];
+};
+
+static int ovpn_new_iface(struct ovpn_ctx *ovpn)
+{
+ struct rtattr *linkinfo, *data;
+ struct ovpn_link_req req = { 0 };
+ int ret = -1;
+
+ fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname,
+ ovpn->mode);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname,
+ strlen(ovpn->ifname) + 1) < 0)
+ goto err;
+
+ linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO);
+ if (!linkinfo)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME,
+ strlen(OVPN_FAMILY_NAME) + 1) < 0)
+ goto err;
+
+ if (ovpn->mode_set) {
+ data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA);
+ if (!data)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE,
+ &ovpn->mode, sizeof(uint8_t)) < 0)
+ goto err;
+
+ ovpn_nest_end(&req.n, data);
+ }
+
+ ovpn_nest_end(&req.n, linkinfo);
+
+ req.i.ifi_family = AF_PACKET;
+
+ ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+err:
+ return ret;
+}
+
+static int ovpn_del_iface(struct ovpn_ctx *ovpn)
+{
+ struct ovpn_link_req req = { 0 };
+
+ fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname,
+ ovpn->ifindex);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_DELLINK;
+
+ req.i.ifi_family = AF_PACKET;
+ req.i.ifi_index = ovpn->ifindex;
+
+ return ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+}
+
+static int nl_seq_check(struct nl_msg (*msg)__always_unused,
+ void (*arg)__always_unused)
+{
+ return NL_OK;
+}
+
+struct mcast_handler_args {
+ const char *group;
+ int id;
+};
+
+static int mcast_family_handler(struct nl_msg *msg, void *arg)
+{
+ struct mcast_handler_args *grp = arg;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *mcgrp;
+ int rem_mcgrp;
+
+ nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!tb[CTRL_ATTR_MCAST_GROUPS])
+ return NL_SKIP;
+
+ nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+ struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+ nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+ nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+ if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+ !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+ continue;
+ if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+ grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+ continue;
+ grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+ break;
+ }
+
+ return NL_SKIP;
+}
+
+static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ int *ret = arg;
+
+ *ret = err->error;
+ return NL_STOP;
+}
+
+static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg)
+{
+ int *ret = arg;
+
+ *ret = 0;
+ return NL_STOP;
+}
+
+static int ovpn_handle_msg(struct nl_msg *msg, void *arg)
+{
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ struct nlmsghdr *nlh = nlmsg_hdr(msg);
+ char ifname[IF_NAMESIZE];
+ int *ret = arg;
+ __u32 ifindex;
+
+ fprintf(stderr, "received message from ovpn-dco\n");
+
+ *ret = -1;
+
+ if (!genlmsg_valid_hdr(nlh, 0)) {
+ fprintf(stderr, "invalid header\n");
+ return NL_STOP;
+ }
+
+ if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL)) {
+ fprintf(stderr, "received bogus data from ovpn-dco\n");
+ return NL_STOP;
+ }
+
+ if (!attrs[OVPN_A_IFINDEX]) {
+ fprintf(stderr, "no ifindex in this message\n");
+ return NL_STOP;
+ }
+
+ ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]);
+ if (!if_indextoname(ifindex, ifname)) {
+ fprintf(stderr, "cannot resolve ifname for ifindex: %u\n",
+ ifindex);
+ return NL_STOP;
+ }
+
+ switch (gnlh->cmd) {
+ case OVPN_CMD_PEER_DEL_NTF:
+ fprintf(stdout, "received CMD_PEER_DEL_NTF\n");
+ break;
+ case OVPN_CMD_KEY_SWAP_NTF:
+ fprintf(stdout, "received CMD_KEY_SWAP_NTF\n");
+ break;
+ default:
+ fprintf(stderr, "received unknown command: %d\n", gnlh->cmd);
+ return NL_STOP;
+ }
+
+ *ret = 0;
+ return NL_OK;
+}
+
+static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family,
+ const char *group)
+{
+ struct nl_msg *msg;
+ struct nl_cb *cb;
+ int ret, ctrlid;
+ struct mcast_handler_args grp = {
+ .group = group,
+ .id = -ENOENT,
+ };
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return -ENOMEM;
+
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!cb) {
+ ret = -ENOMEM;
+ goto out_fail_cb;
+ }
+
+ ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+ genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+ ret = -ENOBUFS;
+ NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+ ret = nl_send_auto_complete(sock, msg);
+ if (ret < 0)
+ goto nla_put_failure;
+
+ ret = 1;
+
+ nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret);
+ nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp);
+
+ while (ret > 0)
+ nl_recvmsgs(sock, cb);
+
+ if (ret == 0)
+ ret = grp.id;
+ nla_put_failure:
+ nl_cb_put(cb);
+ out_fail_cb:
+ nlmsg_free(msg);
+ return ret;
+}
+
+static int ovpn_listen_mcast(void)
+{
+ struct nl_sock *sock;
+ struct nl_cb *cb;
+ int mcid, ret;
+
+ sock = nl_socket_alloc();
+ if (!sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(sock, 8192, 8192);
+
+ ret = genl_connect(sock);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_free;
+ }
+
+ mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS);
+ if (mcid < 0) {
+ fprintf(stderr, "cannot get mcast group: %s\n",
+ nl_geterror(mcid));
+ goto err_free;
+ }
+
+ ret = nl_socket_add_membership(sock, mcid);
+ if (ret) {
+ fprintf(stderr, "failed to join mcast group: %d\n", ret);
+ goto err_free;
+ }
+
+ ret = 1;
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret);
+ nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret);
+
+ while (ret == 1) {
+ int err = nl_recvmsgs(sock, cb);
+
+ if (err < 0) {
+ fprintf(stderr,
+ "cannot receive netlink message: (%d) %s\n",
+ err, nl_geterror(-err));
+ ret = -1;
+ break;
+ }
+ }
+
+ nl_cb_put(cb);
+err_free:
+ nl_socket_free(sock);
+ return ret;
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr,
+ "Usage %s <command> <iface> [arguments..]\n",
+ cmd);
+ fprintf(stderr, "where <command> can be one of the following\n\n");
+
+ fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tmode:\n");
+ fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n");
+ fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n");
+
+ fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+
+ fprintf(stderr,
+ "* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: TCP port to listen to\n");
+ fprintf(stderr,
+ "\tpeers_file: file containing one peer per line: Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n");
+ fprintf(stderr,
+ "\tipv6: whether the socket should listen to the IPv6 wildcard address\n");
+
+ fprintf(stderr,
+ "* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n");
+ fprintf(stderr, "\traddr: peer IP address to connect to\n");
+ fprintf(stderr, "\trport: peer TCP port to connect to\n");
+ fprintf(stderr,
+ "\tkey_file: file containing the symmetric key for encryption\n");
+
+ fprintf(stderr,
+ "* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID to be used in data packets to/from this peer\n");
+ fprintf(stderr, "\traddr: peer IP address\n");
+ fprintf(stderr, "\trport: peer UDP port\n");
+ fprintf(stderr, "\tvpnaddr: peer VPN IP\n");
+
+ fprintf(stderr,
+ "* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeers_file: text file containing one peer per line. Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n");
+
+ fprintf(stderr,
+ "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr,
+ "\tkeepalive_interval: interval for sending ping messages\n");
+ fprintf(stderr,
+ "\tkeepalive_timeout: time after which a peer is timed out\n");
+
+ fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n");
+
+ fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n");
+
+ fprintf(stderr,
+ "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to configure the key for\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+ fprintf(stderr, "\tkey_id: an ID from 0 to 7\n");
+ fprintf(stderr,
+ "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n");
+ fprintf(stderr,
+ "\tkey_dir: key direction, must 0 on one host and 1 on the other\n");
+ fprintf(stderr, "\tkey_file: file containing the pre-shared key\n");
+
+ fprintf(stderr,
+ "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n");
+
+ fprintf(stderr,
+ "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+
+ fprintf(stderr,
+ "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+
+ fprintf(stderr,
+ "* listen_mcast: listen to ovpn netlink multicast messages\n");
+}
+
+static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host,
+ const char *service, const char *vpnip)
+{
+ int ret;
+ struct addrinfo *result;
+ struct addrinfo hints = {
+ .ai_family = ovpn->sa_family,
+ .ai_socktype = SOCK_DGRAM,
+ .ai_protocol = IPPROTO_UDP
+ };
+
+ if (host) {
+ ret = getaddrinfo(host, service, &hints, &result);
+ if (ret) {
+ fprintf(stderr, "getaddrinfo on remote error: %s\n",
+ gai_strerror(ret));
+ return -1;
+ }
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen);
+ }
+
+ if (vpnip) {
+ ret = getaddrinfo(vpnip, NULL, &hints, &result);
+ if (ret) {
+ fprintf(stderr, "getaddrinfo on vpnip error: %s\n",
+ gai_strerror(ret));
+ return -1;
+ }
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen);
+ ovpn->sa_family = result->ai_family;
+
+ ovpn->peer_ip_set = true;
+ }
+
+ ret = 0;
+out:
+ freeaddrinfo(result);
+ return ret;
+}
+
+static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id,
+ const char *raddr, const char *rport,
+ const char *vpnip)
+{
+ ovpn->peer_id = strtoul(peer_id, NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ return ovpn_parse_remote(ovpn, raddr, rport, vpnip);
+}
+
+static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn)
+{
+ int slot = strtoul(arg, NULL, 10);
+
+ if (errno == ERANGE || slot < 1 || slot > 2) {
+ fprintf(stderr, "key slot out of range\n");
+ return -1;
+ }
+
+ switch (slot) {
+ case 1:
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ break;
+ case 2:
+ ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY;
+ break;
+ }
+
+ return 0;
+}
+
+static int ovpn_send_tcp_data(int socket)
+{
+ uint16_t len = htons(1000);
+ uint8_t buf[1002];
+ int ret;
+
+ memcpy(buf, &len, sizeof(len));
+ memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len));
+
+ ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ fprintf(stdout, "Sent %u bytes over TCP socket\n", ret);
+
+ return ret > 0 ? 0 : ret;
+}
+
+static int ovpn_recv_tcp_data(int socket)
+{
+ uint8_t buf[1002];
+ uint16_t len;
+ int ret;
+
+ ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ if (ret < 2) {
+ fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret);
+ return ret;
+ }
+
+ memcpy(&len, buf, sizeof(len));
+ len = ntohs(len);
+
+ fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n",
+ ret, len);
+
+ return 0;
+}
+
+static enum ovpn_cmd ovpn_parse_cmd(const char *cmd)
+{
+ if (!strcmp(cmd, "new_iface"))
+ return CMD_NEW_IFACE;
+
+ if (!strcmp(cmd, "del_iface"))
+ return CMD_DEL_IFACE;
+
+ if (!strcmp(cmd, "listen"))
+ return CMD_LISTEN;
+
+ if (!strcmp(cmd, "connect"))
+ return CMD_CONNECT;
+
+ if (!strcmp(cmd, "new_peer"))
+ return CMD_NEW_PEER;
+
+ if (!strcmp(cmd, "new_multi_peer"))
+ return CMD_NEW_MULTI_PEER;
+
+ if (!strcmp(cmd, "set_peer"))
+ return CMD_SET_PEER;
+
+ if (!strcmp(cmd, "del_peer"))
+ return CMD_DEL_PEER;
+
+ if (!strcmp(cmd, "get_peer"))
+ return CMD_GET_PEER;
+
+ if (!strcmp(cmd, "new_key"))
+ return CMD_NEW_KEY;
+
+ if (!strcmp(cmd, "del_key"))
+ return CMD_DEL_KEY;
+
+ if (!strcmp(cmd, "get_key"))
+ return CMD_GET_KEY;
+
+ if (!strcmp(cmd, "swap_keys"))
+ return CMD_SWAP_KEYS;
+
+ if (!strcmp(cmd, "listen_mcast"))
+ return CMD_LISTEN_MCAST;
+
+ return CMD_INVALID;
+}
+
+/* Send process to background and waits for signal.
+ *
+ * This helper is called at the end of commands
+ * creating sockets, so that the latter stay alive
+ * along with the process that created them.
+ *
+ * A signal is expected to be delivered in order to
+ * terminate the waiting processes
+ */
+static void ovpn_waitbg(void)
+{
+ daemon(1, 1);
+ pause();
+}
+
+static int ovpn_run_cmd(struct ovpn_ctx *ovpn)
+{
+ char peer_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128], lport[10];
+ char raddr[128], rport[10];
+ int n, ret;
+ FILE *fp;
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ ret = ovpn_new_iface(ovpn);
+ break;
+ case CMD_DEL_IFACE:
+ ret = ovpn_del_iface(ovpn);
+ break;
+ case CMD_LISTEN:
+ ret = ovpn_listen(ovpn, ovpn->sa_family);
+ if (ret < 0) {
+ fprintf(stderr, "cannot listen on TCP socket\n");
+ return ret;
+ }
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ int num_peers = 0;
+
+ while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ if (num_peers == MAX_PEERS) {
+ fprintf(stderr, "max peers reached!\n");
+ return -E2BIG;
+ }
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.sa_family = ovpn->sa_family;
+
+ peer_ctx.socket = ovpn_accept(ovpn);
+ if (peer_ctx.socket < 0) {
+ fprintf(stderr, "cannot accept connection!\n");
+ return -1;
+ }
+
+ /* store peer sockets to test TCP I/O */
+ ovpn->cli_sockets[num_peers] = peer_ctx.socket;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL,
+ NULL, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, true);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s\n",
+ peer_id, vpnip);
+ return ret;
+ }
+ num_peers++;
+ }
+
+ for (int i = 0; i < num_peers; i++) {
+ ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]);
+ if (ret < 0)
+ break;
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_CONNECT:
+ ret = ovpn_connect(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect TCP socket\n");
+ return ret;
+ }
+
+ ret = ovpn_new_peer(ovpn, true);
+ if (ret < 0) {
+ fprintf(stderr, "cannot add peer to VPN\n");
+ close(ovpn->socket);
+ return ret;
+ }
+
+ if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) {
+ ret = ovpn_new_key(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot set key\n");
+ return ret;
+ }
+ }
+
+ ret = ovpn_send_tcp_data(ovpn->socket);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ ret = ovpn_new_peer(ovpn, false);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_MULTI_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ while ((n = fscanf(fp, "%s %s %s %s %s %s\n", peer_id, laddr,
+ lport, raddr, rport, vpnip)) == 6) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.socket = ovpn->socket;
+ peer_ctx.sa_family = AF_UNSPEC;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr,
+ rport, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, false);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s %s %s\n",
+ peer_id, raddr, rport, vpnip);
+ return ret;
+ }
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_SET_PEER:
+ ret = ovpn_set_peer(ovpn);
+ break;
+ case CMD_DEL_PEER:
+ ret = ovpn_del_peer(ovpn);
+ break;
+ case CMD_GET_PEER:
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ fprintf(stderr, "List of peers connected to: %s\n",
+ ovpn->ifname);
+
+ ret = ovpn_get_peer(ovpn);
+ break;
+ case CMD_NEW_KEY:
+ ret = ovpn_new_key(ovpn);
+ break;
+ case CMD_DEL_KEY:
+ ret = ovpn_del_key(ovpn);
+ break;
+ case CMD_GET_KEY:
+ ret = ovpn_get_key(ovpn);
+ break;
+ case CMD_SWAP_KEYS:
+ ret = ovpn_swap_keys(ovpn);
+ break;
+ case CMD_LISTEN_MCAST:
+ ret = ovpn_listen_mcast();
+ break;
+ case CMD_INVALID:
+ break;
+ }
+
+ return ret;
+}
+
+static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
+{
+ int ret;
+
+ /* no args required for LISTEN_MCAST */
+ if (ovpn->cmd == CMD_LISTEN_MCAST)
+ return 0;
+
+ /* all commands need an ifname */
+ if (argc < 3)
+ return -EINVAL;
+
+ strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1);
+ ovpn->ifname[IFNAMSIZ - 1] = '\0';
+
+ /* all commands, except NEW_IFNAME, needs an ifindex */
+ if (ovpn->cmd != CMD_NEW_IFACE) {
+ ovpn->ifindex = if_nametoindex(ovpn->ifname);
+ if (!ovpn->ifindex) {
+ fprintf(stderr, "cannot find interface: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ }
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ if (argc < 4)
+ break;
+
+ if (!strcmp(argv[3], "P2P")) {
+ ovpn->mode = OVPN_MODE_P2P;
+ } else if (!strcmp(argv[3], "MP")) {
+ ovpn->mode = OVPN_MODE_MP;
+ } else {
+ fprintf(stderr, "Cannot parse iface mode: %s\n",
+ argv[3]);
+ return -1;
+ }
+ ovpn->mode_set = true;
+ break;
+ case CMD_DEL_IFACE:
+ break;
+ case CMD_LISTEN:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+
+ if (argc > 5 && !strcmp(argv[5], "ipv6"))
+ ovpn->sa_family = AF_INET6;
+ break;
+ case CMD_CONNECT:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->sa_family = AF_INET;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5],
+ NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot parse remote peer data\n");
+ return -1;
+ }
+
+ if (argc > 6) {
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ ovpn->key_id = 0;
+ ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ ovpn->key_dir = KEY_DIR_OUT;
+
+ ret = ovpn_parse_key(argv[6], ovpn);
+ if (ret)
+ return -1;
+ }
+ break;
+ case CMD_NEW_PEER:
+ if (argc < 7)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ const char *vpnip = (argc > 7) ? argv[7] : NULL;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6],
+ vpnip);
+ if (ret < 0)
+ return -1;
+ break;
+ case CMD_NEW_MULTI_PEER:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+ break;
+ case CMD_SET_PEER:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_interval = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_DEL_PEER:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_GET_PEER:
+ ovpn->peer_id = PEER_ID_UNDEF;
+ if (argc > 3) {
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ }
+ break;
+ case CMD_NEW_KEY:
+ if (argc < 9)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return -1;
+
+ ovpn->key_id = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE || ovpn->key_id > 2) {
+ fprintf(stderr, "key ID out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_cipher(argv[6], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key_direction(argv[7], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key(argv[8], ovpn);
+ if (ret)
+ return -1;
+ break;
+ case CMD_DEL_KEY:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_GET_KEY:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_SWAP_KEYS:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_LISTEN_MCAST:
+ break;
+ case CMD_INVALID:
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct ovpn_ctx ovpn;
+ int ret;
+
+ if (argc < 2) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ memset(&ovpn, 0, sizeof(ovpn));
+ ovpn.sa_family = AF_UNSPEC;
+ ovpn.cipher = OVPN_CIPHER_ALG_NONE;
+
+ ovpn.cmd = ovpn_parse_cmd(argv[1]);
+ if (ovpn.cmd == CMD_INVALID) {
+ fprintf(stderr, "Error: unknown command.\n\n");
+ usage(argv[0]);
+ return -1;
+ }
+
+ ret = ovpn_parse_cmd_args(&ovpn, argc, argv);
+ if (ret < 0) {
+ fprintf(stderr, "Error: invalid arguments.\n\n");
+ if (ret == -EINVAL)
+ usage(argv[0]);
+ return ret;
+ }
+
+ ret = ovpn_run_cmd(&ovpn);
+ if (ret)
+ fprintf(stderr, "Cannot execute command: %s (%d)\n",
+ strerror(-ret), ret);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt
new file mode 100644
index 000000000000..d753eebe8716
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt
@@ -0,0 +1,5 @@
+1 5.5.5.2
+2 5.5.5.3
+3 5.5.5.4
+4 5.5.5.5
+5 5.5.5.6
diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
new file mode 100755
index 000000000000..32504079a2b8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+ALG="chachapoly"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
new file mode 100755
index 000000000000..093d44772ffd
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test-close-socket.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh
new file mode 100755
index 000000000000..5e48a8b67928
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+done
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh
new file mode 100755
index 000000000000..ba5d725e18b0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-float.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+FLOAT="1"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh
new file mode 100755
index 000000000000..ba3f1f315a34
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh
new file mode 100755
index 000000000000..e8acdc303307
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+ ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1))
+done
+
+# ping LAN behind client 1
+ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP}
+
+if [ "$FLOAT" == "1" ]; then
+ # make clients float..
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p}
+ done
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1
+ done
+fi
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+echo "Adding secondary key and then swap:"
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p}
+done
+
+sleep 1
+
+echo "Querying all peers:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0
+ip netns exec peer1 ${OVPN_CLI} get_peer tun1
+
+echo "Querying peer 1:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1
+
+echo "Querying non-existent peer 10:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true
+
+echo "Deleting peer 1:"
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1
+ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1
+
+echo "Querying keys:"
+for p in $(seq 2 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2
+done
+
+echo "Deleting peer while sending traffic:"
+(ip netns exec peer2 ping -qf -w 4 5.5.5.1)&
+sleep 2
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2
+# following command fails in TCP mode
+# (both ends get conn reset when one peer disconnects)
+ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true
+
+echo "Deleting keys:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2
+done
+
+echo "Setting timeout to 3s MP:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0
+done
+# wait for peers to timeout
+sleep 5
+
+echo "Setting timeout to 3s P2P:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3
+done
+sleep 5
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt
new file mode 100644
index 000000000000..e9773ddf875c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/udp_peers.txt
@@ -0,0 +1,6 @@
+1 10.10.1.1 1 10.10.1.2 1 5.5.5.2
+2 10.10.2.1 1 10.10.2.2 1 5.5.5.3
+3 10.10.3.1 1 10.10.3.2 1 5.5.5.4
+4 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5
+5 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6
+6 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 66be7699c72c..88e914c4eef9 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -205,7 +205,6 @@
# Check that PMTU exceptions are created for both paths.
source lib.sh
-source net_helper.sh
PAUSE_ON_FAIL=no
VERBOSE=0
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index b8475cb29be7..1c43401a1c80 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -9,7 +9,6 @@
#include <arpa/inet.h>
#include <errno.h>
#include <error.h>
-#include <linux/dccp.h>
#include <linux/in.h>
#include <linux/unistd.h>
#include <stdbool.h>
@@ -21,10 +20,6 @@
#include <sys/socket.h>
#include <unistd.h>
-#ifndef SOL_DCCP
-#define SOL_DCCP 269
-#endif
-
static const char *IP4_ADDR = "127.0.0.1";
static const char *IP6_ADDR = "::1";
static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
@@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count,
if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
error(1, errno, "tcp: failed to listen on receive port");
- else if (proto == SOCK_DCCP) {
- if (setsockopt(rcv_fds[i], SOL_DCCP,
- DCCP_SOCKOPT_SERVICE,
- &(int) {htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
- if (listen(rcv_fds[i], 10))
- error(1, errno, "dccp: failed to listen on receive port");
- }
}
}
@@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto)
if (fd < 0)
error(1, errno, "failed to create send socket");
- if (proto == SOCK_DCCP &&
- setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE,
- &(int){htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
if (bind(fd, saddr, sz))
error(1, errno, "failed to bind send socket");
@@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto)
if (i < 0)
error(1, errno, "epoll_wait failed");
- if (proto == SOCK_STREAM || proto == SOCK_DCCP) {
+ if (proto == SOCK_STREAM) {
fd = accept(ev.data.fd, NULL, NULL);
if (fd < 0)
error(1, errno, "failed to accept");
@@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto,
static void test_proto(int proto, const char *proto_str)
{
- if (proto == SOCK_DCCP) {
- int test_fd;
-
- test_fd = socket(AF_INET, proto, 0);
- if (test_fd < 0) {
- if (errno == ESOCKTNOSUPPORT) {
- fprintf(stderr, "DCCP not supported: skipping DCCP tests\n");
- return;
- } else
- error(1, errno, "failed to create a DCCP socket");
- }
- close(test_fd);
- }
-
fprintf(stderr, "%s IPv4 ... ", proto_str);
run_one_test(AF_INET, AF_INET, proto, IP4_ADDR);
@@ -271,7 +238,6 @@ int main(void)
{
test_proto(SOCK_DGRAM, "UDP");
test_proto(SOCK_STREAM, "TCP");
- test_proto(SOCK_DCCP, "DCCP");
fprintf(stderr, "SUCCESS\n");
return 0;
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 02d617040793..a5e959a080bb 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -285,11 +285,6 @@ setup_hs()
ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index 79fb81e63c59..a649dba3cb77 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -250,11 +250,6 @@ setup_hs()
eval local rtname=\${rt_${rid}}
local rtveth=veth-t${tid}
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh
index 50563443a4ad..318487eda671 100755
--- a/tools/testing/selftests/net/srv6_end_flavors_test.sh
+++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh
@@ -399,7 +399,7 @@ __get_srv6_rtcfg_id()
# Given the description of a router <id:op> as an input, the function returns
# the <op> token which represents the operation (e.g. End behavior with or
-# withouth flavors) configured for the node.
+# without flavors) configured for the node.
# Note that when the operation represents an End behavior with a list of
# flavors, the output is the ordered version of that list.
@@ -480,7 +480,7 @@ setup_rt_local_sids()
# all SIDs start with a common locator. Routes and SRv6 Endpoint
- # behavior instaces are grouped together in the 'localsid' table.
+ # behavior instances are grouped together in the 'localsid' table.
ip -netns "${nsname}" -6 rule \
add to "${LOCATOR_SERVICE}::/16" \
lookup "${LOCALSID_TABLE_ID}" prio 999
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
index 87e414cc417c..ba730655a7bf 100755
--- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -245,10 +245,8 @@
# that adopted in the use cases already examined (of course, it is necessary to
# consider the different SIDs/C-SIDs).
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -376,32 +374,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -410,8 +394,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -420,28 +403,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
@@ -462,10 +429,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -497,7 +464,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -518,9 +485,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -596,7 +560,7 @@ setup_rt_local_sids()
local lcnode_func_prefix
local lcblock_prefix
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -668,8 +632,8 @@ __setup_l3vpn()
local rtsrc_nsname
local rtdst_nsname
- rtsrc_nsname="$(get_rtname "${src}")"
- rtdst_nsname="$(get_rtname "${dst}")"
+ eval rtsrc_nsname=\${$(get_rtname "${src}")}
+ eval rtdst_nsname=\${$(get_rtname "${dst}")}
container="${LCBLOCK_ADDR}"
@@ -744,8 +708,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -791,11 +755,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -880,7 +839,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -903,7 +862,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -915,7 +874,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test()
local nsname
local ret
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
__nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}"
ret="$?"
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index c79cb8ede17f..4b86040c58c6 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -287,10 +287,8 @@
# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46)
# and sends it to the host hs-1.
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -418,32 +416,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -452,15 +436,12 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
+ setup_ns "${nsname}"
- __create_namespace "${nsname}"
-
+ eval nsname=\${$(get_rtname "${rtid}")}
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -470,29 +451,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
# the selftest is considered as "skipped".
@@ -512,10 +476,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -547,7 +511,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -631,7 +595,7 @@ set_end_x_nextcsid()
local rt="$1"
local adj="$2"
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
net_prefix="$(get_network_prefix "${rt}" "${adj}")"
lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
@@ -650,7 +614,7 @@ set_underlay_sids_reachability()
local rt="$1"
local rt_neighs="$2"
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -685,7 +649,7 @@ setup_rt_local_sids()
local lcnode_func_prefix
local lcblock_prefix
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
set_underlay_sids_reachability "${rt}" "${rt_neighs}"
@@ -728,8 +692,8 @@ __setup_l3vpn()
local rtsrc_nsname
local rtdst_nsname
- rtsrc_nsname="$(get_rtname "${src}")"
- rtdst_nsname="$(get_rtname "${dst}")"
+ eval rtsrc_nsname=\${$(get_rtname "${src}")}
+ eval rtdst_nsname=\${$(get_rtname "${dst}")}
container="${LCBLOCK_ADDR}"
@@ -804,8 +768,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -851,11 +815,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -947,7 +906,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -970,7 +929,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -982,7 +941,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1093,7 +1052,7 @@ rt_x_nextcsid_end_x_behavior_test()
local nsname
local ret
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
__nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}"
ret="$?"
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
index 28a775654b92..3efce1718c5f 100755
--- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -166,10 +166,8 @@
# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d)
#
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
@@ -248,32 +246,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -282,8 +266,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -292,29 +275,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
# the selftest is considered as "skipped".
@@ -334,10 +300,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -369,7 +335,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -387,9 +353,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -403,7 +366,7 @@ setup_rt_local_sids()
local nsname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -469,7 +432,7 @@ __setup_rt_policy()
local policy=''
local n
- nsname="$(get_rtname "${encap_rt}")"
+ eval nsname=\${$(get_rtname "${encap_rt}")}
for n in ${end_rts}; do
policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -516,8 +479,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -555,11 +518,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -656,7 +614,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -679,7 +637,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -691,7 +649,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
index cb4177d41b21..cabc70538ffe 100755
--- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -116,10 +116,8 @@
# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b)
#
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly RT2HS_DEVNAME="veth-hs"
readonly HS_VETH_NAME="veth0"
@@ -199,32 +197,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -233,8 +217,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -243,28 +226,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
@@ -285,10 +252,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -320,7 +287,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -341,9 +308,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -357,7 +321,7 @@ setup_rt_local_sids()
local nsname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -407,7 +371,7 @@ __setup_rt_policy()
local policy=''
local n
- nsname="$(get_rtname "${encap_rt}")"
+ eval nsname=\${$(get_rtname "${encap_rt}")}
for n in ${end_rts}; do
policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -446,7 +410,7 @@ setup_decap()
local rt="$1"
local nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
# Local End.DX2 behavior
ip -netns "${nsname}" -6 route \
@@ -463,8 +427,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -486,11 +450,6 @@ setup_hs()
add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
-
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
}
# set an auto-generated mac address
@@ -508,7 +467,7 @@ set_mac_address()
local ifname="$4"
local nsname
- nsname=$(get_nodename "${nodename}")
+ eval nsname=\${${nodename}}
ip -netns "${nsname}" link set dev "${ifname}" down
@@ -532,7 +491,7 @@ set_host_l2peer()
local hssrc_name
local ipaddr
- hssrc_name="$(get_hsname "${hssrc}")"
+ eval hssrc_name=\${$(get_hsname "${hssrc}")}
if [ "${proto}" -eq 6 ]; then
ipaddr="${ipprefix}::${hsdst}"
@@ -562,7 +521,7 @@ setup_l2vpn()
local rtdst="${hsdst}"
# set fixed mac for source node and the neigh MAC address
- set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
+ set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6
set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4
@@ -570,7 +529,7 @@ setup_l2vpn()
# to the mac address of the remote peer (L2 VPN destination host).
# Otherwise, traffic coming from the source host is dropped at the
# ingress router.
- set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
+ set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
# set the SRv6 Policies at the ingress router
setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
@@ -647,7 +606,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -670,7 +629,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -682,7 +641,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
index 02b986c9c247..9067197c9055 100755
--- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -51,7 +51,9 @@ ret=0
# All tests in this script. Can be overridden with -t option.
TESTS="
neigh_suppress_arp
+ neigh_suppress_uc_arp
neigh_suppress_ns
+ neigh_suppress_uc_ns
neigh_vlan_suppress_arp
neigh_vlan_suppress_ns
"
@@ -388,6 +390,52 @@ neigh_suppress_arp()
neigh_suppress_arp_common $vid $sip $tip
}
+neigh_suppress_uc_arp_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local tip=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast ARP, per-port ARP suppression - VLAN $vid"
+ echo "-----------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_arp()
+{
+ local vid=10
+ local sip=192.0.2.1
+ local tip=192.0.2.2
+
+ neigh_suppress_uc_arp_common $vid $sip $tip
+
+ vid=20
+ sip=192.0.2.17
+ tip=192.0.2.18
+ neigh_suppress_uc_arp_common $vid $sip $tip
+}
+
neigh_suppress_ns_common()
{
local vid=$1; shift
@@ -494,6 +542,78 @@ neigh_suppress_ns()
neigh_suppress_ns_common $vid $saddr $daddr $maddr
}
+icmpv6_header_get()
+{
+ local csum=$1; shift
+ local tip=$1; shift
+ local type
+ local p
+
+ # Type 135 (Neighbor Solicitation), hex format
+ type="87"
+ p=$(:
+ )"$type:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"$csum:"$( : ICMPv6.checksum
+ )"00:00:00:00:"$( : Reserved
+ )"$tip:"$( : Target Address
+ )
+ echo $p
+}
+
+neigh_suppress_uc_ns_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local full_dip=$1; shift
+ local csum=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast NS, per-port NS suppression - VLAN $vid"
+ echo "---------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_ns()
+{
+ local vid=10
+ local saddr=2001:db8:1::1
+ local daddr=2001:db8:1::2
+ local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02
+ local csum="ef:79"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+
+ vid=20
+ saddr=2001:db8:2::1
+ daddr=2001:db8:2::2
+ full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02
+ csum="ef:76"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+}
+
neigh_vlan_suppress_arp()
{
local vid1=10
@@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then
exit $ksft_skip
fi
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index d5ffd8c9172e..1dc337c709f8 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,7 +3,7 @@
#
# Run a series of udpgro functional tests.
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 815fad8c53a8..54fa4821bc5e 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,7 +3,7 @@
#
# Run a series of udpgro benchmarks
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 5f3d1a110d11..9a2cfec1153e 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,7 +3,7 @@
#
# Run a series of udpgro benchmarks
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index f22f6c66997e..a39fdc4aa2ff 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source net_helper.sh
+source lib.sh
BPF_FILE="lib/xdp_dummy.bpf.o"
readonly BASE="ns-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index ddc97ecd8b39..9aa44d8176d9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -600,5 +600,40 @@
"matchPattern": "qdisc hfsc",
"matchCount": "1",
"teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"]
+ },
+ {
+ "id": "309e",
+ "name": "Test HFSC eltree double add with reentrant enqueue behaviour on netem",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 1s",
+ "$TC qdisc add dev $DUMMY parent 1:0 handle 2:0 hfsc",
+ "ping -I $DUMMY -f -c10 -s48 -W0.001 10.10.11.1 || true",
+ "$TC class add dev $DUMMY parent 2:0 classid 2:1 hfsc rt m2 20Kbit",
+ "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%",
+ "$TC class add dev $DUMMY parent 2:0 classid 2:2 hfsc rt m2 20Kbit",
+ "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip dst 10.10.11.2/32 flowid 2:1",
+ "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 2 u32 match ip dst 10.10.11.3/32 flowid 2:2",
+ "ping -c 1 10.10.11.2 -I$DUMMY > /dev/null || true",
+ "$TC filter del dev $DUMMY parent 2:0 protocol ip prio 1",
+ "$TC class del dev $DUMMY classid 2:1",
+ "ping -c 1 10.10.11.3 -I$DUMMY > /dev/null || true"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY parent 2:0 classid 2:2 hfsc sc m2 20Kbit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j class ls dev $DUMMY classid 2:1",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index cddff1772e10..589b18ed758a 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -31,6 +31,10 @@ try_modprobe act_skbedit
try_modprobe act_skbmod
try_modprobe act_tunnel_key
try_modprobe act_vlan
+try_modprobe act_ife
+try_modprobe act_meta_mark
+try_modprobe act_meta_skbtcindex
+try_modprobe act_meta_skbprio
try_modprobe cls_basic
try_modprobe cls_bpf
try_modprobe cls_cgroup
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 55500f901fbc..a8f550aecb35 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -611,6 +611,35 @@ n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips"
} < <(n0 wg show wg0 allowed-ips)
ip0 link del wg0
+allowedips=( )
+for i in {1..197}; do
+ allowedips+=( 192.168.0.$i )
+ allowedips+=( abcd::$i )
+done
+saved_ifs="$IFS"
+IFS=,
+allowedips="${allowedips[*]}"
+IFS="$saved_ifs"
+ip0 link add wg0 type wireguard
+n0 wg set wg0 peer "$pub1" allowed-ips "$allowedips"
+n0 wg set wg0 peer "$pub1" allowed-ips -192.168.0.1/32,-192.168.0.20/32,-192.168.0.100/32,-abcd::1/128,-abcd::20/128,-abcd::100/128
+{
+ read -r pub allowedips
+ [[ $pub == "$pub1" ]]
+ i=0
+ for ip in $allowedips; do
+ [[ $ip != "192.168.0.1" ]]
+ [[ $ip != "192.168.0.20" ]]
+ [[ $ip != "192.168.0.100" ]]
+ [[ $ip != "abcd::1" ]]
+ [[ $ip != "abcd::20" ]]
+ [[ $ip != "abcd::100" ]]
+ ((++i))
+ done
+ ((i == 388))
+} < <(n0 wg show wg0 allowed-ips)
+ip0 link del wg0
+
! n0 wg show doesnotexist || false
ip0 link add wg0 type wireguard
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 35856b11c143..791d21b736a5 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -43,7 +43,7 @@ $(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.o
$(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0))
$(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117))
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac))
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20250521,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,b6f2628b85b1b23cc06517ec9c74f82d52c4cdbd020f3dd2f00c972a1782950e))
export CFLAGS := -O3 -pipe
ifeq ($(HOST_ARCH),$(ARCH))
@@ -401,6 +401,7 @@ $(BASH_PATH)/.installed: $(BASH_TAR)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
touch $@
+$(BASH_PATH)/bash: export CFLAGS_FOR_BUILD += -std=gnu17
$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble
$(MAKE) -C $(BASH_PATH)
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index c305d2f613f0..5d39f43dd667 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -22,7 +22,6 @@ CONFIG_HAVE_ARCH_KASAN=y
CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y
CONFIG_UBSAN=y
-CONFIG_UBSAN_SANITIZE_ALL=y
CONFIG_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_SHIRQ=y
diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c
index 44aee49b6cee..1453d38e08bb 100644
--- a/tools/testing/vsock/timeout.c
+++ b/tools/testing/vsock/timeout.c
@@ -21,6 +21,7 @@
#include <stdbool.h>
#include <unistd.h>
#include <stdio.h>
+#include <time.h>
#include "timeout.h"
static volatile bool timeout;
@@ -28,6 +29,8 @@ static volatile bool timeout;
/* SIGALRM handler function. Do not use sleep(2), alarm(2), or
* setitimer(2) while using this API - they may interfere with each
* other.
+ *
+ * If you need to sleep, please use timeout_sleep() provided by this API.
*/
void sigalrm(int signo)
{
@@ -58,3 +61,18 @@ void timeout_end(void)
alarm(0);
timeout = false;
}
+
+/* Sleep in a timeout section.
+ *
+ * nanosleep(2) can be used with this API since POSIX.1 explicitly
+ * specifies that it does not interact with signals.
+ */
+int timeout_usleep(useconds_t usec)
+{
+ struct timespec ts = {
+ .tv_sec = usec / 1000000,
+ .tv_nsec = (usec % 1000000) * 1000,
+ };
+
+ return nanosleep(&ts, NULL);
+}
diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h
index ecb7c840e65a..1c3fcad87a49 100644
--- a/tools/testing/vsock/timeout.h
+++ b/tools/testing/vsock/timeout.h
@@ -11,5 +11,6 @@ void sigalrm(int signo);
void timeout_begin(unsigned int seconds);
void timeout_check(const char *operation);
void timeout_end(void);
+int timeout_usleep(useconds_t usec);
#endif /* TIMEOUT_H */
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index de25892f865f..0c7e9cbcbc85 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -17,6 +17,7 @@
#include <assert.h>
#include <sys/epoll.h>
#include <sys/mman.h>
+#include <linux/sockios.h>
#include "timeout.h"
#include "control.h"
@@ -96,6 +97,30 @@ void vsock_wait_remote_close(int fd)
close(epollfd);
}
+/* Wait until transport reports no data left to be sent.
+ * Return false if transport does not implement the unsent_bytes() callback.
+ */
+bool vsock_wait_sent(int fd)
+{
+ int ret, sock_bytes_unsent;
+
+ timeout_begin(TIMEOUT);
+ do {
+ ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP)
+ break;
+
+ perror("ioctl(SIOCOUTQ)");
+ exit(EXIT_FAILURE);
+ }
+ timeout_check("SIOCOUTQ");
+ } while (sock_bytes_unsent != 0);
+ timeout_end();
+
+ return !ret;
+}
+
/* Create socket <type>, bind to <cid, port> and return the file descriptor. */
int vsock_bind(unsigned int cid, unsigned int port, int type)
{
@@ -798,3 +823,16 @@ void enable_so_zerocopy_check(int fd)
setsockopt_int_check(fd, SOL_SOCKET, SO_ZEROCOPY, 1,
"setsockopt SO_ZEROCOPY");
}
+
+void enable_so_linger(int fd, int timeout)
+{
+ struct linger optval = {
+ .l_onoff = 1,
+ .l_linger = timeout
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
+ perror("setsockopt(SO_LINGER)");
+ exit(EXIT_FAILURE);
+ }
+}
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index d1f765ce3eee..5e2db67072d5 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -54,6 +54,7 @@ int vsock_stream_listen(unsigned int cid, unsigned int port);
int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp);
void vsock_wait_remote_close(int fd);
+bool vsock_wait_sent(int fd);
void send_buf(int fd, const void *buf, size_t len, int flags,
ssize_t expected_ret);
void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret);
@@ -79,4 +80,5 @@ void setsockopt_int_check(int fd, int level, int optname, int val,
void setsockopt_timeval_check(int fd, int level, int optname,
struct timeval val, char const *errmsg);
void enable_so_zerocopy_check(int fd);
+void enable_so_linger(int fd, int timeout);
#endif /* UTIL_H */
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 613551132a96..f669baaa0dca 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -21,7 +21,6 @@
#include <poll.h>
#include <signal.h>
#include <sys/ioctl.h>
-#include <linux/sockios.h>
#include <linux/time64.h>
#include "vsock_test_zerocopy.h"
@@ -1058,18 +1057,39 @@ static void sigpipe(int signo)
have_sigpipe = 1;
}
+#define SEND_SLEEP_USEC (10 * 1000)
+
static void test_stream_check_sigpipe(int fd)
{
ssize_t res;
have_sigpipe = 0;
- res = send(fd, "A", 1, 0);
- if (res != -1) {
- fprintf(stderr, "expected send(2) failure, got %zi\n", res);
- exit(EXIT_FAILURE);
+ /* When the other peer calls shutdown(SHUT_RD), there is a chance that
+ * the send() call could occur before the message carrying the close
+ * information arrives over the transport. In such cases, the send()
+ * might still succeed. To avoid this race, let's retry the send() call
+ * a few times, ensuring the test is more reliable.
+ */
+ timeout_begin(TIMEOUT);
+ while(1) {
+ res = send(fd, "A", 1, 0);
+ if (res == -1 && errno != EINTR)
+ break;
+
+ /* Sleep a little before trying again to avoid flooding the
+ * other peer and filling its receive buffer, causing
+ * false-negative.
+ */
+ timeout_usleep(SEND_SLEEP_USEC);
+ timeout_check("send");
}
+ timeout_end();
+ if (errno != EPIPE) {
+ fprintf(stderr, "unexpected send(2) errno %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
if (!have_sigpipe) {
fprintf(stderr, "SIGPIPE expected\n");
exit(EXIT_FAILURE);
@@ -1077,12 +1097,21 @@ static void test_stream_check_sigpipe(int fd)
have_sigpipe = 0;
- res = send(fd, "A", 1, MSG_NOSIGNAL);
- if (res != -1) {
- fprintf(stderr, "expected send(2) failure, got %zi\n", res);
- exit(EXIT_FAILURE);
+ timeout_begin(TIMEOUT);
+ while(1) {
+ res = send(fd, "A", 1, MSG_NOSIGNAL);
+ if (res == -1 && errno != EINTR)
+ break;
+
+ timeout_usleep(SEND_SLEEP_USEC);
+ timeout_check("send");
}
+ timeout_end();
+ if (errno != EPIPE) {
+ fprintf(stderr, "unexpected send(2) errno %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
if (have_sigpipe) {
fprintf(stderr, "SIGPIPE not expected\n");
exit(EXIT_FAILURE);
@@ -1250,7 +1279,7 @@ static void test_unsent_bytes_server(const struct test_opts *opts, int type)
static void test_unsent_bytes_client(const struct test_opts *opts, int type)
{
unsigned char buf[MSG_BUF_IOCTL_LEN];
- int ret, fd, sock_bytes_unsent;
+ int fd;
fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
if (fd < 0) {
@@ -1267,22 +1296,12 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type)
/* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though
* the "RECEIVED" message means that the other side has received the
* data, there can be a delay in our kernel before updating the "unsent
- * bytes" counter. Repeat SIOCOUTQ until it returns 0.
+ * bytes" counter. vsock_wait_sent() will repeat SIOCOUTQ until it
+ * returns 0.
*/
- timeout_begin(TIMEOUT);
- do {
- ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
- if (ret < 0) {
- if (errno == EOPNOTSUPP) {
- fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
- break;
- }
- perror("ioctl");
- exit(EXIT_FAILURE);
- }
- timeout_check("SIOCOUTQ");
- } while (sock_bytes_unsent != 0);
- timeout_end();
+ if (!vsock_wait_sent(fd))
+ fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
+
close(fd);
}
@@ -1794,10 +1813,6 @@ static void test_stream_connect_retry_server(const struct test_opts *opts)
static void test_stream_linger_client(const struct test_opts *opts)
{
- struct linger optval = {
- .l_onoff = 1,
- .l_linger = 1
- };
int fd;
fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
@@ -1806,15 +1821,58 @@ static void test_stream_linger_client(const struct test_opts *opts)
exit(EXIT_FAILURE);
}
- if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
- perror("setsockopt(SO_LINGER)");
+ enable_so_linger(fd, 1);
+ close(fd);
+}
+
+static void test_stream_linger_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
exit(EXIT_FAILURE);
}
+ vsock_wait_remote_close(fd);
close(fd);
}
-static void test_stream_linger_server(const struct test_opts *opts)
+/* Half of the default to not risk timing out the control channel */
+#define LINGER_TIMEOUT (TIMEOUT / 2)
+
+static void test_stream_nolinger_client(const struct test_opts *opts)
+{
+ bool waited;
+ time_t ns;
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ enable_so_linger(fd, LINGER_TIMEOUT);
+ send_byte(fd, 1, 0); /* Left unread to expose incorrect behaviour. */
+ waited = vsock_wait_sent(fd);
+
+ ns = current_nsec();
+ close(fd);
+ ns = current_nsec() - ns;
+
+ if (!waited) {
+ fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
+ } else if (DIV_ROUND_UP(ns, NSEC_PER_SEC) >= LINGER_TIMEOUT) {
+ fprintf(stderr, "Unexpected lingering\n");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("DONE");
+}
+
+static void test_stream_nolinger_server(const struct test_opts *opts)
{
int fd;
@@ -1824,7 +1882,7 @@ static void test_stream_linger_server(const struct test_opts *opts)
exit(EXIT_FAILURE);
}
- vsock_wait_remote_close(fd);
+ control_expectln("DONE");
close(fd);
}
@@ -1988,6 +2046,11 @@ static struct test_case test_cases[] = {
.run_client = test_stream_linger_client,
.run_server = test_stream_linger_server,
},
+ {
+ .name = "SOCK_STREAM SO_LINGER close() on unread",
+ .run_client = test_stream_nolinger_client,
+ .run_server = test_stream_nolinger_server,
+ },
{},
};