145 files changed, 9804 insertions, 1453 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 5e1254eb66de..c31cbbd12c45 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -41,6 +41,7 @@ help:
 	@echo '  mm                     - misc mm tools'
 	@echo '  wmi			- WMI interface examples'
 	@echo '  x86_energy_perf_policy - Intel energy policy tool'
+	@echo '  ynl			- ynl headers, library, and python tool'
 	@echo ''
 	@echo 'You can do:'
 	@echo ' $$ make -C tools/ <tool>_install'
@@ -118,11 +119,14 @@ freefall: FORCE
 kvm_stat: FORCE
 	$(call descend,kvm/$@)
 
+ynl: FORCE
+	$(call descend,net/ynl)
+
 all: acpi counter cpupower gpio hv firewire \
 		perf selftests bootconfig spi turbostat usb \
 		virtio mm bpf x86_energy_perf_policy \
 		tmon freefall iio objtool kvm_stat wmi \
-		debugging tracing thermal thermometer thermal-engine
+		debugging tracing thermal thermometer thermal-engine ynl
 
 acpi_install:
 	$(call descend,power/$(@:_install=),install)
@@ -157,13 +161,16 @@ freefall_install:
 kvm_stat_install:
 	$(call descend,kvm/$(@:_install=),install)
 
+ynl_install:
+	$(call descend,net/$(@:_install=),install)
+
 install: acpi_install counter_install cpupower_install gpio_install \
 		hv_install firewire_install iio_install \
 		perf_install selftests_install turbostat_install usb_install \
 		virtio_install mm_install bpf_install x86_energy_perf_policy_install \
 		tmon_install freefall_install objtool_install kvm_stat_install \
 		wmi_install debugging_install intel-speed-select_install \
-		tracing_install thermometer_install thermal-engine_install
+		tracing_install thermometer_install thermal-engine_install ynl_install
 
 acpi_clean:
 	$(call descend,power/acpi,clean)
@@ -214,12 +221,15 @@ freefall_clean:
 build_clean:
 	$(call descend,build,clean)
 
+ynl_clean:
+	$(call descend,net/$(@:_clean=),clean)
+
 clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
 		perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
 		mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
 		freefall_clean build_clean libbpf_clean libsubcmd_clean \
 		gpio_clean objtool_clean leds_clean wmi_clean firmware_clean debugging_clean \
 		intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean \
-		sched_ext_clean
+		sched_ext_clean ynl_clean
 
 .PHONY: FORCE
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index aa5016ff3d91..f333a0ac4ee4 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -145,6 +145,8 @@
 
 #define SO_RCVPRIORITY		82
 
+#define SO_PASSRIGHTS		83
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 42869770776e..44f2bb93e7e6 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -7,8 +7,8 @@
  *	      Magnus Karlsson <magnus.karlsson@intel.com>
  */
 
-#ifndef _LINUX_IF_XDP_H
-#define _LINUX_IF_XDP_H
+#ifndef _UAPI_LINUX_IF_XDP_H
+#define _UAPI_LINUX_IF_XDP_H
 
 #include <linux/types.h>
 
@@ -180,4 +180,4 @@ struct xdp_desc {
 /* TX packet carries valid metadata. */
 #define XDP_TX_METADATA (1 << 1)
 
-#endif /* _LINUX_IF_XDP_H */
+#endif /* _UAPI_LINUX_IF_XDP_H */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 7600bf62dbdf..7eb9571786b8 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -219,6 +219,7 @@ enum {
 	NETDEV_CMD_QSTATS_GET,
 	NETDEV_CMD_BIND_RX,
 	NETDEV_CMD_NAPI_SET,
+	NETDEV_CMD_BIND_TX,
 
 	__NETDEV_CMD_MAX,
 	NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index e0605403f977..fdcee6a71e0f 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1283,6 +1283,7 @@ enum bpf_tc_attach_point {
 	BPF_TC_INGRESS = 1 << 0,
 	BPF_TC_EGRESS  = 1 << 1,
 	BPF_TC_CUSTOM  = 1 << 2,
+	BPF_TC_QDISC   = 1 << 3,
 };
 
 #define BPF_TC_PARENT(a, b) 	\
@@ -1297,9 +1298,11 @@ struct bpf_tc_hook {
 	int ifindex;
 	enum bpf_tc_attach_point attach_point;
 	__u32 parent;
+	__u32 handle;
+	const char *qdisc;
 	size_t :0;
 };
-#define bpf_tc_hook__last_field parent
+#define bpf_tc_hook__last_field qdisc
 
 struct bpf_tc_opts {
 	size_t sz;
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 68a2def17175..c997e69d507f 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -529,9 +529,9 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
 }
 
 
-typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
+typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook);
 
-static int clsact_config(struct libbpf_nla_req *req)
+static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
 {
 	req->tc.tcm_parent = TC_H_CLSACT;
 	req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
@@ -539,6 +539,16 @@ static int clsact_config(struct libbpf_nla_req *req)
 	return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
 }
 
+static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
+{
+	const char *qdisc = OPTS_GET(hook, qdisc, NULL);
+
+	req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT);
+	req->tc.tcm_handle = OPTS_GET(hook, handle, 0);
+
+	return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1);
+}
+
 static int attach_point_to_config(struct bpf_tc_hook *hook,
 				  qdisc_config_t *config)
 {
@@ -552,6 +562,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook,
 		return 0;
 	case BPF_TC_CUSTOM:
 		return -EOPNOTSUPP;
+	case BPF_TC_QDISC:
+		*config = &qdisc_config;
+		return 0;
 	default:
 		return -EINVAL;
 	}
@@ -596,7 +609,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
 	req.tc.tcm_family  = AF_UNSPEC;
 	req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
 
-	ret = config(&req);
+	ret = config(&req, hook);
 	if (ret < 0)
 		return ret;
 
@@ -639,6 +652,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
 	case BPF_TC_INGRESS:
 	case BPF_TC_EGRESS:
 		return libbpf_err(__bpf_tc_detach(hook, NULL, true));
+	case BPF_TC_QDISC:
 	case BPF_TC_INGRESS | BPF_TC_EGRESS:
 		return libbpf_err(tc_qdisc_delete(hook));
 	case BPF_TC_CUSTOM:
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index f3269ce39e5b..90686e241157 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -20,13 +20,30 @@ CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
 	$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \
 	$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h)
 CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
+CFLAGS_lockd_netlink:=$(call get_hdr_inc,_LINUX_LOCKD_NETLINK_H,lockd_netlink.h)
 CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
 CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h)
 CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
 CFLAGS_nl80211:=$(call get_hdr_inc,__LINUX_NL802121_H,nl80211.h)
 CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h)
 CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
+CFLAGS_ovpn:=$(call get_hdr_inc,_LINUX_OVPN_H,ovpn.h)
 CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
 CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
 CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_rt-addr:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+	$(call get_hdr_inc,__LINUX_IF_ADDR_H,if_addr.h)
+CFLAGS_rt-link:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+	$(call get_hdr_inc,_LINUX_IF_LINK_H,if_link.h)
+CFLAGS_rt-neigh:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+	$(call get_hdr_inc,__LINUX_NEIGHBOUR_H,neighbour.h)
+CFLAGS_rt-route:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h)
+CFLAGS_rt-rule:=$(call get_hdr_inc,__LINUX_FIB_RULES_H,fib_rules.h)
+CFLAGS_tc:= $(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+	$(call get_hdr_inc,__LINUX_PKT_SCHED_H,pkt_sched.h) \
+	$(call get_hdr_inc,__LINUX_PKT_CLS_H,pkt_cls.h) \
+	$(call get_hdr_inc,_TC_CT_H,tc_act/tc_ct.h) \
+	$(call get_hdr_inc,_TC_MIRRED_H,tc_act/tc_mirred.h) \
+	$(call get_hdr_inc,_TC_SKBEDIT_H,tc_act/tc_skbedit.h) \
+	$(call get_hdr_inc,_TC_TUNNEL_KEY_H,tc_act/tc_tunnel_key.h)
 CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h)
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
index 21f9e299dc75..86e1e4a959a7 100644
--- a/tools/net/ynl/generated/Makefile
+++ b/tools/net/ynl/generated/Makefile
@@ -22,10 +22,9 @@ TOOL:=../pyynl/ynl_gen_c.py
 TOOL_RST:=../pyynl/ynl_gen_rst.py
 
 SPECS_DIR:=../../../../Documentation/netlink/specs
-GENS_PATHS=$(shell grep -nrI --files-without-match \
-		'protocol: netlink' \
-		$(SPECS_DIR))
-GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS})
+SPECS_PATHS=$(wildcard $(SPECS_DIR)/*.yaml)
+GENS_UNSUP=conntrack nftables
+GENS=$(filter-out ${GENS_UNSUP},$(patsubst $(SPECS_DIR)/%.yaml,%,${SPECS_PATHS}))
 SRCS=$(patsubst %,%-user.c,${GENS})
 HDRS=$(patsubst %,%-user.h,${GENS})
 OBJS=$(patsubst %,%-user.o,${GENS})
diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
index 3c09a7bbfba5..824777d7e05e 100644
--- a/tools/net/ynl/lib/ynl-priv.h
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -25,6 +25,7 @@ enum ynl_policy_type {
 	YNL_PT_UINT,
 	YNL_PT_NUL_STR,
 	YNL_PT_BITFIELD32,
+	YNL_PT_SUBMSG,
 };
 
 enum ynl_parse_result {
@@ -42,7 +43,10 @@ typedef int (*ynl_parse_cb_t)(const struct nlmsghdr *nlh,
 			      struct ynl_parse_arg *yarg);
 
 struct ynl_policy_attr {
-	enum ynl_policy_type type;
+	enum ynl_policy_type type:8;
+	__u8 is_submsg:1;
+	__u8 is_selector:1;
+	__u16 selector_type;
 	unsigned int len;
 	const char *name;
 	const struct ynl_policy_nest *nest;
@@ -94,12 +98,17 @@ struct ynl_ntf_base_type {
 	unsigned char data[] __attribute__((aligned(8)));
 };
 
+struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id, __u16 flags);
+struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id);
+
 struct nlmsghdr *
 ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
 struct nlmsghdr *
 ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
 
 int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
+int ynl_submsg_failed(struct ynl_parse_arg *yarg, const char *field_name,
+		      const char *sel_name);
 
 /* YNL specific helpers used by the auto-generated code */
 
@@ -204,11 +213,15 @@ static inline void *ynl_attr_data_end(const struct nlattr *attr)
 				     NLMSG_HDRLEN + fixed_hdr_sz); attr; \
 	     (attr) = ynl_attr_next(ynl_nlmsg_end_addr(nlh), attr))
 
-#define ynl_attr_for_each_nested(attr, outer)				\
+#define ynl_attr_for_each_nested_off(attr, outer, offset)		\
 	for ((attr) = ynl_attr_first(outer, outer->nla_len,		\
-				     sizeof(struct nlattr)); attr;	\
+				     sizeof(struct nlattr) + offset);	\
+	     attr;							\
 	     (attr) = ynl_attr_next(ynl_attr_data_end(outer), attr))
 
+#define ynl_attr_for_each_nested(attr, outer)				\
+	ynl_attr_for_each_nested_off(attr, outer, 0)
+
 #define ynl_attr_for_each_payload(start, len, attr)			\
 	for ((attr) = ynl_attr_first(start, len, 0); attr;		\
 	     (attr) = ynl_attr_next(start + len, attr))
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index c4da34048ef8..2a169c3c0797 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -45,8 +45,39 @@
 #define perr(_ys, _msg)			__yerr(&(_ys)->err, errno, _msg)
 
 /* -- Netlink boiler plate */
+static bool
+ynl_err_walk_is_sel(const struct ynl_policy_nest *policy,
+		    const struct nlattr *attr)
+{
+	unsigned int type = ynl_attr_type(attr);
+
+	return policy && type <= policy->max_attr &&
+		policy->table[type].is_selector;
+}
+
+static const struct ynl_policy_nest *
+ynl_err_walk_sel_policy(const struct ynl_policy_attr *policy_attr,
+			const struct nlattr *selector)
+{
+	const struct ynl_policy_nest *policy = policy_attr->nest;
+	const char *sel;
+	unsigned int i;
+
+	if (!policy_attr->is_submsg)
+		return policy;
+
+	sel = ynl_attr_get_str(selector);
+	for (i = 0; i <= policy->max_attr; i++) {
+		if (!strcmp(sel, policy->table[i].name))
+			return policy->table[i].nest;
+	}
+
+	return NULL;
+}
+
 static int
-ynl_err_walk_report_one(const struct ynl_policy_nest *policy, unsigned int type,
+ynl_err_walk_report_one(const struct ynl_policy_nest *policy,
+			const struct nlattr *selector, unsigned int type,
 			char *str, int str_sz, int *n)
 {
 	if (!policy) {
@@ -67,9 +98,34 @@ ynl_err_walk_report_one(const struct ynl_policy_nest *policy, unsigned int type,
 		return 1;
 	}
 
-	if (*n < str_sz)
-		*n += snprintf(str, str_sz - *n,
-			       ".%s", policy->table[type].name);
+	if (*n < str_sz) {
+		int sz;
+
+		sz = snprintf(str, str_sz - *n,
+			      ".%s", policy->table[type].name);
+		*n += sz;
+		str += sz;
+	}
+
+	if (policy->table[type].is_submsg) {
+		if (!selector) {
+			if (*n < str_sz)
+				*n += snprintf(str, str_sz, "(!selector)");
+			return 1;
+		}
+
+		if (ynl_attr_type(selector) !=
+		    policy->table[type].selector_type) {
+			if (*n < str_sz)
+				*n += snprintf(str, str_sz, "(!=selector)");
+			return 1;
+		}
+
+		if (*n < str_sz)
+			*n += snprintf(str, str_sz - *n, "(%s)",
+				       ynl_attr_get_str(selector));
+	}
+
 	return 0;
 }
 
@@ -78,6 +134,8 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
 	     const struct ynl_policy_nest *policy, char *str, int str_sz,
 	     const struct ynl_policy_nest **nest_pol)
 {
+	const struct ynl_policy_nest *next_pol;
+	const struct nlattr *selector = NULL;
 	unsigned int astart_off, aend_off;
 	const struct nlattr *attr;
 	unsigned int data_len;
@@ -96,6 +154,10 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
 	ynl_attr_for_each_payload(start, data_len, attr) {
 		astart_off = (char *)attr - (char *)start;
 		aend_off = (char *)ynl_attr_data_end(attr) - (char *)start;
+
+		if (ynl_err_walk_is_sel(policy, attr))
+			selector = attr;
+
 		if (aend_off <= off)
 			continue;
 
@@ -109,16 +171,20 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
 
 	type = ynl_attr_type(attr);
 
-	if (ynl_err_walk_report_one(policy, type, str, str_sz, &n))
+	if (ynl_err_walk_report_one(policy, selector, type, str, str_sz, &n))
+		return n;
+
+	next_pol = ynl_err_walk_sel_policy(&policy->table[type], selector);
+	if (!next_pol)
 		return n;
 
 	if (!off) {
 		if (nest_pol)
-			*nest_pol = policy->table[type].nest;
+			*nest_pol = next_pol;
 		return n;
 	}
 
-	if (!policy->table[type].nest) {
+	if (!next_pol) {
 		if (n < str_sz)
 			n += snprintf(str, str_sz, "!nest");
 		return n;
@@ -128,7 +194,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
 	start =  ynl_attr_data(attr);
 	end = start + ynl_attr_data_len(attr);
 
-	return n + ynl_err_walk(ys, start, end, off, policy->table[type].nest,
+	return n + ynl_err_walk(ys, start, end, off, next_pol,
 				&str[n], str_sz - n, nest_pol);
 }
 
@@ -191,12 +257,12 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
 		n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ",
 			     str ? " (" : "");
 
-		start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+		start = ynl_nlmsg_data_offset(ys->nlh, ys->req_hdr_len);
 		end = ynl_nlmsg_end_addr(ys->nlh);
 
 		off = ys->err.attr_offs;
 		off -= sizeof(struct nlmsghdr);
-		off -= ys->family->hdr_len;
+		off -= ys->req_hdr_len;
 
 		n += ynl_err_walk(ys, start, end, off, ys->req_policy,
 				  &bad_attr[n], sizeof(bad_attr) - n, NULL);
@@ -216,14 +282,14 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
 		n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ",
 			     bad_attr[0] ? ", " : (str ? " (" : ""));
 
-		start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+		start = ynl_nlmsg_data_offset(ys->nlh, ys->req_hdr_len);
 		end = ynl_nlmsg_end_addr(ys->nlh);
 
 		nest_pol = ys->req_policy;
 		if (tb[NLMSGERR_ATTR_MISS_NEST]) {
 			off = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
 			off -= sizeof(struct nlmsghdr);
-			off -= ys->family->hdr_len;
+			off -= ys->req_hdr_len;
 
 			n += ynl_err_walk(ys, start, end, off, ys->req_policy,
 					  &miss_attr[n], sizeof(miss_attr) - n,
@@ -231,7 +297,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
 		}
 
 		n2 = 0;
-		ynl_err_walk_report_one(nest_pol, type, &miss_attr[n],
+		ynl_err_walk_report_one(nest_pol, NULL, type, &miss_attr[n],
 					sizeof(miss_attr) - n, &n2);
 		n += n2;
 
@@ -384,6 +450,15 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
 	return 0;
 }
 
+int ynl_submsg_failed(struct ynl_parse_arg *yarg, const char *field_name,
+		      const char *sel_name)
+{
+	yerr(yarg->ys, YNL_ERROR_SUBMSG_KEY,
+	     "Parsing error: Sub-message key not set (msg %s, key %s)",
+	     field_name, sel_name);
+	return YNL_PARSE_CB_ERROR;
+}
+
 /* Generic code */
 
 static void ynl_err_reset(struct ynl_sock *ys)
@@ -451,14 +526,14 @@ ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
 	return nlh;
 }
 
-void ynl_msg_start_req(struct ynl_sock *ys, __u32 id)
+struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id, __u16 flags)
 {
-	ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK);
+	return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | flags);
 }
 
-void ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
+struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
 {
-	ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
+	return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
 }
 
 struct nlmsghdr *
@@ -663,6 +738,7 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
 	struct sockaddr_nl addr;
 	struct ynl_sock *ys;
 	socklen_t addrlen;
+	int sock_type;
 	int one = 1;
 
 	ys = malloc(sizeof(*ys) + 2 * YNL_SOCKET_BUFFER_SIZE);
@@ -675,7 +751,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
 	ys->rx_buf = &ys->raw_buf[YNL_SOCKET_BUFFER_SIZE];
 	ys->ntf_last_next = &ys->ntf_first;
 
-	ys->socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+	sock_type = yf->is_classic ? yf->classic_id : NETLINK_GENERIC;
+
+	ys->socket = socket(AF_NETLINK, SOCK_RAW, sock_type);
 	if (ys->socket < 0) {
 		__perr(yse, "failed to create a netlink socket");
 		goto err_free_sock;
@@ -708,8 +786,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
 	ys->portid = addr.nl_pid;
 	ys->seq = random();
 
-
-	if (ynl_sock_read_family(ys, yf->name)) {
+	if (yf->is_classic) {
+		ys->family_id = yf->classic_id;
+	} else if (ynl_sock_read_family(ys, yf->name)) {
 		if (yse)
 			memcpy(yse, &ys->err, sizeof(*yse));
 		goto err_close_sock;
@@ -791,13 +870,21 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
 	struct ynl_parse_arg yarg = { .ys = ys, };
 	const struct ynl_ntf_info *info;
 	struct ynl_ntf_base_type *rsp;
-	struct genlmsghdr *gehdr;
+	__u32 cmd;
 	int ret;
 
-	gehdr = ynl_nlmsg_data(nlh);
-	if (gehdr->cmd >= ys->family->ntf_info_size)
+	if (ys->family->is_classic) {
+		cmd = nlh->nlmsg_type;
+	} else {
+		struct genlmsghdr *gehdr;
+
+		gehdr = ynl_nlmsg_data(nlh);
+		cmd = gehdr->cmd;
+	}
+
+	if (cmd >= ys->family->ntf_info_size)
 		return YNL_PARSE_CB_ERROR;
-	info = &ys->family->ntf_info[gehdr->cmd];
+	info = &ys->family->ntf_info[cmd];
 	if (!info->cb)
 		return YNL_PARSE_CB_ERROR;
 
@@ -811,7 +898,7 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
 		goto err_free;
 
 	rsp->family = nlh->nlmsg_type;
-	rsp->cmd = gehdr->cmd;
+	rsp->cmd = cmd;
 
 	*ys->ntf_last_next = rsp;
 	ys->ntf_last_next = &rsp->next;
@@ -863,18 +950,23 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg)
 static int
 ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd)
 {
-	struct genlmsghdr *gehdr;
+	if (ys->family->is_classic) {
+		if (nlh->nlmsg_type != rsp_cmd)
+			return ynl_ntf_parse(ys, nlh);
+	} else {
+		struct genlmsghdr *gehdr;
+
+		if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
+			yerr(ys, YNL_ERROR_INV_RESP,
+			     "Kernel responded with truncated message");
+			return -1;
+		}
 
-	if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
-		yerr(ys, YNL_ERROR_INV_RESP,
-		     "Kernel responded with truncated message");
-		return -1;
+		gehdr = ynl_nlmsg_data(nlh);
+		if (gehdr->cmd != rsp_cmd)
+			return ynl_ntf_parse(ys, nlh);
 	}
 
-	gehdr = ynl_nlmsg_data(nlh);
-	if (gehdr->cmd != rsp_cmd)
-		return ynl_ntf_parse(ys, nlh);
-
 	return 0;
 }
 
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
index 6cd570b283ea..db7c0591a63f 100644
--- a/tools/net/ynl/lib/ynl.h
+++ b/tools/net/ynl/lib/ynl.h
@@ -2,6 +2,7 @@
 #ifndef __YNL_C_H
 #define __YNL_C_H 1
 
+#include <stdbool.h>
 #include <stddef.h>
 #include <linux/genetlink.h>
 #include <linux/types.h>
@@ -22,6 +23,7 @@ enum ynl_error_code {
 	YNL_ERROR_INV_RESP,
 	YNL_ERROR_INPUT_INVALID,
 	YNL_ERROR_INPUT_TOO_BIG,
+	YNL_ERROR_SUBMSG_KEY,
 };
 
 /**
@@ -48,6 +50,8 @@ struct ynl_family {
 /* private: */
 	const char *name;
 	size_t hdr_len;
+	bool is_classic;
+	__u16 classic_id;
 	const struct ynl_ntf_info *ntf_info;
 	unsigned int ntf_info_size;
 };
@@ -77,11 +81,25 @@ struct ynl_sock {
 
 	struct nlmsghdr *nlh;
 	const struct ynl_policy_nest *req_policy;
+	size_t req_hdr_len;
 	unsigned char *tx_buf;
 	unsigned char *rx_buf;
 	unsigned char raw_buf[];
 };
 
+/**
+ * struct ynl_string - parsed individual string
+ * @len: length of the string (excluding terminating character)
+ * @str: value of the string
+ *
+ * Parsed and nul-terminated string. This struct is only used for arrays of
+ * strings. Non-array string members are placed directly in respective types.
+ */
+struct ynl_string {
+	unsigned int len;
+	char str[];
+};
+
 struct ynl_sock *
 ynl_sock_create(const struct ynl_family *yf, struct ynl_error *e);
 void ynl_sock_destroy(struct ynl_sock *ys);
diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py
index 794e3c7dcc65..33ccc5c1843b 100755
--- a/tools/net/ynl/pyynl/cli.py
+++ b/tools/net/ynl/pyynl/cli.py
@@ -144,16 +144,17 @@ def main():
             ops = [ (item[0], json.loads(item[1]), args.flags or []) for item in args.multi ]
             reply = ynl.do_multi(ops)
             output(reply)
-    except NlError as e:
-        print(e)
-        exit(1)
 
-    if args.ntf:
-        try:
+        if args.ntf:
             for msg in ynl.poll_ntf(duration=args.duration):
                 output(msg)
-        except KeyboardInterrupt:
-            pass
+    except NlError as e:
+        print(e)
+        exit(1)
+    except KeyboardInterrupt:
+        pass
+    except BrokenPipeError:
+        pass
 
 
 if __name__ == "__main__":
diff --git a/tools/net/ynl/pyynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py
index 9137b83e580a..71518b9842ee 100644
--- a/tools/net/ynl/pyynl/lib/__init__.py
+++ b/tools/net/ynl/pyynl/lib/__init__.py
@@ -1,8 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
 from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
-    SpecFamily, SpecOperation
+    SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat
 from .ynl import YnlFamily, Netlink, NlError
 
 __all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
-           "SpecFamily", "SpecOperation", "YnlFamily", "Netlink", "NlError"]
+           "SpecFamily", "SpecOperation", "SpecSubMessage", "SpecSubMessageFormat",
+           "YnlFamily", "Netlink", "NlError"]
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index dcc2c6b298d6..55b59f6c79b8 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -594,7 +594,7 @@ class YnlFamily(SpecFamily):
             scalar_selector = self._get_scalar(attr, value["selector"])
             attr_payload = struct.pack("II", scalar_value, scalar_selector)
         elif attr['type'] == 'sub-message':
-            msg_format = self._resolve_selector(attr, search_attrs)
+            msg_format, _ = self._resolve_selector(attr, search_attrs)
             attr_payload = b''
             if msg_format.fixed_header:
                 attr_payload += self._encode_struct(msg_format.fixed_header, value)
@@ -712,10 +712,10 @@ class YnlFamily(SpecFamily):
             raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'")
 
         spec = sub_msg_spec.formats[value]
-        return spec
+        return spec, value
 
     def _decode_sub_msg(self, attr, attr_spec, search_attrs):
-        msg_format = self._resolve_selector(attr_spec, search_attrs)
+        msg_format, _ = self._resolve_selector(attr_spec, search_attrs)
         decoded = {}
         offset = 0
         if msg_format.fixed_header:
@@ -787,7 +787,7 @@ class YnlFamily(SpecFamily):
 
         return rsp
 
-    def _decode_extack_path(self, attrs, attr_set, offset, target):
+    def _decode_extack_path(self, attrs, attr_set, offset, target, search_attrs):
         for attr in attrs:
             try:
                 attr_spec = attr_set.attrs_by_val[attr.type]
@@ -801,26 +801,37 @@ class YnlFamily(SpecFamily):
             if offset + attr.full_len <= target:
                 offset += attr.full_len
                 continue
-            if attr_spec['type'] != 'nest':
+
+            pathname = attr_spec.name
+            if attr_spec['type'] == 'nest':
+                sub_attrs = self.attr_sets[attr_spec['nested-attributes']]
+                search_attrs = SpaceAttrs(sub_attrs, search_attrs.lookup(attr_spec['name']))
+            elif attr_spec['type'] == 'sub-message':
+                msg_format, value = self._resolve_selector(attr_spec, search_attrs)
+                if msg_format is None:
+                    raise Exception(f"Can't resolve sub-message of {attr_spec['name']} for extack")
+                sub_attrs = self.attr_sets[msg_format.attr_set]
+                pathname += f"({value})"
+            else:
                 raise Exception(f"Can't dive into {attr.type} ({attr_spec['name']}) for extack")
             offset += 4
-            subpath = self._decode_extack_path(NlAttrs(attr.raw),
-                                               self.attr_sets[attr_spec['nested-attributes']],
-                                               offset, target)
+            subpath = self._decode_extack_path(NlAttrs(attr.raw), sub_attrs,
+                                               offset, target, search_attrs)
             if subpath is None:
                 return None
-            return '.' + attr_spec.name + subpath
+            return '.' + pathname + subpath
 
         return None
 
-    def _decode_extack(self, request, op, extack):
+    def _decode_extack(self, request, op, extack, vals):
         if 'bad-attr-offs' not in extack:
             return
 
         msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set), op)
         offset = self.nlproto.msghdr_size() + self._struct_size(op.fixed_header)
+        search_attrs = SpaceAttrs(op.attr_set, vals)
         path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset,
-                                        extack['bad-attr-offs'])
+                                        extack['bad-attr-offs'], search_attrs)
         if path:
             del extack['bad-attr-offs']
             extack['bad-attr'] = path
@@ -1012,7 +1023,7 @@ class YnlFamily(SpecFamily):
         for (method, vals, flags) in ops:
             op = self.ops[method]
             msg = self._encode_message(op, vals, flags, req_seq)
-            reqs_by_seq[req_seq] = (op, msg, flags)
+            reqs_by_seq[req_seq] = (op, vals, msg, flags)
             payload += msg
             req_seq += 1
 
@@ -1027,9 +1038,9 @@ class YnlFamily(SpecFamily):
             self._recv_dbg_print(reply, nms)
             for nl_msg in nms:
                 if nl_msg.nl_seq in reqs_by_seq:
-                    (op, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq]
+                    (op, vals, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq]
                     if nl_msg.extack:
-                        self._decode_extack(req_msg, op, nl_msg.extack)
+                        self._decode_extack(req_msg, op, nl_msg.extack, vals)
                 else:
                     op = None
                     req_flags = []
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index a7f08edbc235..76032e01c2e7 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -14,6 +14,7 @@ import yaml
 
 sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
 from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry
+from lib import SpecSubMessage, SpecSubMessageFormat
 
 
 def c_upper(name):
@@ -56,11 +57,20 @@ class Type(SpecAttr):
         self.request = False
         self.reply = False
 
+        self.is_selector = False
+
         if 'len' in attr:
             self.len = attr['len']
 
         if 'nested-attributes' in attr:
-            self.nested_attrs = attr['nested-attributes']
+            nested = attr['nested-attributes']
+        elif 'sub-message' in attr:
+            nested = attr['sub-message']
+        else:
+            nested = None
+
+        if nested:
+            self.nested_attrs = nested
             if self.nested_attrs == family.name:
                 self.nested_render_name = c_lower(f"{family.ident_name}")
             else:
@@ -119,7 +129,9 @@ class Type(SpecAttr):
         return c_upper(value)
 
     def resolve(self):
-        if 'name-prefix' in self.attr:
+        if 'parent-sub-message' in self.attr:
+            enum_name = self.attr['parent-sub-message'].enum_name
+        elif 'name-prefix' in self.attr:
             enum_name = f"{self.attr['name-prefix']}{self.name}"
         else:
             enum_name = f"{self.attr_set.name_prefix}{self.name}"
@@ -142,19 +154,19 @@ class Type(SpecAttr):
         return self.is_recursive() and not ri.op
 
     def presence_type(self):
-        return 'bit'
+        return 'present'
 
     def presence_member(self, space, type_filter):
         if self.presence_type() != type_filter:
             return
 
-        if self.presence_type() == 'bit':
+        if self.presence_type() == 'present':
             pfx = '__' if space == 'user' else ''
             return f"{pfx}u32 {self.c_name}:1;"
 
-        if self.presence_type() == 'len':
+        if self.presence_type() in {'len', 'count'}:
             pfx = '__' if space == 'user' else ''
-            return f"{pfx}u32 {self.c_name}_len;"
+            return f"{pfx}u32 {self.c_name};"
 
     def _complex_member_type(self, ri):
         return None
@@ -163,7 +175,7 @@ class Type(SpecAttr):
         return False
 
     def _free_lines(self, ri, var, ref):
-        if self.is_multi_val() or self.presence_type() == 'len':
+        if self.is_multi_val() or self.presence_type() in {'count', 'len'}:
             return [f'free({var}->{ref}{self.c_name});']
         return []
 
@@ -175,21 +187,21 @@ class Type(SpecAttr):
     def arg_member(self, ri):
         member = self._complex_member_type(ri)
         if member:
-            arg = [member + ' *' + self.c_name]
+            spc = ' ' if member[-1] != '*' else ''
+            arg = [member + spc + '*' + self.c_name]
             if self.presence_type() == 'count':
                 arg += ['unsigned int n_' + self.c_name]
             return arg
         raise Exception(f"Struct member not implemented for class type {self.type}")
 
     def struct_member(self, ri):
-        if self.is_multi_val():
-            ri.cw.p(f"unsigned int n_{self.c_name};")
         member = self._complex_member_type(ri)
         if member:
             ptr = '*' if self.is_multi_val() else ''
             if self.is_recursive_for_op(ri):
                 ptr = '*'
-            ri.cw.p(f"{member} {ptr}{self.c_name};")
+            spc = ' ' if member[-1] != '*' else ''
+            ri.cw.p(f"{member}{spc}{ptr}{self.c_name};")
             return
         members = self.arg_member(ri)
         for one in members:
@@ -215,10 +227,9 @@ class Type(SpecAttr):
         cw.p(f'[{self.enum_name}] = {"{"} .name = "{self.name}", {typol}{"}"},')
 
     def _attr_put_line(self, ri, var, line):
-        if self.presence_type() == 'bit':
-            ri.cw.p(f"if ({var}->_present.{self.c_name})")
-        elif self.presence_type() == 'len':
-            ri.cw.p(f"if ({var}->_present.{self.c_name}_len)")
+        presence = self.presence_type()
+        if presence in {'present', 'len'}:
+            ri.cw.p(f"if ({var}->_{presence}.{self.c_name})")
         ri.cw.p(f"{line};")
 
     def _attr_put_simple(self, ri, var, put_type):
@@ -248,7 +259,7 @@ class Type(SpecAttr):
         if not self.is_multi_val():
             ri.cw.p("if (ynl_attr_validate(yarg, attr))")
             ri.cw.p("return YNL_PARSE_CB_ERROR;")
-            if self.presence_type() == 'bit':
+            if self.presence_type() == 'present':
                 ri.cw.p(f"{var}->_present.{self.c_name} = 1;")
 
         if init_lines:
@@ -279,7 +290,8 @@ class Type(SpecAttr):
             presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
             # Every layer below last is a nest, so we know it uses bit presence
             # last layer is "self" and may be a complex type
-            if i == len(ref) - 1 and self.presence_type() != 'bit':
+            if i == len(ref) - 1 and self.presence_type() != 'present':
+                presence = f"{var}->{'.'.join(ref[:i] + [''])}_{self.presence_type()}.{ref[i]}"
                 continue
             code.append(presence + ' = 1;')
         ref_path = '.'.join(ref[:-1])
@@ -355,26 +367,10 @@ class TypeScalar(Type):
         if 'byte-order' in attr:
             self.byte_order_comment = f" /* {attr['byte-order']} */"
 
-        if 'enum' in self.attr:
-            enum = self.family.consts[self.attr['enum']]
-            low, high = enum.value_range()
-            if 'min' not in self.checks:
-                if low != 0 or self.type[0] == 's':
-                    self.checks['min'] = low
-            if 'max' not in self.checks:
-                self.checks['max'] = high
-
-        if 'min' in self.checks and 'max' in self.checks:
-            if self.get_limit('min') > self.get_limit('max'):
-                raise Exception(f'Invalid limit for "{self.name}" min: {self.get_limit("min")} max: {self.get_limit("max")}')
-            self.checks['range'] = True
-
-        low = min(self.get_limit('min', 0), self.get_limit('max', 0))
-        high = max(self.get_limit('min', 0), self.get_limit('max', 0))
-        if low < 0 and self.type[0] == 'u':
-            raise Exception(f'Invalid limit for "{self.name}" negative limit for unsigned type')
-        if low < -32768 or high > 32767:
-            self.checks['full-range'] = True
+        # Classic families have some funny enums, don't bother
+        # computing checks, since we only need them for kernel policies
+        if not family.is_classic():
+            self._init_checks()
 
         # Added by resolve():
         self.is_bitfield = None
@@ -399,6 +395,31 @@ class TypeScalar(Type):
         else:
             self.type_name = '__' + self.type
 
+    def _init_checks(self):
+        if 'enum' in self.attr:
+            enum = self.family.consts[self.attr['enum']]
+            low, high = enum.value_range()
+            if low == None and high == None:
+                self.checks['sparse'] = True
+            else:
+                if 'min' not in self.checks:
+                    if low != 0 or self.type[0] == 's':
+                        self.checks['min'] = low
+                if 'max' not in self.checks:
+                    self.checks['max'] = high
+
+        if 'min' in self.checks and 'max' in self.checks:
+            if self.get_limit('min') > self.get_limit('max'):
+                raise Exception(f'Invalid limit for "{self.name}" min: {self.get_limit("min")} max: {self.get_limit("max")}')
+            self.checks['range'] = True
+
+        low = min(self.get_limit('min', 0), self.get_limit('max', 0))
+        high = max(self.get_limit('min', 0), self.get_limit('max', 0))
+        if low < 0 and self.type[0] == 'u':
+            raise Exception(f'Invalid limit for "{self.name}" negative limit for unsigned type')
+        if low < -32768 or high > 32767:
+            self.checks['full-range'] = True
+
     def _attr_policy(self, policy):
         if 'flags-mask' in self.checks or self.is_bitfield:
             if self.is_bitfield:
@@ -417,6 +438,8 @@ class TypeScalar(Type):
             return f"NLA_POLICY_MIN({policy}, {self.get_limit_str('min')})"
         elif 'max' in self.checks:
             return f"NLA_POLICY_MAX({policy}, {self.get_limit_str('max')})"
+        elif 'sparse' in self.checks:
+            return f"NLA_POLICY_VALIDATE_FN({policy}, &{c_lower(self.enum_name)}_validate)"
         return super()._attr_policy(policy)
 
     def _attr_typol(self):
@@ -463,7 +486,10 @@ class TypeString(Type):
         ri.cw.p(f"char *{self.c_name};")
 
     def _attr_typol(self):
-        return f'.type = YNL_PT_NUL_STR, '
+        typol = f'.type = YNL_PT_NUL_STR, '
+        if self.is_selector:
+            typol += '.is_selector = 1, '
+        return typol
 
     def _attr_policy(self, policy):
         if 'exact-len' in self.checks:
@@ -488,7 +514,7 @@ class TypeString(Type):
         self._attr_put_simple(ri, var, 'str')
 
     def _attr_get(self, ri, var):
-        len_mem = var + '->_present.' + self.c_name + '_len'
+        len_mem = var + '->_len.' + self.c_name
         return [f"{len_mem} = len;",
                 f"{var}->{self.c_name} = malloc(len + 1);",
                 f"memcpy({var}->{self.c_name}, ynl_attr_get_str(attr), len);",
@@ -497,10 +523,10 @@ class TypeString(Type):
                ['unsigned int len;']
 
     def _setter_lines(self, ri, member, presence):
-        return [f"{presence}_len = strlen({self.c_name});",
-                f"{member} = malloc({presence}_len + 1);",
-                f'memcpy({member}, {self.c_name}, {presence}_len);',
-                f'{member}[{presence}_len] = 0;']
+        return [f"{presence} = strlen({self.c_name});",
+                f"{member} = malloc({presence} + 1);",
+                f'memcpy({member}, {self.c_name}, {presence});',
+                f'{member}[{presence}] = 0;']
 
 
 class TypeBinary(Type):
@@ -539,10 +565,10 @@ class TypeBinary(Type):
 
     def attr_put(self, ri, var):
         self._attr_put_line(ri, var, f"ynl_attr_put(nlh, {self.enum_name}, " +
-                            f"{var}->{self.c_name}, {var}->_present.{self.c_name}_len)")
+                            f"{var}->{self.c_name}, {var}->_len.{self.c_name})")
 
     def _attr_get(self, ri, var):
-        len_mem = var + '->_present.' + self.c_name + '_len'
+        len_mem = var + '->_len.' + self.c_name
         return [f"{len_mem} = len;",
                 f"{var}->{self.c_name} = malloc(len);",
                 f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
@@ -550,9 +576,60 @@ class TypeBinary(Type):
                ['unsigned int len;']
 
     def _setter_lines(self, ri, member, presence):
-        return [f"{presence}_len = len;",
-                f"{member} = malloc({presence}_len);",
-                f'memcpy({member}, {self.c_name}, {presence}_len);']
+        return [f"{presence} = len;",
+                f"{member} = malloc({presence});",
+                f'memcpy({member}, {self.c_name}, {presence});']
+
+
+class TypeBinaryStruct(TypeBinary):
+    def struct_member(self, ri):
+        ri.cw.p(f'struct {c_lower(self.get("struct"))} *{self.c_name};')
+
+    def _attr_get(self, ri, var):
+        struct_sz = 'sizeof(struct ' + c_lower(self.get("struct")) + ')'
+        len_mem = var + '->_' + self.presence_type() + '.' + self.c_name
+        return [f"{len_mem} = len;",
+                f"if (len < {struct_sz})",
+                f"{var}->{self.c_name} = calloc(1, {struct_sz});",
+                "else",
+                f"{var}->{self.c_name} = malloc(len);",
+                f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
+               ['len = ynl_attr_data_len(attr);'], \
+               ['unsigned int len;']
+
+
+class TypeBinaryScalarArray(TypeBinary):
+    def arg_member(self, ri):
+        return [f'__{self.get("sub-type")} *{self.c_name}', 'size_t count']
+
+    def presence_type(self):
+        return 'count'
+
+    def struct_member(self, ri):
+        ri.cw.p(f'__{self.get("sub-type")} *{self.c_name};')
+
+    def attr_put(self, ri, var):
+        presence = self.presence_type()
+        ri.cw.block_start(line=f"if ({var}->_{presence}.{self.c_name})")
+        ri.cw.p(f"i = {var}->_{presence}.{self.c_name} * sizeof(__{self.get('sub-type')});")
+        ri.cw.p(f"ynl_attr_put(nlh, {self.enum_name}, " +
+                f"{var}->{self.c_name}, i);")
+        ri.cw.block_end()
+
+    def _attr_get(self, ri, var):
+        len_mem = var + '->_count.' + self.c_name
+        return [f"{len_mem} = len / sizeof(__{self.get('sub-type')});",
+                f"len = {len_mem} * sizeof(__{self.get('sub-type')});",
+                f"{var}->{self.c_name} = malloc(len);",
+                f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
+               ['len = ynl_attr_data_len(attr);'], \
+               ['unsigned int len;']
+
+    def _setter_lines(self, ri, member, presence):
+        return [f"{presence} = count;",
+                f"count *= sizeof(__{self.get('sub-type')});",
+                f"{member} = malloc(count);",
+                f'memcpy({member}, {self.c_name}, count);']
 
 
 class TypeBitfield32(Type):
@@ -608,7 +685,11 @@ class TypeNest(Type):
                             f"{self.enum_name}, {at}{var}->{self.c_name})")
 
     def _attr_get(self, ri, var):
-        get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))",
+        pns = self.family.pure_nested_structs[self.nested_attrs]
+        args = ["&parg", "attr"]
+        for sel in pns.external_selectors():
+            args.append(f'{var}->{sel.name}')
+        get_lines = [f"if ({self.nested_render_name}_parse({', '.join(args)}))",
                      "return YNL_PARSE_CB_ERROR;"]
         init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
                       f"parg.data = &{var}->{self.c_name};"]
@@ -638,22 +719,40 @@ class TypeMultiAttr(Type):
     def _complex_member_type(self, ri):
         if 'type' not in self.attr or self.attr['type'] == 'nest':
             return self.nested_struct_type
+        elif self.attr['type'] == 'binary' and 'struct' in self.attr:
+            return None  # use arg_member()
+        elif self.attr['type'] == 'string':
+            return 'struct ynl_string *'
         elif self.attr['type'] in scalars:
             scalar_pfx = '__' if ri.ku_space == 'user' else ''
             return scalar_pfx + self.attr['type']
         else:
             raise Exception(f"Sub-type {self.attr['type']} not supported yet")
 
+    def arg_member(self, ri):
+        if self.type == 'binary' and 'struct' in self.attr:
+            return [f'struct {c_lower(self.attr["struct"])} *{self.c_name}',
+                    f'unsigned int n_{self.c_name}']
+        return super().arg_member(ri)
+
     def free_needs_iter(self):
-        return 'type' not in self.attr or self.attr['type'] == 'nest'
+        return self.attr['type'] in {'nest', 'string'}
 
     def _free_lines(self, ri, var, ref):
         lines = []
         if self.attr['type'] in scalars:
             lines += [f"free({var}->{ref}{self.c_name});"]
+        elif self.attr['type'] == 'binary':
+            lines += [f"free({var}->{ref}{self.c_name});"]
+        elif self.attr['type'] == 'string':
+            lines += [
+                f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)",
+                f"free({var}->{ref}{self.c_name}[i]);",
+                f"free({var}->{ref}{self.c_name});",
+            ]
         elif 'type' not in self.attr or self.attr['type'] == 'nest':
             lines += [
-                f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)",
+                f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)",
                 f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);',
                 f"free({var}->{ref}{self.c_name});",
             ]
@@ -673,18 +772,22 @@ class TypeMultiAttr(Type):
     def attr_put(self, ri, var):
         if self.attr['type'] in scalars:
             put_type = self.type
-            ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
+            ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
             ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
+        elif self.attr['type'] == 'binary' and 'struct' in self.attr:
+            ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
+            ri.cw.p(f"ynl_attr_put(nlh, {self.enum_name}, &{var}->{self.c_name}[i], sizeof(struct {c_lower(self.attr['struct'])}));")
+        elif self.attr['type'] == 'string':
+            ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
+            ri.cw.p(f"ynl_attr_put_str(nlh, {self.enum_name}, {var}->{self.c_name}[i]->str);")
         elif 'type' not in self.attr or self.attr['type'] == 'nest':
-            ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
+            ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
             self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
                                 f"{self.enum_name}, &{var}->{self.c_name}[i])")
         else:
             raise Exception(f"Put of MultiAttr sub-type {self.attr['type']} not supported yet")
 
     def _setter_lines(self, ri, member, presence):
-        # For multi-attr we have a count, not presence, hack up the presence
-        presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name
         return [f"{member} = {self.c_name};",
                 f"{presence} = n_{self.c_name};"]
 
@@ -702,12 +805,22 @@ class TypeArrayNest(Type):
         elif self.attr['sub-type'] in scalars:
             scalar_pfx = '__' if ri.ku_space == 'user' else ''
             return scalar_pfx + self.attr['sub-type']
+        elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
+            return None  # use arg_member()
         else:
             raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet")
 
+    def arg_member(self, ri):
+        if self.sub_type == 'binary' and 'exact-len' in self.checks:
+            return [f'unsigned char (*{self.c_name})[{self.checks["exact-len"]}]',
+                    f'unsigned int n_{self.c_name}']
+        return super().arg_member(ri)
+
     def _attr_typol(self):
         if self.attr['sub-type'] in scalars:
             return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, '
+        elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
+            return f'.type = YNL_PT_BINARY, .len = {self.checks["exact-len"]}, '
         else:
             return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
 
@@ -717,10 +830,31 @@ class TypeArrayNest(Type):
                      'ynl_attr_for_each_nested(attr2, attr) {',
                      '\tif (ynl_attr_validate(yarg, attr2))',
                      '\t\treturn YNL_PARSE_CB_ERROR;',
-                     f'\t{var}->n_{self.c_name}++;',
+                     f'\t{var}->_count.{self.c_name}++;',
                      '}']
         return get_lines, None, local_vars
 
+    def attr_put(self, ri, var):
+        ri.cw.p(f'array = ynl_attr_nest_start(nlh, {self.enum_name});')
+        if self.sub_type in scalars:
+            put_type = self.sub_type
+            ri.cw.block_start(line=f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
+            ri.cw.p(f"ynl_attr_put_{put_type}(nlh, i, {var}->{self.c_name}[i]);")
+            ri.cw.block_end()
+        elif self.sub_type == 'binary' and 'exact-len' in self.checks:
+            ri.cw.p(f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
+            ri.cw.p(f"ynl_attr_put(nlh, i, {var}->{self.c_name}[i], {self.checks['exact-len']});")
+        elif self.sub_type == 'nest':
+            ri.cw.p(f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
+            ri.cw.p(f"{self.nested_render_name}_put(nlh, i, &{var}->{self.c_name}[i]);")
+        else:
+            raise Exception(f"Put for ArrayNest sub-type {self.attr['sub-type']} not supported, yet")
+        ri.cw.p('ynl_attr_nest_end(nlh, array);')
+
+    def _setter_lines(self, ri, member, presence):
+        return [f"{member} = {self.c_name};",
+                f"{presence} = n_{self.c_name};"]
+
 
 class TypeNestTypeValue(Type):
     def _complex_member_type(self, ri):
@@ -752,14 +886,71 @@ class TypeNestTypeValue(Type):
         return get_lines, init_lines, local_vars
 
 
+class TypeSubMessage(TypeNest):
+    def __init__(self, family, attr_set, attr, value):
+        super().__init__(family, attr_set, attr, value)
+
+        self.selector = Selector(attr, attr_set)
+
+    def _attr_typol(self):
+        typol = f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
+        typol += '.is_submsg = 1, '
+        # Reverse-parsing of the policy (ynl_err_walk() in ynl.c) does not
+        # support external selectors. No family uses sub-messages with external
+        # selector for requests so this is fine for now.
+        if not self.selector.is_external():
+            typol += f'.selector_type = {self.attr_set[self["selector"]].value} '
+        return typol
+
+    def _attr_get(self, ri, var):
+        sel = c_lower(self['selector'])
+        if self.selector.is_external():
+            sel_var = f"_sel_{sel}"
+        else:
+            sel_var = f"{var}->{sel}"
+        get_lines = [f'if (!{sel_var})',
+                     f'return ynl_submsg_failed(yarg, "%s", "%s");' %
+                        (self.name, self['selector']),
+                    f"if ({self.nested_render_name}_parse(&parg, {sel_var}, attr))",
+                     "return YNL_PARSE_CB_ERROR;"]
+        init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
+                      f"parg.data = &{var}->{self.c_name};"]
+        return get_lines, init_lines, None
+
+
+class Selector:
+    def __init__(self, msg_attr, attr_set):
+        self.name = msg_attr["selector"]
+
+        if self.name in attr_set:
+            self.attr = attr_set[self.name]
+            self.attr.is_selector = True
+            self._external = False
+        else:
+            # The selector will need to get passed down thru the structs
+            self.attr = None
+            self._external = True
+
+    def set_attr(self, attr):
+        self.attr = attr
+
+    def is_external(self):
+        return self._external
+
+
 class Struct:
-    def __init__(self, family, space_name, type_list=None, inherited=None):
+    def __init__(self, family, space_name, type_list=None, fixed_header=None,
+                 inherited=None, submsg=None):
         self.family = family
         self.space_name = space_name
         self.attr_set = family.attr_sets[space_name]
         # Use list to catch comparisons with empty sets
         self._inherited = inherited if inherited is not None else []
         self.inherited = []
+        self.fixed_header = None
+        if fixed_header:
+            self.fixed_header = 'struct ' + c_lower(fixed_header)
+        self.submsg = submsg
 
         self.nested = type_list is None
         if family.name == c_lower(space_name):
@@ -809,6 +1000,19 @@ class Struct:
             raise Exception("Inheriting different members not supported")
         self.inherited = [c_lower(x) for x in sorted(self._inherited)]
 
+    def external_selectors(self):
+        sels = []
+        for name, attr in self.attr_list:
+            if isinstance(attr, TypeSubMessage) and attr.selector.is_external():
+                sels.append(attr.selector)
+        return sels
+
+    def free_needs_iter(self):
+        for _, attr in self.attr_list:
+            if attr.free_needs_iter():
+                return True
+        return False
+
 
 class EnumEntry(SpecEnumEntry):
     def __init__(self, enum_set, yaml, prev, value_start):
@@ -862,7 +1066,7 @@ class EnumSet(SpecEnumSet):
         high = max([x.value for x in self.entries.values()])
 
         if high - low + 1 != len(self.entries):
-            raise Exception("Can't get value range for a noncontiguous enum")
+            return None, None
 
         return low, high
 
@@ -909,18 +1113,25 @@ class AttrSet(SpecAttrSet):
         elif elem['type'] == 'string':
             t = TypeString(self.family, self, elem, value)
         elif elem['type'] == 'binary':
-            t = TypeBinary(self.family, self, elem, value)
+            if 'struct' in elem:
+                t = TypeBinaryStruct(self.family, self, elem, value)
+            elif elem.get('sub-type') in scalars:
+                t = TypeBinaryScalarArray(self.family, self, elem, value)
+            else:
+                t = TypeBinary(self.family, self, elem, value)
         elif elem['type'] == 'bitfield32':
             t = TypeBitfield32(self.family, self, elem, value)
         elif elem['type'] == 'nest':
             t = TypeNest(self.family, self, elem, value)
         elif elem['type'] == 'indexed-array' and 'sub-type' in elem:
-            if elem["sub-type"] in ['nest', 'u32']:
+            if elem["sub-type"] in ['binary', 'nest', 'u32']:
                 t = TypeArrayNest(self.family, self, elem, value)
             else:
                 raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}')
         elif elem['type'] == 'nest-type-value':
             t = TypeNestTypeValue(self.family, self, elem, value)
+        elif elem['type'] == 'sub-message':
+            t = TypeSubMessage(self.family, self, elem, value)
         else:
             raise Exception(f"No typed class for type {elem['type']}")
 
@@ -932,6 +1143,14 @@ class AttrSet(SpecAttrSet):
 
 class Operation(SpecOperation):
     def __init__(self, family, yaml, req_value, rsp_value):
+        # Fill in missing operation properties (for fixed hdr-only msgs)
+        for mode in ['do', 'dump', 'event']:
+            for direction in ['request', 'reply']:
+                try:
+                    yaml[mode][direction].setdefault('attributes', [])
+                except KeyError:
+                    pass
+
         super().__init__(family, yaml, req_value, rsp_value)
 
         self.render_name = c_lower(family.ident_name + '_' + self.name)
@@ -957,6 +1176,16 @@ class Operation(SpecOperation):
         self.has_ntf = True
 
 
+class SubMessage(SpecSubMessage):
+    def __init__(self, family, yaml):
+        super().__init__(family, yaml)
+
+        self.render_name = c_lower(family.ident_name + '-' + yaml['name'])
+
+    def resolve(self):
+        self.resolve_up(super())
+
+
 class Family(SpecFamily):
     def __init__(self, file_name, exclude_ops):
         # Added by resolve:
@@ -993,9 +1222,6 @@ class Family(SpecFamily):
     def resolve(self):
         self.resolve_up(super())
 
-        if self.yaml.get('protocol', 'genetlink') not in {'genetlink', 'genetlink-c', 'genetlink-legacy'}:
-            raise Exception("Codegen only supported for genetlink")
-
         self.c_name = c_lower(self.ident_name)
         if 'name-prefix' in self.yaml['operations']:
             self.op_prefix = c_upper(self.yaml['operations']['name-prefix'])
@@ -1018,7 +1244,7 @@ class Family(SpecFamily):
 
         # dict space-name -> 'request': set(attrs), 'reply': set(attrs)
         self.root_sets = dict()
-        # dict space-name -> set('request', 'reply')
+        # dict space-name -> Struct
         self.pure_nested_structs = dict()
 
         self._mark_notify()
@@ -1027,6 +1253,7 @@ class Family(SpecFamily):
         self._load_root_sets()
         self._load_nested_sets()
         self._load_attr_use()
+        self._load_selector_passing()
         self._load_hooks()
 
         self.kernel_policy = self.yaml.get('kernel-policy', 'split')
@@ -1042,6 +1269,12 @@ class Family(SpecFamily):
     def new_operation(self, elem, req_value, rsp_value):
         return Operation(self, elem, req_value, rsp_value)
 
+    def new_sub_message(self, elem):
+        return SubMessage(self, elem)
+
+    def is_classic(self):
+        return self.proto == 'netlink-raw'
+
     def _mark_notify(self):
         for op in self.msgs.values():
             if 'notify' in op:
@@ -1091,20 +1324,85 @@ class Family(SpecFamily):
             for _, spec in self.attr_sets[name].items():
                 if 'nested-attributes' in spec:
                     nested = spec['nested-attributes']
-                    # If the unknown nest we hit is recursive it's fine, it'll be a pointer
-                    if self.pure_nested_structs[nested].recursive:
-                        continue
-                    if nested not in pns_key_seen:
-                        # Dicts are sorted, this will make struct last
-                        struct = self.pure_nested_structs.pop(name)
-                        self.pure_nested_structs[name] = struct
-                        finished = False
-                        break
+                elif 'sub-message' in spec:
+                    nested = spec.sub_message
+                else:
+                    continue
+
+                # If the unknown nest we hit is recursive it's fine, it'll be a pointer
+                if self.pure_nested_structs[nested].recursive:
+                    continue
+                if nested not in pns_key_seen:
+                    # Dicts are sorted, this will make struct last
+                    struct = self.pure_nested_structs.pop(name)
+                    self.pure_nested_structs[name] = struct
+                    finished = False
+                    break
             if finished:
                 pns_key_seen.add(name)
             else:
                 pns_key_list.append(name)
 
+    def _load_nested_set_nest(self, spec):
+        inherit = set()
+        nested = spec['nested-attributes']
+        if nested not in self.root_sets:
+            if nested not in self.pure_nested_structs:
+                self.pure_nested_structs[nested] = \
+                    Struct(self, nested, inherited=inherit,
+                           fixed_header=spec.get('fixed-header'))
+        else:
+            raise Exception(f'Using attr set as root and nested not supported - {nested}')
+
+        if 'type-value' in spec:
+            if nested in self.root_sets:
+                raise Exception("Inheriting members to a space used as root not supported")
+            inherit.update(set(spec['type-value']))
+        elif spec['type'] == 'indexed-array':
+            inherit.add('idx')
+        self.pure_nested_structs[nested].set_inherited(inherit)
+
+        return nested
+
+    def _load_nested_set_submsg(self, spec):
+        # Fake the struct type for the sub-message itself
+        # its not a attr_set but codegen wants attr_sets.
+        submsg = self.sub_msgs[spec["sub-message"]]
+        nested = submsg.name
+
+        attrs = []
+        for name, fmt in submsg.formats.items():
+            attr = {
+                "name": name,
+                "parent-sub-message": spec,
+            }
+            if 'attribute-set' in fmt:
+                attr |= {
+                    "type": "nest",
+                    "nested-attributes": fmt['attribute-set'],
+                }
+                if 'fixed-header' in fmt:
+                    attr |= { "fixed-header": fmt["fixed-header"] }
+            elif 'fixed-header' in fmt:
+                attr |= {
+                    "type": "binary",
+                    "struct": fmt["fixed-header"],
+                }
+            else:
+                attr["type"] = "flag"
+            attrs.append(attr)
+
+        self.attr_sets[nested] = AttrSet(self, {
+            "name": nested,
+            "name-pfx": self.name + '-' + spec.name + '-',
+            "attributes": attrs
+        })
+
+        if nested not in self.pure_nested_structs:
+            self.pure_nested_structs[nested] = Struct(self, nested, submsg=submsg)
+
+        return nested
+
     def _load_nested_sets(self):
         attr_set_queue = list(self.root_sets.keys())
         attr_set_seen = set(self.root_sets.keys())
@@ -1112,37 +1410,32 @@ class Family(SpecFamily):
         while len(attr_set_queue):
             a_set = attr_set_queue.pop(0)
             for attr, spec in self.attr_sets[a_set].items():
-                if 'nested-attributes' not in spec:
+                if 'nested-attributes' in spec:
+                    nested = self._load_nested_set_nest(spec)
+                elif 'sub-message' in spec:
+                    nested = self._load_nested_set_submsg(spec)
+                else:
                     continue
 
-                nested = spec['nested-attributes']
                 if nested not in attr_set_seen:
                     attr_set_queue.append(nested)
                     attr_set_seen.add(nested)
 
-                inherit = set()
-                if nested not in self.root_sets:
-                    if nested not in self.pure_nested_structs:
-                        self.pure_nested_structs[nested] = Struct(self, nested, inherited=inherit)
-                else:
-                    raise Exception(f'Using attr set as root and nested not supported - {nested}')
-
-                if 'type-value' in spec:
-                    if nested in self.root_sets:
-                        raise Exception("Inheriting members to a space used as root not supported")
-                    inherit.update(set(spec['type-value']))
-                elif spec['type'] == 'indexed-array':
-                    inherit.add('idx')
-                self.pure_nested_structs[nested].set_inherited(inherit)
-
         for root_set, rs_members in self.root_sets.items():
             for attr, spec in self.attr_sets[root_set].items():
                 if 'nested-attributes' in spec:
                     nested = spec['nested-attributes']
+                elif 'sub-message' in spec:
+                    nested = spec.sub_message
+                else:
+                    nested = None
+
+                if nested:
                     if attr in rs_members['request']:
                         self.pure_nested_structs[nested].request = True
                     if attr in rs_members['reply']:
                         self.pure_nested_structs[nested].reply = True
+
                     if spec.is_multi_val():
                         child = self.pure_nested_structs.get(nested)
                         child.in_multi_val = True
@@ -1152,20 +1445,26 @@ class Family(SpecFamily):
         # Propagate the request / reply / recursive
         for attr_set, struct in reversed(self.pure_nested_structs.items()):
             for _, spec in self.attr_sets[attr_set].items():
-                if 'nested-attributes' in spec:
-                    child_name = spec['nested-attributes']
-                    struct.child_nests.add(child_name)
-                    child = self.pure_nested_structs.get(child_name)
-                    if child:
-                        if not child.recursive:
-                            struct.child_nests.update(child.child_nests)
-                        child.request |= struct.request
-                        child.reply |= struct.reply
-                        if spec.is_multi_val():
-                            child.in_multi_val = True
                 if attr_set in struct.child_nests:
                     struct.recursive = True
 
+                if 'nested-attributes' in spec:
+                    child_name = spec['nested-attributes']
+                elif 'sub-message' in spec:
+                    child_name = spec.sub_message
+                else:
+                    continue
+
+                struct.child_nests.add(child_name)
+                child = self.pure_nested_structs.get(child_name)
+                if child:
+                    if not child.recursive:
+                        struct.child_nests.update(child.child_nests)
+                    child.request |= struct.request
+                    child.reply |= struct.reply
+                    if spec.is_multi_val():
+                        child.in_multi_val = True
+
         self._sort_pure_types()
 
     def _load_attr_use(self):
@@ -1184,6 +1483,30 @@ class Family(SpecFamily):
                 if attr in rs_members['reply']:
                     spec.set_reply()
 
+    def _load_selector_passing(self):
+        def all_structs():
+            for k, v in reversed(self.pure_nested_structs.items()):
+                yield k, v
+            for k, _ in self.root_sets.items():
+                yield k, None  # we don't have a struct, but it must be terminal
+
+        for attr_set, struct in all_structs():
+            for _, spec in self.attr_sets[attr_set].items():
+                if 'nested-attributes' in spec:
+                    child_name = spec['nested-attributes']
+                elif 'sub-message' in spec:
+                    child_name = spec.sub_message
+                else:
+                    continue
+
+                child = self.pure_nested_structs.get(child_name)
+                for selector in child.external_selectors():
+                    if selector.name in self.attr_sets[attr_set]:
+                        sel_attr = self.attr_sets[attr_set][selector.name]
+                        selector.set_attr(sel_attr)
+                    else:
+                        raise Exception("Passing selector thru more than one layer not supported")
+
     def _load_global_policy(self):
         global_set = set()
         attr_set_name = None
@@ -1233,12 +1556,19 @@ class RenderInfo:
         self.op_mode = op_mode
         self.op = op
 
-        self.fixed_hdr = None
+        fixed_hdr = op.fixed_header if op else None
+        self.fixed_hdr_len = 'ys->family->hdr_len'
         if op and op.fixed_header:
-            self.fixed_hdr = 'struct ' + c_lower(op.fixed_header)
+            if op.fixed_header != family.fixed_header:
+                if family.is_classic():
+                    self.fixed_hdr_len = f"sizeof(struct {c_lower(fixed_hdr)})"
+                else:
+                    raise Exception(f"Per-op fixed header not supported, yet")
+
 
         # 'do' and 'dump' response parsing is identical
         self.type_consistent = True
+        self.type_oneside = False
         if op_mode != 'do' and 'dump' in op:
             if 'do' in op:
                 if ('reply' in op['do']) != ('reply' in op["dump"]):
@@ -1246,7 +1576,8 @@ class RenderInfo:
                 elif 'reply' in op['do'] and op["do"]["reply"] != op["dump"]["reply"]:
                     self.type_consistent = False
             else:
-                self.type_consistent = False
+                self.type_consistent = True
+                self.type_oneside = True
 
         self.attr_set = attr_set
         if not self.attr_set:
@@ -1264,15 +1595,26 @@ class RenderInfo:
 
         self.struct = dict()
         if op_mode == 'notify':
-            op_mode = 'do'
+            op_mode = 'do' if 'do' in op else 'dump'
         for op_dir in ['request', 'reply']:
             if op:
                 type_list = []
                 if op_dir in op[op_mode]:
                     type_list = op[op_mode][op_dir]['attributes']
-                self.struct[op_dir] = Struct(family, self.attr_set, type_list=type_list)
+                self.struct[op_dir] = Struct(family, self.attr_set,
+                                             fixed_header=fixed_hdr,
+                                             type_list=type_list)
         if op_mode == 'event':
-            self.struct['reply'] = Struct(family, self.attr_set, type_list=op['event']['attributes'])
+            self.struct['reply'] = Struct(family, self.attr_set,
+                                          fixed_header=fixed_hdr,
+                                          type_list=op['event']['attributes'])
+
+    def type_empty(self, key):
+        return len(self.struct[key].attr_list) == 0 and \
+            self.struct['request'].fixed_header is None
+
+    def needs_nlflags(self, direction):
+        return self.op_mode == 'do' and direction == 'request' and self.family.is_classic()
 
 
 class CodeWriter:
@@ -1330,6 +1672,7 @@ class CodeWriter:
         if self._silent_block:
             ind += 1
         self._silent_block = line.endswith(')') and CodeWriter._is_cond(line)
+        self._silent_block |= line.strip() == 'else'
         if line[0] == '#':
             ind = 0
         if add_ind:
@@ -1540,7 +1883,9 @@ def op_prefix(ri, direction, deref=False):
         suffix += f"{direction_to_suffix[direction]}"
     else:
         if direction == 'request':
-            suffix += '_req_dump'
+            suffix += '_req'
+            if not ri.type_oneside:
+                suffix += '_dump'
         else:
             if ri.type_consistent:
                 if deref:
@@ -1584,11 +1929,37 @@ def print_dump_prototype(ri):
     print_prototype(ri, "request")
 
 
+def put_typol_submsg(cw, struct):
+    cw.block_start(line=f'const struct ynl_policy_attr {struct.render_name}_policy[] =')
+
+    i = 0
+    for name, arg in struct.member_list():
+        nest = ""
+        if arg.type == 'nest':
+            nest = f" .nest = &{arg.nested_render_name}_nest,"
+        cw.p('[%d] = { .type = YNL_PT_SUBMSG, .name = "%s",%s },' %
+             (i, name, nest))
+        i += 1
+
+    cw.block_end(line=';')
+    cw.nl()
+
+    cw.block_start(line=f'const struct ynl_policy_nest {struct.render_name}_nest =')
+    cw.p(f'.max_attr = {i - 1},')
+    cw.p(f'.table = {struct.render_name}_policy,')
+    cw.block_end(line=';')
+    cw.nl()
+
+
 def put_typol_fwd(cw, struct):
     cw.p(f'extern const struct ynl_policy_nest {struct.render_name}_nest;')
 
 
 def put_typol(cw, struct):
+    if struct.submsg:
+        put_typol_submsg(cw, struct)
+        return
+
     type_max = struct.attr_set.max_name
     cw.block_start(line=f'const struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =')
 
@@ -1674,13 +2045,24 @@ def put_req_nested(ri, struct):
     local_vars = []
     init_lines = []
 
-    local_vars.append('struct nlattr *nest;')
-    init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);")
-
+    if struct.submsg is None:
+        local_vars.append('struct nlattr *nest;')
+        init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);")
+    if struct.fixed_header:
+        local_vars.append('void *hdr;')
+        struct_sz = f'sizeof({struct.fixed_header})'
+        init_lines.append(f"hdr = ynl_nlmsg_put_extra_header(nlh, {struct_sz});")
+        init_lines.append(f"memcpy(hdr, &obj->_hdr, {struct_sz});")
+
+    has_anest = False
+    has_count = False
     for _, arg in struct.member_list():
-        if arg.presence_type() == 'count':
-            local_vars.append('unsigned int i;')
-            break
+        has_anest |= arg.type == 'indexed-array'
+        has_count |= arg.presence_type() == 'count'
+    if has_anest:
+        local_vars.append('struct nlattr *array;')
+    if has_count:
+        local_vars.append('unsigned int i;')
 
     put_req_nested_prototype(ri, struct, suffix='')
     ri.cw.block_start()
@@ -1692,7 +2074,8 @@ def put_req_nested(ri, struct):
     for _, arg in struct.member_list():
         arg.attr_put(ri, "obj")
 
-    ri.cw.p("ynl_attr_nest_end(nlh, nest);")
+    if struct.submsg is None:
+        ri.cw.p("ynl_attr_nest_end(nlh, nest);")
 
     ri.cw.nl()
     ri.cw.p('return 0;')
@@ -1701,19 +2084,27 @@ def put_req_nested(ri, struct):
 
 
 def _multi_parse(ri, struct, init_lines, local_vars):
+    if struct.fixed_header:
+        local_vars += ['void *hdr;']
     if struct.nested:
-        iter_line = "ynl_attr_for_each_nested(attr, nested)"
+        if struct.fixed_header:
+            iter_line = f"ynl_attr_for_each_nested_off(attr, nested, sizeof({struct.fixed_header}))"
+        else:
+            iter_line = "ynl_attr_for_each_nested(attr, nested)"
     else:
-        if ri.fixed_hdr:
-            local_vars += ['void *hdr;']
         iter_line = "ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)"
+        if ri.op.fixed_header != ri.family.fixed_header:
+            if ri.family.is_classic():
+                iter_line = f"ynl_attr_for_each(attr, nlh, sizeof({struct.fixed_header}))"
+            else:
+                raise Exception(f"Per-op fixed header not supported, yet")
 
     array_nests = set()
     multi_attrs = set()
     needs_parg = False
     for arg, aspec in struct.member_list():
         if aspec['type'] == 'indexed-array' and 'sub-type' in aspec:
-            if aspec["sub-type"] == 'nest':
+            if aspec["sub-type"] in {'binary', 'nest'}:
                 local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
                 array_nests.add(arg)
             elif aspec['sub-type'] in scalars:
@@ -1724,6 +2115,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
         if 'multi-attr' in aspec:
             multi_attrs.add(arg)
         needs_parg |= 'nested-attributes' in aspec
+        needs_parg |= 'sub-message' in aspec
     if array_nests or multi_attrs:
         local_vars.append('int i;')
     if needs_parg:
@@ -1745,9 +2137,14 @@ def _multi_parse(ri, struct, init_lines, local_vars):
     for arg in struct.inherited:
         ri.cw.p(f'dst->{arg} = {arg};')
 
-    if ri.fixed_hdr:
-        ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
-        ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));")
+    if struct.fixed_header:
+        if struct.nested:
+            ri.cw.p('hdr = ynl_attr_data(nested);')
+        elif ri.family.is_classic():
+            ri.cw.p('hdr = ynl_nlmsg_data(nlh);')
+        else:
+            ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
+        ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({struct.fixed_header}));")
     for anest in sorted(all_multi):
         aspec = struct[anest]
         ri.cw.p(f"if (dst->{aspec.c_name})")
@@ -1772,7 +2169,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
 
         ri.cw.block_start(line=f"if (n_{aspec.c_name})")
         ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
-        ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
+        ri.cw.p(f"dst->_count.{aspec.c_name} = n_{aspec.c_name};")
         ri.cw.p('i = 0;')
         if 'nested-attributes' in aspec:
             ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
@@ -1783,6 +2180,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
             ri.cw.p('return YNL_PARSE_CB_ERROR;')
         elif aspec.sub_type in scalars:
             ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.sub_type}(attr);")
+        elif aspec.sub_type == 'binary' and 'exact-len' in aspec.checks:
+            # Length is validated by typol
+            ri.cw.p(f'memcpy(dst->{aspec.c_name}[i], ynl_attr_data(attr), {aspec.checks["exact-len"]});')
         else:
             raise Exception(f"Nest parsing type not supported in {aspec['name']}")
         ri.cw.p('i++;')
@@ -1794,7 +2194,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
         aspec = struct[anest]
         ri.cw.block_start(line=f"if (n_{aspec.c_name})")
         ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
-        ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
+        ri.cw.p(f"dst->_count.{aspec.c_name} = n_{aspec.c_name};")
         ri.cw.p('i = 0;')
         if 'nested-attributes' in aspec:
             ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
@@ -1806,8 +2206,22 @@ def _multi_parse(ri, struct, init_lines, local_vars):
             ri.cw.p('return YNL_PARSE_CB_ERROR;')
         elif aspec.type in scalars:
             ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.type}(attr);")
+        elif aspec.type == 'binary' and 'struct' in aspec:
+            ri.cw.p('size_t len = ynl_attr_data_len(attr);')
+            ri.cw.nl()
+            ri.cw.p(f'if (len > sizeof(dst->{aspec.c_name}[0]))')
+            ri.cw.p(f'len = sizeof(dst->{aspec.c_name}[0]);')
+            ri.cw.p(f"memcpy(&dst->{aspec.c_name}[i], ynl_attr_data(attr), len);")
+        elif aspec.type == 'string':
+            ri.cw.p('unsigned int len;')
+            ri.cw.nl()
+            ri.cw.p('len = strnlen(ynl_attr_get_str(attr), ynl_attr_data_len(attr));')
+            ri.cw.p(f'dst->{aspec.c_name}[i] = malloc(sizeof(struct ynl_string) + len + 1);')
+            ri.cw.p(f"dst->{aspec.c_name}[i]->len = len;")
+            ri.cw.p(f"memcpy(dst->{aspec.c_name}[i]->str, ynl_attr_get_str(attr), len);")
+            ri.cw.p(f"dst->{aspec.c_name}[i]->str[len] = 0;")
         else:
-            raise Exception('Nest parsing type not supported yet')
+            raise Exception(f'Nest parsing of type {aspec.type} not supported yet')
         ri.cw.p('i++;')
         ri.cw.block_end()
         ri.cw.block_end()
@@ -1822,9 +2236,49 @@ def _multi_parse(ri, struct, init_lines, local_vars):
     ri.cw.nl()
 
 
+def parse_rsp_submsg(ri, struct):
+    parse_rsp_nested_prototype(ri, struct, suffix='')
+
+    var = 'dst'
+    local_vars = {'const struct nlattr *attr = nested;',
+                  f'{struct.ptr_name}{var} = yarg->data;',
+                  'struct ynl_parse_arg parg;'}
+
+    for _, arg in struct.member_list():
+        _, _, l_vars = arg._attr_get(ri, var)
+        local_vars |= set(l_vars) if l_vars else set()
+
+    ri.cw.block_start()
+    ri.cw.write_func_lvar(list(local_vars))
+    ri.cw.p('parg.ys = yarg->ys;')
+    ri.cw.nl()
+
+    first = True
+    for name, arg in struct.member_list():
+        kw = 'if' if first else 'else if'
+        first = False
+
+        ri.cw.block_start(line=f'{kw} (!strcmp(sel, "{name}"))')
+        get_lines, init_lines, _ = arg._attr_get(ri, var)
+        for line in init_lines or []:
+            ri.cw.p(line)
+        for line in get_lines:
+            ri.cw.p(line)
+        if arg.presence_type() == 'present':
+            ri.cw.p(f"{var}->_present.{arg.c_name} = 1;")
+        ri.cw.block_end()
+    ri.cw.p('return 0;')
+    ri.cw.block_end()
+    ri.cw.nl()
+
+
 def parse_rsp_nested_prototype(ri, struct, suffix=';'):
     func_args = ['struct ynl_parse_arg *yarg',
                  'const struct nlattr *nested']
+    for sel in struct.external_selectors():
+        func_args.append('const char *_sel_' + sel.name)
+    if struct.submsg:
+        func_args.insert(1, 'const char *sel')
     for arg in struct.inherited:
         func_args.append('__u32 ' + arg)
 
@@ -1833,6 +2287,9 @@ def parse_rsp_nested_prototype(ri, struct, suffix=';'):
 
 
 def parse_rsp_nested(ri, struct):
+    if struct.submsg:
+        return parse_rsp_submsg(ri, struct)
+
     parse_rsp_nested_prototype(ri, struct, suffix='')
 
     local_vars = ['const struct nlattr *attr;',
@@ -1885,7 +2342,7 @@ def print_req(ri):
         ret_err = 'NULL'
         local_vars += [f'{type_name(ri, rdir(direction))} *rsp;']
 
-    if ri.fixed_hdr:
+    if ri.struct["request"].fixed_header:
         local_vars += ['size_t hdr_len;',
                        'void *hdr;']
 
@@ -1898,14 +2355,18 @@ def print_req(ri):
     ri.cw.block_start()
     ri.cw.write_func_lvar(local_vars)
 
-    ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
+    if ri.family.is_classic():
+        ri.cw.p(f"nlh = ynl_msg_start_req(ys, {ri.op.enum_name}, req->_nlmsg_flags);")
+    else:
+        ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
 
     ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
+    ri.cw.p(f"ys->req_hdr_len = {ri.fixed_hdr_len};")
     if 'reply' in ri.op[ri.op_mode]:
         ri.cw.p(f"yrs.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
     ri.cw.nl()
 
-    if ri.fixed_hdr:
+    if ri.struct['request'].fixed_header:
         ri.cw.p("hdr_len = sizeof(req->_hdr);")
         ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);")
         ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
@@ -1951,7 +2412,7 @@ def print_dump(ri):
                   'struct nlmsghdr *nlh;',
                   'int err;']
 
-    if ri.fixed_hdr:
+    if ri.struct['request'].fixed_header:
         local_vars += ['size_t hdr_len;',
                        'void *hdr;']
 
@@ -1967,9 +2428,12 @@ def print_dump(ri):
     else:
         ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};')
     ri.cw.nl()
-    ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
+    if ri.family.is_classic():
+        ri.cw.p(f"nlh = ynl_msg_start_dump(ys, {ri.op.enum_name});")
+    else:
+        ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
 
-    if ri.fixed_hdr:
+    if ri.struct['request'].fixed_header:
         ri.cw.p("hdr_len = sizeof(req->_hdr);")
         ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);")
         ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
@@ -1977,6 +2441,7 @@ def print_dump(ri):
 
     if "request" in ri.op[ri.op_mode]:
         ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
+        ri.cw.p(f"ys->req_hdr_len = {ri.fixed_hdr_len};")
         ri.cw.nl()
         for _, attr in ri.struct["request"].member_list():
             attr.attr_put(ri, "req")
@@ -2022,32 +2487,45 @@ def print_free_prototype(ri, direction, suffix=';'):
     ri.cw.write_func_prot('void', f"{name}_free", [f"struct {struct_name} *{arg}"], suffix=suffix)
 
 
+def print_nlflags_set(ri, direction):
+    name = op_prefix(ri, direction)
+    ri.cw.write_func_prot(f'static inline void', f"{name}_set_nlflags",
+                          [f"struct {name} *req", "__u16 nl_flags"])
+    ri.cw.block_start()
+    ri.cw.p('req->_nlmsg_flags = nl_flags;')
+    ri.cw.block_end()
+    ri.cw.nl()
+
+
 def _print_type(ri, direction, struct):
     suffix = f'_{ri.type_name}{direction_to_suffix[direction]}'
     if not direction and ri.type_name_conflict:
         suffix += '_'
 
-    if ri.op_mode == 'dump':
+    if ri.op_mode == 'dump' and not ri.type_oneside:
         suffix += '_dump'
 
     ri.cw.block_start(line=f"struct {ri.family.c_name}{suffix}")
 
-    if ri.fixed_hdr:
-        ri.cw.p(ri.fixed_hdr + ' _hdr;')
+    if ri.needs_nlflags(direction):
+        ri.cw.p('__u16 _nlmsg_flags;')
+        ri.cw.nl()
+    if struct.fixed_header:
+        ri.cw.p(struct.fixed_header + ' _hdr;')
         ri.cw.nl()
 
-    meta_started = False
-    for _, attr in struct.member_list():
-        for type_filter in ['len', 'bit']:
+    for type_filter in ['present', 'len', 'count']:
+        meta_started = False
+        for _, attr in struct.member_list():
             line = attr.presence_member(ri.ku_space, type_filter)
             if line:
                 if not meta_started:
                     ri.cw.block_start(line=f"struct")
                     meta_started = True
                 ri.cw.p(line)
-    if meta_started:
-        ri.cw.block_end(line='_present;')
-        ri.cw.nl()
+        if meta_started:
+            ri.cw.block_end(line=f'_{type_filter};')
+    ri.cw.nl()
 
     for arg in struct.inherited:
         ri.cw.p(f"__u32 {arg};")
@@ -2071,6 +2549,9 @@ def print_type_helpers(ri, direction, deref=False):
     print_free_prototype(ri, direction)
     ri.cw.nl()
 
+    if ri.needs_nlflags(direction):
+        print_nlflags_set(ri, direction)
+
     if ri.ku_space == 'user' and direction == 'request':
         for _, attr in ri.struct[direction].member_list():
             attr.setter(ri, ri.attr_set, direction, deref=deref)
@@ -2078,7 +2559,7 @@ def print_type_helpers(ri, direction, deref=False):
 
 
 def print_req_type_helpers(ri):
-    if len(ri.struct["request"].attr_list) == 0:
+    if ri.type_empty("request"):
         return
     print_alloc_wrapper(ri, "request")
     print_type_helpers(ri, "request")
@@ -2101,7 +2582,7 @@ def print_parse_prototype(ri, direction, terminate=True):
 
 
 def print_req_type(ri):
-    if len(ri.struct["request"].attr_list) == 0:
+    if ri.type_empty("request"):
         return
     print_type(ri, "request")
 
@@ -2139,11 +2620,9 @@ def print_wrapped_type(ri):
 
 
 def _free_type_members_iter(ri, struct):
-    for _, attr in struct.member_list():
-        if attr.free_needs_iter():
-            ri.cw.p('unsigned int i;')
-            ri.cw.nl()
-            break
+    if struct.free_needs_iter():
+        ri.cw.p('unsigned int i;')
+        ri.cw.nl()
 
 
 def _free_type_members(ri, var, struct, ref=''):
@@ -2280,6 +2759,46 @@ def print_kernel_policy_ranges(family, cw):
             cw.nl()
 
 
+def print_kernel_policy_sparse_enum_validates(family, cw):
+    first = True
+    for _, attr_set in family.attr_sets.items():
+        if attr_set.subset_of:
+            continue
+
+        for _, attr in attr_set.items():
+            if not attr.request:
+                continue
+            if not attr.enum_name:
+                continue
+            if 'sparse' not in attr.checks:
+                continue
+
+            if first:
+                cw.p('/* Sparse enums validation callbacks */')
+                first = False
+
+            sign = '' if attr.type[0] == 'u' else '_signed'
+            suffix = 'ULL' if attr.type[0] == 'u' else 'LL'
+            cw.write_func_prot('static int', f'{c_lower(attr.enum_name)}_validate',
+                               ['const struct nlattr *attr', 'struct netlink_ext_ack *extack'])
+            cw.block_start()
+            cw.block_start(line=f'switch (nla_get_{attr["type"]}(attr))')
+            enum = family.consts[attr['enum']]
+            first_entry = True
+            for entry in enum.entries.values():
+                if first_entry:
+                    first_entry = False
+                else:
+                    cw.p('fallthrough;')
+                cw.p(f'case {entry.c_name}:')
+            cw.p('return 0;')
+            cw.block_end()
+            cw.p('NL_SET_ERR_MSG_ATTR(extack, attr, "invalid enum value");')
+            cw.p('return -EINVAL;')
+            cw.block_end()
+            cw.nl()
+
+
 def print_kernel_op_table_fwd(family, cw, terminate):
     exported = not kernel_can_gen_family_struct(family)
 
@@ -2739,7 +3258,11 @@ def render_uapi(family, cw):
 
 
 def _render_user_ntf_entry(ri, op):
-    ri.cw.block_start(line=f"[{op.enum_name}] = ")
+    if not ri.family.is_classic():
+        ri.cw.block_start(line=f"[{op.enum_name}] = ")
+    else:
+        crud_op = ri.family.req_by_value[op.rsp_value]
+        ri.cw.block_start(line=f"[{crud_op.enum_name}] = ")
     ri.cw.p(f".alloc_sz\t= sizeof({type_name(ri, 'event')}),")
     ri.cw.p(f".cb\t\t= {op_prefix(ri, 'reply', deref=True)}_parse,")
     ri.cw.p(f".policy\t\t= &{ri.struct['reply'].render_name}_nest,")
@@ -2754,7 +3277,7 @@ def render_user_family(family, cw, prototype):
         return
 
     if family.ntfs:
-        cw.block_start(line=f"static const struct ynl_ntf_info {family['name']}_ntf_info[] = ")
+        cw.block_start(line=f"static const struct ynl_ntf_info {family.c_name}_ntf_info[] = ")
         for ntf_op_name, ntf_op in family.ntfs.items():
             if 'notify' in ntf_op:
                 op = family.ops[ntf_op['notify']]
@@ -2774,13 +3297,19 @@ def render_user_family(family, cw, prototype):
 
     cw.block_start(f'{symbol} = ')
     cw.p(f'.name\t\t= "{family.c_name}",')
-    if family.fixed_header:
+    if family.is_classic():
+        cw.p(f'.is_classic\t= true,')
+        cw.p(f'.classic_id\t= {family.get("protonum")},')
+    if family.is_classic():
+        if family.fixed_header:
+            cw.p(f'.hdr_len\t= sizeof(struct {c_lower(family.fixed_header)}),')
+    elif family.fixed_header:
         cw.p(f'.hdr_len\t= sizeof(struct genlmsghdr) + sizeof(struct {c_lower(family.fixed_header)}),')
     else:
         cw.p('.hdr_len\t= sizeof(struct genlmsghdr),')
     if family.ntfs:
-        cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
-        cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family['name']}_ntf_info),")
+        cw.p(f".ntf_info\t= {family.c_name}_ntf_info,")
+        cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family.c_name}_ntf_info),")
     cw.block_end(line=';')
 
 
@@ -2887,7 +3416,7 @@ def main():
             cw.p(f'#include "{hdr_file}"')
             cw.p('#include "ynl.h"')
         headers = []
-    for definition in parsed['definitions']:
+    for definition in parsed['definitions'] + parsed['attribute-sets']:
         if 'header' in definition:
             headers.append(definition['header'])
     if args.mode == 'user':
@@ -2941,6 +3470,7 @@ def main():
             print_kernel_family_struct_hdr(parsed, cw)
         else:
             print_kernel_policy_ranges(parsed, cw)
+            print_kernel_policy_sparse_enum_validates(parsed, cw)
 
             for _, struct in sorted(parsed.pure_nested_structs.items()):
                 if struct.request:
@@ -3009,7 +3539,7 @@ def main():
                     ri = RenderInfo(cw, parsed, args.mode, op, 'dump')
                     print_req_type(ri)
                     print_req_type_helpers(ri)
-                    if not ri.type_consistent:
+                    if not ri.type_consistent or ri.type_oneside:
                         print_rsp_type(ri)
                     print_wrapped_type(ri)
                     print_dump_prototype(ri)
@@ -3047,8 +3577,7 @@ def main():
                     has_recursive_nests = True
             if has_recursive_nests:
                 cw.nl()
-            for name in parsed.pure_nested_structs:
-                struct = Struct(parsed, name)
+            for struct in parsed.pure_nested_structs.values():
                 put_typol(cw, struct)
             for name in parsed.root_sets:
                 struct = Struct(parsed, name)
@@ -3087,7 +3616,7 @@ def main():
                 if 'dump' in op:
                     cw.p(f"/* {op.enum_name} - dump */")
                     ri = RenderInfo(cw, parsed, args.mode, op, "dump")
-                    if not ri.type_consistent:
+                    if not ri.type_consistent or ri.type_oneside:
                         parse_rsp_msg(ri, deref=True)
                     print_req_free(ri)
                     print_dump_type_free(ri)
diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 6c56d0d726b4..0cb6348e28d3 100755
--- a/tools/net/ynl/pyynl/ynl_gen_rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
@@ -392,7 +392,7 @@ def parse_arguments() -> argparse.Namespace:
 
 
 def parse_yaml_file(filename: str) -> str:
-    """Transform the YAML specified by filename into a rst-formmated string"""
+    """Transform the YAML specified by filename into an RST-formatted string"""
     with open(filename, "r", encoding="utf-8") as spec_file:
         yaml_data = yaml.safe_load(spec_file)
         content = parse_yaml(yaml_data)
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index dda6686257a7..7f5fca7682d7 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -2,4 +2,8 @@ ethtool
 devlink
 netdev
 ovs
-page-pool
-\ No newline at end of file
+page-pool
+rt-addr
+rt-link
+rt-route
+tc
diff --git a/tools/net/ynl/samples/devlink.c b/tools/net/ynl/samples/devlink.c
index d2611d7ebab4..ac9dfb01f280 100644
--- a/tools/net/ynl/samples/devlink.c
+++ b/tools/net/ynl/samples/devlink.c
@@ -22,6 +22,7 @@ int main(int argc, char **argv)
 	ynl_dump_foreach(devs, d) {
 		struct devlink_info_get_req *info_req;
 		struct devlink_info_get_rsp *info_rsp;
+		unsigned i;
 
 		printf("%s/%s:\n", d->bus_name, d->dev_name);
 
@@ -34,11 +35,11 @@ int main(int argc, char **argv)
 		if (!info_rsp)
 			goto err_free_devs;
 
-		if (info_rsp->_present.info_driver_name_len)
+		if (info_rsp->_len.info_driver_name)
 			printf("    driver: %s\n", info_rsp->info_driver_name);
-		if (info_rsp->n_info_version_running)
+		if (info_rsp->_count.info_version_running)
 			printf("    running fw:\n");
-		for (unsigned i = 0; i < info_rsp->n_info_version_running; i++)
+		for (i = 0; i < info_rsp->_count.info_version_running; i++)
 			printf("        %s: %s\n",
 			       info_rsp->info_version_running[i].info_version_name,
 			       info_rsp->info_version_running[i].info_version_value);
diff --git a/tools/net/ynl/samples/rt-addr.c b/tools/net/ynl/samples/rt-addr.c
new file mode 100644
index 000000000000..2edde5c36b18
--- /dev/null
+++ b/tools/net/ynl/samples/rt-addr.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-addr-user.h"
+
+static void rt_addr_print(struct rt_addr_getaddr_rsp *a)
+{
+	char ifname[IF_NAMESIZE];
+	char addr_str[64];
+	const char *addr;
+	const char *name;
+
+	name = if_indextoname(a->_hdr.ifa_index, ifname);
+	if (name)
+		printf("%16s: ", name);
+
+	switch (a->_len.address) {
+	case 4:
+		addr = inet_ntop(AF_INET, a->address,
+				 addr_str, sizeof(addr_str));
+		break;
+	case 16:
+		addr = inet_ntop(AF_INET6, a->address,
+				 addr_str, sizeof(addr_str));
+		break;
+	default:
+		addr = NULL;
+		break;
+	}
+	if (addr)
+		printf("%s", addr);
+	else
+		printf("[%d]", a->_len.address);
+
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	struct rt_addr_getaddr_list *rsp;
+	struct rt_addr_getaddr_req *req;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+
+	ys = ynl_sock_create(&ynl_rt_addr_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return 1;
+	}
+
+	req = rt_addr_getaddr_req_alloc();
+	if (!req)
+		goto err_destroy;
+
+	rsp = rt_addr_getaddr_dump(ys, req);
+	rt_addr_getaddr_req_free(req);
+	if (!rsp)
+		goto err_close;
+
+	if (ynl_dump_empty(rsp))
+		fprintf(stderr, "Error: no addresses reported\n");
+	ynl_dump_foreach(rsp, addr)
+		rt_addr_print(addr);
+	rt_addr_getaddr_list_free(rsp);
+
+	ynl_sock_destroy(ys);
+	return 0;
+
+err_close:
+	fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+	ynl_sock_destroy(ys);
+	return 2;
+}
diff --git a/tools/net/ynl/samples/rt-link.c b/tools/net/ynl/samples/rt-link.c
new file mode 100644
index 000000000000..acdd4b4a0f74
--- /dev/null
+++ b/tools/net/ynl/samples/rt-link.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-link-user.h"
+
+static void rt_link_print(struct rt_link_getlink_rsp *r)
+{
+	unsigned int i;
+
+	printf("%3d: ", r->_hdr.ifi_index);
+
+	if (r->_len.ifname)
+		printf("%16s: ", r->ifname);
+
+	if (r->_present.mtu)
+		printf("mtu %5d  ", r->mtu);
+
+	if (r->linkinfo._len.kind)
+		printf("kind %-8s  ", r->linkinfo.kind);
+	else
+		printf("     %8s  ", "");
+
+	if (r->prop_list._count.alt_ifname) {
+		printf("altname ");
+		for (i = 0; i < r->prop_list._count.alt_ifname; i++)
+			printf("%s ", r->prop_list.alt_ifname[i]->str);
+		printf(" ");
+	}
+
+	if (r->linkinfo._present.data && r->linkinfo.data._present.netkit) {
+		struct rt_link_linkinfo_netkit_attrs *netkit;
+		const char *name;
+
+		netkit = &r->linkinfo.data.netkit;
+		printf("primary %d  ", netkit->primary);
+
+		name = NULL;
+		if (netkit->_present.policy)
+			name = rt_link_netkit_policy_str(netkit->policy);
+		if (name)
+			printf("policy %s  ", name);
+	}
+
+	printf("\n");
+}
+
+static int rt_link_create_netkit(struct ynl_sock *ys)
+{
+	struct rt_link_getlink_ntf *ntf_gl;
+	struct rt_link_newlink_req *req;
+	struct ynl_ntf_base_type *ntf;
+	int ret;
+
+	req = rt_link_newlink_req_alloc();
+	if (!req) {
+		fprintf(stderr, "Can't alloc req\n");
+		return -1;
+	}
+
+	/* rtnetlink doesn't provide info about the created object.
+	 * It expects us to set the ECHO flag and the dig the info out
+	 * of the notifications...
+	 */
+	rt_link_newlink_req_set_nlflags(req, NLM_F_CREATE | NLM_F_ECHO);
+
+	rt_link_newlink_req_set_linkinfo_kind(req, "netkit");
+
+	/* Test error messages */
+	rt_link_newlink_req_set_linkinfo_data_netkit_policy(req, 10);
+	ret = rt_link_newlink(ys, req);
+	if (ret) {
+		printf("Testing error message for policy being bad:\n\t%s\n", ys->err.msg);
+	} else {
+		fprintf(stderr,	"Warning: unexpected success creating netkit with bad attrs\n");
+		goto created;
+	}
+
+	rt_link_newlink_req_set_linkinfo_data_netkit_policy(req, NETKIT_DROP);
+
+	ret = rt_link_newlink(ys, req);
+created:
+	rt_link_newlink_req_free(req);
+	if (ret) {
+		fprintf(stderr, "YNL: %s\n", ys->err.msg);
+		return -1;
+	}
+
+	if (!ynl_has_ntf(ys)) {
+		fprintf(stderr,
+			"Warning: interface created but received no notification, won't delete the interface\n");
+		return 0;
+	}
+
+	ntf = ynl_ntf_dequeue(ys);
+	if (ntf->cmd !=	RTM_NEWLINK) {
+		fprintf(stderr,
+			"Warning: unexpected notification type, won't delete the interface\n");
+		return 0;
+	}
+	ntf_gl = (void *)ntf;
+	ret = ntf_gl->obj._hdr.ifi_index;
+	ynl_ntf_free(ntf);
+
+	return ret;
+}
+
+static void rt_link_del(struct ynl_sock *ys, int ifindex)
+{
+	struct rt_link_dellink_req *req;
+
+	req = rt_link_dellink_req_alloc();
+	if (!req) {
+		fprintf(stderr, "Can't alloc req\n");
+		return;
+	}
+
+	req->_hdr.ifi_index = ifindex;
+	if (rt_link_dellink(ys, req))
+		fprintf(stderr, "YNL: %s\n", ys->err.msg);
+	else
+		fprintf(stderr,
+			"Trying to delete a Netkit interface (ifindex %d)\n",
+			ifindex);
+
+	rt_link_dellink_req_free(req);
+}
+
+int main(int argc, char **argv)
+{
+	struct rt_link_getlink_req_dump *req;
+	struct rt_link_getlink_list *rsp;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+	int created = 0;
+
+	ys = ynl_sock_create(&ynl_rt_link_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return 1;
+	}
+
+	if (argc > 1) {
+		fprintf(stderr, "Trying to create a Netkit interface\n");
+		created = rt_link_create_netkit(ys);
+		if (created < 0)
+			goto err_destroy;
+	}
+
+	req = rt_link_getlink_req_dump_alloc();
+	if (!req)
+		goto err_del_ifc;
+
+	rsp = rt_link_getlink_dump(ys, req);
+	rt_link_getlink_req_dump_free(req);
+	if (!rsp)
+		goto err_close;
+
+	if (ynl_dump_empty(rsp))
+		fprintf(stderr, "Error: no links reported\n");
+	ynl_dump_foreach(rsp, link)
+		rt_link_print(link);
+	rt_link_getlink_list_free(rsp);
+
+	if (created)
+		rt_link_del(ys, created);
+
+	ynl_sock_destroy(ys);
+	return 0;
+
+err_close:
+	fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_del_ifc:
+	if (created)
+		rt_link_del(ys, created);
+err_destroy:
+	ynl_sock_destroy(ys);
+	return 2;
+}
diff --git a/tools/net/ynl/samples/rt-route.c b/tools/net/ynl/samples/rt-route.c
new file mode 100644
index 000000000000..7427104a96df
--- /dev/null
+++ b/tools/net/ynl/samples/rt-route.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-route-user.h"
+
+static void rt_route_print(struct rt_route_getroute_rsp *r)
+{
+	char ifname[IF_NAMESIZE];
+	char route_str[64];
+	const char *route;
+	const char *name;
+
+	/* Ignore local */
+	if (r->_hdr.rtm_table == RT_TABLE_LOCAL)
+		return;
+
+	if (r->_present.oif) {
+		name = if_indextoname(r->oif, ifname);
+		if (name)
+			printf("oif: %-16s ", name);
+	}
+
+	if (r->_len.dst) {
+		route = inet_ntop(r->_hdr.rtm_family, r->dst,
+				  route_str, sizeof(route_str));
+		printf("dst: %s/%d", route, r->_hdr.rtm_dst_len);
+	}
+
+	if (r->_len.gateway) {
+		route = inet_ntop(r->_hdr.rtm_family, r->gateway,
+				  route_str, sizeof(route_str));
+		printf("gateway: %s ", route);
+	}
+
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	struct rt_route_getroute_req_dump *req;
+	struct rt_route_getroute_list *rsp;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+
+	ys = ynl_sock_create(&ynl_rt_route_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return 1;
+	}
+
+	req = rt_route_getroute_req_dump_alloc();
+	if (!req)
+		goto err_destroy;
+
+	rsp = rt_route_getroute_dump(ys, req);
+	rt_route_getroute_req_dump_free(req);
+	if (!rsp)
+		goto err_close;
+
+	if (ynl_dump_empty(rsp))
+		fprintf(stderr, "Error: no routeesses reported\n");
+	ynl_dump_foreach(rsp, route)
+		rt_route_print(route);
+	rt_route_getroute_list_free(rsp);
+
+	ynl_sock_destroy(ys);
+	return 0;
+
+err_close:
+	fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+	ynl_sock_destroy(ys);
+	return 2;
+}
diff --git a/tools/net/ynl/samples/tc.c b/tools/net/ynl/samples/tc.c
new file mode 100644
index 000000000000..0bfff0fdd792
--- /dev/null
+++ b/tools/net/ynl/samples/tc.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <net/if.h>
+
+#include "tc-user.h"
+
+static void tc_qdisc_print(struct tc_getqdisc_rsp *q)
+{
+	char ifname[IF_NAMESIZE];
+	const char *name;
+
+	name = if_indextoname(q->_hdr.tcm_ifindex, ifname);
+	if (name)
+		printf("%16s: ", name);
+
+	if (q->_len.kind) {
+		printf("%s  ", q->kind);
+
+		if (q->options._present.fq_codel) {
+			struct tc_fq_codel_attrs *fq_codel;
+			struct tc_fq_codel_xstats *stats;
+
+			fq_codel = &q->options.fq_codel;
+			stats = q->stats2.app.fq_codel;
+
+			if (fq_codel->_present.limit)
+				printf("limit: %dp ", fq_codel->limit);
+			if (fq_codel->_present.target)
+				printf("target: %dms ",
+				       (fq_codel->target + 500) / 1000);
+			if (q->stats2.app._len.fq_codel)
+				printf("new_flow_cnt: %d ",
+				       stats->qdisc_stats.new_flow_count);
+		}
+	}
+
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	struct tc_getqdisc_req_dump *req;
+	struct tc_getqdisc_list *rsp;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+
+	ys = ynl_sock_create(&ynl_tc_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return 1;
+	}
+
+	req = tc_getqdisc_req_dump_alloc();
+	if (!req)
+		goto err_destroy;
+
+	rsp = tc_getqdisc_dump(ys, req);
+	tc_getqdisc_req_dump_free(req);
+	if (!rsp)
+		goto err_close;
+
+	if (ynl_dump_empty(rsp))
+		fprintf(stderr, "Error: no addresses reported\n");
+	ynl_dump_foreach(rsp, qdisc)
+		tc_qdisc_print(qdisc);
+	tc_getqdisc_list_free(rsp);
+
+	ynl_sock_destroy(ys);
+	return 0;
+
+err_close:
+	fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+	ynl_sock_destroy(ys);
+	return 2;
+}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 84e7f4ed4c97..6aa11cd3db42 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -67,11 +67,13 @@ TARGETS += mseal_system_mappings
 TARGETS += nci
 TARGETS += net
 TARGETS += net/af_unix
+TARGETS += net/can
 TARGETS += net/forwarding
 TARGETS += net/hsr
 TARGETS += net/mptcp
 TARGETS += net/netfilter
 TARGETS += net/openvswitch
+TARGETS += net/ovpn
 TARGETS += net/packetdrill
 TARGETS += net/rds
 TARGETS += net/tcp_ao
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index c378d5d07e02..3201a962b3dc 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -71,8 +71,10 @@ CONFIG_NET_IPGRE=y
 CONFIG_NET_IPGRE_DEMUX=y
 CONFIG_NET_IPIP=y
 CONFIG_NET_MPLS_GSO=y
+CONFIG_NET_SCH_BPF=y
 CONFIG_NET_SCH_FQ=y
 CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_SCH_HTB=y
 CONFIG_NET_SCHED=y
 CONFIG_NETDEVSIM=y
 CONFIG_NETFILTER=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
new file mode 100644
index 000000000000..730357cd0c9a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "bpf_qdisc_fifo.skel.h"
+#include "bpf_qdisc_fq.skel.h"
+#include "bpf_qdisc_fail__incompl_ops.skel.h"
+
+#define LO_IFINDEX 1
+
+static const unsigned int total_bytes = 10 * 1024 * 1024;
+
+static void do_test(char *qdisc)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+			    .attach_point = BPF_TC_QDISC,
+			    .parent = TC_H_ROOT,
+			    .handle = 0x8000000,
+			    .qdisc = qdisc);
+	int srv_fd = -1, cli_fd = -1;
+	int err;
+
+	err = bpf_tc_hook_create(&hook);
+	if (!ASSERT_OK(err, "attach qdisc"))
+		return;
+
+	srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+	if (!ASSERT_OK_FD(srv_fd, "start server"))
+		goto done;
+
+	cli_fd = connect_to_fd(srv_fd, 0);
+	if (!ASSERT_OK_FD(cli_fd, "connect to client"))
+		goto done;
+
+	err = send_recv_data(srv_fd, cli_fd, total_bytes);
+	ASSERT_OK(err, "send_recv_data");
+
+done:
+	if (srv_fd != -1)
+		close(srv_fd);
+	if (cli_fd != -1)
+		close(cli_fd);
+
+	bpf_tc_hook_destroy(&hook);
+}
+
+static void test_fifo(void)
+{
+	struct bpf_qdisc_fifo *fifo_skel;
+
+	fifo_skel = bpf_qdisc_fifo__open_and_load();
+	if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+		return;
+
+	if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+		goto out;
+
+	do_test("bpf_fifo");
+out:
+	bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_fq(void)
+{
+	struct bpf_qdisc_fq *fq_skel;
+
+	fq_skel = bpf_qdisc_fq__open_and_load();
+	if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load"))
+		return;
+
+	if (!ASSERT_OK(bpf_qdisc_fq__attach(fq_skel), "bpf_qdisc_fq__attach"))
+		goto out;
+
+	do_test("bpf_fq");
+out:
+	bpf_qdisc_fq__destroy(fq_skel);
+}
+
+static void test_qdisc_attach_to_mq(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook,
+			    .attach_point = BPF_TC_QDISC,
+			    .parent = TC_H_MAKE(1 << 16, 1),
+			    .handle = 0x11 << 16,
+			    .qdisc = "bpf_fifo");
+	struct bpf_qdisc_fifo *fifo_skel;
+	int err;
+
+	fifo_skel = bpf_qdisc_fifo__open_and_load();
+	if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+		return;
+
+	if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+		goto out;
+
+	SYS(out, "ip link add veth0 type veth peer veth1");
+	hook.ifindex = if_nametoindex("veth0");
+	SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+	err = bpf_tc_hook_create(&hook);
+	ASSERT_OK(err, "attach qdisc");
+
+	bpf_tc_hook_destroy(&hook);
+
+	SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+	bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_qdisc_attach_to_non_root(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+			    .attach_point = BPF_TC_QDISC,
+			    .parent = TC_H_MAKE(1 << 16, 1),
+			    .handle = 0x11 << 16,
+			    .qdisc = "bpf_fifo");
+	struct bpf_qdisc_fifo *fifo_skel;
+	int err;
+
+	fifo_skel = bpf_qdisc_fifo__open_and_load();
+	if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+		return;
+
+	if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+		goto out;
+
+	SYS(out, "tc qdisc add dev lo root handle 1: htb");
+	SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit");
+
+	err = bpf_tc_hook_create(&hook);
+	if (!ASSERT_ERR(err, "attach qdisc"))
+		bpf_tc_hook_destroy(&hook);
+
+out_del_htb:
+	SYS(out, "tc qdisc delete dev lo root htb");
+out:
+	bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_incompl_ops(void)
+{
+	struct bpf_qdisc_fail__incompl_ops *skel;
+	struct bpf_link *link;
+
+	skel = bpf_qdisc_fail__incompl_ops__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_qdisc_fifo__open_and_load"))
+		return;
+
+	link = bpf_map__attach_struct_ops(skel->maps.test);
+	if (!ASSERT_ERR_PTR(link, "bpf_map__attach_struct_ops"))
+		bpf_link__destroy(link);
+
+	bpf_qdisc_fail__incompl_ops__destroy(skel);
+}
+
+static int get_default_qdisc(char *qdisc_name)
+{
+	FILE *f;
+	int num;
+
+	f = fopen("/proc/sys/net/core/default_qdisc", "r");
+	if (!f)
+		return -errno;
+
+	num = fscanf(f, "%s", qdisc_name);
+	fclose(f);
+
+	return num == 1 ? 0 : -EFAULT;
+}
+
+static void test_default_qdisc_attach_to_mq(void)
+{
+	char default_qdisc[IFNAMSIZ] = {};
+	struct bpf_qdisc_fifo *fifo_skel;
+	struct netns_obj *netns = NULL;
+	int err;
+
+	fifo_skel = bpf_qdisc_fifo__open_and_load();
+	if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+		return;
+
+	if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+		goto out;
+
+	err = get_default_qdisc(default_qdisc);
+	if (!ASSERT_OK(err, "read sysctl net.core.default_qdisc"))
+		goto out;
+
+	err = write_sysctl("/proc/sys/net/core/default_qdisc", "bpf_fifo");
+	if (!ASSERT_OK(err, "write sysctl net.core.default_qdisc"))
+		goto out;
+
+	netns = netns_new("bpf_qdisc_ns", true);
+	if (!ASSERT_OK_PTR(netns, "netns_new"))
+		goto out;
+
+	SYS(out, "ip link add veth0 type veth peer veth1");
+	SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+	ASSERT_EQ(fifo_skel->bss->init_called, true, "init_called");
+
+	SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+	netns_free(netns);
+	if (default_qdisc[0])
+		write_sysctl("/proc/sys/net/core/default_qdisc", default_qdisc);
+
+	bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+void test_ns_bpf_qdisc(void)
+{
+	if (test__start_subtest("fifo"))
+		test_fifo();
+	if (test__start_subtest("fq"))
+		test_fq();
+	if (test__start_subtest("attach to mq"))
+		test_qdisc_attach_to_mq();
+	if (test__start_subtest("attach to non root"))
+		test_qdisc_attach_to_non_root();
+	if (test__start_subtest("incompl_ops"))
+		test_incompl_ops();
+}
+
+void serial_test_bpf_qdisc_default(void)
+{
+	test_default_qdisc_attach_to_mq();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index d56e18b25528..a4517bee34d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -7,14 +7,433 @@
 
 #define TEST_NS "sock_iter_batch_netns"
 
+static const int init_batch_size = 16;
 static const int nr_soreuse = 4;
 
+struct iter_out {
+	int idx;
+	__u64 cookie;
+} __packed;
+
+struct sock_count {
+	__u64 cookie;
+	int count;
+};
+
+static int insert(__u64 cookie, struct sock_count counts[], int counts_len)
+{
+	int insert = -1;
+	int i = 0;
+
+	for (; i < counts_len; i++) {
+		if (!counts[i].cookie) {
+			insert = i;
+		} else if (counts[i].cookie == cookie) {
+			insert = i;
+			break;
+		}
+	}
+	if (insert < 0)
+		return insert;
+
+	counts[insert].cookie = cookie;
+	counts[insert].count++;
+
+	return counts[insert].count;
+}
+
+static int read_n(int iter_fd, int n, struct sock_count counts[],
+		  int counts_len)
+{
+	struct iter_out out;
+	int nread = 1;
+	int i = 0;
+
+	for (; nread > 0 && (n < 0 || i < n); i++) {
+		nread = read(iter_fd, &out, sizeof(out));
+		if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread"))
+			break;
+		ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert");
+	}
+
+	ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n");
+
+	return i;
+}
+
+static __u64 socket_cookie(int fd)
+{
+	__u64 cookie;
+	socklen_t cookie_len = sizeof(cookie);
+
+	if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie,
+				  &cookie_len), "getsockopt(SO_COOKIE)"))
+		return 0;
+	return cookie;
+}
+
+static bool was_seen(int fd, struct sock_count counts[], int counts_len)
+{
+	__u64 cookie = socket_cookie(fd);
+	int i = 0;
+
+	for (; cookie && i < counts_len; i++)
+		if (cookie == counts[i].cookie)
+			return true;
+
+	return false;
+}
+
+static int get_seen_socket(int *fds, struct sock_count counts[], int n)
+{
+	int i = 0;
+
+	for (; i < n; i++)
+		if (was_seen(fds[i], counts, n))
+			return i;
+	return -1;
+}
+
+static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n)
+{
+	int i, nread, iter_fd;
+	int nth_sock_idx = -1;
+	struct iter_out out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+		return -1;
+
+	for (; n >= 0; n--) {
+		nread = read(iter_fd, &out, sizeof(out));
+		if (!nread || !ASSERT_GE(nread, 1, "nread"))
+			goto done;
+	}
+
+	for (i = 0; i < fds_len && nth_sock_idx < 0; i++)
+		if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie)
+			nth_sock_idx = i;
+done:
+	close(iter_fd);
+	return nth_sock_idx;
+}
+
+static int get_seen_count(int fd, struct sock_count counts[], int n)
+{
+	__u64 cookie = socket_cookie(fd);
+	int count = 0;
+	int i = 0;
+
+	for (; cookie && !count && i < n; i++)
+		if (cookie == counts[i].cookie)
+			count = counts[i].count;
+
+	return count;
+}
+
+static void check_n_were_seen_once(int *fds, int fds_len, int n,
+				   struct sock_count counts[], int counts_len)
+{
+	int seen_once = 0;
+	int seen_cnt;
+	int i = 0;
+
+	for (; i < fds_len; i++) {
+		/* Skip any sockets that were closed or that weren't seen
+		 * exactly once.
+		 */
+		if (fds[i] < 0)
+			continue;
+		seen_cnt = get_seen_count(fds[i], counts, counts_len);
+		if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt"))
+			seen_once++;
+	}
+
+	ASSERT_EQ(seen_once, n, "seen_once");
+}
+
+static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
+			int *socks, int socks_len, struct sock_count *counts,
+			int counts_len, struct bpf_link *link, int iter_fd)
+{
+	int close_idx;
+
+	/* Iterate through the first socks_len - 1 sockets. */
+	read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+	/* Make sure we saw socks_len - 1 sockets exactly once. */
+	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+			       counts_len);
+
+	/* Close a socket we've already seen to remove it from the bucket. */
+	close_idx = get_seen_socket(socks, counts, counts_len);
+	if (!ASSERT_GE(close_idx, 0, "close_idx"))
+		return;
+	close(socks[close_idx]);
+	socks[close_idx] = -1;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure the last socket wasn't skipped and that there were no
+	 * repeats.
+	 */
+	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+			       counts_len);
+}
+
+static void remove_unseen(int family, int sock_type, const char *addr,
+			  __u16 port, int *socks, int socks_len,
+			  struct sock_count *counts, int counts_len,
+			  struct bpf_link *link, int iter_fd)
+{
+	int close_idx;
+
+	/* Iterate through the first socket. */
+	read_n(iter_fd, 1, counts, counts_len);
+
+	/* Make sure we saw a socket from fds. */
+	check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+	/* Close what would be the next socket in the bucket to exercise the
+	 * condition where we need to skip past the first cookie we remembered.
+	 */
+	close_idx = get_nth_socket(socks, socks_len, link, 1);
+	if (!ASSERT_GE(close_idx, 0, "close_idx"))
+		return;
+	close(socks[close_idx]);
+	socks[close_idx] = -1;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure the remaining sockets were seen exactly once and that we
+	 * didn't repeat the socket that was already seen.
+	 */
+	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+			       counts_len);
+}
+
+static void remove_all(int family, int sock_type, const char *addr,
+		       __u16 port, int *socks, int socks_len,
+		       struct sock_count *counts, int counts_len,
+		       struct bpf_link *link, int iter_fd)
+{
+	int close_idx, i;
+
+	/* Iterate through the first socket. */
+	read_n(iter_fd, 1, counts, counts_len);
+
+	/* Make sure we saw a socket from fds. */
+	check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+	/* Close all remaining sockets to exhaust the list of saved cookies and
+	 * exit without putting any sockets into the batch on the next read.
+	 */
+	for (i = 0; i < socks_len - 1; i++) {
+		close_idx = get_nth_socket(socks, socks_len, link, 1);
+		if (!ASSERT_GE(close_idx, 0, "close_idx"))
+			return;
+		close(socks[close_idx]);
+		socks[close_idx] = -1;
+	}
+
+	/* Make sure there are no more sockets returned */
+	ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+}
+
+static void add_some(int family, int sock_type, const char *addr, __u16 port,
+		     int *socks, int socks_len, struct sock_count *counts,
+		     int counts_len, struct bpf_link *link, int iter_fd)
+{
+	int *new_socks = NULL;
+
+	/* Iterate through the first socks_len - 1 sockets. */
+	read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+	/* Make sure we saw socks_len - 1 sockets exactly once. */
+	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+			       counts_len);
+
+	/* Double the number of sockets in the bucket. */
+	new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+					   socks_len);
+	if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+		goto done;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure each of the original sockets was seen exactly once. */
+	check_n_were_seen_once(socks, socks_len, socks_len, counts,
+			       counts_len);
+done:
+	free_fds(new_socks, socks_len);
+}
+
+static void force_realloc(int family, int sock_type, const char *addr,
+			  __u16 port, int *socks, int socks_len,
+			  struct sock_count *counts, int counts_len,
+			  struct bpf_link *link, int iter_fd)
+{
+	int *new_socks = NULL;
+
+	/* Iterate through the first socket just to initialize the batch. */
+	read_n(iter_fd, 1, counts, counts_len);
+
+	/* Double the number of sockets in the bucket to force a realloc on the
+	 * next read.
+	 */
+	new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+					   socks_len);
+	if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+		goto done;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure each socket from the first set was seen exactly once. */
+	check_n_were_seen_once(socks, socks_len, socks_len, counts,
+			       counts_len);
+done:
+	free_fds(new_socks, socks_len);
+}
+
+struct test_case {
+	void (*test)(int family, int sock_type, const char *addr, __u16 port,
+		     int *socks, int socks_len, struct sock_count *counts,
+		     int counts_len, struct bpf_link *link, int iter_fd);
+	const char *description;
+	int init_socks;
+	int max_socks;
+	int sock_type;
+	int family;
+};
+
+static struct test_case resume_tests[] = {
+	{
+		.description = "udp: resume after removing a seen socket",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_DGRAM,
+		.family = AF_INET6,
+		.test = remove_seen,
+	},
+	{
+		.description = "udp: resume after removing one unseen socket",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_DGRAM,
+		.family = AF_INET6,
+		.test = remove_unseen,
+	},
+	{
+		.description = "udp: resume after removing all unseen sockets",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_DGRAM,
+		.family = AF_INET6,
+		.test = remove_all,
+	},
+	{
+		.description = "udp: resume after adding a few sockets",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_DGRAM,
+		/* Use AF_INET so that new sockets are added to the head of the
+		 * bucket's list.
+		 */
+		.family = AF_INET,
+		.test = add_some,
+	},
+	{
+		.description = "udp: force a realloc to occur",
+		.init_socks = init_batch_size,
+		.max_socks = init_batch_size * 2,
+		.sock_type = SOCK_DGRAM,
+		/* Use AF_INET6 so that new sockets are added to the tail of the
+		 * bucket's list, needing to be added to the next batch to force
+		 * a realloc.
+		 */
+		.family = AF_INET6,
+		.test = force_realloc,
+	},
+};
+
+static void do_resume_test(struct test_case *tc)
+{
+	struct sock_iter_batch *skel = NULL;
+	static const __u16 port = 10001;
+	struct bpf_link *link = NULL;
+	struct sock_count *counts;
+	int err, iter_fd = -1;
+	const char *addr;
+	int *fds = NULL;
+	int local_port;
+
+	counts = calloc(tc->max_socks, sizeof(*counts));
+	if (!ASSERT_OK_PTR(counts, "counts"))
+		goto done;
+	skel = sock_iter_batch__open();
+	if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+		goto done;
+
+	/* Prepare a bucket of sockets in the kernel hashtable */
+	addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1";
+	fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0,
+				     tc->init_socks);
+	if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
+		goto done;
+	local_port = get_socket_local_port(*fds);
+	if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
+		goto done;
+	skel->rodata->ports[0] = ntohs(local_port);
+	skel->rodata->sf = tc->family;
+
+	err = sock_iter_batch__load(skel);
+	if (!ASSERT_OK(err, "sock_iter_batch__load"))
+		goto done;
+
+	link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ?
+					skel->progs.iter_tcp_soreuse :
+					skel->progs.iter_udp_soreuse,
+					NULL);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+		goto done;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+		goto done;
+
+	tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
+		 counts, tc->max_socks, link, iter_fd);
+done:
+	free(counts);
+	free_fds(fds, tc->init_socks);
+	if (iter_fd >= 0)
+		close(iter_fd);
+	bpf_link__destroy(link);
+	sock_iter_batch__destroy(skel);
+}
+
+static void do_resume_tests(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(resume_tests); i++) {
+		if (test__start_subtest(resume_tests[i].description)) {
+			do_resume_test(&resume_tests[i]);
+		}
+	}
+}
+
 static void do_test(int sock_type, bool onebyone)
 {
 	int err, i, nread, to_read, total_read, iter_fd = -1;
-	int first_idx, second_idx, indices[nr_soreuse];
+	struct iter_out outputs[nr_soreuse];
 	struct bpf_link *link = NULL;
 	struct sock_iter_batch *skel;
+	int first_idx, second_idx;
 	int *fds[2] = {};
 
 	skel = sock_iter_batch__open();
@@ -34,6 +453,7 @@ static void do_test(int sock_type, bool onebyone)
 			goto done;
 		skel->rodata->ports[i] = ntohs(local_port);
 	}
+	skel->rodata->sf = AF_INET6;
 
 	err = sock_iter_batch__load(skel);
 	if (!ASSERT_OK(err, "sock_iter_batch__load"))
@@ -55,38 +475,38 @@ static void do_test(int sock_type, bool onebyone)
 	 * from a bucket and leave one socket out from
 	 * that bucket on purpose.
 	 */
-	to_read = (nr_soreuse - 1) * sizeof(*indices);
+	to_read = (nr_soreuse - 1) * sizeof(*outputs);
 	total_read = 0;
 	first_idx = -1;
 	do {
-		nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
-		if (nread <= 0 || nread % sizeof(*indices))
+		nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+		if (nread <= 0 || nread % sizeof(*outputs))
 			break;
 		total_read += nread;
 
 		if (first_idx == -1)
-			first_idx = indices[0];
-		for (i = 0; i < nread / sizeof(*indices); i++)
-			ASSERT_EQ(indices[i], first_idx, "first_idx");
+			first_idx = outputs[0].idx;
+		for (i = 0; i < nread / sizeof(*outputs); i++)
+			ASSERT_EQ(outputs[i].idx, first_idx, "first_idx");
 	} while (total_read < to_read);
-	ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread");
+	ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread");
 	ASSERT_EQ(total_read, to_read, "total_read");
 
 	free_fds(fds[first_idx], nr_soreuse);
 	fds[first_idx] = NULL;
 
 	/* Read the "whole" second bucket */
-	to_read = nr_soreuse * sizeof(*indices);
+	to_read = nr_soreuse * sizeof(*outputs);
 	total_read = 0;
 	second_idx = !first_idx;
 	do {
-		nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
-		if (nread <= 0 || nread % sizeof(*indices))
+		nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+		if (nread <= 0 || nread % sizeof(*outputs))
 			break;
 		total_read += nread;
 
-		for (i = 0; i < nread / sizeof(*indices); i++)
-			ASSERT_EQ(indices[i], second_idx, "second_idx");
+		for (i = 0; i < nread / sizeof(*outputs); i++)
+			ASSERT_EQ(outputs[i].idx, second_idx, "second_idx");
 	} while (total_read <= to_read);
 	ASSERT_EQ(nread, 0, "nread");
 	/* Both so_reuseport ports should be in different buckets, so
@@ -128,6 +548,7 @@ void test_sock_iter_batch(void)
 		do_test(SOCK_DGRAM, true);
 		do_test(SOCK_DGRAM, false);
 	}
+	do_resume_tests();
 	close_netns(nstoken);
 
 done:
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
new file mode 100644
index 000000000000..3754f581b328
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_QDISC_COMMON_H
+#define _BPF_QDISC_COMMON_H
+
+#define NET_XMIT_SUCCESS        0x00
+#define NET_XMIT_DROP           0x01    /* skb dropped                  */
+#define NET_XMIT_CN             0x02    /* congestion notification      */
+
+#define TC_PRIO_CONTROL  7
+#define TC_PRIO_MAX      15
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+
+struct bpf_sk_buff_ptr;
+
+static struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
+{
+	return (struct qdisc_skb_cb *)skb->cb;
+}
+
+static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
+{
+	return qdisc_skb_cb(skb)->pkt_len;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
new file mode 100644
index 000000000000..f188062ed730
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops")
+int BPF_PROG(bpf_qdisc_test_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+	     struct bpf_sk_buff_ptr *to_free)
+{
+	bpf_qdisc_skb_drop(skb, to_free);
+	return NET_XMIT_DROP;
+}
+
+SEC("struct_ops")
+struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch)
+{
+	return NULL;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch)
+{
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops test = {
+	.enqueue   = (void *)bpf_qdisc_test_enqueue,
+	.dequeue   = (void *)bpf_qdisc_test_dequeue,
+	.reset     = (void *)bpf_qdisc_test_reset,
+	.destroy   = (void *)bpf_qdisc_test_destroy,
+	.id        = "bpf_qdisc_test",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
new file mode 100644
index 000000000000..1de2be3e370b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct skb_node {
+	struct sk_buff __kptr * skb;
+	struct bpf_list_node node;
+};
+
+private(A) struct bpf_spin_lock q_fifo_lock;
+private(A) struct bpf_list_head q_fifo __contains(skb_node, node);
+
+bool init_called;
+
+SEC("struct_ops/bpf_fifo_enqueue")
+int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+	     struct bpf_sk_buff_ptr *to_free)
+{
+	struct skb_node *skbn;
+	u32 pkt_len;
+
+	if (sch->q.qlen == sch->limit)
+		goto drop;
+
+	skbn = bpf_obj_new(typeof(*skbn));
+	if (!skbn)
+		goto drop;
+
+	pkt_len = qdisc_pkt_len(skb);
+
+	sch->q.qlen++;
+	skb = bpf_kptr_xchg(&skbn->skb, skb);
+	if (skb)
+		bpf_qdisc_skb_drop(skb, to_free);
+
+	bpf_spin_lock(&q_fifo_lock);
+	bpf_list_push_back(&q_fifo, &skbn->node);
+	bpf_spin_unlock(&q_fifo_lock);
+
+	sch->qstats.backlog += pkt_len;
+	return NET_XMIT_SUCCESS;
+drop:
+	bpf_qdisc_skb_drop(skb, to_free);
+	return NET_XMIT_DROP;
+}
+
+SEC("struct_ops/bpf_fifo_dequeue")
+struct sk_buff *BPF_PROG(bpf_fifo_dequeue, struct Qdisc *sch)
+{
+	struct bpf_list_node *node;
+	struct sk_buff *skb = NULL;
+	struct skb_node *skbn;
+
+	bpf_spin_lock(&q_fifo_lock);
+	node = bpf_list_pop_front(&q_fifo);
+	bpf_spin_unlock(&q_fifo_lock);
+	if (!node)
+		return NULL;
+
+	skbn = container_of(node, struct skb_node, node);
+	skb = bpf_kptr_xchg(&skbn->skb, skb);
+	bpf_obj_drop(skbn);
+	if (!skb)
+		return NULL;
+
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
+	bpf_qdisc_bstats_update(sch, skb);
+	sch->q.qlen--;
+
+	return skb;
+}
+
+SEC("struct_ops/bpf_fifo_init")
+int BPF_PROG(bpf_fifo_init, struct Qdisc *sch, struct nlattr *opt,
+	     struct netlink_ext_ack *extack)
+{
+	sch->limit = 1000;
+	init_called = true;
+	return 0;
+}
+
+SEC("struct_ops/bpf_fifo_reset")
+void BPF_PROG(bpf_fifo_reset, struct Qdisc *sch)
+{
+	struct bpf_list_node *node;
+	struct skb_node *skbn;
+	int i;
+
+	bpf_for(i, 0, sch->q.qlen) {
+		struct sk_buff *skb = NULL;
+
+		bpf_spin_lock(&q_fifo_lock);
+		node = bpf_list_pop_front(&q_fifo);
+		bpf_spin_unlock(&q_fifo_lock);
+
+		if (!node)
+			break;
+
+		skbn = container_of(node, struct skb_node, node);
+		skb = bpf_kptr_xchg(&skbn->skb, skb);
+		if (skb)
+			bpf_kfree_skb(skb);
+		bpf_obj_drop(skbn);
+	}
+	sch->q.qlen = 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fifo_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fifo = {
+	.enqueue   = (void *)bpf_fifo_enqueue,
+	.dequeue   = (void *)bpf_fifo_dequeue,
+	.init      = (void *)bpf_fifo_init,
+	.reset     = (void *)bpf_fifo_reset,
+	.destroy   = (void *)bpf_fifo_destroy,
+	.id        = "bpf_fifo",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
new file mode 100644
index 000000000000..1a3233a275c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
@@ -0,0 +1,756 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* bpf_fq is intended for testing the bpf qdisc infrastructure and not a direct
+ * copy of sch_fq. bpf_fq implements the scheduling algorithm of sch_fq before
+ * 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") was
+ * introduced. It gives each flow a fair chance to transmit packets in a
+ * round-robin fashion. Note that for flow pacing, bpf_fq currently only
+ * respects skb->tstamp but not skb->sk->sk_pacing_rate. In addition, if there
+ * are multiple bpf_fq instances, they will have a shared view of flows and
+ * configuration since some key data structure such as fq_prio_flows,
+ * fq_nonprio_flows, and fq_bpf_data are global.
+ *
+ * To use bpf_fq alone without running selftests, use the following commands.
+ *
+ * 1. Register bpf_fq to the kernel
+ *     bpftool struct_ops register bpf_qdisc_fq.bpf.o /sys/fs/bpf
+ * 2. Add bpf_fq to an interface
+ *     tc qdisc add dev <interface name> root handle <handle> bpf_fq
+ * 3. Delete bpf_fq attached to the interface
+ *     tc qdisc delete dev <interface name> root
+ * 4. Unregister bpf_fq
+ *     bpftool struct_ops unregister name fq
+ *
+ * The qdisc name, bpf_fq, used in tc commands is defined by Qdisc_ops.id.
+ * The struct_ops_map_name, fq, used in the bpftool command is the name of the
+ * Qdisc_ops.
+ *
+ * SEC(".struct_ops")
+ * struct Qdisc_ops fq = {
+ *         ...
+ *         .id        = "bpf_fq",
+ * };
+ */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_SEC 1000000000L
+
+#define NUM_QUEUE (1 << 20)
+
+struct fq_bpf_data {
+	u32 quantum;
+	u32 initial_quantum;
+	u32 flow_refill_delay;
+	u32 flow_plimit;
+	u64 horizon;
+	u32 orphan_mask;
+	u32 timer_slack;
+	u64 time_next_delayed_flow;
+	u64 unthrottle_latency_ns;
+	u8 horizon_drop;
+	u32 new_flow_cnt;
+	u32 old_flow_cnt;
+	u64 ktime_cache;
+};
+
+enum {
+	CLS_RET_PRIO	= 0,
+	CLS_RET_NONPRIO = 1,
+	CLS_RET_ERR	= 2,
+};
+
+struct skb_node {
+	u64 tstamp;
+	struct sk_buff __kptr * skb;
+	struct bpf_rb_node node;
+};
+
+struct fq_flow_node {
+	int credit;
+	u32 qlen;
+	u64 age;
+	u64 time_next_packet;
+	struct bpf_list_node list_node;
+	struct bpf_rb_node rb_node;
+	struct bpf_rb_root queue __contains(skb_node, node);
+	struct bpf_spin_lock lock;
+	struct bpf_refcount refcount;
+};
+
+struct dequeue_nonprio_ctx {
+	bool stop_iter;
+	u64 expire;
+	u64 now;
+};
+
+struct remove_flows_ctx {
+	bool gc_only;
+	u32 reset_cnt;
+	u32 reset_max;
+};
+
+struct unset_throttled_flows_ctx {
+	bool unset_all;
+	u64 now;
+};
+
+struct fq_stashed_flow {
+	struct fq_flow_node __kptr * flow;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u64);
+	__type(value, struct fq_stashed_flow);
+	__uint(max_entries, NUM_QUEUE);
+} fq_nonprio_flows SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u64);
+	__type(value, struct fq_stashed_flow);
+	__uint(max_entries, 1);
+} fq_prio_flows SEC(".maps");
+
+private(A) struct bpf_spin_lock fq_delayed_lock;
+private(A) struct bpf_rb_root fq_delayed __contains(fq_flow_node, rb_node);
+
+private(B) struct bpf_spin_lock fq_new_flows_lock;
+private(B) struct bpf_list_head fq_new_flows __contains(fq_flow_node, list_node);
+
+private(C) struct bpf_spin_lock fq_old_flows_lock;
+private(C) struct bpf_list_head fq_old_flows __contains(fq_flow_node, list_node);
+
+private(D) struct fq_bpf_data q;
+
+/* Wrapper for bpf_kptr_xchg that expects NULL dst */
+static void bpf_kptr_xchg_back(void *map_val, void *ptr)
+{
+	void *ret;
+
+	ret = bpf_kptr_xchg(map_val, ptr);
+	if (ret)
+		bpf_obj_drop(ret);
+}
+
+static bool skbn_tstamp_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct skb_node *skbn_a;
+	struct skb_node *skbn_b;
+
+	skbn_a = container_of(a, struct skb_node, node);
+	skbn_b = container_of(b, struct skb_node, node);
+
+	return skbn_a->tstamp < skbn_b->tstamp;
+}
+
+static bool fn_time_next_packet_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct fq_flow_node *flow_a;
+	struct fq_flow_node *flow_b;
+
+	flow_a = container_of(a, struct fq_flow_node, rb_node);
+	flow_b = container_of(b, struct fq_flow_node, rb_node);
+
+	return flow_a->time_next_packet < flow_b->time_next_packet;
+}
+
+static void
+fq_flows_add_head(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+		  struct fq_flow_node *flow, u32 *flow_cnt)
+{
+	bpf_spin_lock(lock);
+	bpf_list_push_front(head, &flow->list_node);
+	bpf_spin_unlock(lock);
+	*flow_cnt += 1;
+}
+
+static void
+fq_flows_add_tail(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+		  struct fq_flow_node *flow, u32 *flow_cnt)
+{
+	bpf_spin_lock(lock);
+	bpf_list_push_back(head, &flow->list_node);
+	bpf_spin_unlock(lock);
+	*flow_cnt += 1;
+}
+
+static void
+fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+		      struct bpf_list_node **node, u32 *flow_cnt)
+{
+	bpf_spin_lock(lock);
+	*node = bpf_list_pop_front(head);
+	bpf_spin_unlock(lock);
+	*flow_cnt -= 1;
+}
+
+static bool
+fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock)
+{
+	struct bpf_list_node *node;
+
+	bpf_spin_lock(lock);
+	node = bpf_list_pop_front(head);
+	if (node) {
+		bpf_list_push_front(head, node);
+		bpf_spin_unlock(lock);
+		return false;
+	}
+	bpf_spin_unlock(lock);
+
+	return true;
+}
+
+/* flow->age is used to denote the state of the flow (not-detached, detached, throttled)
+ * as well as the timestamp when the flow is detached.
+ *
+ * 0: not-detached
+ * 1 - (~0ULL-1): detached
+ * ~0ULL: throttled
+ */
+static void fq_flow_set_detached(struct fq_flow_node *flow)
+{
+	flow->age = bpf_jiffies64();
+}
+
+static bool fq_flow_is_detached(struct fq_flow_node *flow)
+{
+	return flow->age != 0 && flow->age != ~0ULL;
+}
+
+static bool sk_listener(struct sock *sk)
+{
+	return (1 << sk->__sk_common.skc_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
+}
+
+static void fq_gc(void);
+
+static int fq_new_flow(void *flow_map, struct fq_stashed_flow **sflow, u64 hash)
+{
+	struct fq_stashed_flow tmp = {};
+	struct fq_flow_node *flow;
+	int ret;
+
+	flow = bpf_obj_new(typeof(*flow));
+	if (!flow)
+		return -ENOMEM;
+
+	flow->credit = q.initial_quantum,
+	flow->qlen = 0,
+	flow->age = 1,
+	flow->time_next_packet = 0,
+
+	ret = bpf_map_update_elem(flow_map, &hash, &tmp, 0);
+	if (ret == -ENOMEM || ret == -E2BIG) {
+		fq_gc();
+		bpf_map_update_elem(&fq_nonprio_flows, &hash, &tmp, 0);
+	}
+
+	*sflow = bpf_map_lookup_elem(flow_map, &hash);
+	if (!*sflow) {
+		bpf_obj_drop(flow);
+		return -ENOMEM;
+	}
+
+	bpf_kptr_xchg_back(&(*sflow)->flow, flow);
+	return 0;
+}
+
+static int
+fq_classify(struct sk_buff *skb, struct fq_stashed_flow **sflow)
+{
+	struct sock *sk = skb->sk;
+	int ret = CLS_RET_NONPRIO;
+	u64 hash = 0;
+
+	if ((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL) {
+		*sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+		ret = CLS_RET_PRIO;
+	} else {
+		if (!sk || sk_listener(sk)) {
+			hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+			/* Avoid collision with an existing flow hash, which
+			 * only uses the lower 32 bits of hash, by setting the
+			 * upper half of hash to 1.
+			 */
+			hash |= (1ULL << 32);
+		} else if (sk->__sk_common.skc_state == TCP_CLOSE) {
+			hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+			hash |= (1ULL << 32);
+		} else {
+			hash = sk->__sk_common.skc_hash;
+		}
+		*sflow = bpf_map_lookup_elem(&fq_nonprio_flows, &hash);
+	}
+
+	if (!*sflow)
+		ret = fq_new_flow(&fq_nonprio_flows, sflow, hash) < 0 ?
+		      CLS_RET_ERR : CLS_RET_NONPRIO;
+
+	return ret;
+}
+
+static bool fq_packet_beyond_horizon(struct sk_buff *skb)
+{
+	return (s64)skb->tstamp > (s64)(q.ktime_cache + q.horizon);
+}
+
+SEC("struct_ops/bpf_fq_enqueue")
+int BPF_PROG(bpf_fq_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+	     struct bpf_sk_buff_ptr *to_free)
+{
+	struct fq_flow_node *flow = NULL, *flow_copy;
+	struct fq_stashed_flow *sflow;
+	u64 time_to_send, jiffies;
+	struct skb_node *skbn;
+	int ret;
+
+	if (sch->q.qlen >= sch->limit)
+		goto drop;
+
+	if (!skb->tstamp) {
+		time_to_send = q.ktime_cache = bpf_ktime_get_ns();
+	} else {
+		if (fq_packet_beyond_horizon(skb)) {
+			q.ktime_cache = bpf_ktime_get_ns();
+			if (fq_packet_beyond_horizon(skb)) {
+				if (q.horizon_drop)
+					goto drop;
+
+				skb->tstamp = q.ktime_cache + q.horizon;
+			}
+		}
+		time_to_send = skb->tstamp;
+	}
+
+	ret = fq_classify(skb, &sflow);
+	if (ret == CLS_RET_ERR)
+		goto drop;
+
+	flow = bpf_kptr_xchg(&sflow->flow, flow);
+	if (!flow)
+		goto drop;
+
+	if (ret == CLS_RET_NONPRIO) {
+		if (flow->qlen >= q.flow_plimit) {
+			bpf_kptr_xchg_back(&sflow->flow, flow);
+			goto drop;
+		}
+
+		if (fq_flow_is_detached(flow)) {
+			flow_copy = bpf_refcount_acquire(flow);
+
+			jiffies = bpf_jiffies64();
+			if ((s64)(jiffies - (flow_copy->age + q.flow_refill_delay)) > 0) {
+				if (flow_copy->credit < q.quantum)
+					flow_copy->credit = q.quantum;
+			}
+			flow_copy->age = 0;
+			fq_flows_add_tail(&fq_new_flows, &fq_new_flows_lock, flow_copy,
+					  &q.new_flow_cnt);
+		}
+	}
+
+	skbn = bpf_obj_new(typeof(*skbn));
+	if (!skbn) {
+		bpf_kptr_xchg_back(&sflow->flow, flow);
+		goto drop;
+	}
+
+	skbn->tstamp = skb->tstamp = time_to_send;
+
+	sch->qstats.backlog += qdisc_pkt_len(skb);
+
+	skb = bpf_kptr_xchg(&skbn->skb, skb);
+	if (skb)
+		bpf_qdisc_skb_drop(skb, to_free);
+
+	bpf_spin_lock(&flow->lock);
+	bpf_rbtree_add(&flow->queue, &skbn->node, skbn_tstamp_less);
+	bpf_spin_unlock(&flow->lock);
+
+	flow->qlen++;
+	bpf_kptr_xchg_back(&sflow->flow, flow);
+
+	sch->q.qlen++;
+	return NET_XMIT_SUCCESS;
+
+drop:
+	bpf_qdisc_skb_drop(skb, to_free);
+	sch->qstats.drops++;
+	return NET_XMIT_DROP;
+}
+
+static int fq_unset_throttled_flows(u32 index, struct unset_throttled_flows_ctx *ctx)
+{
+	struct bpf_rb_node *node = NULL;
+	struct fq_flow_node *flow;
+
+	bpf_spin_lock(&fq_delayed_lock);
+
+	node = bpf_rbtree_first(&fq_delayed);
+	if (!node) {
+		bpf_spin_unlock(&fq_delayed_lock);
+		return 1;
+	}
+
+	flow = container_of(node, struct fq_flow_node, rb_node);
+	if (!ctx->unset_all && flow->time_next_packet > ctx->now) {
+		q.time_next_delayed_flow = flow->time_next_packet;
+		bpf_spin_unlock(&fq_delayed_lock);
+		return 1;
+	}
+
+	node = bpf_rbtree_remove(&fq_delayed, &flow->rb_node);
+
+	bpf_spin_unlock(&fq_delayed_lock);
+
+	if (!node)
+		return 1;
+
+	flow = container_of(node, struct fq_flow_node, rb_node);
+	flow->age = 0;
+	fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+
+	return 0;
+}
+
+static void fq_flow_set_throttled(struct fq_flow_node *flow)
+{
+	flow->age = ~0ULL;
+
+	if (q.time_next_delayed_flow > flow->time_next_packet)
+		q.time_next_delayed_flow = flow->time_next_packet;
+
+	bpf_spin_lock(&fq_delayed_lock);
+	bpf_rbtree_add(&fq_delayed, &flow->rb_node, fn_time_next_packet_less);
+	bpf_spin_unlock(&fq_delayed_lock);
+}
+
+static void fq_check_throttled(u64 now)
+{
+	struct unset_throttled_flows_ctx ctx = {
+		.unset_all = false,
+		.now = now,
+	};
+	unsigned long sample;
+
+	if (q.time_next_delayed_flow > now)
+		return;
+
+	sample = (unsigned long)(now - q.time_next_delayed_flow);
+	q.unthrottle_latency_ns -= q.unthrottle_latency_ns >> 3;
+	q.unthrottle_latency_ns += sample >> 3;
+
+	q.time_next_delayed_flow = ~0ULL;
+	bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &ctx, 0);
+}
+
+static struct sk_buff*
+fq_dequeue_nonprio_flows(u32 index, struct dequeue_nonprio_ctx *ctx)
+{
+	u64 time_next_packet, time_to_send;
+	struct bpf_rb_node *rb_node;
+	struct sk_buff *skb = NULL;
+	struct bpf_list_head *head;
+	struct bpf_list_node *node;
+	struct bpf_spin_lock *lock;
+	struct fq_flow_node *flow;
+	struct skb_node *skbn;
+	bool is_empty;
+	u32 *cnt;
+
+	if (q.new_flow_cnt) {
+		head = &fq_new_flows;
+		lock = &fq_new_flows_lock;
+		cnt = &q.new_flow_cnt;
+	} else if (q.old_flow_cnt) {
+		head = &fq_old_flows;
+		lock = &fq_old_flows_lock;
+		cnt = &q.old_flow_cnt;
+	} else {
+		if (q.time_next_delayed_flow != ~0ULL)
+			ctx->expire = q.time_next_delayed_flow;
+		goto break_loop;
+	}
+
+	fq_flows_remove_front(head, lock, &node, cnt);
+	if (!node)
+		goto break_loop;
+
+	flow = container_of(node, struct fq_flow_node, list_node);
+	if (flow->credit <= 0) {
+		flow->credit += q.quantum;
+		fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+		return NULL;
+	}
+
+	bpf_spin_lock(&flow->lock);
+	rb_node = bpf_rbtree_first(&flow->queue);
+	if (!rb_node) {
+		bpf_spin_unlock(&flow->lock);
+		is_empty = fq_flows_is_empty(&fq_old_flows, &fq_old_flows_lock);
+		if (head == &fq_new_flows && !is_empty) {
+			fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+		} else {
+			fq_flow_set_detached(flow);
+			bpf_obj_drop(flow);
+		}
+		return NULL;
+	}
+
+	skbn = container_of(rb_node, struct skb_node, node);
+	time_to_send = skbn->tstamp;
+
+	time_next_packet = (time_to_send > flow->time_next_packet) ?
+		time_to_send : flow->time_next_packet;
+	if (ctx->now < time_next_packet) {
+		bpf_spin_unlock(&flow->lock);
+		flow->time_next_packet = time_next_packet;
+		fq_flow_set_throttled(flow);
+		return NULL;
+	}
+
+	rb_node = bpf_rbtree_remove(&flow->queue, rb_node);
+	bpf_spin_unlock(&flow->lock);
+
+	if (!rb_node)
+		goto add_flow_and_break;
+
+	skbn = container_of(rb_node, struct skb_node, node);
+	skb = bpf_kptr_xchg(&skbn->skb, skb);
+	bpf_obj_drop(skbn);
+
+	if (!skb)
+		goto add_flow_and_break;
+
+	flow->credit -= qdisc_skb_cb(skb)->pkt_len;
+	flow->qlen--;
+
+add_flow_and_break:
+	fq_flows_add_head(head, lock, flow, cnt);
+
+break_loop:
+	ctx->stop_iter = true;
+	return skb;
+}
+
+static struct sk_buff *fq_dequeue_prio(void)
+{
+	struct fq_flow_node *flow = NULL;
+	struct fq_stashed_flow *sflow;
+	struct bpf_rb_node *rb_node;
+	struct sk_buff *skb = NULL;
+	struct skb_node *skbn;
+	u64 hash = 0;
+
+	sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+	if (!sflow)
+		return NULL;
+
+	flow = bpf_kptr_xchg(&sflow->flow, flow);
+	if (!flow)
+		return NULL;
+
+	bpf_spin_lock(&flow->lock);
+	rb_node = bpf_rbtree_first(&flow->queue);
+	if (!rb_node) {
+		bpf_spin_unlock(&flow->lock);
+		goto out;
+	}
+
+	skbn = container_of(rb_node, struct skb_node, node);
+	rb_node = bpf_rbtree_remove(&flow->queue, &skbn->node);
+	bpf_spin_unlock(&flow->lock);
+
+	if (!rb_node)
+		goto out;
+
+	skbn = container_of(rb_node, struct skb_node, node);
+	skb = bpf_kptr_xchg(&skbn->skb, skb);
+	bpf_obj_drop(skbn);
+
+out:
+	bpf_kptr_xchg_back(&sflow->flow, flow);
+
+	return skb;
+}
+
+SEC("struct_ops/bpf_fq_dequeue")
+struct sk_buff *BPF_PROG(bpf_fq_dequeue, struct Qdisc *sch)
+{
+	struct dequeue_nonprio_ctx cb_ctx = {};
+	struct sk_buff *skb = NULL;
+	int i;
+
+	if (!sch->q.qlen)
+		goto out;
+
+	skb = fq_dequeue_prio();
+	if (skb)
+		goto dequeue;
+
+	q.ktime_cache = cb_ctx.now = bpf_ktime_get_ns();
+	fq_check_throttled(q.ktime_cache);
+	bpf_for(i, 0, sch->limit) {
+		skb = fq_dequeue_nonprio_flows(i, &cb_ctx);
+		if (cb_ctx.stop_iter)
+			break;
+	};
+
+	if (skb) {
+dequeue:
+		sch->q.qlen--;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
+		bpf_qdisc_bstats_update(sch, skb);
+		return skb;
+	}
+
+	if (cb_ctx.expire)
+		bpf_qdisc_watchdog_schedule(sch, cb_ctx.expire, q.timer_slack);
+out:
+	return NULL;
+}
+
+static int fq_remove_flows_in_list(u32 index, void *ctx)
+{
+	struct bpf_list_node *node;
+	struct fq_flow_node *flow;
+
+	bpf_spin_lock(&fq_new_flows_lock);
+	node = bpf_list_pop_front(&fq_new_flows);
+	bpf_spin_unlock(&fq_new_flows_lock);
+	if (!node) {
+		bpf_spin_lock(&fq_old_flows_lock);
+		node = bpf_list_pop_front(&fq_old_flows);
+		bpf_spin_unlock(&fq_old_flows_lock);
+		if (!node)
+			return 1;
+	}
+
+	flow = container_of(node, struct fq_flow_node, list_node);
+	bpf_obj_drop(flow);
+
+	return 0;
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+/* limit number of collected flows per round */
+#define FQ_GC_MAX 8
+#define FQ_GC_AGE (3*CONFIG_HZ)
+
+static bool fq_gc_candidate(struct fq_flow_node *flow)
+{
+	u64 jiffies = bpf_jiffies64();
+
+	return fq_flow_is_detached(flow) &&
+	       ((s64)(jiffies - (flow->age + FQ_GC_AGE)) > 0);
+}
+
+static int
+fq_remove_flows(struct bpf_map *flow_map, u64 *hash,
+		struct fq_stashed_flow *sflow, struct remove_flows_ctx *ctx)
+{
+	if (sflow->flow &&
+	    (!ctx->gc_only || fq_gc_candidate(sflow->flow))) {
+		bpf_map_delete_elem(flow_map, hash);
+		ctx->reset_cnt++;
+	}
+
+	return ctx->reset_cnt < ctx->reset_max ? 0 : 1;
+}
+
+static void fq_gc(void)
+{
+	struct remove_flows_ctx cb_ctx = {
+		.gc_only = true,
+		.reset_cnt = 0,
+		.reset_max = FQ_GC_MAX,
+	};
+
+	bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &cb_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_reset")
+void BPF_PROG(bpf_fq_reset, struct Qdisc *sch)
+{
+	struct unset_throttled_flows_ctx utf_ctx = {
+		.unset_all = true,
+	};
+	struct remove_flows_ctx rf_ctx = {
+		.gc_only = false,
+		.reset_cnt = 0,
+		.reset_max = NUM_QUEUE,
+	};
+	struct fq_stashed_flow *sflow;
+	u64 hash = 0;
+
+	sch->q.qlen = 0;
+	sch->qstats.backlog = 0;
+
+	bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &rf_ctx, 0);
+
+	rf_ctx.reset_cnt = 0;
+	bpf_for_each_map_elem(&fq_prio_flows, fq_remove_flows, &rf_ctx, 0);
+	fq_new_flow(&fq_prio_flows, &sflow, hash);
+
+	bpf_loop(NUM_QUEUE, fq_remove_flows_in_list, NULL, 0);
+	q.new_flow_cnt = 0;
+	q.old_flow_cnt = 0;
+
+	bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &utf_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_init")
+int BPF_PROG(bpf_fq_init, struct Qdisc *sch, struct nlattr *opt,
+	     struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = sch->dev_queue->dev;
+	u32 psched_mtu = dev->mtu + dev->hard_header_len;
+	struct fq_stashed_flow *sflow;
+	u64 hash = 0;
+
+	if (fq_new_flow(&fq_prio_flows, &sflow, hash) < 0)
+		return -ENOMEM;
+
+	sch->limit = 10000;
+	q.initial_quantum = 10 * psched_mtu;
+	q.quantum = 2 * psched_mtu;
+	q.flow_refill_delay = 40;
+	q.flow_plimit = 100;
+	q.horizon = 10ULL * NSEC_PER_SEC;
+	q.horizon_drop = 1;
+	q.orphan_mask = 1024 - 1;
+	q.timer_slack = 10 * NSEC_PER_USEC;
+	q.time_next_delayed_flow = ~0ULL;
+	q.unthrottle_latency_ns = 0ULL;
+	q.new_flow_cnt = 0;
+	q.old_flow_cnt = 0;
+
+	return 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fq_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fq = {
+	.enqueue   = (void *)bpf_fq_enqueue,
+	.dequeue   = (void *)bpf_fq_dequeue,
+	.reset     = (void *)bpf_fq_reset,
+	.init      = (void *)bpf_fq_init,
+	.destroy   = (void *)bpf_fq_destroy,
+	.id        = "bpf_fq",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 659694162739..17db400f0e0d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -128,6 +128,7 @@
 #define sk_refcnt		__sk_common.skc_refcnt
 #define sk_state		__sk_common.skc_state
 #define sk_net			__sk_common.skc_net
+#define sk_rcv_saddr		__sk_common.skc_rcv_saddr
 #define sk_v6_daddr		__sk_common.skc_v6_daddr
 #define sk_v6_rcv_saddr		__sk_common.skc_v6_rcv_saddr
 #define sk_flags		__sk_common.skc_flags
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 0107a24b7522..d330b1511979 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -83,6 +83,14 @@ struct loop_ctx {
 	struct sock *sk;
 };
 
+static bool sk_is_tcp(struct sock *sk)
+{
+	return (sk->__sk_common.skc_family == AF_INET ||
+		sk->__sk_common.skc_family == AF_INET6) &&
+		sk->sk_type == SOCK_STREAM &&
+		sk->sk_protocol == IPPROTO_TCP;
+}
+
 static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
 				 const struct sockopt_test *t,
 				 int level)
@@ -91,6 +99,9 @@ static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
 
 	opt = t->opt;
 
+	if (opt == SO_TXREHASH && !sk_is_tcp(sk))
+		return 0;
+
 	if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)))
 		return 1;
 	/* kernel initialized txrehash to 255 */
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index 96531b0d9d55..8f483337e103 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -17,6 +17,12 @@ static bool ipv6_addr_loopback(const struct in6_addr *a)
 		a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0;
 }
 
+static bool ipv4_addr_loopback(__be32 a)
+{
+	return a == bpf_ntohl(0x7f000001);
+}
+
+volatile const unsigned int sf;
 volatile const __u16 ports[2];
 unsigned int bucket[2];
 
@@ -26,16 +32,20 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
 	struct sock *sk = (struct sock *)ctx->sk_common;
 	struct inet_hashinfo *hinfo;
 	unsigned int hash;
+	__u64 sock_cookie;
 	struct net *net;
 	int idx;
 
 	if (!sk)
 		return 0;
 
+	sock_cookie = bpf_get_socket_cookie(sk);
 	sk = bpf_core_cast(sk, struct sock);
-	if (sk->sk_family != AF_INET6 ||
+	if (sk->sk_family != sf ||
 	    sk->sk_state != TCP_LISTEN ||
-	    !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+	    sk->sk_family == AF_INET6 ?
+	    !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+	    !ipv4_addr_loopback(sk->sk_rcv_saddr))
 		return 0;
 
 	if (sk->sk_num == ports[0])
@@ -52,6 +62,7 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
 	hinfo = net->ipv4.tcp_death_row.hashinfo;
 	bucket[idx] = hash & hinfo->lhash2_mask;
 	bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+	bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
 
 	return 0;
 }
@@ -63,14 +74,18 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
 {
 	struct sock *sk = (struct sock *)ctx->udp_sk;
 	struct udp_table *udptable;
+	__u64 sock_cookie;
 	int idx;
 
 	if (!sk)
 		return 0;
 
+	sock_cookie = bpf_get_socket_cookie(sk);
 	sk = bpf_core_cast(sk, struct sock);
-	if (sk->sk_family != AF_INET6 ||
-	    !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+	if (sk->sk_family != sf ||
+	    sk->sk_family == AF_INET6 ?
+	    !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+	    !ipv4_addr_loopback(sk->sk_rcv_saddr))
 		return 0;
 
 	if (sk->sk_num == ports[0])
@@ -84,6 +99,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
 	udptable = sk->sk_net.net->ipv4.udp_table;
 	bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask;
 	bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+	bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index ccde6a4c6319..683306db8594 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -4,6 +4,8 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/errno.h>
 #include "xsk_xdp_common.h"
 
 struct {
@@ -14,6 +16,7 @@ struct {
 } xsk SEC(".maps");
 
 static unsigned int idx;
+int adjust_value = 0;
 int count = 0;
 
 SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp)
@@ -70,4 +73,51 @@ SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp)
 	return bpf_redirect_map(&xsk, idx, XDP_DROP);
 }
 
+SEC("xdp.frags") int xsk_xdp_adjust_tail(struct xdp_md *xdp)
+{
+	__u32 buff_len, curr_buff_len;
+	int ret;
+
+	buff_len = bpf_xdp_get_buff_len(xdp);
+	if (buff_len == 0)
+		return XDP_DROP;
+
+	ret = bpf_xdp_adjust_tail(xdp, adjust_value);
+	if (ret < 0) {
+		/* Handle unsupported cases */
+		if (ret == -EOPNOTSUPP) {
+			/* Set adjust_value to -EOPNOTSUPP to indicate to userspace that this case
+			 * is unsupported
+			 */
+			adjust_value = -EOPNOTSUPP;
+			return bpf_redirect_map(&xsk, 0, XDP_DROP);
+		}
+
+		return XDP_DROP;
+	}
+
+	curr_buff_len = bpf_xdp_get_buff_len(xdp);
+	if (curr_buff_len != buff_len + adjust_value)
+		return XDP_DROP;
+
+	if (curr_buff_len > buff_len) {
+		__u32 *pkt_data = (void *)(long)xdp->data;
+		__u32 len, words_to_end, seq_num;
+
+		len = curr_buff_len - PKT_HDR_ALIGN;
+		words_to_end = len / sizeof(*pkt_data) - 1;
+		seq_num = words_to_end;
+
+		/* Convert sequence number to network byte order. Store this in the last 4 bytes of
+		 * the packet. Use 'adjust_value' to determine the position at the end of the
+		 * packet for storing the sequence number.
+		 */
+		seq_num = __constant_htonl(words_to_end);
+		bpf_xdp_store_bytes(xdp, curr_buff_len - sizeof(seq_num), &seq_num,
+				    sizeof(seq_num));
+	}
+
+	return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h
index 5a6f36f07383..45810ff552da 100644
--- a/tools/testing/selftests/bpf/xsk_xdp_common.h
+++ b/tools/testing/selftests/bpf/xsk_xdp_common.h
@@ -4,6 +4,7 @@
 #define XSK_XDP_COMMON_H_
 
 #define MAX_SOCKETS 2
+#define PKT_HDR_ALIGN (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
 
 struct xdp_info {
 	__u64 count;
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 11f047b8af75..0ced4026ee44 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -524,6 +524,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 	test->nb_sockets = 1;
 	test->fail = false;
 	test->set_ring = false;
+	test->adjust_tail = false;
+	test->adjust_tail_support = false;
 	test->mtu = MAX_ETH_PKT_SIZE;
 	test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
 	test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
@@ -757,14 +759,15 @@ static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
 	return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
 }
 
-static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
 {
-	struct pkt_stream *pkt_stream;
+	ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+}
 
-	pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
-	test->ifobj_tx->xsk->pkt_stream = pkt_stream;
-	pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
-	test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+	pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
+	pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
 }
 
 static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
@@ -991,6 +994,31 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
 	return true;
 }
 
+static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx)
+{
+	struct bpf_map *data_map;
+	int adjust_value = 0;
+	int key = 0;
+	int ret;
+
+	data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+	if (!data_map || !bpf_map__is_internal(data_map)) {
+		ksft_print_msg("Error: could not find bss section of XDP program\n");
+		exit_with_error(errno);
+	}
+
+	ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
+	if (ret) {
+		ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
+		exit_with_error(errno);
+	}
+
+	/* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
+	 * helper is not supported. Skip the adjust_tail test case in this scenario.
+	 */
+	return adjust_value != -EOPNOTSUPP;
+}
+
 static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
 			  u32 bytes_processed)
 {
@@ -1767,8 +1795,13 @@ static void *worker_testapp_validate_rx(void *arg)
 
 	if (!err && ifobject->validation_func)
 		err = ifobject->validation_func(ifobject);
-	if (err)
-		report_failure(test);
+
+	if (err) {
+		if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs))
+			test->adjust_tail_support = false;
+		else
+			report_failure(test);
+	}
 
 	pthread_exit(NULL);
 }
@@ -2515,6 +2548,71 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test)
 	return testapp_validate_traffic(test);
 }
 
+static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
+{
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
+			       skel_tx->progs.xsk_xdp_adjust_tail,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+	skel_rx->bss->adjust_value = adjust_value;
+
+	return testapp_validate_traffic(test);
+}
+
+static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
+{
+	int ret;
+
+	test->adjust_tail_support = true;
+	test->adjust_tail = true;
+	test->total_steps = 1;
+
+	pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
+	pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
+
+	ret = testapp_xdp_adjust_tail(test, value);
+	if (ret)
+		return ret;
+
+	if (!test->adjust_tail_support) {
+		ksft_test_result_skip("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
+				      mode_string(test), busy_poll_string(test));
+		return TEST_SKIP;
+	}
+
+	return 0;
+}
+
+static int testapp_adjust_tail_shrink(struct test_spec *test)
+{
+	/* Shrink by 4 bytes for testing purpose */
+	return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
+}
+
+static int testapp_adjust_tail_shrink_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	/* Shrink by the frag size */
+	return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+static int testapp_adjust_tail_grow(struct test_spec *test)
+{
+	/* Grow by 4 bytes for testing purpose */
+	return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
+}
+
+static int testapp_adjust_tail_grow_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	/* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
+	return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
+				   XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
 static void run_pkt_test(struct test_spec *test)
 {
 	int ret;
@@ -2621,6 +2719,10 @@ static const struct test_spec tests[] = {
 	{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
 	{.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
 	{.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+	{.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
+	{.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
+	{.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
+	{.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
 	};
 
 static void print_tests(void)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index e46e823f6a1a..67fc44b2813b 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -173,6 +173,8 @@ struct test_spec {
 	u16 nb_sockets;
 	bool fail;
 	bool set_ring;
+	bool adjust_tail;
+	bool adjust_tail_support;
 	enum test_mode mode;
 	char name[MAX_TEST_NAME_SIZE];
 };
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
index ec746f374e85..d634d8395d90 100644
--- a/tools/testing/selftests/drivers/net/.gitignore
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-xdp_helper
+napi_id_helper
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 0c95bd944d56..be780bcb73a3 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -3,12 +3,14 @@ CFLAGS += $(KHDR_INCLUDES)
 
 TEST_INCLUDES := $(wildcard lib/py/*.py) \
 		 $(wildcard lib/sh/*.sh) \
-		 ../../net/net_helper.sh \
 		 ../../net/lib.sh \
 
-TEST_GEN_FILES := xdp_helper
+TEST_GEN_FILES := \
+	napi_id_helper \
+# end of TEST_GEN_FILES
 
 TEST_PROGS := \
+	napi_id.py \
 	netcons_basic.sh \
 	netcons_fragmented_msg.sh \
 	netcons_overflow.sh \
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 07cddb19ba35..df2c047ffa90 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -15,12 +15,11 @@ TEST_PROGS = \
 	iou-zcrx.py \
 	irq.py \
 	loopback.sh \
-	nic_link_layer.py \
-	nic_performance.py \
 	pp_alloc_fail.py \
 	rss_ctx.py \
 	rss_input_xfrm.py \
 	tso.py \
+	xsk_reconfig.py \
 	#
 
 TEST_FILES := \
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index 3947e9157115..7947650210a0 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 
+from os import path
 from lib.py import ksft_run, ksft_exit
 from lib.py import ksft_eq, KsftSkipEx
 from lib.py import NetDrvEpEnv
@@ -10,8 +11,7 @@ from lib.py import ksft_disruptive
 
 def require_devmem(cfg):
     if not hasattr(cfg, "_devmem_probed"):
-        port = rand_port()
-        probe_command = f"./ncdevmem -f {cfg.ifname}"
+        probe_command = f"{cfg.bin_local} -f {cfg.ifname}"
         cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0
         cfg._devmem_probed = True
 
@@ -21,22 +21,55 @@ def require_devmem(cfg):
 
 @ksft_disruptive
 def check_rx(cfg) -> None:
-    cfg.require_ipver("6")
     require_devmem(cfg)
 
     port = rand_port()
-    listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}"
+    socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}"
+    listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7"
+
+    with bkg(listen_cmd, exit_wait=True) as ncdevmem:
+        wait_port_listen(port)
+        cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \
+            head -c 1K | {socat}", host=cfg.remote, shell=True)
+
+    ksft_eq(ncdevmem.ret, 0)
+
+
+@ksft_disruptive
+def check_tx(cfg) -> None:
+    require_devmem(cfg)
+
+    port = rand_port()
+    listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
 
     with bkg(listen_cmd) as socat:
         wait_port_listen(port)
-        cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.addr_v['6']}]:{port}", host=cfg.remote, shell=True)
+        cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True)
+
+    ksft_eq(socat.stdout.strip(), "hello\nworld")
+
+
+@ksft_disruptive
+def check_tx_chunks(cfg) -> None:
+    cfg.require_ipver("6")
+    require_devmem(cfg)
+
+    port = rand_port()
+    listen_cmd = f"socat -U - TCP6-LISTEN:{port}"
+
+    with bkg(listen_cmd, exit_wait=True) as socat:
+        wait_port_listen(port)
+        cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True)
 
     ksft_eq(socat.stdout.strip(), "hello\nworld")
 
 
 def main() -> None:
     with NetDrvEpEnv(__file__) as cfg:
-        ksft_run([check_rx],
+        cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem")
+        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+        ksft_run([check_rx, check_tx, check_tx_chunks],
                  args=(cfg, ))
     ksft_exit()
 
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
index c26b4180eddd..62456df947bc 100644
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -37,8 +37,8 @@
 
 #include <liburing.h>
 
-#define PAGE_SIZE (4096)
-#define AREA_SIZE (8192 * PAGE_SIZE)
+static long page_size;
+#define AREA_SIZE (8192 * page_size)
 #define SEND_SIZE (512 * 4096)
 #define min(a, b) \
 	({ \
@@ -66,7 +66,7 @@ static int cfg_oneshot_recvs;
 static int cfg_send_size = SEND_SIZE;
 static struct sockaddr_in6 cfg_addr;
 
-static char payload[SEND_SIZE] __attribute__((aligned(PAGE_SIZE)));
+static char *payload;
 static void *area_ptr;
 static void *ring_ptr;
 static size_t ring_size;
@@ -114,8 +114,8 @@ static inline size_t get_refill_ring_size(unsigned int rq_entries)
 
 	ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe);
 	/* add space for the header (head/tail/etc.) */
-	ring_size += PAGE_SIZE;
-	return ALIGN_UP(ring_size, 4096);
+	ring_size += page_size;
+	return ALIGN_UP(ring_size, page_size);
 }
 
 static void setup_zcrx(struct io_uring *ring)
@@ -219,7 +219,7 @@ static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
 
 	connfd = cqe->res;
 	if (cfg_oneshot)
-		add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+		add_recvzc_oneshot(ring, connfd, page_size);
 	else
 		add_recvzc(ring, connfd);
 }
@@ -245,7 +245,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
 
 	if (cfg_oneshot) {
 		if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) {
-			add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+			add_recvzc_oneshot(ring, connfd, page_size);
 			cfg_oneshot_recvs--;
 		}
 	} else if (!(cqe->flags & IORING_CQE_F_MORE)) {
@@ -260,7 +260,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
 
 	for (i = 0; i < n; i++) {
 		if (*(data + i) != payload[(received + i)])
-			error(1, 0, "payload mismatch at ", i);
+			error(1, 0, "payload mismatch at %d", i);
 	}
 	received += n;
 
@@ -354,7 +354,7 @@ static void run_client(void)
 		chunk = min_t(ssize_t, cfg_payload_len, to_send);
 		res = send(fd, src, chunk, 0);
 		if (res < 0)
-			error(1, 0, "send(): %d", sent);
+			error(1, 0, "send(): %zd", sent);
 		sent += res;
 		to_send -= res;
 	}
@@ -370,7 +370,7 @@ static void usage(const char *filepath)
 
 static void parse_opts(int argc, char **argv)
 {
-	const int max_payload_len = sizeof(payload) -
+	const int max_payload_len = SEND_SIZE -
 				    sizeof(struct ipv6hdr) -
 				    sizeof(struct tcphdr) -
 				    40 /* max tcp options */;
@@ -443,6 +443,13 @@ int main(int argc, char **argv)
 	const char *cfg_test = argv[argc - 1];
 	int i;
 
+	page_size = sysconf(_SC_PAGESIZE);
+	if (page_size < 0)
+		return 1;
+
+	if (posix_memalign((void **)&payload, page_size, SEND_SIZE))
+		return 1;
+
 	parse_opts(argc, argv);
 
 	for (i = 0; i < SEND_SIZE; i++)
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 6a0378e06cab..9c03fd777f3d 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -5,13 +5,12 @@ import re
 from os import path
 from lib.py import ksft_run, ksft_exit
 from lib.py import NetDrvEpEnv
-from lib.py import bkg, cmd, ethtool, wait_port_listen
+from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
 
 
-def _get_rx_ring_entries(cfg):
-    output = ethtool(f"-g {cfg.ifname}", host=cfg.remote).stdout
-    values = re.findall(r'RX:\s+(\d+)', output)
-    return int(values[1])
+def _get_current_settings(cfg):
+    output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0]
+    return (output['rx'], output['hds-thresh'])
 
 
 def _get_combined_channels(cfg):
@@ -20,8 +19,21 @@ def _get_combined_channels(cfg):
     return int(values[1])
 
 
-def _set_flow_rule(cfg, chan):
-    output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port 9999 action {chan}", host=cfg.remote).stdout
+def _create_rss_ctx(cfg, chan):
+    output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout
+    values = re.search(r'New RSS context is (\d+)', output).group(1)
+    ctx_id = int(values)
+    return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote))
+
+
+def _set_flow_rule(cfg, port, chan):
+    output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout
+    values = re.search(r'ID (\d+)', output).group(1)
+    return int(values)
+
+
+def _set_flow_rule_rss(cfg, port, ctx_id):
+    output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout
     values = re.search(r'ID (\d+)', output).group(1)
     return int(values)
 
@@ -32,24 +44,29 @@ def test_zcrx(cfg) -> None:
     combined_chans = _get_combined_channels(cfg)
     if combined_chans < 2:
         raise KsftSkipEx('at least 2 combined channels required')
-    rx_ring = _get_rx_ring_entries(cfg)
-
-    try:
-        ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
-        ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
-        ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
-        flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
-
-        rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}"
-        tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 12840"
-        with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
-            wait_port_listen(9999, proto="tcp", host=cfg.remote)
-            cmd(tx_cmd)
-    finally:
-        ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
-        ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
-        ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
-        ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+    (rx_ring, hds_thresh) = _get_current_settings(cfg)
+    port = rand_port()
+
+    ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+    defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+    ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+    defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+    ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+    defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+    ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+    defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+    flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+    defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+    rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+    tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
+    with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+        wait_port_listen(port, proto="tcp", host=cfg.remote)
+        cmd(tx_cmd)
 
 
 def test_zcrx_oneshot(cfg) -> None:
@@ -58,24 +75,61 @@ def test_zcrx_oneshot(cfg) -> None:
     combined_chans = _get_combined_channels(cfg)
     if combined_chans < 2:
         raise KsftSkipEx('at least 2 combined channels required')
-    rx_ring = _get_rx_ring_entries(cfg)
-
-    try:
-        ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
-        ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
-        ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
-        flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
-
-        rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4"
-        tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 4096 -z 16384"
-        with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
-            wait_port_listen(9999, proto="tcp", host=cfg.remote)
-            cmd(tx_cmd)
-    finally:
-        ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
-        ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
-        ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
-        ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+    (rx_ring, hds_thresh) = _get_current_settings(cfg)
+    port = rand_port()
+
+    ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+    defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+    ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+    defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+    ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+    defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+    ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+    defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+    flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+    defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+    rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+    tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384"
+    with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+        wait_port_listen(port, proto="tcp", host=cfg.remote)
+        cmd(tx_cmd)
+
+
+def test_zcrx_rss(cfg) -> None:
+    cfg.require_ipver('6')
+
+    combined_chans = _get_combined_channels(cfg)
+    if combined_chans < 2:
+        raise KsftSkipEx('at least 2 combined channels required')
+    (rx_ring, hds_thresh) = _get_current_settings(cfg)
+    port = rand_port()
+
+    ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+    defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+    ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+    defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+    ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+    defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+    ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+    defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+    (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
+    flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
+    defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+    rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+    tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
+    with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+        wait_port_listen(port, proto="tcp", host=cfg.remote)
+        cmd(tx_cmd)
 
 
 def main() -> None:
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index 399789a9676a..b582885786f5 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -9,7 +9,6 @@ try:
     sys.path.append(KSFT_DIR.as_posix())
     from net.lib.py import *
     from drivers.net.lib.py import *
-    from .linkconfig import LinkConfig
 except ModuleNotFoundError as e:
     ksft_pr("Failed importing `net` library from kernel sources")
     ksft_pr(str(e))
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py
deleted file mode 100644
index 79fde603cbbc..000000000000
--- a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-from lib.py import cmd, ethtool, ip
-from lib.py import ksft_pr, ksft_eq, KsftSkipEx
-from typing import Optional
-import re
-import time
-import json
-
-#The LinkConfig class is implemented to handle the link layer configurations.
-#Required minimum ethtool version is 6.10
-
-class LinkConfig:
-    """Class for handling the link layer configurations"""
-    def __init__(self, cfg: object) -> None:
-        self.cfg = cfg
-        self.partner_netif = self.get_partner_netif_name()
-
-        """Get the initial link configuration of local interface"""
-        self.common_link_modes = self.get_common_link_modes()
-
-    def get_partner_netif_name(self) -> Optional[str]:
-        partner_netif = None
-        try:
-            if not self.verify_link_up():
-                return None
-            """Get partner interface name"""
-            partner_json_output = ip("addr show", json=True, host=self.cfg.remote)
-            for interface in partner_json_output:
-                for addr in interface.get('addr_info', []):
-                    if addr.get('local') == self.cfg.remote_addr:
-                        partner_netif = interface['ifname']
-                        ksft_pr(f"Partner Interface name: {partner_netif}")
-            if partner_netif is None:
-                ksft_pr("Unable to get the partner interface name")
-        except Exception as e:
-            print(f"Unexpected error occurred while getting partner interface name: {e}")
-        self.partner_netif = partner_netif
-        return partner_netif
-
-    def verify_link_up(self) -> bool:
-        """Verify whether the local interface link is up"""
-        with open(f"/sys/class/net/{self.cfg.ifname}/operstate", "r") as fp:
-            link_state = fp.read().strip()
-
-        if link_state == "down":
-            ksft_pr(f"Link state of interface {self.cfg.ifname} is DOWN")
-            return False
-        else:
-            return True
-
-    def reset_interface(self, local: bool = True, remote: bool = True) -> bool:
-        ksft_pr("Resetting interfaces in local and remote")
-        if remote:
-            if self.verify_link_up():
-                if self.partner_netif is not None:
-                    ifname = self.partner_netif
-                    link_up_cmd = f"ip link set up {ifname}"
-                    link_down_cmd = f"ip link set down {ifname}"
-                    reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}"
-                    try:
-                        cmd(reset_cmd, host=self.cfg.remote)
-                    except Exception as e:
-                        ksft_pr(f"Unexpected error occurred while resetting remote: {e}")
-                else:
-                    ksft_pr("Partner interface not available")
-        if local:
-            ifname = self.cfg.ifname
-            link_up_cmd = f"ip link set up {ifname}"
-            link_down_cmd = f"ip link set down {ifname}"
-            reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}"
-            try:
-                cmd(reset_cmd)
-            except Exception as e:
-                ksft_pr(f"Unexpected error occurred while resetting local: {e}")
-        time.sleep(10)
-        if self.verify_link_up() and self.get_ethtool_field("link-detected"):
-            ksft_pr("Local and remote interfaces reset to original state")
-            return True
-        else:
-            ksft_pr("Error occurred after resetting interfaces. Link is DOWN.")
-            return False
-
-    def set_speed_and_duplex(self, speed: str, duplex: str, autoneg: bool = True) -> bool:
-        """Set the speed and duplex state for the interface"""
-        autoneg_state = "on" if autoneg is True else "off"
-        process = None
-        try:
-            process = ethtool(f"--change {self.cfg.ifname} speed {speed} duplex {duplex} autoneg {autoneg_state}")
-        except Exception as e:
-            ksft_pr(f"Unexpected error occurred while setting speed/duplex: {e}")
-        if process is None or process.ret != 0:
-            return False
-        else:
-            ksft_pr(f"Speed: {speed} Mbps, Duplex: {duplex} set for Interface: {self.cfg.ifname}")
-            return True
-
-    def verify_speed_and_duplex(self, expected_speed: str, expected_duplex: str) -> bool:
-        if not self.verify_link_up():
-            return False
-        """Verifying the speed and duplex state for the interface"""
-        with open(f"/sys/class/net/{self.cfg.ifname}/speed", "r") as fp:
-            actual_speed = fp.read().strip()
-        with open(f"/sys/class/net/{self.cfg.ifname}/duplex", "r") as fp:
-            actual_duplex = fp.read().strip()
-
-        ksft_eq(actual_speed, expected_speed)
-        ksft_eq(actual_duplex, expected_duplex)
-        return True
-
-    def set_autonegotiation_state(self, state: str, remote: bool = False) -> bool:
-        common_link_modes = self.common_link_modes
-        speeds, duplex_modes = self.get_speed_duplex_values(self.common_link_modes)
-        speed = speeds[0]
-        duplex = duplex_modes[0]
-        if not speed or not duplex:
-            ksft_pr("No speed or duplex modes found")
-            return False
-
-        speed_duplex_cmd = f"speed {speed} duplex {duplex}" if state == "off" else ""
-        if remote:
-            if not self.verify_link_up():
-                return False
-            """Set the autonegotiation state for the partner"""
-            command = f"-s {self.partner_netif} {speed_duplex_cmd} autoneg {state}"
-            partner_autoneg_change = None
-            """Set autonegotiation state for interface in remote pc"""
-            try:
-                partner_autoneg_change = ethtool(command, host=self.cfg.remote)
-            except Exception as e:
-                ksft_pr(f"Unexpected error occurred while changing auto-neg in remote: {e}")
-            if partner_autoneg_change is None or partner_autoneg_change.ret != 0:
-                ksft_pr(f"Not able to set autoneg parameter for interface {self.partner_netif}.")
-                return False
-            ksft_pr(f"Autoneg set as {state} for {self.partner_netif}")
-        else:
-            """Set the autonegotiation state for the interface"""
-            try:
-                process = ethtool(f"-s {self.cfg.ifname} {speed_duplex_cmd} autoneg {state}")
-                if process.ret != 0:
-                    ksft_pr(f"Not able to set autoneg parameter for interface {self.cfg.ifname}")
-                    return False
-            except Exception as e:
-                ksft_pr(f"Unexpected error occurred while changing auto-neg in local: {e}")
-                return False
-            ksft_pr(f"Autoneg set as {state} for {self.cfg.ifname}")
-        return True
-
-    def check_autoneg_supported(self, remote: bool = False) -> bool:
-        if not remote:
-            local_autoneg = self.get_ethtool_field("supports-auto-negotiation")
-            if local_autoneg is None:
-                ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.cfg.ifname}")
-            """Return autoneg status of the local interface"""
-            return local_autoneg
-        else:
-            if not self.verify_link_up():
-                raise KsftSkipEx("Link is DOWN")
-            """Check remote auto-negotiation support status"""
-            partner_autoneg = False
-            if self.partner_netif is not None:
-                partner_autoneg = self.get_ethtool_field("supports-auto-negotiation", remote=True)
-                if partner_autoneg is None:
-                    ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.partner_netif}")
-            return partner_autoneg
-
-    def get_common_link_modes(self) -> set[str]:
-        common_link_modes = []
-        """Populate common link modes"""
-        link_modes = self.get_ethtool_field("supported-link-modes")
-        partner_link_modes = self.get_ethtool_field("link-partner-advertised-link-modes")
-        if link_modes is None:
-            raise KsftSkipEx(f"Link modes not available for {self.cfg.ifname}")
-        if partner_link_modes is None:
-            raise KsftSkipEx(f"Partner link modes not available for {self.cfg.ifname}")
-        common_link_modes = set(link_modes) and set(partner_link_modes)
-        return common_link_modes
-
-    def get_speed_duplex_values(self, link_modes: list[str]) -> tuple[list[str], list[str]]:
-        speed = []
-        duplex = []
-        """Check the link modes"""
-        for data in link_modes:
-            parts = data.split('/')
-            speed_value = re.match(r'\d+', parts[0])
-            if speed_value:
-                speed.append(speed_value.group())
-            else:
-                ksft_pr(f"No speed value found for interface {self.ifname}")
-                return None, None
-            duplex.append(parts[1].lower())
-        return speed, duplex
-
-    def get_ethtool_field(self, field: str, remote: bool = False) -> Optional[str]:
-        process = None
-        if not remote:
-            """Get the ethtool field value for the local interface"""
-            try:
-                process = ethtool(self.cfg.ifname, json=True)
-            except Exception as e:
-                ksft_pr("Required minimum ethtool version is 6.10")
-                ksft_pr(f"Unexpected error occurred while getting ethtool field in local: {e}")
-                return None
-        else:
-            if not self.verify_link_up():
-                return None
-            """Get the ethtool field value for the remote interface"""
-            self.cfg.require_cmd("ethtool", remote=True)
-            if self.partner_netif is None:
-                ksft_pr(f"Partner interface name is unavailable.")
-                return None
-            try:
-                process = ethtool(self.partner_netif, json=True, host=self.cfg.remote)
-            except Exception as e:
-                ksft_pr("Required minimum ethtool version is 6.10")
-                ksft_pr(f"Unexpected error occurred while getting ethtool field in remote: {e}")
-                return None
-        json_data = process[0]
-        """Check if the field exist in the json data"""
-        if field not in json_data:
-            raise KsftSkipEx(f'Field {field} does not exist in the output of interface {json_data["ifname"]}')
-        return json_data[field]
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 9d48004ff1a1..02e4d3d7ded2 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -9,22 +9,31 @@
  *     ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201
  *
  *     On client:
- *     echo -n "hello\nworld" | nc -s <server IP> 5201 -p 5201
+ *     echo -n "hello\nworld" | \
+ *		ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
  *
- * Test data validation:
+ * Note this is compatible with regular netcat. i.e. the sender or receiver can
+ * be replaced with regular netcat to test the RX or TX path in isolation.
+ *
+ * Test data validation (devmem TCP on RX only):
  *
  *     On server:
  *     ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7
  *
  *     On client:
  *     yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
- *             tr \\n \\0 | \
- *             head -c 5G | \
+ *             head -c 1G | \
  *             nc <server IP> 5201 -p 5201
  *
+ * Test data validation (devmem TCP on RX and TX, validation happens on RX):
  *
- * Note this is compatible with regular netcat. i.e. the sender or receiver can
- * be replaced with regular netcat to test the RX or TX path in isolation.
+ *	On server:
+ *	ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1
+ *
+ *	On client:
+ *	yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \
+ *		head -c 1M | \
+ *		ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
  */
 #define _GNU_SOURCE
 #define __EXPORTED_HEADERS__
@@ -40,15 +49,18 @@
 #include <fcntl.h>
 #include <malloc.h>
 #include <error.h>
+#include <poll.h>
 
 #include <arpa/inet.h>
 #include <sys/socket.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
 #include <sys/syscall.h>
+#include <sys/time.h>
 
 #include <linux/memfd.h>
 #include <linux/dma-buf.h>
+#include <linux/errqueue.h>
 #include <linux/udmabuf.h>
 #include <linux/types.h>
 #include <linux/netlink.h>
@@ -70,6 +82,9 @@
 #define MSG_SOCK_DEVMEM 0x2000000
 #endif
 
+#define MAX_IOV 1024
+
+static size_t max_chunk;
 static char *server_ip;
 static char *client_ip;
 static char *port;
@@ -79,6 +94,8 @@ static int num_queues = -1;
 static char *ifname;
 static unsigned int ifindex;
 static unsigned int dmabuf_id;
+static uint32_t tx_dmabuf_id;
+static int waittime_ms = 500;
 
 struct memory_buffer {
 	int fd;
@@ -92,6 +109,8 @@ struct memory_buffer {
 struct memory_provider {
 	struct memory_buffer *(*alloc)(size_t size);
 	void (*free)(struct memory_buffer *ctx);
+	void (*memcpy_to_device)(struct memory_buffer *dst, size_t off,
+				 void *src, int n);
 	void (*memcpy_from_device)(void *dst, struct memory_buffer *src,
 				   size_t off, int n);
 };
@@ -152,6 +171,20 @@ static void udmabuf_free(struct memory_buffer *ctx)
 	free(ctx);
 }
 
+static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off,
+				     void *src, int n)
+{
+	struct dma_buf_sync sync = {};
+
+	sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE;
+	ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+
+	memcpy(dst->buf_mem + off, src, n);
+
+	sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE;
+	ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+}
+
 static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
 				       size_t off, int n)
 {
@@ -169,6 +202,7 @@ static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
 static struct memory_provider udmabuf_memory_provider = {
 	.alloc = udmabuf_alloc,
 	.free = udmabuf_free,
+	.memcpy_to_device = udmabuf_memcpy_to_device,
 	.memcpy_from_device = udmabuf_memcpy_from_device,
 };
 
@@ -187,14 +221,16 @@ void validate_buffer(void *line, size_t size)
 {
 	static unsigned char seed = 1;
 	unsigned char *ptr = line;
-	int errors = 0;
+	unsigned char expected;
+	static int errors;
 	size_t i;
 
 	for (i = 0; i < size; i++) {
-		if (ptr[i] != seed) {
+		expected = seed ? seed : '\n';
+		if (ptr[i] != expected) {
 			fprintf(stderr,
 				"Failed validation: expected=%u, actual=%u, index=%lu\n",
-				seed, ptr[i], i);
+				expected, ptr[i], i);
 			errors++;
 			if (errors > 20)
 				error(1, 0, "validation failed.");
@@ -337,7 +373,8 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin)
 		server_addr = strrchr(server_addr, ':') + 1;
 	}
 
-	return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2",
+	/* Try configure 5-tuple */
+	if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2",
 			   ifname,
 			   type,
 			   client_ip ? "src-ip" : "",
@@ -345,7 +382,17 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin)
 			   server_addr,
 			   client_ip ? "src-port" : "",
 			   client_ip ? port : "",
-			   port, start_queue);
+			   port, start_queue))
+		/* If that fails, try configure 3-tuple */
+		if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2",
+				ifname,
+				type,
+				server_addr,
+				port, start_queue))
+			/* If that fails, return error */
+			return -1;
+
+	return 0;
 }
 
 static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
@@ -393,6 +440,49 @@ err_close:
 	return -1;
 }
 
+static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+			 struct ynl_sock **ys)
+{
+	struct netdev_bind_tx_req *req = NULL;
+	struct netdev_bind_tx_rsp *rsp = NULL;
+	struct ynl_error yerr;
+
+	*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+	if (!*ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return -1;
+	}
+
+	req = netdev_bind_tx_req_alloc();
+	netdev_bind_tx_req_set_ifindex(req, ifindex);
+	netdev_bind_tx_req_set_fd(req, dmabuf_fd);
+
+	rsp = netdev_bind_tx(*ys, req);
+	if (!rsp) {
+		perror("netdev_bind_tx");
+		goto err_close;
+	}
+
+	if (!rsp->_present.id) {
+		perror("id not present");
+		goto err_close;
+	}
+
+	fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id);
+	tx_dmabuf_id = rsp->id;
+
+	netdev_bind_tx_req_free(req);
+	netdev_bind_tx_rsp_free(rsp);
+
+	return 0;
+
+err_close:
+	fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+	netdev_bind_tx_req_free(req);
+	ynl_sock_destroy(*ys);
+	return -1;
+}
+
 static void enable_reuseaddr(int fd)
 {
 	int opt = 1;
@@ -447,10 +537,9 @@ static struct netdev_queue_id *create_queues(void)
 	return queues;
 }
 
-int do_server(struct memory_buffer *mem)
+static int do_server(struct memory_buffer *mem)
 {
 	char ctrl_data[sizeof(int) * 20000];
-	struct netdev_queue_id *queues;
 	size_t non_page_aligned_frags = 0;
 	struct sockaddr_in6 client_addr;
 	struct sockaddr_in6 server_sin;
@@ -674,13 +763,216 @@ void run_devmem_tests(void)
 	provider->free(mem);
 }
 
+static uint64_t gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL);
+}
+
+static int do_poll(int fd)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	ret = poll(&pfd, 1, waittime_ms);
+	if (ret == -1)
+		error(1, errno, "poll");
+
+	return ret && (pfd.revents & POLLERR);
+}
+
+static void wait_compl(int fd)
+{
+	int64_t tstop = gettimeofday_ms() + waittime_ms;
+	char control[CMSG_SPACE(100)] = {};
+	struct sock_extended_err *serr;
+	struct msghdr msg = {};
+	struct cmsghdr *cm;
+	__u32 hi, lo;
+	int ret;
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	while (gettimeofday_ms() < tstop) {
+		if (!do_poll(fd))
+			continue;
+
+		ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+		if (ret < 0) {
+			if (errno == EAGAIN)
+				continue;
+			error(1, errno, "recvmsg(MSG_ERRQUEUE)");
+			return;
+		}
+		if (msg.msg_flags & MSG_CTRUNC)
+			error(1, 0, "MSG_CTRUNC\n");
+
+		for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+			if (cm->cmsg_level != SOL_IP &&
+			    cm->cmsg_level != SOL_IPV6)
+				continue;
+			if (cm->cmsg_level == SOL_IP &&
+			    cm->cmsg_type != IP_RECVERR)
+				continue;
+			if (cm->cmsg_level == SOL_IPV6 &&
+			    cm->cmsg_type != IPV6_RECVERR)
+				continue;
+
+			serr = (void *)CMSG_DATA(cm);
+			if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+				error(1, 0, "wrong origin %u", serr->ee_origin);
+			if (serr->ee_errno != 0)
+				error(1, 0, "wrong errno %d", serr->ee_errno);
+
+			hi = serr->ee_data;
+			lo = serr->ee_info;
+
+			fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
+			return;
+		}
+	}
+
+	error(1, 0, "did not receive tx completion");
+}
+
+static int do_client(struct memory_buffer *mem)
+{
+	char ctrl_data[CMSG_SPACE(sizeof(__u32))];
+	struct sockaddr_in6 server_sin;
+	struct sockaddr_in6 client_sin;
+	struct ynl_sock *ys = NULL;
+	struct iovec iov[MAX_IOV];
+	struct msghdr msg = {};
+	ssize_t line_size = 0;
+	struct cmsghdr *cmsg;
+	char *line = NULL;
+	unsigned long mid;
+	size_t len = 0;
+	int socket_fd;
+	__u32 ddmabuf;
+	int opt = 1;
+	int ret;
+
+	ret = parse_address(server_ip, atoi(port), &server_sin);
+	if (ret < 0)
+		error(1, 0, "parse server address");
+
+	socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (socket_fd < 0)
+		error(1, socket_fd, "create socket");
+
+	enable_reuseaddr(socket_fd);
+
+	ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
+			 strlen(ifname) + 1);
+	if (ret)
+		error(1, errno, "bindtodevice");
+
+	if (bind_tx_queue(ifindex, mem->fd, &ys))
+		error(1, 0, "Failed to bind\n");
+
+	if (client_ip) {
+		ret = parse_address(client_ip, atoi(port), &client_sin);
+		if (ret < 0)
+			error(1, 0, "parse client address");
+
+		ret = bind(socket_fd, &client_sin, sizeof(client_sin));
+		if (ret)
+			error(1, errno, "bind");
+	}
+
+	ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
+	if (ret)
+		error(1, errno, "set sock opt");
+
+	fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
+		ntohs(server_sin.sin6_port), ifname);
+
+	ret = connect(socket_fd, &server_sin, sizeof(server_sin));
+	if (ret)
+		error(1, errno, "connect");
+
+	while (1) {
+		free(line);
+		line = NULL;
+		line_size = getline(&line, &len, stdin);
+
+		if (line_size < 0)
+			break;
+
+		if (max_chunk) {
+			msg.msg_iovlen =
+				(line_size + max_chunk - 1) / max_chunk;
+			if (msg.msg_iovlen > MAX_IOV)
+				error(1, 0,
+				      "can't partition %zd bytes into maximum of %d chunks",
+				      line_size, MAX_IOV);
+
+			for (int i = 0; i < msg.msg_iovlen; i++) {
+				iov[i].iov_base = (void *)(i * max_chunk);
+				iov[i].iov_len = max_chunk;
+			}
+
+			iov[msg.msg_iovlen - 1].iov_len =
+				line_size - (msg.msg_iovlen - 1) * max_chunk;
+		} else {
+			iov[0].iov_base = 0;
+			iov[0].iov_len = line_size;
+			msg.msg_iovlen = 1;
+		}
+
+		msg.msg_iov = iov;
+		provider->memcpy_to_device(mem, 0, line, line_size);
+
+		msg.msg_control = ctrl_data;
+		msg.msg_controllen = sizeof(ctrl_data);
+
+		cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_DEVMEM_DMABUF;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+
+		ddmabuf = tx_dmabuf_id;
+
+		*((__u32 *)CMSG_DATA(cmsg)) = ddmabuf;
+
+		ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY);
+		if (ret < 0)
+			error(1, errno, "Failed sendmsg");
+
+		fprintf(stderr, "sendmsg_ret=%d\n", ret);
+
+		if (ret != line_size)
+			error(1, errno, "Did not send all bytes %d vs %zd", ret,
+			      line_size);
+
+		wait_compl(socket_fd);
+	}
+
+	fprintf(stderr, "%s: tx ok\n", TEST_PREFIX);
+
+	free(line);
+	close(socket_fd);
+
+	if (ys)
+		ynl_sock_destroy(ys);
+
+	return 0;
+}
+
 int main(int argc, char *argv[])
 {
 	struct memory_buffer *mem;
 	int is_server = 0, opt;
 	int ret;
 
-	while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
+	while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
 		switch (opt) {
 		case 'l':
 			is_server = 1;
@@ -706,6 +998,9 @@ int main(int argc, char *argv[])
 		case 'f':
 			ifname = optarg;
 			break;
+		case 'z':
+			max_chunk = atoi(optarg);
+			break;
 		case '?':
 			fprintf(stderr, "unknown option: %c\n", optopt);
 			break;
@@ -717,6 +1012,8 @@ int main(int argc, char *argv[])
 
 	ifindex = if_nametoindex(ifname);
 
+	fprintf(stderr, "using ifindex=%u\n", ifindex);
+
 	if (!server_ip && !client_ip) {
 		if (start_queue < 0 && num_queues < 0) {
 			num_queues = rxq_num(ifindex);
@@ -767,7 +1064,7 @@ int main(int argc, char *argv[])
 		error(1, 0, "Missing -p argument\n");
 
 	mem = provider->alloc(getpagesize() * NUM_PAGES);
-	ret = is_server ? do_server(mem) : 1;
+	ret = is_server ? do_server(mem) : do_client(mem);
 	provider->free(mem);
 
 	return ret;
diff --git a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py
deleted file mode 100644
index efd921180532..000000000000
--- a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-#Introduction:
-#This file has basic link layer tests for generic NIC drivers.
-#The test comprises of auto-negotiation, speed and duplex checks.
-#
-#Setup:
-#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1)
-#
-#        DUT PC                                              Partner PC
-#┌───────────────────────┐                         ┌──────────────────────────┐
-#│                       │                         │                          │
-#│                       │                         │                          │
-#│           ┌───────────┐                         │                          │
-#│           │DUT NIC    │         Eth             │                          │
-#│           │Interface ─┼─────────────────────────┼─    any eth Interface    │
-#│           └───────────┘                         │                          │
-#│                       │                         │                          │
-#│                       │                         │                          │
-#└───────────────────────┘                         └──────────────────────────┘
-#
-#Configurations:
-#Required minimum ethtool version is 6.10 (supports json)
-#Default values:
-#time_delay = 8 #time taken to wait for transitions to happen, in seconds.
-
-import time
-import argparse
-from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_eq
-from lib.py import KsftFailEx, KsftSkipEx
-from lib.py import NetDrvEpEnv
-from lib.py import LinkConfig
-
-def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None:
-    if link_config.partner_netif is None:
-        KsftSkipEx("Partner interface is not available")
-    if not link_config.check_autoneg_supported() or not link_config.check_autoneg_supported(remote=True):
-        KsftSkipEx(f"Auto-negotiation not supported for interface {cfg.ifname} or {link_config.partner_netif}")
-    if not link_config.verify_link_up():
-        raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
-
-def verify_autonegotiation(cfg: object, expected_state: str, link_config: LinkConfig) -> None:
-    if not link_config.verify_link_up():
-        raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
-    """Verifying the autonegotiation state in partner"""
-    partner_autoneg_output = link_config.get_ethtool_field("auto-negotiation", remote=True)
-    if partner_autoneg_output is None:
-        KsftSkipEx(f"Auto-negotiation state not available for interface {link_config.partner_netif}")
-    partner_autoneg_state = "on" if partner_autoneg_output is True else "off"
-
-    ksft_eq(partner_autoneg_state, expected_state)
-
-    """Verifying the autonegotiation state of local"""
-    autoneg_output = link_config.get_ethtool_field("auto-negotiation")
-    if autoneg_output is None:
-        KsftSkipEx(f"Auto-negotiation state not available for interface {cfg.ifname}")
-    actual_state = "on" if autoneg_output is True else "off"
-
-    ksft_eq(actual_state, expected_state)
-
-    """Verifying the link establishment"""
-    link_available = link_config.get_ethtool_field("link-detected")
-    if link_available is None:
-        KsftSkipEx(f"Link status not available for interface {cfg.ifname}")
-    if link_available != True:
-        raise KsftSkipEx("Link not established at interface {cfg.ifname} after changing auto-negotiation")
-
-def test_autonegotiation(cfg: object, link_config: LinkConfig, time_delay: int) -> None:
-    _pre_test_checks(cfg, link_config)
-    for state in ["off", "on"]:
-        if not link_config.set_autonegotiation_state(state, remote=True):
-            raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {link_config.partner_netif}")
-        if not link_config.set_autonegotiation_state(state):
-            raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {cfg.ifname}")
-        time.sleep(time_delay)
-        verify_autonegotiation(cfg, state, link_config)
-
-def test_network_speed(cfg: object, link_config: LinkConfig, time_delay: int) -> None:
-    _pre_test_checks(cfg, link_config)
-    common_link_modes = link_config.common_link_modes
-    if not common_link_modes:
-        KsftSkipEx("No common link modes exist")
-    speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes)
-
-    if speeds and duplex_modes and len(speeds) == len(duplex_modes):
-        for idx in range(len(speeds)):
-            speed = speeds[idx]
-            duplex = duplex_modes[idx]
-            if not link_config.set_speed_and_duplex(speed, duplex):
-                raise KsftFailEx(f"Unable to set speed and duplex parameters for {cfg.ifname}")
-            time.sleep(time_delay)
-            if not link_config.verify_speed_and_duplex(speed, duplex):
-                raise KsftSkipEx(f"Error occurred while verifying speed and duplex states for interface {cfg.ifname}")
-    else:
-        if not speeds or not duplex_modes:
-            KsftSkipEx(f"No supported speeds or duplex modes found for interface {cfg.ifname}")
-        else:
-            KsftSkipEx("Mismatch in the number of speeds and duplex modes")
-
-def main() -> None:
-    parser = argparse.ArgumentParser(description="Run basic link layer tests for NIC driver")
-    parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.')
-    args = parser.parse_args()
-    time_delay = args.time_delay
-    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
-        link_config = LinkConfig(cfg)
-        ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, time_delay,))
-        link_config.reset_interface()
-    ksft_exit()
-
-if __name__ == "__main__":
-    main()
diff --git a/tools/testing/selftests/drivers/net/hw/nic_performance.py b/tools/testing/selftests/drivers/net/hw/nic_performance.py
deleted file mode 100644
index 201403b76ea3..000000000000
--- a/tools/testing/selftests/drivers/net/hw/nic_performance.py
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-#Introduction:
-#This file has basic performance test for generic NIC drivers.
-#The test comprises of throughput check for TCP and UDP streams.
-#
-#Setup:
-#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1)
-#
-#        DUT PC                                              Partner PC
-#┌───────────────────────┐                         ┌──────────────────────────┐
-#│                       │                         │                          │
-#│                       │                         │                          │
-#│           ┌───────────┐                         │                          │
-#│           │DUT NIC    │         Eth             │                          │
-#│           │Interface ─┼─────────────────────────┼─    any eth Interface    │
-#│           └───────────┘                         │                          │
-#│                       │                         │                          │
-#│                       │                         │                          │
-#└───────────────────────┘                         └──────────────────────────┘
-#
-#Configurations:
-#To prevent interruptions, Add ethtool, ip to the sudoers list in remote PC and get the ssh key from remote.
-#Required minimum ethtool version is 6.10
-#Change the below configuration based on your hw needs.
-# """Default values"""
-#time_delay = 8 #time taken to wait for transitions to happen, in seconds.
-#test_duration = 10  #performance test duration for the throughput check, in seconds.
-#send_throughput_threshold = 80 #percentage of send throughput required to pass the check
-#receive_throughput_threshold = 50 #percentage of receive throughput required to pass the check
-
-import time
-import json
-import argparse
-from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_true
-from lib.py import KsftFailEx, KsftSkipEx, GenerateTraffic
-from lib.py import NetDrvEpEnv, bkg, wait_port_listen
-from lib.py import cmd
-from lib.py import LinkConfig
-
-class TestConfig:
-    def __init__(self, time_delay: int, test_duration: int, send_throughput_threshold: int, receive_throughput_threshold: int) -> None:
-        self.time_delay = time_delay
-        self.test_duration = test_duration
-        self.send_throughput_threshold = send_throughput_threshold
-        self.receive_throughput_threshold = receive_throughput_threshold
-
-def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None:
-    if not link_config.verify_link_up():
-        KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
-    common_link_modes = link_config.common_link_modes
-    if common_link_modes is None:
-        KsftSkipEx("No common link modes found")
-    if link_config.partner_netif == None:
-        KsftSkipEx("Partner interface is not available")
-    if link_config.check_autoneg_supported():
-        KsftSkipEx("Auto-negotiation not supported by local")
-    if link_config.check_autoneg_supported(remote=True):
-        KsftSkipEx("Auto-negotiation not supported by remote")
-    cfg.require_cmd("iperf3", remote=True)
-
-def check_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, protocol: str, traffic: GenerateTraffic) -> None:
-    common_link_modes = link_config.common_link_modes
-    speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes)
-    """Test duration in seconds"""
-    duration = test_config.test_duration
-
-    ksft_pr(f"{protocol} test")
-    test_type = "-u" if protocol == "UDP" else ""
-
-    send_throughput = []
-    receive_throughput = []
-    for idx in range(0, len(speeds)):
-        if link_config.set_speed_and_duplex(speeds[idx], duplex_modes[idx]) == False:
-            raise KsftFailEx(f"Not able to set speed and duplex parameters for {cfg.ifname}")
-        time.sleep(test_config.time_delay)
-        if not link_config.verify_link_up():
-            raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
-
-        send_command=f"{test_type} -b 0 -t {duration} --json"
-        receive_command=f"{test_type} -b 0 -t {duration} --reverse --json"
-
-        send_result = traffic.run_remote_test(cfg, command=send_command)
-        if send_result.ret != 0:
-            raise KsftSkipEx("Error occurred during data transmit: {send_result.stdout}")
-
-        send_output = send_result.stdout
-        send_data = json.loads(send_output)
-
-        """Convert throughput to Mbps"""
-        send_throughput.append(round(send_data['end']['sum_sent']['bits_per_second'] / 1e6, 2))
-        ksft_pr(f"{protocol}: Send throughput: {send_throughput[idx]} Mbps")
-
-        receive_result = traffic.run_remote_test(cfg, command=receive_command)
-        if receive_result.ret != 0:
-            raise KsftSkipEx("Error occurred during data receive: {receive_result.stdout}")
-
-        receive_output = receive_result.stdout
-        receive_data = json.loads(receive_output)
-
-        """Convert throughput to Mbps"""
-        receive_throughput.append(round(receive_data['end']['sum_received']['bits_per_second'] / 1e6, 2))
-        ksft_pr(f"{protocol}: Receive throughput: {receive_throughput[idx]} Mbps")
-
-    """Check whether throughput is not below the threshold (default values set at start)"""
-    for idx in range(0, len(speeds)):
-        send_threshold = float(speeds[idx]) * float(test_config.send_throughput_threshold / 100)
-        receive_threshold = float(speeds[idx]) * float(test_config.receive_throughput_threshold / 100)
-        ksft_true(send_throughput[idx] >= send_threshold, f"{protocol}: Send throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex")
-        ksft_true(receive_throughput[idx] >= receive_threshold, f"{protocol}: Receive throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex")
-
-def test_tcp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None:
-    _pre_test_checks(cfg, link_config)
-    check_throughput(cfg, link_config, test_config, 'TCP', traffic)
-
-def test_udp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None:
-    _pre_test_checks(cfg, link_config)
-    check_throughput(cfg, link_config, test_config, 'UDP', traffic)
-
-def main() -> None:
-    parser = argparse.ArgumentParser(description="Run basic performance test for NIC driver")
-    parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.')
-    parser.add_argument('--test-duration', type=int, default=10, help='Performance test duration for the throughput check, in seconds. Default is 10 seconds.')
-    parser.add_argument('--stt', type=int, default=80, help='Send throughput Threshold: Percentage of send throughput upon actual throughput required to pass the throughput check (in percentage). Default is 80.')
-    parser.add_argument('--rtt', type=int, default=50, help='Receive throughput Threshold: Percentage of receive throughput upon actual throughput required to pass the throughput check (in percentage). Default is 50.')
-    args=parser.parse_args()
-    test_config = TestConfig(args.time_delay, args.test_duration, args.stt, args.rtt)
-    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
-        traffic = GenerateTraffic(cfg)
-        link_config = LinkConfig(cfg)
-        ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, test_config, traffic,  ))
-        link_config.reset_interface()
-    ksft_exit()
-
-if __name__ == "__main__":
-    main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index 53bb08cc29ec..f439c434ba36 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -32,6 +32,11 @@ def test_rss_input_xfrm(cfg, ipver):
     if multiprocessing.cpu_count() < 2:
         raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
 
+    cfg.require_cmd("socat", remote=True)
+
+    if not hasattr(socket, "SO_INCOMING_CPU"):
+        raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
     input_xfrm = cfg.ethnl.rss_get(
         {'header': {'dev-name': cfg.ifname}}).get('input_xfrm')
 
diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
new file mode 100755
index 000000000000..d19d1d518208
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+# This is intended to be run on a virtio-net guest interface.
+# The test binds the XDP socket to the interface without setting
+# the fill ring to trigger delayed refill_work. This helps to
+# make it easier to reproduce the deadlock when XDP program,
+# XDP socket bind/unbind, rx ring resize race with refill_work on
+# the buggy kernel.
+#
+# The Qemu command to setup virtio-net
+# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no
+# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on
+
+from lib.py import ksft_exit, ksft_run
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetDrvEnv
+from lib.py import bkg, ip, cmd, ethtool
+import time
+
+def _get_rx_ring_entries(cfg):
+    output = ethtool(f"-g {cfg.ifname}", json=True)
+    return output[0]["rx"]
+
+def setup_xsk(cfg, xdp_queue_id = 0) -> bkg:
+    # Probe for support
+    xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+    if xdp.ret == 255:
+        raise KsftSkipEx('AF_XDP unsupported')
+    elif xdp.ret > 0:
+        raise KsftFailEx('unable to create AF_XDP socket')
+
+    try:
+        return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \
+                   '{xdp_queue_id} -z', ksft_wait=3)
+    except:
+        raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \
+                         'Please consider adding iommu_platform=on ' \
+                         'when setting up virtio-net-pci')
+
+def check_xdp_bind(cfg):
+    with setup_xsk(cfg):
+        ip(f"link set dev %s xdp obj %s sec xdp" %
+           (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+        ip(f"link set dev %s xdp off" % cfg.ifname)
+
+def check_rx_resize(cfg):
+    with setup_xsk(cfg):
+        rx_ring = _get_rx_ring_entries(cfg)
+        ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2))
+        ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring))
+
+def main():
+    with NetDrvEnv(__file__, nsim_test=False) as cfg:
+        ksft_run([check_xdp_bind, check_rx_resize],
+                 args=(cfg, ))
+    ksft_exit()
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index ad5ff645183a..3bccddf8cbc5 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -12,7 +12,7 @@ from .remote import Remote
 
 class NetDrvEnvBase:
     """
-    Base class for a NIC / host envirnoments
+    Base class for a NIC / host environments
 
     Attributes:
       test_dir: Path to the source directory of the test
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index da5af2c680fa..d9c10613ae67 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -2,7 +2,7 @@
 
 import time
 
-from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen, bkg
+from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
 
 class GenerateTraffic:
     def __init__(self, env, port=None):
@@ -23,24 +23,6 @@ class GenerateTraffic:
             self.stop(verbose=True)
             raise Exception("iperf3 traffic did not ramp up")
 
-    def run_remote_test(self, env: object, port=None, command=None):
-        if port is None:
-            port = rand_port()
-        try:
-            server_cmd = f"iperf3 -s 1 -p {port} --one-off"
-            with bkg(server_cmd, host=env.remote):
-                #iperf3 opens TCP connection as default in server
-                #-u to be specified in client command for UDP
-                wait_port_listen(port, host=env.remote)
-        except Exception as e:
-            raise Exception(f"Unexpected error occurred while running server command: {e}")
-        try:
-            client_cmd = f"iperf3 -c {env.remote_addr} -p {port} {command}"
-            proc = cmd(client_cmd)
-            return proc
-        except Exception as e:
-            raise Exception(f"Unexpected error occurred while running client command: {e}")
-
     def _wait_pkts(self, pkt_cnt=None, pps=None):
         """
         Wait until we've seen pkt_cnt or until traffic ramps up to pps.
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 3c96b022954d..29b01b8e2215 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -33,7 +33,6 @@ NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
 
 # Used to create and delete namespaces
 source "${LIBDIR}"/../../../../net/lib.sh
-source "${LIBDIR}"/../../../../net/net_helper.sh
 
 # Create netdevsim interfaces
 create_ifaces() {
diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py
new file mode 100755
index 000000000000..356bac46ba04
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, NetDrvEpEnv
+from lib.py import bkg, cmd, rand_port, NetNSEnter
+
+def test_napi_id(cfg) -> None:
+    port = rand_port()
+    listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}"
+
+    with bkg(listen_cmd, ksft_wait=3) as server:
+        cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True)
+
+    ksft_eq(0, server.ret)
+
+def main() -> None:
+    with NetDrvEpEnv(__file__) as cfg:
+        ksft_run([test_napi_id], args=(cfg,))
+    ksft_exit()
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c
new file mode 100644
index 000000000000..eecd610c2109
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id_helper.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#include "../../net/lib/ksft.h"
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_in address;
+	unsigned int napi_id;
+	unsigned int port;
+	socklen_t optlen;
+	char buf[1024];
+	int opt = 1;
+	int server;
+	int client;
+	int ret;
+
+	server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+	if (server < 0) {
+		perror("socket creation failed");
+		if (errno == EAFNOSUPPORT)
+			return -1;
+		return 1;
+	}
+
+	port = atoi(argv[2]);
+
+	if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {
+		perror("setsockopt");
+		return 1;
+	}
+
+	address.sin_family = AF_INET;
+	inet_pton(AF_INET, argv[1], &address.sin_addr);
+	address.sin_port = htons(port);
+
+	if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) {
+		perror("bind failed");
+		return 1;
+	}
+
+	if (listen(server, 1) < 0) {
+		perror("listen");
+		return 1;
+	}
+
+	ksft_ready();
+
+	client = accept(server, NULL, 0);
+	if (client < 0) {
+		perror("accept");
+		return 1;
+	}
+
+	optlen = sizeof(napi_id);
+	ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id,
+			 &optlen);
+	if (ret != 0) {
+		perror("getsockopt");
+		return 1;
+	}
+
+	read(client, buf, 1024);
+
+	ksft_wait();
+
+	if (napi_id == 0) {
+		fprintf(stderr, "napi ID is 0\n");
+		return 1;
+	}
+
+	close(client);
+	close(server);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
index aed62d9e6c0a..1bb46ec435d4 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0-only
 
-source ../../../net/net_helper.sh
+source ../../../net/lib.sh
 
 NSIM_DEV_1_ID=$((256 + RANDOM % 256))
 NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index af8df2313a3b..e0f114612c1a 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -50,6 +50,16 @@ def _test_tcp(cfg) -> None:
         cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
     ksft_eq(nc.stdout.strip(), test_string)
 
+def _schedule_checksum_reset(cfg, netnl) -> None:
+    features = ethtool(f"-k {cfg.ifname}", json=True)
+    setting = ""
+    for side in ["tx", "rx"]:
+        f = features[0][side + "-checksumming"]
+        if not f["fixed"]:
+            setting += " " + side
+            setting += " " + ("on" if f["requested"] or f["active"] else "off")
+    defer(ethtool, f" -K {cfg.ifname} " + setting)
+
 def _set_offload_checksum(cfg, netnl, on) -> None:
     try:
         ethtool(f" -K {cfg.ifname} rx {on} tx {on} ")
@@ -139,6 +149,7 @@ def set_interface_init(cfg) -> None:
 def test_default_v4(cfg, netnl) -> None:
     cfg.require_ipver("4")
 
+    _schedule_checksum_reset(cfg, netnl)
     _set_offload_checksum(cfg, netnl, "off")
     _test_v4(cfg)
     _test_tcp(cfg)
@@ -149,6 +160,7 @@ def test_default_v4(cfg, netnl) -> None:
 def test_default_v6(cfg, netnl) -> None:
     cfg.require_ipver("6")
 
+    _schedule_checksum_reset(cfg, netnl)
     _set_offload_checksum(cfg, netnl, "off")
     _test_v6(cfg)
     _test_tcp(cfg)
@@ -157,6 +169,7 @@ def test_default_v6(cfg, netnl) -> None:
     _test_tcp(cfg)
 
 def test_xdp_generic_sb(cfg, netnl) -> None:
+    _schedule_checksum_reset(cfg, netnl)
     _set_xdp_generic_sb_on(cfg)
     _set_offload_checksum(cfg, netnl, "off")
     _test_v4(cfg)
@@ -168,6 +181,7 @@ def test_xdp_generic_sb(cfg, netnl) -> None:
     _test_tcp(cfg)
 
 def test_xdp_generic_mb(cfg, netnl) -> None:
+    _schedule_checksum_reset(cfg, netnl)
     _set_xdp_generic_mb_on(cfg)
     _set_offload_checksum(cfg, netnl, "off")
     _test_v4(cfg)
@@ -179,6 +193,7 @@ def test_xdp_generic_mb(cfg, netnl) -> None:
     _test_tcp(cfg)
 
 def test_xdp_native_sb(cfg, netnl) -> None:
+    _schedule_checksum_reset(cfg, netnl)
     _set_xdp_native_sb_on(cfg)
     _set_offload_checksum(cfg, netnl, "off")
     _test_v4(cfg)
@@ -190,6 +205,7 @@ def test_xdp_native_sb(cfg, netnl) -> None:
     _test_tcp(cfg)
 
 def test_xdp_native_mb(cfg, netnl) -> None:
+    _schedule_checksum_reset(cfg, netnl)
     _set_xdp_native_mb_on(cfg)
     _set_offload_checksum(cfg, netnl, "off")
     _test_v4(cfg)
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 06abd3f233e1..236005290a33 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -26,13 +26,13 @@ def nl_get_queues(cfg, nl, qtype='rx'):
 
 def check_xsk(cfg, nl, xdp_queue_id=0) -> None:
     # Probe for support
-    xdp = cmd(f'{cfg.test_dir / "xdp_helper"} - -', fail=False)
+    xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
     if xdp.ret == 255:
         raise KsftSkipEx('AF_XDP unsupported')
     elif xdp.ret > 0:
         raise KsftFailEx('unable to create AF_XDP socket')
 
-    with bkg(f'{cfg.test_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
+    with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
              ksft_wait=3):
 
         rx = tx = False
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
index 2d5a76d99181..eaf6938f100e 100644
--- a/tools/testing/selftests/drivers/net/team/Makefile
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for net selftests
 
-TEST_PROGS := dev_addr_lists.sh
+TEST_PROGS := dev_addr_lists.sh propagation.sh
 
 TEST_INCLUDES := \
 	../bonding/lag_lib.sh \
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config
index b5e3a3aad4bf..636b3525b679 100644
--- a/tools/testing/selftests/drivers/net/team/config
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -1,5 +1,6 @@
 CONFIG_DUMMY=y
 CONFIG_IPV6=y
 CONFIG_MACVLAN=y
+CONFIG_NETDEVSIM=m
 CONFIG_NET_TEAM=y
 CONFIG_NET_TEAM_MODE_LOADBALANCE=y
diff --git a/tools/testing/selftests/drivers/net/team/propagation.sh b/tools/testing/selftests/drivers/net/team/propagation.sh
new file mode 100755
index 000000000000..4bea75b79878
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/propagation.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+NSIM_LRO_ID=$((256 + RANDOM % 256))
+NSIM_LRO_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_LRO_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+
+cleanup()
+{
+	set +e
+	ip link del dummyteam &>/dev/null
+	ip link del team0 &>/dev/null
+	echo $NSIM_LRO_ID > $NSIM_DEV_SYS_DEL
+	modprobe -r netdevsim
+}
+
+# Trigger LRO propagation to the lower.
+# https://lore.kernel.org/netdev/aBvOpkIoxcr9PfDg@mini-arch/
+team_lro()
+{
+	# using netdevsim because it supports NETIF_F_LRO
+	NSIM_LRO_NAME=$(find $NSIM_LRO_SYS/net -maxdepth 1 -type d ! \
+		-path $NSIM_LRO_SYS/net -exec basename {} \;)
+
+	ip link add name team0 type team
+	ip link set $NSIM_LRO_NAME down
+	ip link set dev $NSIM_LRO_NAME master team0
+	ip link set team0 up
+	ethtool -K team0 large-receive-offload off
+
+	ip link del team0
+}
+
+# Trigger promisc propagation to the lower during IFLA_MASTER.
+# https://lore.kernel.org/netdev/20250506032328.3003050-1-sdf@fomichev.me/
+team_promisc()
+{
+	ip link add name dummyteam type dummy
+	ip link add name team0 type team
+	ip link set dummyteam down
+	ip link set team0 promisc on
+	ip link set dev dummyteam master team0
+	ip link set team0 up
+
+	ip link del team0
+	ip link del dummyteam
+}
+
+# Trigger promisc propagation to the lower via netif_change_flags (aka
+# ndo_change_rx_flags).
+# https://lore.kernel.org/netdev/20250514220319.3505158-1-stfomichev@gmail.com/
+team_change_flags()
+{
+	ip link add name dummyteam type dummy
+	ip link add name team0 type team
+	ip link set dummyteam down
+	ip link set dev dummyteam master team0
+	ip link set team0 up
+	ip link set team0 promisc on
+
+	# Make sure we can add more L2 addresses without any issues.
+	ip link add link team0 address 00:00:00:00:00:01 team0.1 type macvlan
+	ip link set team0.1 up
+
+	ip link del team0.1
+	ip link del team0
+	ip link del dummyteam
+}
+
+trap cleanup EXIT
+modprobe netdevsim || :
+echo $NSIM_LRO_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+team_lro
+team_promisc
+team_change_flags
diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
index 1562aa7d60b0..6dec59d64083 100644
--- a/tools/testing/selftests/nci/nci_dev.c
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2021 Samsung Electrnoics
+ * Copyright (C) 2021 Samsung Electronics
  * Bongsu Jeon <bongsu.jeon@samsung.com>
  *
  * Test code for nci
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 70a38f485d4d..ea84b88bcb30 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -115,7 +115,7 @@ YNL_GEN_FILES := busy_poller netlink-dumps
 TEST_GEN_FILES += $(YNL_GEN_FILES)
 
 TEST_FILES := settings
-TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+TEST_FILES += in_netns.sh lib.sh setup_loopback.sh setup_veth.sh
 
 TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
 
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
index d66336256580..8b015f16c03d 100644
--- a/tools/testing/selftests/net/af_unix/scm_rights.c
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -23,6 +23,7 @@ FIXTURE_VARIANT(scm_rights)
 	int type;
 	int flags;
 	bool test_listener;
+	bool disabled;
 };
 
 FIXTURE_VARIANT_ADD(scm_rights, dgram)
@@ -31,6 +32,16 @@ FIXTURE_VARIANT_ADD(scm_rights, dgram)
 	.type = SOCK_DGRAM,
 	.flags = 0,
 	.test_listener = false,
+	.disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, dgram_disabled)
+{
+	.name = "UNIX ",
+	.type = SOCK_DGRAM,
+	.flags = 0,
+	.test_listener = false,
+	.disabled = true,
 };
 
 FIXTURE_VARIANT_ADD(scm_rights, stream)
@@ -39,6 +50,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream)
 	.type = SOCK_STREAM,
 	.flags = 0,
 	.test_listener = false,
+	.disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_disabled)
+{
+	.name = "UNIX-STREAM ",
+	.type = SOCK_STREAM,
+	.flags = 0,
+	.test_listener = false,
+	.disabled = true,
 };
 
 FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
@@ -47,6 +68,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
 	.type = SOCK_STREAM,
 	.flags = MSG_OOB,
 	.test_listener = false,
+	.disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_oob_disabled)
+{
+	.name = "UNIX-STREAM ",
+	.type = SOCK_STREAM,
+	.flags = MSG_OOB,
+	.test_listener = false,
+	.disabled = true,
 };
 
 FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
@@ -55,6 +86,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
 	.type = SOCK_STREAM,
 	.flags = 0,
 	.test_listener = true,
+	.disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_disabled)
+{
+	.name = "UNIX-STREAM ",
+	.type = SOCK_STREAM,
+	.flags = 0,
+	.test_listener = true,
+	.disabled = true,
 };
 
 FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
@@ -63,6 +104,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
 	.type = SOCK_STREAM,
 	.flags = MSG_OOB,
 	.test_listener = true,
+	.disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob_disabled)
+{
+	.name = "UNIX-STREAM ",
+	.type = SOCK_STREAM,
+	.flags = MSG_OOB,
+	.test_listener = true,
+	.disabled = true,
 };
 
 static int count_sockets(struct __test_metadata *_metadata,
@@ -105,6 +156,9 @@ FIXTURE_SETUP(scm_rights)
 	ret = unshare(CLONE_NEWNET);
 	ASSERT_EQ(0, ret);
 
+	if (variant->disabled)
+		return;
+
 	ret = count_sockets(_metadata, variant);
 	ASSERT_EQ(0, ret);
 }
@@ -113,6 +167,9 @@ FIXTURE_TEARDOWN(scm_rights)
 {
 	int ret;
 
+	if (variant->disabled)
+		return;
+
 	sleep(1);
 
 	ret = count_sockets(_metadata, variant);
@@ -121,6 +178,7 @@ FIXTURE_TEARDOWN(scm_rights)
 
 static void create_listeners(struct __test_metadata *_metadata,
 			     FIXTURE_DATA(scm_rights) *self,
+			     const FIXTURE_VARIANT(scm_rights) *variant,
 			     int n)
 {
 	struct sockaddr_un addr = {
@@ -140,6 +198,12 @@ static void create_listeners(struct __test_metadata *_metadata,
 		ret = listen(self->fd[i], -1);
 		ASSERT_EQ(0, ret);
 
+		if (variant->disabled) {
+			ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+					 &(int){0}, sizeof(int));
+			ASSERT_EQ(0, ret);
+		}
+
 		addrlen = sizeof(addr);
 		ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen);
 		ASSERT_EQ(0, ret);
@@ -164,6 +228,12 @@ static void create_socketpairs(struct __test_metadata *_metadata,
 	for (i = 0; i < n * 2; i += 2) {
 		ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i);
 		ASSERT_EQ(0, ret);
+
+		if (variant->disabled) {
+			ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+					 &(int){0}, sizeof(int));
+			ASSERT_EQ(0, ret);
+		}
 	}
 }
 
@@ -175,7 +245,7 @@ static void __create_sockets(struct __test_metadata *_metadata,
 	ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
 
 	if (variant->test_listener)
-		create_listeners(_metadata, self, n);
+		create_listeners(_metadata, self, variant, n);
 	else
 		create_socketpairs(_metadata, self, variant, n);
 }
@@ -230,7 +300,13 @@ void __send_fd(struct __test_metadata *_metadata,
 	int ret;
 
 	ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
-	ASSERT_EQ(MSGLEN, ret);
+
+	if (variant->disabled) {
+		ASSERT_EQ(-1, ret);
+		ASSERT_EQ(-EPERM, -errno);
+	} else {
+		ASSERT_EQ(MSGLEN, ret);
+	}
 }
 
 #define create_sockets(n)					\
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index f366cadbc5e8..4046131e7888 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -106,26 +106,16 @@
 # |                                                                       |
 # +-----------------------------------------------------------------------+
 
+. ./lib.sh
+
 ERR=4 # Return 4 by default, which is the SKIP code for kselftest
 PING6="ping"
 PAUSE_ON_FAIL="no"
 
-readonly NS0=$(mktemp -u ns0-XXXXXXXX)
-readonly NS1=$(mktemp -u ns1-XXXXXXXX)
-readonly NS2=$(mktemp -u ns2-XXXXXXXX)
-readonly NS3=$(mktemp -u ns3-XXXXXXXX)
-
 # Exit the script after having removed the network namespaces it created
-#
-# Parameters:
-#
-#   * The list of network namespaces to delete before exiting.
-#
 exit_cleanup()
 {
-	for ns in "$@"; do
-		ip netns delete "${ns}" 2>/dev/null || true
-	done
+	cleanup_all_ns
 
 	if [ "${ERR}" -eq 4 ]; then
 		echo "Error: Setting up the testing environment failed." >&2
@@ -140,17 +130,7 @@ exit_cleanup()
 # namespaces created by this script are deleted.
 create_namespaces()
 {
-	ip netns add "${NS0}" || exit_cleanup
-	ip netns add "${NS1}" || exit_cleanup "${NS0}"
-	ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
-	ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
-}
-
-# The trap function handler
-#
-exit_cleanup_all()
-{
-	exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+	setup_ns NS0 NS1 NS2 NS3 || exit_cleanup
 }
 
 # Configure a network interface using a host route
@@ -188,10 +168,6 @@ iface_config()
 #
 setup_underlay()
 {
-	for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
-		ip -netns "${ns}" link set dev lo up
-	done;
-
 	ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
 	ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
 	ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
@@ -234,14 +210,6 @@ setup_overlay_ipv4()
 	ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
 	ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
 	ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
-
-	# The intermediate namespaces don't have routes for the reverse path,
-	# as it will be handled by tc. So we need to ensure that rp_filter is
-	# not going to block the traffic.
-	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
-	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
 }
 
 setup_overlay_ipv6()
@@ -521,13 +489,10 @@ done
 
 check_features
 
-# Create namespaces before setting up the exit trap.
-# Otherwise, exit_cleanup_all() could delete namespaces that were not created
-# by this script.
-create_namespaces
-
 set -e
-trap exit_cleanup_all EXIT
+trap exit_cleanup EXIT
+
+create_namespaces
 
 setup_underlay
 setup_overlay_ipv4
diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh
index 7db292ec4884..7d2d40812074 100755
--- a/tools/testing/selftests/net/busy_poll_test.sh
+++ b/tools/testing/selftests/net/busy_poll_test.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
-source net_helper.sh
+source lib.sh
 
 NSIM_SV_ID=$((256 + RANDOM % 256))
 NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
diff --git a/tools/testing/selftests/net/can/.gitignore b/tools/testing/selftests/net/can/.gitignore
new file mode 100644
index 000000000000..764a53fc837f
--- /dev/null
+++ b/tools/testing/selftests/net/can/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+test_raw_filter
diff --git a/tools/testing/selftests/net/can/Makefile b/tools/testing/selftests/net/can/Makefile
new file mode 100644
index 000000000000..5b82e60a03e7
--- /dev/null
+++ b/tools/testing/selftests/net/can/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+
+TEST_PROGS := test_raw_filter.sh
+
+TEST_GEN_FILES := test_raw_filter
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c
new file mode 100644
index 000000000000..4101c36390fd
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+/*
+ * Copyright (c) 2011 Volkswagen Group Electronic Research
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <net/if.h>
+#include <linux/if.h>
+
+#include <linux/can.h>
+#include <linux/can/raw.h>
+
+#include "../../kselftest_harness.h"
+
+#define ID 0x123
+
+char CANIF[IFNAMSIZ];
+
+static int send_can_frames(int sock, int testcase)
+{
+	struct can_frame frame;
+
+	frame.can_dlc = 1;
+	frame.data[0] = testcase;
+
+	frame.can_id = ID;
+	if (write(sock, &frame, sizeof(frame)) < 0)
+		goto write_err;
+
+	frame.can_id = (ID | CAN_RTR_FLAG);
+	if (write(sock, &frame, sizeof(frame)) < 0)
+		goto write_err;
+
+	frame.can_id = (ID | CAN_EFF_FLAG);
+	if (write(sock, &frame, sizeof(frame)) < 0)
+		goto write_err;
+
+	frame.can_id = (ID | CAN_EFF_FLAG | CAN_RTR_FLAG);
+	if (write(sock, &frame, sizeof(frame)) < 0)
+		goto write_err;
+
+	return 0;
+
+write_err:
+	perror("write");
+	return 1;
+}
+
+FIXTURE(can_filters) {
+	int sock;
+};
+
+FIXTURE_SETUP(can_filters)
+{
+	struct sockaddr_can addr;
+	struct ifreq ifr;
+	int recv_own_msgs = 1;
+	int s, ret;
+
+	s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+	ASSERT_GE(s, 0)
+		TH_LOG("failed to create CAN_RAW socket: %d", errno);
+
+	strncpy(ifr.ifr_name, CANIF, sizeof(ifr.ifr_name));
+	ret = ioctl(s, SIOCGIFINDEX, &ifr);
+	ASSERT_GE(ret, 0)
+		TH_LOG("failed SIOCGIFINDEX: %d", errno);
+
+	addr.can_family = AF_CAN;
+	addr.can_ifindex = ifr.ifr_ifindex;
+
+	setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
+		   &recv_own_msgs, sizeof(recv_own_msgs));
+
+	ret = bind(s, (struct sockaddr *)&addr, sizeof(addr));
+	ASSERT_EQ(ret, 0)
+		TH_LOG("failed bind socket: %d", errno);
+
+	self->sock = s;
+}
+
+FIXTURE_TEARDOWN(can_filters)
+{
+	close(self->sock);
+}
+
+FIXTURE_VARIANT(can_filters) {
+	int testcase;
+	canid_t id;
+	canid_t mask;
+	int exp_num_rx;
+	canid_t exp_flags[];
+};
+
+/* Receive all frames when filtering for the ID in standard frame format */
+FIXTURE_VARIANT_ADD(can_filters, base) {
+	.testcase = 1,
+	.id = ID,
+	.mask = CAN_SFF_MASK,
+	.exp_num_rx = 4,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Ignore EFF flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_eff) {
+	.testcase = 2,
+	.id = ID | CAN_EFF_FLAG,
+	.mask = CAN_SFF_MASK,
+	.exp_num_rx = 4,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Ignore RTR flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_rtr) {
+	.testcase = 3,
+	.id = ID | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK,
+	.exp_num_rx = 4,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Ignore EFF and RTR flags in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_effrtr) {
+	.testcase = 4,
+	.id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK,
+	.exp_num_rx = 4,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only SFF frames when expecting no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff) {
+	.testcase = 5,
+	.id = ID,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_eff) {
+	.testcase = 6,
+	.id = ID | CAN_EFF_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only SFF frames when expecting no EFF flag, ignoring RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_rtr) {
+	.testcase = 7,
+	.id = ID | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag,
+ * ignoring RTR flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_effrtr) {
+	.testcase = 8,
+	.id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive no remote frames when filtering for no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr) {
+	.testcase = 9,
+	.id = ID,
+	.mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		0,
+		CAN_EFF_FLAG,
+	},
+};
+
+/* Receive no remote frames when filtering for no RTR flag, ignoring EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_eff) {
+	.testcase = 10,
+	.id = ID | CAN_EFF_FLAG,
+	.mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		0,
+		CAN_EFF_FLAG,
+	},
+};
+
+/* Receive only remote frames when filter includes RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_rtr) {
+	.testcase = 11,
+	.id = ID | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only remote frames when filter includes RTR flag, ignoring EFF
+ * flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_effrtr) {
+	.testcase = 12,
+	.id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only SFF data frame when filtering for no flags */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr) {
+	.testcase = 13,
+	.id = ID,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		0,
+	},
+};
+
+/* Receive only EFF data frame when filtering for EFF but no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_eff) {
+	.testcase = 14,
+	.id = ID | CAN_EFF_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		CAN_EFF_FLAG,
+	},
+};
+
+/* Receive only SFF remote frame when filtering for RTR but no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_rtr) {
+	.testcase = 15,
+	.id = ID | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only EFF remote frame when filtering for EFF and RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_effrtr) {
+	.testcase = 16,
+	.id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only SFF data frame when filtering for no EFF flag and no RTR flag
+ * but based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff) {
+	.testcase = 17,
+	.id = ID,
+	.mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		0,
+	},
+};
+
+/* Receive only EFF data frame when filtering for EFF flag and no RTR flag but
+ * based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff_eff) {
+	.testcase = 18,
+	.id = ID | CAN_EFF_FLAG,
+	.mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		CAN_EFF_FLAG,
+	},
+};
+
+/* This test verifies that the raw CAN filters work, by checking if only frames
+ * with the expected set of flags are received. For each test case, the given
+ * filter (id and mask) is added and four CAN frames are sent with every
+ * combination of set/unset EFF/RTR flags.
+ */
+TEST_F(can_filters, test_filter)
+{
+	struct can_filter rfilter;
+	int ret;
+
+	rfilter.can_id = variant->id;
+	rfilter.can_mask = variant->mask;
+	setsockopt(self->sock, SOL_CAN_RAW, CAN_RAW_FILTER,
+		   &rfilter, sizeof(rfilter));
+
+	TH_LOG("filters: can_id = 0x%08X can_mask = 0x%08X",
+		rfilter.can_id, rfilter.can_mask);
+
+	ret = send_can_frames(self->sock, variant->testcase);
+	ASSERT_EQ(ret, 0)
+		TH_LOG("failed to send CAN frames");
+
+	for (int i = 0; i <= variant->exp_num_rx; i++) {
+		struct can_frame frame;
+		struct timeval tv = {
+			.tv_sec = 0,
+			.tv_usec = 50000, /* 50ms timeout */
+		};
+		fd_set rdfs;
+
+		FD_ZERO(&rdfs);
+		FD_SET(self->sock, &rdfs);
+
+		ret = select(self->sock + 1, &rdfs, NULL, NULL, &tv);
+		ASSERT_GE(ret, 0)
+			TH_LOG("failed select for frame %d, err: %d)", i, errno);
+
+		ret = FD_ISSET(self->sock, &rdfs);
+		if (i == variant->exp_num_rx) {
+			ASSERT_EQ(ret, 0)
+				TH_LOG("too many frames received");
+		} else {
+			ASSERT_NE(ret, 0)
+				TH_LOG("too few frames received");
+
+			ret = read(self->sock, &frame, sizeof(frame));
+			ASSERT_GE(ret, 0)
+				TH_LOG("failed to read frame %d, err: %d", i, errno);
+
+			TH_LOG("rx: can_id = 0x%08X rx = %d", frame.can_id, i);
+
+			ASSERT_EQ(ID, frame.can_id & CAN_SFF_MASK)
+				TH_LOG("received wrong can_id");
+			ASSERT_EQ(variant->testcase, frame.data[0])
+				TH_LOG("received wrong test case");
+
+			ASSERT_EQ(frame.can_id & ~CAN_ERR_MASK,
+				  variant->exp_flags[i])
+				TH_LOG("received unexpected flags");
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	char *ifname = getenv("CANIF");
+
+	if (!ifname) {
+		printf("CANIF environment variable must contain the test interface\n");
+		return KSFT_FAIL;
+	}
+
+	strncpy(CANIF, ifname, sizeof(CANIF) - 1);
+
+	return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/net/can/test_raw_filter.sh b/tools/testing/selftests/net/can/test_raw_filter.sh
new file mode 100755
index 000000000000..276d6c06ac95
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	test_raw_filter
+"
+
+net_dir=$(dirname $0)/..
+source $net_dir/lib.sh
+
+export CANIF=${CANIF:-"vcan0"}
+BITRATE=${BITRATE:-500000}
+
+setup()
+{
+	if [[ $CANIF == vcan* ]]; then
+		ip link add name $CANIF type vcan || exit $ksft_skip
+	else
+		ip link set dev $CANIF type can bitrate $BITRATE || exit $ksft_skip
+	fi
+	ip link set dev $CANIF up
+	pwd
+}
+
+cleanup()
+{
+	ip link set dev $CANIF down
+	if [[ $CANIF == vcan* ]]; then
+		ip link delete $CANIF
+	fi
+}
+
+test_raw_filter()
+{
+	./test_raw_filter
+	check_err $?
+	log_test "test_raw_filter"
+}
+
+trap cleanup EXIT
+setup
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 130d532b7e67..3cfef5153823 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -33,7 +33,6 @@ CONFIG_NETFILTER_ADVANCED=y
 CONFIG_NF_CONNTRACK=m
 CONFIG_IPV6_MROUTE=y
 CONFIG_IPV6_SIT=y
-CONFIG_IP_DCCP=m
 CONFIG_NF_NAT=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP_NF_IPTABLES=m
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index c7cea556b416..5fbdd2a0b537 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -516,10 +516,7 @@ fib_rule4_test()
 	fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
 		"oif redirect to table" "oif no redirect to table"
 
-	# Enable forwarding and disable rp_filter as all the addresses are in
-	# the same subnet and egress device == ingress device.
 	ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
-	ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
 	match="from $SRC_IP iif $DEV"
 	getnomatch="from $SRC_IP iif lo"
 	fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 3ea6f886a210..a94b73a53f72 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -11,7 +11,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
        ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
        ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
        ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
-       ipv4_mpath_list ipv6_mpath_list"
+       ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance"
 
 VERBOSE=0
 PAUSE_ON_FAIL=no
@@ -1085,6 +1085,35 @@ route_setup()
 	set +e
 }
 
+forwarding_cleanup()
+{
+	cleanup_ns $ns3
+
+	route_cleanup
+}
+
+# extend route_setup with an ns3 reachable through ns2 over both devices
+forwarding_setup()
+{
+	forwarding_cleanup
+
+	route_setup
+
+	setup_ns ns3
+
+	ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2
+	ip -netns $ns3 link set veth5 up
+	ip -netns $ns2 link set veth6 up
+
+	ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24
+	ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24
+	ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2
+
+	ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad
+	ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad
+	ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2
+}
+
 # assumption is that basic add of a single path route works
 # otherwise just adding an address on an interface is broken
 ipv6_rt_add()
@@ -2531,9 +2560,6 @@ ipv4_mpath_list_test()
 	run_cmd "ip -n $ns2 route add 203.0.113.0/24
 		nexthop via 172.16.201.2 nexthop via 172.16.202.2"
 	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
-	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
-	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
-	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
 	set +e
 
 	local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
@@ -2600,6 +2626,93 @@ ipv6_mpath_list_test()
 	route_cleanup
 }
 
+tc_set_flower_counter__saddr_syn() {
+	tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2"
+}
+
+ip_mpath_balance_dep_check()
+{
+	if [ ! -x "$(command -v socat)" ]; then
+		echo "socat command not found. Skipping test"
+		return 1
+	fi
+
+	if [ ! -x "$(command -v jq)" ]; then
+		echo "jq command not found. Skipping test"
+		return 1
+	fi
+}
+
+ip_mpath_balance() {
+	local -r ipver=$1
+	local -r daddr=$2
+	local -r num_conn=20
+
+	for i in $(seq 1 $num_conn); do
+		ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null &
+		sleep 0.02
+		echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000
+	done
+
+	local -r syn0="$(tc_get_flower_counter $ns1 veth1)"
+	local -r syn1="$(tc_get_flower_counter $ns1 veth3)"
+	local -r syns=$((syn0+syn1))
+
+	[ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)"
+
+	[[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]]
+}
+
+ipv4_mpath_balance_test()
+{
+	echo
+	echo "IPv4 multipath load balance test"
+
+	ip_mpath_balance_dep_check || return 1
+	forwarding_setup
+
+	$IP route add 172.16.105.1 \
+		nexthop via 172.16.101.2 \
+		nexthop via 172.16.103.2
+
+	ip netns exec $ns1 \
+		sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+
+	tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1
+	tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1
+
+	ip_mpath_balance -4 172.16.105.1
+
+	log_test $? 0 "IPv4 multipath loadbalance"
+
+	forwarding_cleanup
+}
+
+ipv6_mpath_balance_test()
+{
+	echo
+	echo "IPv6 multipath load balance test"
+
+	ip_mpath_balance_dep_check || return 1
+	forwarding_setup
+
+	$IP route add 2001:db8:105::1\
+		nexthop via 2001:db8:101::2 \
+		nexthop via 2001:db8:103::2
+
+	ip netns exec $ns1 \
+		sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+	tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1
+	tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1
+
+	ip_mpath_balance -6 "[2001:db8:105::1]"
+
+	log_test $? 0 "IPv6 multipath loadbalance"
+
+	forwarding_cleanup
+}
+
 ################################################################################
 # usage
 
@@ -2683,6 +2796,8 @@ do
 	fib6_gc_test|ipv6_gc)		fib6_gc_test;;
 	ipv4_mpath_list)		ipv4_mpath_list_test;;
 	ipv6_mpath_list)		ipv6_mpath_list_test;;
+	ipv4_mpath_balance)		ipv4_mpath_balance_test;;
+	ipv6_mpath_balance)		ipv6_mpath_balance_test;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index e6a3e04fd83f..d4e7dd659354 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,10 +1,24 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
-	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
-	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
-	   v3exc_timeout_test v3star_ex_auto_add_test"
+ALL_TESTS="
+	v2reportleave_test
+	v3include_test
+	v3inc_allow_test
+	v3inc_is_include_test
+	v3inc_is_exclude_test
+	v3inc_to_exclude_test
+	v3exc_allow_test
+	v3exc_is_include_test
+	v3exc_is_exclude_test
+	v3exc_to_exclude_test
+	v3inc_block_test
+	v3exc_block_test
+	v3exc_timeout_test
+	v3star_ex_auto_add_test
+	v2per_vlan_snooping_port_stp_test
+	v2per_vlan_snooping_vlan_stp_test
+"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -554,6 +568,64 @@ v3star_ex_auto_add_test()
 	v3cleanup $swp2 $TEST_GROUP
 }
 
+v2per_vlan_snooping_stp_test()
+{
+	local is_port=$1
+
+	local msg="port"
+	[[ $is_port -ne 1 ]] && msg="vlan"
+
+	ip link set br0 up type bridge vlan_filtering 1 \
+					mcast_igmp_version 2 \
+					mcast_snooping 1 \
+					mcast_vlan_snooping 1 \
+					mcast_querier 1 \
+					mcast_stats_enabled 1
+	bridge vlan global set vid 1 dev br0 \
+					mcast_snooping 1 \
+					mcast_querier 1 \
+					mcast_query_interval 100 \
+					mcast_startup_query_count 0
+	[[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+	[[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+	sleep 5
+	local tx_s=$(ip -j -p stats show dev $swp1 \
+			group xstats_slave subgroup bridge suite mcast \
+			| jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+	[[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+	[[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+	sleep 5
+	local tx_e=$(ip -j -p stats show dev $swp1 \
+			group xstats_slave subgroup bridge suite mcast \
+			| jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+	RET=0
+	local tx=$(expr $tx_e - $tx_s)
+	test $tx -gt 0
+	check_err $? "No IGMP queries after STP state becomes forwarding"
+	log_test "per vlan snooping with $msg stp state change"
+
+	# restore settings
+	bridge vlan global set vid 1 dev br0 \
+					mcast_querier 0 \
+					mcast_query_interval 12500 \
+					mcast_startup_query_count 2
+	ip link set br0 up type bridge vlan_filtering 0 \
+					mcast_vlan_snooping 0 \
+					mcast_stats_enabled 0
+}
+
+v2per_vlan_snooping_port_stp_test()
+{
+	v2per_vlan_snooping_stp_test 1
+}
+
+v2per_vlan_snooping_vlan_stp_test()
+{
+	v2per_vlan_snooping_stp_test 0
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index f84ab2e65754..4cacef5a813a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,10 +1,23 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
-	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
-	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
-	   mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
+ALL_TESTS="
+	mldv2include_test
+	mldv2inc_allow_test
+	mldv2inc_is_include_test
+	mldv2inc_is_exclude_test
+	mldv2inc_to_exclude_test
+	mldv2exc_allow_test
+	mldv2exc_is_include_test
+	mldv2exc_is_exclude_test
+	mldv2exc_to_exclude_test
+	mldv2inc_block_test
+	mldv2exc_block_test
+	mldv2exc_timeout_test
+	mldv2star_ex_auto_add_test
+	mldv2per_vlan_snooping_port_stp_test
+	mldv2per_vlan_snooping_vlan_stp_test
+"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test()
 	mldv2cleanup $swp2
 }
 
+mldv2per_vlan_snooping_stp_test()
+{
+	local is_port=$1
+
+	local msg="port"
+	[[ $is_port -ne 1 ]] && msg="vlan"
+
+	ip link set br0 up type bridge vlan_filtering 1 \
+					mcast_mld_version 2 \
+					mcast_snooping 1 \
+					mcast_vlan_snooping 1 \
+					mcast_querier 1 \
+					mcast_stats_enabled 1
+	bridge vlan global set vid 1 dev br0 \
+					mcast_mld_version 2 \
+					mcast_snooping 1 \
+					mcast_querier 1 \
+					mcast_query_interval 100 \
+					mcast_startup_query_count 0
+
+	[[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+	[[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+	sleep 5
+	local tx_s=$(ip -j -p stats show dev $swp1 \
+			group xstats_slave subgroup bridge suite mcast \
+			| jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+	[[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+	[[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+	sleep 5
+	local tx_e=$(ip -j -p stats show dev $swp1 \
+			group xstats_slave subgroup bridge suite mcast \
+			| jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+
+	RET=0
+	local tx=$(expr $tx_e - $tx_s)
+	test $tx -gt 0
+	check_err $? "No MLD queries after STP state becomes forwarding"
+	log_test "per vlan snooping with $msg stp state change"
+
+	# restore settings
+	bridge vlan global set vid 1 dev br0 \
+					mcast_querier 0 \
+					mcast_query_interval 12500 \
+					mcast_startup_query_count 2 \
+					mcast_mld_version 1
+	ip link set br0 up type bridge vlan_filtering 0 \
+					mcast_vlan_snooping 0 \
+					mcast_stats_enabled 0
+}
+
+mldv2per_vlan_snooping_port_stp_test()
+{
+	mldv2per_vlan_snooping_stp_test 1
+}
+
+mldv2per_vlan_snooping_vlan_stp_test()
+{
+	mldv2per_vlan_snooping_stp_test 0
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index 8d7a1a004b7c..18fd69d8d937 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -1,6 +1,7 @@
 CONFIG_BRIDGE=m
 CONFIG_VLAN_8021Q=m
 CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_BRIDGE_IGMP_SNOOPING=y
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_NET_VRF=m
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index d6f0e449c029..b13c89a99ecb 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -178,8 +178,6 @@ setup()
 		else
 			ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
 			ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
-			ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
-			ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
 
 			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
 			ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
index a6b2b1f9c641..c6866e42f95c 100755
--- a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
+++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
@@ -69,7 +69,6 @@
 # which can affect the conditions needed to trigger a soft lockup.
 
 source lib.sh
-source net_helper.sh
 
 TEST_DURATION=300
 ROUTING_TABLE_REFRESH_PERIOD=0.01
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 701905eeff66..006fdadcc4b9 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -217,6 +217,8 @@ setup_ns()
 			return $ksft_skip
 		fi
 		ip -n "${!ns_name}" link set lo up
+		ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+		ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0
 		ns_list+=("${!ns_name}")
 	done
 	NS_LIST+=("${ns_list[@]}")
@@ -270,6 +272,30 @@ tc_rule_handle_stats_get()
 		  .options.actions[0].stats$selector"
 }
 
+# attach a qdisc with two children match/no-match and a flower filter to match
+tc_set_flower_counter() {
+	local -r ns=$1
+	local -r ipver=$2
+	local -r dev=$3
+	local -r flower_expr=$4
+
+	tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \
+			priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+	tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo
+	tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo
+
+	tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \
+			flower $flower_expr classid 1:2
+}
+
+tc_get_flower_counter() {
+	local -r ns=$1
+	local -r dev=$2
+
+	tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets
+}
+
 ret_set_ksft_status()
 {
 	local ksft_status=$1; shift
@@ -569,3 +595,24 @@ bridge_vlan_add()
 	bridge vlan add "$@"
 	defer bridge vlan del "$@"
 }
+
+wait_local_port_listen()
+{
+	local listener_ns="${1}"
+	local port="${2}"
+	local protocol="${3}"
+	local pattern
+	local i
+
+	pattern=":$(printf "%04X" "${port}") "
+
+	# for tcp protocol additionally check the socket state
+	[ ${protocol} = "tcp" ] && pattern="${pattern}0A"
+	for i in $(seq 10); do
+		if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
+		   /proc/net/"${protocol}"* | grep -q "${pattern}"; then
+			break
+		fi
+		sleep 0.1
+	done
+}
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore
index 1ebc6187f421..bbc97d6bf556 100644
--- a/tools/testing/selftests/net/lib/.gitignore
+++ b/tools/testing/selftests/net/lib/.gitignore
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 csum
+xdp_helper
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index c22623b9a2a5..88c4bc461459 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -10,6 +10,7 @@ TEST_FILES += ../../../../net/ynl
 
 TEST_GEN_FILES += csum
 TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+TEST_GEN_FILES += xdp_helper
 
 TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
 
diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h
new file mode 100644
index 000000000000..17dc34a612c6
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(__NET_KSFT_H__)
+#define __NET_KSFT_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static inline void ksft_ready(void)
+{
+	const char msg[7] = "ready\n";
+	char *env_str;
+	int fd;
+
+	env_str = getenv("KSFT_READY_FD");
+	if (env_str) {
+		fd = atoi(env_str);
+		if (!fd) {
+			fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
+				env_str);
+			return;
+		}
+	} else {
+		fd = STDOUT_FILENO;
+	}
+
+	write(fd, msg, sizeof(msg));
+	if (fd != STDOUT_FILENO)
+		close(fd);
+}
+
+static inline void ksft_wait(void)
+{
+	char *env_str;
+	char byte;
+	int fd;
+
+	env_str = getenv("KSFT_WAIT_FD");
+	if (env_str) {
+		fd = atoi(env_str);
+		if (!fd) {
+			fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
+				env_str);
+			return;
+		}
+	} else {
+		/* Not running in KSFT env, wait for input from STDIN instead */
+		fd = STDIN_FILENO;
+	}
+
+	read(fd, &byte, sizeof(byte));
+	if (fd != STDIN_FILENO)
+		close(fd);
+}
+
+#endif
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 3cfad0fd4570..61287c203b6e 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -3,6 +3,7 @@
 import builtins
 import functools
 import inspect
+import signal
 import sys
 import time
 import traceback
@@ -26,6 +27,10 @@ class KsftXfailEx(Exception):
     pass
 
 
+class KsftTerminate(KeyboardInterrupt):
+    pass
+
+
 def ksft_pr(*objs, **kwargs):
     print("#", *objs, **kwargs)
 
@@ -193,6 +198,17 @@ def ksft_setup(env):
     return env
 
 
+def _ksft_intr(signum, frame):
+    # ksft runner.sh sends 2 SIGTERMs in a row on a timeout
+    # if we don't ignore the second one it will stop us from handling cleanup
+    global term_cnt
+    term_cnt += 1
+    if term_cnt == 1:
+        raise KsftTerminate()
+    else:
+        ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
+
+
 def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
     cases = cases or []
 
@@ -205,6 +221,10 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
                     cases.append(value)
                     break
 
+    global term_cnt
+    term_cnt = 0
+    prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
+
     totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
 
     print("TAP version 13")
@@ -233,7 +253,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
             for line in tb.strip().split('\n'):
                 ksft_pr("Exception|", line)
             if stop:
-                ksft_pr("Stopping tests due to KeyboardInterrupt.")
+                ksft_pr(f"Stopping tests due to {type(e).__name__}.")
             KSFT_RESULT = False
             cnt_key = 'fail'
 
@@ -248,6 +268,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
         if stop:
             break
 
+    signal.signal(signal.SIGTERM, prev_sigterm)
+
     print(
         f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
     )
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 8986c584cb37..6329ae805abf 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -39,12 +39,12 @@ class EthtoolFamily(YnlFamily):
 
 class RtnlFamily(YnlFamily):
     def __init__(self, recv_size=0):
-        super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
+        super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(),
                          schema='', recv_size=recv_size)
 
 class RtnlAddrFamily(YnlFamily):
     def __init__(self, recv_size=0):
-        super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(),
+        super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(),
                          schema='', recv_size=recv_size)
 
 class NetdevFamily(YnlFamily):
diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c
index aeed25914104..eb025a9f35b1 100644
--- a/tools/testing/selftests/drivers/net/xdp_helper.c
+++ b/tools/testing/selftests/net/lib/xdp_helper.c
@@ -11,55 +11,16 @@
 #include <net/if.h>
 #include <inttypes.h>
 
+#include "ksft.h"
+
 #define UMEM_SZ (1U << 16)
 #define NUM_DESC (UMEM_SZ / 2048)
 
-/* Move this to a common header when reused! */
-static void ksft_ready(void)
-{
-	const char msg[7] = "ready\n";
-	char *env_str;
-	int fd;
-
-	env_str = getenv("KSFT_READY_FD");
-	if (env_str) {
-		fd = atoi(env_str);
-		if (!fd) {
-			fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
-				env_str);
-			return;
-		}
-	} else {
-		fd = STDOUT_FILENO;
-	}
-
-	write(fd, msg, sizeof(msg));
-	if (fd != STDOUT_FILENO)
-		close(fd);
-}
 
-static void ksft_wait(void)
+static void print_usage(const char *bin)
 {
-	char *env_str;
-	char byte;
-	int fd;
-
-	env_str = getenv("KSFT_WAIT_FD");
-	if (env_str) {
-		fd = atoi(env_str);
-		if (!fd) {
-			fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
-				env_str);
-			return;
-		}
-	} else {
-		/* Not running in KSFT env, wait for input from STDIN instead */
-		fd = STDIN_FILENO;
-	}
-
-	read(fd, &byte, sizeof(byte));
-	if (fd != STDIN_FILENO)
-		close(fd);
+	fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n"
+		"where:\n\t-z: force zerocopy mode", bin);
 }
 
 /* this is a simple helper program that creates an XDP socket and does the
@@ -77,12 +38,13 @@ int main(int argc, char **argv)
 	struct sockaddr_xdp sxdp = { 0 };
 	int num_desc = NUM_DESC;
 	void *umem_area;
+	int retry = 0;
 	int ifindex;
 	int sock_fd;
 	int queue;
 
-	if (argc != 3) {
-		fprintf(stderr, "Usage: %s ifindex queue_id\n", argv[0]);
+	if (argc != 3 && argc != 4) {
+		print_usage(argv[0]);
 		return 1;
 	}
 
@@ -132,11 +94,29 @@ int main(int argc, char **argv)
 	sxdp.sxdp_queue_id = queue;
 	sxdp.sxdp_flags = 0;
 
-	if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) {
-		munmap(umem_area, UMEM_SZ);
-		perror("bind failed");
-		close(sock_fd);
-		return 1;
+	if (argc > 3) {
+		if (!strcmp(argv[3], "-z")) {
+			sxdp.sxdp_flags = XDP_ZEROCOPY;
+		} else {
+			print_usage(argv[0]);
+			return 1;
+		}
+	}
+
+	while (1) {
+		if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0)
+			break;
+
+		if (errno == EBUSY && retry < 3) {
+			retry++;
+			sleep(1);
+			continue;
+		} else {
+			perror("bind failed");
+			munmap(umem_area, UMEM_SZ);
+			close(sock_fd);
+			return 1;
+		}
 	}
 
 	ksft_ready();
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 340e1a777e16..e47788bfa671 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -11,7 +11,7 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag
 
 TEST_FILES := mptcp_lib.sh settings
 
-TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) ../net_helper.sh
+TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh)
 
 EXTRA_CLEAN := *.pcap
 
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index e7a75341f0f3..7a3cb4c09e45 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -225,6 +225,37 @@ chk_dump_one()
 	fi
 }
 
+chk_dump_subflow()
+{
+	local inet_diag_token
+	local subflow_line
+	local ss_output
+	local ss_token
+	local msg
+
+	ss_output=$(ss -tniN $ns)
+
+	subflow_line=$(echo "$ss_output" | \
+		       grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+')
+
+	ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+')
+
+	inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \
+			  grep -Eo 'token:[^ ]+')
+
+	msg="....chk dump_subflow"
+
+	mptcp_lib_print_title "$msg"
+	if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then
+		mptcp_lib_pr_ok
+		mptcp_lib_result_pass "${msg}"
+	else
+		mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token"
+		mptcp_lib_result_fail "${msg}"
+		ret=${KSFT_FAIL}
+	fi
+}
+
 msk_info_get_value()
 {
 	local port="${1}"
@@ -316,6 +347,7 @@ chk_msk_fallback_nr 0 "....chk no fallback"
 chk_msk_inuse 2
 chk_msk_cestab 2
 chk_dump_one
+chk_dump_subflow
 flush_pids
 
 chk_msk_inuse 0 "2->0"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index c83a8b47bbdf..ac1349c4b9e5 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -180,13 +180,26 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen,
 }
 
 static void xgetaddrinfo(const char *node, const char *service,
-			 const struct addrinfo *hints,
+			 struct addrinfo *hints,
 			 struct addrinfo **res)
 {
+again:
 	int err = getaddrinfo(node, service, hints, res);
 
 	if (err) {
-		const char *errstr = getxinfo_strerr(err);
+		const char *errstr;
+
+		/* glibc starts to support MPTCP since v2.42.
+		 * For older versions, use IPPROTO_TCP to resolve,
+		 * and use TCP/MPTCP to create socket.
+		 * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82
+		 */
+		if (err == EAI_SOCKTYPE) {
+			hints->ai_protocol = IPPROTO_TCP;
+			goto again;
+		}
+
+		errstr = getxinfo_strerr(err);
 
 		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
 			node ? node : "", service ? service : "", errstr);
@@ -292,7 +305,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
 {
 	int sock = -1;
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
 	};
@@ -356,7 +369,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 			      int infd, struct wstate *winfo)
 {
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 	};
 	struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
index 284286c524cf..e084796e804d 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_diag.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -8,6 +8,7 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <linux/tcp.h>
+#include <arpa/inet.h>
 
 #include <unistd.h>
 #include <stdlib.h>
@@ -19,6 +20,15 @@
 #define IPPROTO_MPTCP 262
 #endif
 
+#define parse_rtattr_nested(tb, max, rta) \
+	(parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \
+			    NLA_F_NESTED))
+
+struct params {
+	__u32 target_token;
+	char subflow_addrs[1024];
+};
+
 struct mptcp_info {
 	__u8	mptcpi_subflows;
 	__u8	mptcpi_add_addr_signal;
@@ -46,6 +56,37 @@ struct mptcp_info {
 	__u32	mptcpi_last_ack_recv;
 };
 
+enum {
+	MPTCP_SUBFLOW_ATTR_UNSPEC,
+	MPTCP_SUBFLOW_ATTR_TOKEN_REM,
+	MPTCP_SUBFLOW_ATTR_TOKEN_LOC,
+	MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ,
+	MPTCP_SUBFLOW_ATTR_MAP_SEQ,
+	MPTCP_SUBFLOW_ATTR_MAP_SFSEQ,
+	MPTCP_SUBFLOW_ATTR_SSN_OFFSET,
+	MPTCP_SUBFLOW_ATTR_MAP_DATALEN,
+	MPTCP_SUBFLOW_ATTR_FLAGS,
+	MPTCP_SUBFLOW_ATTR_ID_REM,
+	MPTCP_SUBFLOW_ATTR_ID_LOC,
+	MPTCP_SUBFLOW_ATTR_PAD,
+
+	__MPTCP_SUBFLOW_ATTR_MAX
+};
+
+#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1)
+
+#define MPTCP_SUBFLOW_FLAG_MCAP_REM		_BITUL(0)
+#define MPTCP_SUBFLOW_FLAG_MCAP_LOC		_BITUL(1)
+#define MPTCP_SUBFLOW_FLAG_JOIN_REM		_BITUL(2)
+#define MPTCP_SUBFLOW_FLAG_JOIN_LOC		_BITUL(3)
+#define MPTCP_SUBFLOW_FLAG_BKUP_REM		_BITUL(4)
+#define MPTCP_SUBFLOW_FLAG_BKUP_LOC		_BITUL(5)
+#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED	_BITUL(6)
+#define MPTCP_SUBFLOW_FLAG_CONNECTED		_BITUL(7)
+#define MPTCP_SUBFLOW_FLAG_MAPVALID		_BITUL(8)
+
+#define rta_getattr(type, value)		(*(type *)RTA_DATA(value))
+
 static void die_perror(const char *msg)
 {
 	perror(msg);
@@ -54,11 +95,13 @@ static void die_perror(const char *msg)
 
 static void die_usage(int r)
 {
-	fprintf(stderr, "Usage: mptcp_diag -t\n");
+	fprintf(stderr, "Usage:\n"
+			"mptcp_diag -t <token>\n"
+			"mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n");
 	exit(r);
 }
 
-static void send_query(int fd, __u32 token)
+static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto)
 {
 	struct sockaddr_nl nladdr = {
 		.nl_family = AF_NETLINK
@@ -72,31 +115,26 @@ static void send_query(int fd, __u32 token)
 			.nlmsg_type = SOCK_DIAG_BY_FAMILY,
 			.nlmsg_flags = NLM_F_REQUEST
 		},
-		.r = {
-			.sdiag_family = AF_INET,
-			/* Real proto is set via INET_DIAG_REQ_PROTOCOL */
-			.sdiag_protocol = IPPROTO_TCP,
-			.id.idiag_cookie[0] = token,
-		}
+		.r = *r
 	};
 	struct rtattr rta_proto;
 	struct iovec iov[6];
-	int iovlen = 1;
-	__u32 proto;
-
-	req.r.idiag_ext |= (1 << (INET_DIAG_INFO - 1));
-	proto = IPPROTO_MPTCP;
-	rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
-	rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+	int iovlen = 0;
 
-	iov[0] = (struct iovec) {
+	iov[iovlen++] = (struct iovec) {
 		.iov_base = &req,
 		.iov_len = sizeof(req)
 	};
-	iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
-	iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto)};
-	req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
-	iovlen += 2;
+
+	if (proto == IPPROTO_MPTCP) {
+		rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
+		rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+
+		iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
+		iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)};
+		req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
+	}
+
 	struct msghdr msg = {
 		.msg_name = &nladdr,
 		.msg_namelen = sizeof(nladdr),
@@ -160,7 +198,67 @@ static void print_info_msg(struct mptcp_info *info)
 	printf("bytes_acked:      %llu\n", info->mptcpi_bytes_acked);
 }
 
-static void parse_nlmsg(struct nlmsghdr *nlh)
+/*
+ * 'print_subflow_info' is from 'mptcp_subflow_info'
+ * which is a function in 'misc/ss.c' of iproute2.
+ */
+static void print_subflow_info(struct rtattr *tb[])
+{
+	u_int32_t flags = 0;
+
+	printf("It's a mptcp subflow, the subflow info:\n");
+	if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) {
+		char caps[32 + 1] = { 0 }, *cap = &caps[0];
+
+		flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]);
+
+		if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM)
+			*cap++ = 'M';
+		if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC)
+			*cap++ = 'm';
+		if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM)
+			*cap++ = 'J';
+		if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC)
+			*cap++ = 'j';
+		if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM)
+			*cap++ = 'B';
+		if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC)
+			*cap++ = 'b';
+		if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED)
+			*cap++ = 'e';
+		if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED)
+			*cap++ = 'c';
+		if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID)
+			*cap++ = 'v';
+
+		if (flags)
+			printf(" flags:%s", caps);
+	}
+	if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] &&
+	    tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] &&
+	    tb[MPTCP_SUBFLOW_ATTR_ID_REM] &&
+	    tb[MPTCP_SUBFLOW_ATTR_ID_LOC])
+		printf(" token:%04x(id:%u)/%04x(id:%u)",
+		       rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]),
+		       rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]),
+		       rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]),
+		       rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC]));
+	if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])
+		printf(" seq:%llu",
+		       rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]));
+	if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])
+		printf(" sfseq:%u",
+		       rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]));
+	if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])
+		printf(" ssnoff:%u",
+		       rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]));
+	if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])
+		printf(" maplen:%u",
+		       rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]));
+	printf("\n");
+}
+
+static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto)
 {
 	struct inet_diag_msg *r = NLMSG_DATA(nlh);
 	struct rtattr *tb[INET_DIAG_MAX + 1];
@@ -169,7 +267,7 @@ static void parse_nlmsg(struct nlmsghdr *nlh)
 			   nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)),
 			   NLA_F_NESTED);
 
-	if (tb[INET_DIAG_INFO]) {
+	if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) {
 		int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
 		struct mptcp_info *info;
 
@@ -183,11 +281,28 @@ static void parse_nlmsg(struct nlmsghdr *nlh)
 		}
 		print_info_msg(info);
 	}
+	if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) {
+		struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 };
+
+		parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX,
+				    tb[INET_DIAG_ULP_INFO]);
+
+		if (ulpinfo[INET_ULP_INFO_MPTCP]) {
+			struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 };
+
+			parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX,
+					    ulpinfo[INET_ULP_INFO_MPTCP]);
+			print_subflow_info(sfinfo);
+		} else {
+			printf("It's a normal TCP!\n");
+		}
+	}
 }
 
-static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
+static void recv_nlmsg(int fd, __u32 proto)
 {
 	char rcv_buff[8192];
+	struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff;
 	struct sockaddr_nl rcv_nladdr = {
 		.nl_family = AF_NETLINK
 	};
@@ -204,7 +319,6 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
 	int len;
 
 	len = recvmsg(fd, &rcv_msg, 0);
-	nlh = (struct nlmsghdr *)rcv_buff;
 
 	while (NLMSG_OK(nlh, len)) {
 		if (nlh->nlmsg_type == NLMSG_DONE) {
@@ -218,40 +332,84 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
 			       -(err->error), strerror(-(err->error)));
 			break;
 		}
-		parse_nlmsg(nlh);
+		parse_nlmsg(nlh, proto);
 		nlh = NLMSG_NEXT(nlh, len);
 	}
 }
 
 static void get_mptcpinfo(__u32 token)
 {
-	struct nlmsghdr *nlh = NULL;
+	struct inet_diag_req_v2 r = {
+		.sdiag_family           = AF_INET,
+		/* Real proto is set via INET_DIAG_REQ_PROTOCOL */
+		.sdiag_protocol         = IPPROTO_TCP,
+		.idiag_ext              = 1 << (INET_DIAG_INFO - 1),
+		.id.idiag_cookie[0]     = token,
+	};
+	__u32 proto = IPPROTO_MPTCP;
 	int fd;
 
 	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
 	if (fd < 0)
 		die_perror("Netlink socket");
 
-	send_query(fd, token);
-	recv_nlmsg(fd, nlh);
+	send_query(fd, &r, proto);
+	recv_nlmsg(fd, proto);
 
 	close(fd);
 }
 
-static void parse_opts(int argc, char **argv, __u32 *target_token)
+static void get_subflow_info(char *subflow_addrs)
+{
+	struct inet_diag_req_v2 r = {
+		.sdiag_family           = AF_INET,
+		.sdiag_protocol         = IPPROTO_TCP,
+		.idiag_ext              = 1 << (INET_DIAG_INFO - 1),
+		.id.idiag_cookie[0]     = INET_DIAG_NOCOOKIE,
+		.id.idiag_cookie[1]     = INET_DIAG_NOCOOKIE,
+	};
+	char saddr[64], daddr[64];
+	int sport, dport;
+	int ret;
+	int fd;
+
+	ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport);
+	if (ret != 4)
+		die_perror("IP PORT Pairs has style problems!");
+
+	printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport);
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+	if (fd < 0)
+		die_perror("Netlink socket");
+
+	r.id.idiag_sport = htons(sport);
+	r.id.idiag_dport = htons(dport);
+
+	inet_pton(AF_INET, saddr, &r.id.idiag_src);
+	inet_pton(AF_INET, daddr, &r.id.idiag_dst);
+	send_query(fd, &r, IPPROTO_TCP);
+	recv_nlmsg(fd, IPPROTO_TCP);
+}
+
+static void parse_opts(int argc, char **argv, struct params *p)
 {
 	int c;
 
 	if (argc < 2)
 		die_usage(1);
 
-	while ((c = getopt(argc, argv, "ht:")) != -1) {
+	while ((c = getopt(argc, argv, "ht:s:")) != -1) {
 		switch (c) {
 		case 'h':
 			die_usage(0);
 			break;
 		case 't':
-			sscanf(optarg, "%x", target_token);
+			sscanf(optarg, "%x", &p->target_token);
+			break;
+		case 's':
+			strncpy(p->subflow_addrs, optarg,
+				sizeof(p->subflow_addrs) - 1);
 			break;
 		default:
 			die_usage(1);
@@ -262,10 +420,15 @@ static void parse_opts(int argc, char **argv, __u32 *target_token)
 
 int main(int argc, char *argv[])
 {
-	__u32 target_token;
+	struct params p = { 0 };
+
+	parse_opts(argc, argv, &p);
+
+	if (p.target_token)
+		get_mptcpinfo(p.target_token);
 
-	parse_opts(argc, argv, &target_token);
-	get_mptcpinfo(target_token);
+	if (p.subflow_addrs[0] != '\0')
+		get_subflow_info(p.subflow_addrs);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 218aac467321..3cf1e2a612ce 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -72,13 +72,21 @@ static const char *getxinfo_strerr(int err)
 }
 
 static void xgetaddrinfo(const char *node, const char *service,
-			 const struct addrinfo *hints,
+			 struct addrinfo *hints,
 			 struct addrinfo **res)
 {
+again:
 	int err = getaddrinfo(node, service, hints, res);
 
 	if (err) {
-		const char *errstr = getxinfo_strerr(err);
+		const char *errstr;
+
+		if (err == EAI_SOCKTYPE) {
+			hints->ai_protocol = IPPROTO_TCP;
+			goto again;
+		}
+
+		errstr = getxinfo_strerr(err);
 
 		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
 			node ? node : "", service ? service : "", errstr);
@@ -91,7 +99,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
 {
 	int sock = -1;
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
 	};
@@ -136,7 +144,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 			      const char * const port, int proto)
 {
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 	};
 	struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index befa66f5a366..b8af65373b3a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -62,6 +62,7 @@ unset sflags
 unset fastclose
 unset fullmesh
 unset speed
+unset join_syn_rej
 unset join_csum_ns1
 unset join_csum_ns2
 unset join_fail_nr
@@ -1403,6 +1404,7 @@ chk_join_nr()
 	local syn_nr=$1
 	local syn_ack_nr=$2
 	local ack_nr=$3
+	local syn_rej=${join_syn_rej:-0}
 	local csum_ns1=${join_csum_ns1:-0}
 	local csum_ns2=${join_csum_ns2:-0}
 	local fail_nr=${join_fail_nr:-0}
@@ -1468,6 +1470,15 @@ chk_join_nr()
 		fail_test "got $count JOIN[s] ack HMAC failure expected 0"
 	fi
 
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
+	if [ -z "$count" ]; then
+		rc=${KSFT_SKIP}
+	elif [ "$count" != "$syn_rej" ]; then
+		rc=${KSFT_FAIL}
+		print_check "syn rejected"
+		fail_test "got $count JOIN[s] syn rejected expected $syn_rej"
+	fi
+
 	print_results "join Rx" ${rc}
 
 	join_syn_tx="${join_syn_tx:-${syn_nr}}" \
@@ -1963,7 +1974,8 @@ subflows_tests()
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 0
+		join_syn_rej=1 \
+			chk_join_nr 1 1 0
 	fi
 
 	# subflow
@@ -1992,7 +2004,8 @@ subflows_tests()
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 2 2 1
+		join_syn_rej=1 \
+			chk_join_nr 2 2 1
 	fi
 
 	# single subflow, dev
@@ -3061,7 +3074,8 @@ syncookies_tests()
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 2 1 1
+		join_syn_rej=1 \
+			chk_join_nr 2 1 1
 	fi
 
 	# test signal address with cookies
@@ -3545,7 +3559,8 @@ userspace_tests()
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 0
+		join_syn_rej=1 \
+			chk_join_nr 1 1 0
 	fi
 
 	# userspace pm type does not send join
@@ -3568,7 +3583,8 @@ userspace_tests()
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		sflags=backup speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 0
+		join_syn_rej=1 \
+			chk_join_nr 1 1 0
 		chk_prio_nr 0 0 0 0
 	fi
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 051e289d7967..09cd24b2ae46 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
 . "$(dirname "${0}")/../lib.sh"
-. "$(dirname "${0}")/../net_helper.sh"
 
 readonly KSFT_PASS=0
 readonly KSFT_FAIL=1
@@ -331,12 +330,15 @@ mptcp_lib_result_print_all_tap() {
 
 # get the value of keyword $1 in the line marked by keyword $2
 mptcp_lib_get_info_value() {
-	grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+	grep "${2}" 2>/dev/null |
+		sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+		# the ';q' at the end limits to the first matched entry.
 }
 
 # $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
 mptcp_lib_evts_get_info() {
-	grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+	grep "${4:-}" "${2}" 2>/dev/null |
+		mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
 }
 
 # $1: PID
@@ -476,8 +478,6 @@ mptcp_lib_ns_init() {
 	local netns
 	for netns in "${@}"; do
 		ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
-		ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
-		ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
 	done
 }
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 926b0be87c99..9934a68df237 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -159,13 +159,21 @@ static const char *getxinfo_strerr(int err)
 }
 
 static void xgetaddrinfo(const char *node, const char *service,
-			 const struct addrinfo *hints,
+			 struct addrinfo *hints,
 			 struct addrinfo **res)
 {
+again:
 	int err = getaddrinfo(node, service, hints, res);
 
 	if (err) {
-		const char *errstr = getxinfo_strerr(err);
+		const char *errstr;
+
+		if (err == EAI_SOCKTYPE) {
+			hints->ai_protocol = IPPROTO_TCP;
+			goto again;
+		}
+
+		errstr = getxinfo_strerr(err);
 
 		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
 			node ? node : "", service ? service : "", errstr);
@@ -178,7 +186,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
 {
 	int sock = -1;
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
 	};
@@ -223,7 +231,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 			      const char * const port, int proto)
 {
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 	};
 	struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
deleted file mode 100644
index 6596fe03c77f..000000000000
--- a/tools/testing/selftests/net/net_helper.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Helper functions
-
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-	local protocol="${3}"
-	local pattern
-	local i
-
-	pattern=":$(printf "%04X" "${port}") "
-
-	# for tcp protocol additionally check the socket state
-	[ ${protocol} = "tcp" ] && pattern="${pattern}0A"
-	for i in $(seq 10); do
-		if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
-		   /proc/net/"${protocol}"* | grep -q "${pattern}"; then
-			break
-		fi
-		sleep 0.1
-	done
-}
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index ffe161fac8b5..e9b2f553588d 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -12,6 +12,7 @@ TEST_PROGS += conntrack_dump_flush.sh
 TEST_PROGS += conntrack_icmp_related.sh
 TEST_PROGS += conntrack_ipip_mtu.sh
 TEST_PROGS += conntrack_tcp_unreplied.sh
+TEST_PROGS += conntrack_resize.sh
 TEST_PROGS += conntrack_sctp_collision.sh
 TEST_PROGS += conntrack_vrf.sh
 TEST_PROGS += conntrack_reverse_clash.sh
@@ -23,6 +24,7 @@ TEST_PROGS += nft_concat_range.sh
 TEST_PROGS += nft_conntrack_helper.sh
 TEST_PROGS += nft_fib.sh
 TEST_PROGS += nft_flowtable.sh
+TEST_PROGS += nft_interface_stress.sh
 TEST_PROGS += nft_meta.sh
 TEST_PROGS += nft_nat.sh
 TEST_PROGS += nft_nat_zones.sh
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
index 1559ba275105..011de8763094 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -60,9 +60,6 @@ bcast_ping()
 	done
 }
 
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0
-
 if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then
 	echo "SKIP: Can't create veth device"
 	exit $ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
index 2549b6590693..ea76f2bc2f59 100755
--- a/tools/testing/selftests/net/netfilter/bridge_brouter.sh
+++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
@@ -22,8 +22,6 @@ trap cleanup EXIT
 
 setup_ns nsbr ns1 ns2
 
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0
 if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then
 	echo "SKIP: Can't create veth device"
 	exit $ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 43d8b500d391..363646f4fefe 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -46,6 +46,7 @@ CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
 CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
 CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_PROCFS=y
 CONFIG_NF_CONNTRACK_EVENTS=y
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_MARK=y
diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
new file mode 100755
index 000000000000..9e033e80219e
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
@@ -0,0 +1,427 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft tool"
+
+init_net_max=0
+ct_buckets=0
+tmpfile=""
+tmpfile_proc=""
+tmpfile_uniq=""
+ret=0
+
+insert_count=2000
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400
+
+modprobe -q nf_conntrack
+if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then
+	echo "SKIP: conntrack sysctls not available"
+	exit $KSFT_SKIP
+fi
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1
+ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1
+
+cleanup() {
+	cleanup_all_ns
+
+	rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq"
+
+	# restore original sysctl setting
+	sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+	sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets
+}
+trap cleanup EXIT
+
+check_max_alias()
+{
+	local expected="$1"
+	# old name, expected to alias to the first, i.e. changing one
+	# changes the other as well.
+	local lv=$(sysctl -n net.nf_conntrack_max)
+
+	if [ $expected -ne "$lv" ];then
+		echo "nf_conntrack_max sysctls should have identical values"
+		exit 1
+	fi
+}
+
+insert_ctnetlink() {
+	local ns="$1"
+	local count="$2"
+	local i=0
+	local bulk=16
+
+	while [ $i -lt $count ] ;do
+		ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \
+			if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+					  -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+					  --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \
+					  return;\
+			fi & \
+		done ; wait" 2>/dev/null
+
+		i=$((i+bulk))
+	done
+}
+
+check_ctcount() {
+	local ns="$1"
+	local count="$2"
+	local msg="$3"
+
+	local now=$(ip netns exec "$ns" conntrack -C)
+
+	if [ $now -ne "$count" ] ;then
+		echo "expected $count entries in $ns, not $now: $msg"
+		exit 1
+	fi
+
+	echo "PASS: got $count connections: $msg"
+}
+
+ctresize() {
+	local duration="$1"
+	local now=$(date +%s)
+	local end=$((now + duration))
+
+	while [ $now -lt $end ]; do
+		sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM
+		now=$(date +%s)
+	done
+}
+
+do_rsleep() {
+	local limit="$1"
+	local r=$RANDOM
+
+	r=$((r%limit))
+	sleep "$r"
+}
+
+ct_flush_once() {
+	local ns="$1"
+
+	ip netns exec "$ns" conntrack -F 2>/dev/null
+}
+
+ctflush() {
+	local ns="$1"
+	local duration="$2"
+	local now=$(date +%s)
+	local end=$((now + duration))
+
+	do_rsleep "$duration"
+
+        while [ $now -lt $end ]; do
+		ct_flush_once "$ns"
+		do_rsleep "$duration"
+		now=$(date +%s)
+        done
+}
+
+ctflood()
+{
+	local ns="$1"
+	local duration="$2"
+	local msg="$3"
+	local now=$(date +%s)
+	local end=$((now + duration))
+	local j=0
+	local k=0
+
+        while [ $now -lt $end ]; do
+		j=$((j%256))
+		k=$((k%256))
+
+		ip netns exec "$ns" bash -c \
+			"j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1
+
+		j=$((j+1))
+
+		if [ $j -eq 256 ];then
+			k=$((k+1))
+		fi
+
+		now=$(date +%s)
+	done
+
+	wait
+}
+
+# dump to /dev/null.  We don't want dumps to cause infinite loops
+# or use-after-free even when conntrack table is altered while dumps
+# are in progress.
+ct_nulldump()
+{
+	local ns="$1"
+
+	ip netns exec "$ns" conntrack -L > /dev/null 2>&1 &
+
+	# Don't require /proc support in conntrack
+	if [ -r /proc/self/net/nf_conntrack ] ; then
+		ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null &
+	fi
+
+	wait
+}
+
+check_taint()
+{
+	local tainted_then="$1"
+	local msg="$2"
+
+	local tainted_now=0
+
+	if [ "$tainted_then" -ne 0 ];then
+		return
+	fi
+
+	read tainted_now < /proc/sys/kernel/tainted
+
+	if [ "$tainted_now" -eq 0 ];then
+		echo "PASS: $msg"
+	else
+		echo "TAINT: $msg"
+		dmesg
+		exit 1
+	fi
+}
+
+insert_flood()
+{
+	local n="$1"
+	local r=0
+
+	r=$((RANDOM%$insert_count))
+
+	ctflood "$n" "$timeout" "floodresize" &
+	insert_ctnetlink "$n" "$r" &
+	ctflush "$n" "$timeout" &
+	ct_nulldump "$n" &
+
+	wait
+}
+
+test_floodresize_all()
+{
+	local timeout=20
+	local n=""
+	local tainted_then=""
+
+	read tainted_then < /proc/sys/kernel/tainted
+
+	for n in "$nsclient1" "$nsclient2";do
+		insert_flood "$n" &
+	done
+
+	# resize table constantly while flood/insert/dump/flushs
+	# are happening in parallel.
+	ctresize "$timeout"
+
+	# wait for subshells to complete, everything is limited
+	# by $timeout.
+	wait
+
+	check_taint "$tainted_then" "resize+flood"
+}
+
+check_dump()
+{
+	local ns="$1"
+	local protoname="$2"
+	local c=0
+	local proto=0
+	local proc=0
+	local unique=""
+	local lret=0
+
+	# NOTE: assumes timeouts are large enough to not have
+	# expirations in all following tests.
+	l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l)
+	c=$(ip netns exec "$ns" conntrack -C)
+
+	if [ "$c" -eq 0 ]; then
+		echo "FAIL: conntrack count for $ns is 0"
+		lret=1
+	fi
+
+	if [ "$c" -ne "$l" ]; then
+		echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l"
+		lret=1
+	fi
+
+	# check the dump we retrieved is free of duplicated entries.
+	unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l)
+	if [ "$l" -ne "$unique" ]; then
+		echo "FAIL: listing contained redundant entries for $ns: $l != $unique"
+		diff -u "$tmpfile" "$tmpfile_uniq"
+		lret=1
+	fi
+
+	# we either inserted icmp or only udp, hence, --proto should return same entry count as without filter.
+	proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l)
+	if [ "$l" -ne "$proto" ]; then
+		echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto"
+		diff -u "$tmpfile" "$tmpfile_uniq"
+		lret=1
+	fi
+
+	if [ -r /proc/self/net/nf_conntrack ] ; then
+		proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l")
+
+		if [ "$l" -ne "$proc" ]; then
+			echo "FAIL: proc inconsistency for $ns: $l != $proc"
+			lret=1
+		fi
+
+		proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l)
+		if [ "$l" -ne "$proc" ]; then
+			echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc"
+			diff -u "$tmpfile_proc" "$tmpfile_uniq"
+			lret=1
+		fi
+	fi
+
+	if [ $lret -eq 0 ];then
+		echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+	else
+		echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+		ret=1
+	fi
+}
+
+test_dump_all()
+{
+	local timeout=3
+	local tainted_then=""
+
+	read tainted_then < /proc/sys/kernel/tainted
+
+	ct_flush_once "$nsclient1"
+	ct_flush_once "$nsclient2"
+
+	ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600
+
+	ctflood "$nsclient1" $timeout "dumpall" &
+	insert_ctnetlink "$nsclient2" $insert_count
+
+	wait
+
+	check_dump "$nsclient1" "icmp"
+	check_dump "$nsclient2" "udp"
+
+	check_taint "$tainted_then" "test parallel conntrack dumps"
+}
+
+check_sysctl_immutable()
+{
+	local ns="$1"
+	local name="$2"
+	local failhard="$3"
+	local o=0
+	local n=0
+
+	o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+	n=$((o+1))
+
+	# return value isn't reliable, need to read it back
+	ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null
+
+	n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+
+	[ -z "$n" ] && return 1
+
+	if [ $o -ne $n ]; then
+		if [ $failhard -gt 0 ] ;then
+			echo "FAIL: net.$name should not be changeable from namespace (now $n)"
+			ret=1
+		fi
+		return 0
+	fi
+
+	return 1
+}
+
+test_conntrack_max_limit()
+{
+	sysctl -q net.netfilter.nf_conntrack_max=100
+	insert_ctnetlink "$nsclient1" 101
+
+	# check netns is clamped by init_net, i.e., either netns follows
+	# init_net value, or a higher pernet limit (compared to init_net) is ignored.
+	check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound"
+
+	sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+}
+
+test_conntrack_disable()
+{
+	local timeout=2
+
+	# disable conntrack pickups
+	ip netns exec "$nsclient1" nft flush table ip test_ct
+
+	ct_flush_once "$nsclient1"
+	ct_flush_once "$nsclient2"
+
+	ctflood "$nsclient1" "$timeout" "conntrack disable"
+	ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
+
+	# Disabled, should not have picked up any connection.
+	check_ctcount "$nsclient1" 0 "conntrack disabled"
+
+	# This one is still active, expect 1 connection.
+	check_ctcount "$nsclient2" 1 "conntrack enabled"
+}
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max)
+
+check_max_alias $init_net_max
+
+sysctl -q net.netfilter.nf_conntrack_max="262000"
+check_max_alias 262000
+
+setup_ns nsclient1 nsclient2
+
+# check this only works from init_net
+for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do
+	check_sysctl_immutable "$nsclient1" "net.$n" 1
+done
+
+# won't work on older kernels. If it works, check that the netns obeys the limit
+if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then
+	# subtest: if pernet is changeable, check that reducing it in pernet
+	# limits the pernet entries.  Inverse, pernet clamped by a lower init_net
+	# setting, is already checked by "test_conntrack_max_limit" test.
+
+	ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1
+	insert_ctnetlink "$nsclient1" 2
+	check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound"
+	ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+fi
+
+for n in "$nsclient1" "$nsclient2";do
+# enable conntrack in both namespaces
+ip netns exec "$n" nft -f - <<EOF
+table ip test_ct {
+	chain input {
+		type filter hook input priority 0
+		ct state new counter
+	}
+}
+EOF
+done
+
+tmpfile=$(mktemp)
+tmpfile_proc=$(mktemp)
+tmpfile_uniq=$(mktemp)
+test_conntrack_max_limit
+test_dump_all
+test_floodresize_all
+test_conntrack_disable
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
index e95ecb37c2b1..207b79932d91 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -32,7 +32,6 @@ source lib.sh
 
 IP0=172.30.30.1
 IP1=172.30.30.2
-DUMMYNET=10.9.9
 PFXL=30
 ret=0
 
@@ -52,11 +51,6 @@ trap cleanup EXIT
 
 setup_ns ns0 ns1
 
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1
-
 if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
 	echo "SKIP: Could not add veth device"
 	exit $ksft_skip
@@ -67,18 +61,13 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
 	exit $ksft_skip
 fi
 
-ip -net "$ns0" link add dummy0 type dummy
-
 ip -net "$ns0" li set veth0 master tvrf
-ip -net "$ns0" li set dummy0 master tvrf
 ip -net "$ns0" li set tvrf up
 ip -net "$ns0" li set veth0 up
-ip -net "$ns0" li set dummy0 up
 ip -net "$ns1" li set veth0 up
 
 ip -net "$ns0" addr add $IP0/$PFXL dev veth0
 ip -net "$ns1" addr add $IP1/$PFXL dev veth0
-ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0
 
 listener_ready()
 {
@@ -219,35 +208,9 @@ EOF
 	fi
 }
 
-test_fib()
-{
-ip netns exec "$ns0" nft -f - <<EOF
-flush ruleset
-table ip t {
-	counter fibcount { }
-
-	chain prerouting {
-		type filter hook prerouting priority 0;
-		meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack
-	}
-}
-EOF
-	ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0
-	ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null
-
-	if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then
-		echo "PASS: fib lookup returned exepected output interface"
-	else
-		echo "FAIL: fib lookup did not return exepected output interface"
-		ret=1
-		return
-	fi
-}
-
 test_ct_zone_in
 test_masquerade_vrf "default"
 test_masquerade_vrf "pfifo"
 test_masquerade_veth
-test_fib
 
 exit $ret
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
index d3edb16cd4b3..6af2ea3ad6b8 100755
--- a/tools/testing/selftests/net/netfilter/ipvs.sh
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -129,9 +129,6 @@ test_dr() {
 	# avoid incorrect arp response
 	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
 	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
-	# avoid reverse route lookup
-	ip netns exec "${ns2}" sysctl -qw  net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${ns2}" sysctl -qw  net.ipv4.conf.veth21.rp_filter=0
 	ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
 
 	test_service
@@ -167,9 +164,6 @@ test_tun() {
 	ip netns exec "${ns2}" ip link set tunl0 up
 	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
 	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
-	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
-	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
 	ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
 
 	test_service
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 1f5979c1510c..efea93cf23d4 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -15,10 +15,12 @@ source lib.sh
 # Available test groups:
 # - reported_issues: check for issues that were reported in the past
 # - correctness: check that packets match given entries, and only those
+# - correctness_large: same but with additional non-matching entries
 # - concurrency: attempt races between insertion, deletion and lookup
 # - timeout: check that packets match entries until they expire
 # - performance: estimate matching rate, compare with rbtree and hash baselines
-TESTS="reported_issues correctness concurrency timeout"
+TESTS="reported_issues correctness correctness_large concurrency timeout"
+
 [ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
 
 # Set types, defined by TYPE_ variables below
@@ -1257,9 +1259,7 @@ send_nomatch() {
 # - add ranged element, check that packets match it
 # - check that packets outside range don't match it
 # - remove some elements, check that packets don't match anymore
-test_correctness() {
-	setup veth send_"${proto}" set || return ${ksft_skip}
-
+test_correctness_main() {
 	range_size=1
 	for i in $(seq "${start}" $((start + count))); do
 		end=$((start + range_size))
@@ -1293,6 +1293,163 @@ test_correctness() {
 	done
 }
 
+test_correctness() {
+	setup veth send_"${proto}" set || return ${ksft_skip}
+
+	test_correctness_main
+}
+
+# Repeat the correctness tests, but add extra non-matching entries.
+# This exercises the more compact '4 bit group' representation that
+# gets picked when the default 8-bit representation exceed
+# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory.
+# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust().
+#
+# The format() helper is way too slow when generating lots of
+# entries so its not used here.
+test_correctness_large() {
+	setup veth send_"${proto}" set || return ${ksft_skip}
+	# number of dummy (filler) entries to add.
+	local dcount=16385
+
+	(
+	echo -n "add element inet filter test { "
+
+	case "$type_spec" in
+	"ether_addr . ipv4_addr")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_mac $((1000000 + i))
+			printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+		done
+		;;
+	"inet_proto . ipv6_addr")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "%i . " $((RANDOM%256))
+			format_addr6 $((1000000 + i))
+		done
+		;;
+	"inet_service . inet_proto")
+		# smaller key sizes, need more entries to hit the
+		# 4-bit threshold.
+		dcount=65536
+		for i in $(seq 1 $dcount); do
+			local proto=$((RANDOM%256))
+
+			# Test uses UDP to match, as it also fails when matching
+			# an entry that doesn't exist, so skip 'udp' entries
+			# to not trigger a wrong failure.
+			[ $proto -eq 17 ] && proto=18
+			[ $i -gt 1 ] && echo ", "
+			printf "%i . %i " $(((i%65534) + 1)) $((proto))
+		done
+		;;
+	"inet_service . ipv4_addr")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+		done
+		;;
+	"ipv4_addr . ether_addr")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+			format_mac $((1000000 + i))
+		done
+		;;
+	"ipv4_addr . inet_service")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+		done
+		;;
+	"ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr")
+		dcount=65536
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+			format_mac $((1000000 + i))
+			printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+		done
+		;;
+	"ipv4_addr . inet_service . inet_proto")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256))
+		done
+		;;
+	"ipv4_addr . inet_service . inet_proto . ipv4_addr")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256))
+		done
+		;;
+	"ipv4_addr . inet_service . ipv4_addr")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256))
+		done
+		;;
+	"ipv6_addr . ether_addr")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_addr6 $((i + 1000000))
+			echo -n " . "
+			format_mac $((1000000 + i))
+		done
+		;;
+	"ipv6_addr . inet_service")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_addr6 $((i + 1000000))
+			echo -n " .  $(((RANDOM%65534) + 1))"
+		done
+		;;
+	"ipv6_addr . inet_service . ether_addr")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_addr6 $((i + 1000000))
+			echo -n " .  $(((RANDOM%65534) + 1)) . "
+			format_mac $((i + 1000000))
+		done
+		;;
+	"ipv6_addr . inet_service . ether_addr . inet_proto")
+		dcount=65536
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_addr6 $((i + 1000000))
+			echo -n " .  $(((RANDOM%65534) + 1)) . "
+			format_mac $((i + 1000000))
+			echo -n " .  $((RANDOM%256))"
+		done
+		;;
+	"ipv6_addr . inet_service . ipv6_addr . inet_service")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_addr6 $((i + 1000000))
+			echo -n " .  $(((RANDOM%65534) + 1)) . "
+			format_addr6 $((i + 2123456))
+			echo -n " .  $((RANDOM%256))"
+		done
+		;;
+	*)
+		"Unhandled $type_spec"
+		return 1
+	esac
+	echo -n "}"
+
+	) | nft -f - || return 1
+
+	test_correctness_main
+}
+
 # Concurrency test template:
 # - add all the elements
 # - start a thread for each physical thread that:
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
index ce1451c275fd..9929a9ffef65 100755
--- a/tools/testing/selftests/net/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -3,6 +3,10 @@
 # This tests the fib expression.
 #
 # Kselftest framework requirement - SKIP code is 4.
+#
+#  10.0.1.99     10.0.1.1           10.0.2.1         10.0.2.99
+# dead:1::99    dead:1::1          dead:2::1        dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
 
 source lib.sh
 
@@ -45,6 +49,19 @@ table inet filter {
 EOF
 }
 
+load_input_ruleset() {
+	local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+	chain input {
+		type filter hook input priority 0; policy accept;
+	        fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+	}
+}
+EOF
+}
+
 load_pbr_ruleset() {
 	local netns=$1
 
@@ -59,6 +76,89 @@ table inet filter {
 EOF
 }
 
+load_type_ruleset() {
+	local netns=$1
+
+	for family in ip ip6;do
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table $family filter {
+	chain type_match_in {
+		fib daddr type local counter comment "daddr configured on other iface"
+		fib daddr . iif type local counter comment "daddr configured on iif"
+		fib daddr type unicast counter comment "daddr not local"
+		fib daddr . iif type unicast counter comment "daddr not configured on iif"
+	}
+
+	chain type_match_out {
+		fib daddr type unicast counter
+		fib daddr . oif type unicast counter
+		fib daddr type local counter
+		fib daddr . oif type local counter
+	}
+
+	chain prerouting {
+		type filter hook prerouting priority 0;
+		icmp type echo-request counter jump type_match_in
+		icmpv6 type echo-request counter jump type_match_in
+	}
+
+	chain input {
+		type filter hook input priority 0;
+		icmp type echo-request counter jump type_match_in
+		icmpv6 type echo-request counter jump type_match_in
+	}
+
+	chain forward {
+		type filter hook forward priority 0;
+		icmp type echo-request counter jump type_match_in
+		icmpv6 type echo-request counter jump type_match_in
+	}
+
+	chain output {
+		type filter hook output priority 0;
+		icmp type echo-request counter jump type_match_out
+		icmpv6 type echo-request counter jump type_match_out
+	}
+
+	chain postrouting {
+		type filter hook postrouting priority 0;
+		icmp type echo-request counter jump type_match_out
+		icmpv6 type echo-request counter jump type_match_out
+	}
+}
+EOF
+done
+}
+
+reload_type_ruleset() {
+	ip netns exec "$1" nft flush table ip filter
+	ip netns exec "$1" nft flush table ip6 filter
+	load_type_ruleset "$1"
+}
+
+check_fib_type_counter_family() {
+	local family="$1"
+	local want="$2"
+	local ns="$3"
+	local chain="$4"
+	local what="$5"
+	local errmsg="$6"
+
+	if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then
+		echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2
+		ip netns exec "$ns" nft list chain "$family" filter "$chain"
+		ret=1
+		return 1
+	fi
+
+	return 0
+}
+
+check_fib_type_counter() {
+	check_fib_type_counter_family "ip" "$@" || return 1
+	check_fib_type_counter_family "ip6" "$@" || return 1
+}
+
 load_ruleset_count() {
 	local netns=$1
 
@@ -77,6 +177,7 @@ check_drops() {
 	if dmesg | grep -q ' nft_rpfilter: ';then
 		dmesg | grep ' nft_rpfilter: '
 		echo "FAIL: rpfilter did drop packets"
+		ret=1
 		return 1
 	fi
 
@@ -151,19 +252,506 @@ test_ping() {
   return 0
 }
 
+test_ping_unreachable() {
+  local daddr4=$1
+  local daddr6=$2
+
+  if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr4" > /dev/null; then
+	echo "FAIL: ${ns1} could reach $daddr4" 1>&2
+	return 1
+  fi
+
+  if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr6" > /dev/null; then
+	echo "FAIL: ${ns1} could reach $daddr6" 1>&2
+	return 1
+  fi
+
+  return 0
+}
+
+test_fib_type() {
+	local notice="$1"
+	local errmsg="addr-on-if"
+	local lret=0
+
+	if ! load_type_ruleset "$nsrouter";then
+		echo "SKIP: Could not load fib type ruleset"
+		[ $ret -eq 0 ] && ret=$ksft_skip
+		return
+	fi
+
+	# makes router receive packet for addresses configured on incoming
+	# interface.
+	test_ping 10.0.1.1 dead:1::1 || return 1
+
+	# expectation: triggers all 'local' in prerouting/input.
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1
+
+	reload_type_ruleset "$nsrouter"
+	# makes router receive packet for address configured on a different (but local)
+	# interface.
+	test_ping 10.0.2.1 dead:2::1 || return 1
+
+	# expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'.
+	errmsg="addr-on-host"
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+	reload_type_ruleset "$nsrouter"
+	test_ping 10.0.2.99 dead:2::99 || return 1
+	errmsg="addr-on-otherhost"
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+	if [ $lret -eq 0 ];then
+		echo "PASS: fib expression address types match ($notice)"
+	else
+		echo "FAIL: fib expression address types match ($notice)"
+		ret=1
+	fi
+}
+
+test_fib_vrf_dev_add_dummy()
+{
+	if ! ip -net "$nsrouter" link add dummy0 type dummy ;then
+		echo "SKIP: VRF tests: dummy device type not supported"
+		return 1
+	fi
+
+	if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then
+		echo "SKIP: VRF tests: vrf device type not supported"
+		return 1
+	fi
+
+	ip -net "$nsrouter" link set dummy0 master tvrf
+	ip -net "$nsrouter" link set dummy0 up
+	ip -net "$nsrouter" link set tvrf up
+}
+
+load_ruleset_vrf()
+{
+# Due to the many different possible combinations using named counters
+# or one-rule-per-expected-result is complex.
+#
+# Instead, add dynamic sets for the fib modes
+# (fib address type, fib output interface lookup .. ),
+# and then add the obtained fib results to them.
+#
+# The test is successful if the sets contain the expected results
+# and no unexpected extra entries existed.
+ip netns exec "$nsrouter" nft -f - <<EOF
+flush ruleset
+table inet t {
+	set fibif4 {
+		typeof meta iif . ip daddr . fib daddr oif
+		flags dynamic
+		counter
+	}
+
+	set fibif4iif {
+		typeof meta iif . ip daddr . fib daddr . iif oif
+		flags dynamic
+		counter
+	}
+
+	set fibif6 {
+		typeof meta iif . ip6 daddr . fib daddr oif
+		flags dynamic
+		counter
+	}
+
+	set fibif6iif {
+		typeof meta iif . ip6 daddr . fib daddr . iif oif
+		flags dynamic
+		counter
+	}
+
+	set fibtype4 {
+		typeof meta iif . ip daddr . fib daddr type
+		flags dynamic
+		counter
+	}
+
+	set fibtype4iif {
+		typeof meta iif . ip daddr . fib daddr . iif type
+		flags dynamic
+		counter
+	}
+
+	set fibtype6 {
+		typeof meta iif . ip6 daddr . fib daddr type
+		flags dynamic
+		counter
+	}
+
+	set fibtype6iif {
+		typeof meta iif . ip6 daddr . fib daddr . iif type
+		flags dynamic
+		counter
+	}
+
+	chain fib_test {
+		meta nfproto ipv4 jump {
+			add @fibif4 { meta iif . ip daddr . fib daddr oif }
+			add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif }
+			add @fibtype4 { meta iif . ip daddr . fib daddr type }
+			add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type }
+
+			add @fibif4 { meta iif . ip saddr . fib saddr oif }
+			add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif }
+		}
+
+		meta nfproto ipv6 jump {
+			add @fibif6    { meta iif . ip6 daddr . fib daddr oif }
+			add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif }
+			add @fibtype6    { meta iif . ip6 daddr . fib daddr type }
+			add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type }
+
+			add @fibif6 { meta iif . ip6 saddr . fib saddr oif }
+			add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif }
+		}
+	}
+
+	chain prerouting {
+		type filter hook prerouting priority 0;
+		icmp type echo-request counter jump fib_test
+
+		# neighbour discovery to be ignored.
+		icmpv6 type echo-request counter jump fib_test
+	}
+}
+EOF
+
+if [ $? -ne 0 ] ;then
+	echo "SKIP: Could not load ruleset for fib vrf test"
+	[ $ret -eq 0 ] && ret=$ksft_skip
+	return 1
+fi
+}
+
+check_type()
+{
+	local setname="$1"
+	local iifname="$2"
+	local addr="$3"
+	local type="$4"
+	local count="$5"
+
+	[ -z "$count" ] && count=1
+
+	if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then
+		echo "FAIL: did not find $iifname . $addr . $type in $setname"
+		ip netns exec "$nsrouter" nft list set inet t "$setname"
+		ret=1
+		return 1
+	fi
+
+	# delete the entry, this allows to check if anything unexpected appeared
+	# at the end of the test run: all dynamic sets should be empty by then.
+	if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then
+		echo "FAIL: can't delete $iifname . $addr . $type in $setname"
+		ip netns exec "$nsrouter" nft list set inet t "$setname"
+		ret=1
+		return 1
+	fi
+
+	return 0
+}
+
+check_local()
+{
+	check_type $@ "local" 1
+}
+
+check_unicast()
+{
+	check_type $@ "unicast" 1
+}
+
+check_rpf()
+{
+	check_type $@
+}
+
+check_fib_vrf_sets_empty()
+{
+	local setname=""
+	local lret=0
+
+	# A non-empty set means that we have seen unexpected packets OR
+	# that a fib lookup provided unexpected results.
+	for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \
+		       "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do
+		if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then
+			echo "FAIL: $setname not empty"
+	                ip netns exec "$nsrouter" nft list set inet t "$setname"
+			ret=1
+			lret=1
+		fi
+	done
+
+	return $lret
+}
+
+check_fib_vrf_type()
+{
+	local msg="$1"
+
+	local addr
+	# the incoming interface is always veth0.  As its not linked to a VRF,
+	# the 'tvrf' device should NOT show up anywhere.
+	local ifname="veth0"
+	local lret=0
+
+	# local_veth0, local_veth1
+	for addr in "10.0.1.1" "10.0.2.1"; do
+		check_local fibtype4  "$ifname" "$addr" || lret=1
+		check_type  fibif4    "$ifname" "$addr" "0" || lret=1
+	done
+	for addr in "dead:1::1" "dead:2::1";do
+		check_local fibtype6  "$ifname" "$addr" || lret=1
+		check_type  fibif6    "$ifname" "$addr" "0" || lret=1
+	done
+
+	# when restricted to the incoming interface, 10.0.1.1 should
+	# be 'local', but 10.0.2.1 unicast.
+	check_local fibtype4iif   "$ifname" "10.0.1.1" || lret=1
+	check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1
+
+	# same for the ipv6 addresses.
+	check_local fibtype6iif   "$ifname" "dead:1::1" || lret=1
+	check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1
+
+	# None of these addresses should find a valid route when restricting
+	# to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are
+	# reachable via 'lo'.
+	for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do
+		check_type fibif4iif "$ifname" "$addr" "0" || lret=1
+	done
+
+	# expect default route (veth1), dummy0 is part of VRF but iif isn't.
+	for addr in "10.9.9.1" "10.9.9.2";do
+		check_unicast fibtype4    "$ifname" "$addr" || lret=1
+		check_unicast fibtype4iif "$ifname" "$addr" || lret=1
+		check_type fibif4 "$ifname" "$addr" "veth1" || lret=1
+	done
+	for addr in "dead:9::1" "dead:9::2";do
+		check_unicast fibtype6    "$ifname" "$addr" || lret=1
+		check_unicast fibtype6iif "$ifname" "$addr" || lret=1
+		check_type fibif6 "$ifname" "$addr" "veth1" || lret=1
+	done
+
+	# same for the IPv6 equivalent addresses.
+	for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do
+		check_type  fibif6iif "$ifname" "$addr" "0" || lret=1
+	done
+
+	check_unicast fibtype4    "$ifname" "10.0.2.99" || lret=1
+	check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+	check_unicast fibtype6    "$ifname" "dead:2::99" || lret=1
+	check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+	check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+	check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+	check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+	check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+	check_rpf  fibif4    "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+	check_rpf  fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+	check_rpf  fibif6    "$ifname" "dead:1::99" "veth0" 5 || lret=1
+	check_rpf  fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1
+
+	check_fib_vrf_sets_empty || lret=1
+
+	if [ $lret -eq 0 ];then
+		echo "PASS: $msg"
+	else
+		echo "FAIL: $msg"
+		ret=1
+	fi
+}
+
+check_fib_veth_vrf_type()
+{
+	local msg="$1"
+
+	local addr
+	local ifname
+	local setname
+	local lret=0
+
+	# as veth0 is now part of tvrf interface, packets will be seen
+	# twice, once with iif veth0, then with iif tvrf.
+
+	for ifname in "veth0" "tvrf"; do
+		for addr in "10.0.1.1" "10.9.9.1"; do
+			check_local fibtype4  "$ifname" "$addr" || lret=1
+			# addr local, but nft_fib doesn't return routes with RTN_LOCAL.
+			check_type  fibif4    "$ifname" "$addr" 0 || lret=1
+			check_type  fibif4iif "$ifname" "$addr" 0 || lret=1
+		done
+
+		for addr in "dead:1::1" "dead:9::1"; do
+			check_local fibtype6 "$ifname" "$addr" || lret=1
+			# same, address is local but no route is returned for lo.
+			check_type  fibif6    "$ifname" "$addr" 0 || lret=1
+			check_type  fibif6iif "$ifname" "$addr" 0 || lret=1
+		done
+
+		for t in fibtype4 fibtype4iif; do
+			check_unicast "$t" "$ifname" 10.9.9.2 || lret=1
+		done
+		for t in fibtype6 fibtype6iif; do
+			check_unicast "$t" "$ifname" dead:9::2 || lret=1
+		done
+
+		check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1
+		check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1
+
+		check_unicast fibtype4    "$ifname" "10.0.2.99" || lret=1
+		check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+
+		check_unicast fibtype6    "$ifname" "dead:2::99" || lret=1
+		check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+		check_type fibif4    "$ifname"  "10.0.2.99" "veth1" || lret=1
+		check_type fibif6    "$ifname" "dead:2::99" "veth1" || lret=1
+		check_type fibif4    "$ifname"   "10.9.9.2" "dummy0" || lret=1
+		check_type fibif6    "$ifname"  "dead:9::2" "dummy0" || lret=1
+
+		# restricted to iif -- MUST NOT provide result, its != $ifname.
+		check_type fibif4iif "$ifname"  "10.0.2.99" 0 || lret=1
+		check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+		check_rpf  fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1
+		check_rpf  fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1
+		check_rpf  fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1
+		check_rpf  fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1
+	done
+
+	check_local fibtype4iif "veth0" "10.0.1.1" || lret=1
+	check_local fibtype6iif "veth0" "dead:1::1" || lret=1
+
+	check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1
+	check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1
+
+	# 10.9.9.2 should not provide a result for iif veth, but
+	# should when iif is tvrf.
+	# This is because its reachable via dummy0 which is part of
+	# tvrf.  iif veth0 MUST conceal the dummy0 result (i.e. return oif 0).
+	check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1
+	check_type fibif6iif "veth0"  "dead:9::2" 0 || lret=1
+
+	check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1
+	check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1
+
+	check_fib_vrf_sets_empty || lret=1
+
+	if [ $lret -eq 0 ];then
+		echo "PASS: $msg"
+	else
+		echo "FAIL: $msg"
+		ret=1
+	fi
+}
+
+# Extends nsrouter config by adding dummy0+vrf.
+#
+#  10.0.1.99     10.0.1.1           10.0.2.1         10.0.2.99
+# dead:1::99    dead:1::1          dead:2::1        dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
+#                         [dummy0]
+#                         10.9.9.1
+#                        dead:9::1
+#                          [tvrf]
+test_fib_vrf()
+{
+	local cntname=""
+
+	if ! test_fib_vrf_dev_add_dummy; then
+		[ $ret -eq 0 ] && ret=$ksft_skip
+		return
+	fi
+
+	ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0
+	ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad
+
+	ip -net "$nsrouter" route add default via 10.0.2.99
+	ip -net "$nsrouter" route add default via dead:2::99
+
+	load_ruleset_vrf || return
+
+	# no echo reply for these addresses: The dummy interface is part of tvrf,
+	# but veth0 (incoming interface) isn't linked to it.
+	test_ping_unreachable "10.9.9.1" "dead:9::1" &
+	test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+	# expect replies from these.
+	test_ping "10.0.1.1" "dead:1::1"
+	test_ping "10.0.2.1" "dead:2::1"
+	test_ping "10.0.2.99" "dead:2::99"
+
+	wait
+
+	check_fib_vrf_type "fib expression address types match (iif not in vrf)"
+
+	# second round: this time, make veth0 (rx interface) part of the vrf.
+	# 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2
+	# becomes unreachable.
+	ip -net "$nsrouter" link set veth0 master tvrf
+	ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+	# this reload should not be needed, but in case
+	# there is some error (missing or unexpected entry) this will prevent them
+	# from leaking into round 2.
+	load_ruleset_vrf || return
+
+	test_ping "10.0.1.1" "dead:1::1"
+	test_ping "10.9.9.1" "dead:9::1"
+
+	# ns2 should no longer be reachable (veth1 not in vrf)
+	test_ping_unreachable "10.0.2.99" "dead:2::99" &
+
+	# vrf via dummy0, but host doesn't exist
+	test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+	wait
+
+	check_fib_veth_vrf_type "fib expression address types match (iif in vrf)"
+}
+
 ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
 
 test_ping 10.0.2.1 dead:2::1 || exit 1
-check_drops || exit 1
+check_drops
 
 test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1
+check_drops
+
+test_ping 10.0.1.99 dead:1::99
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1 || exit 1
 check_drops || exit 1
 
-echo "PASS: fib expression did not cause unwanted packet drops"
+test_ping 10.0.1.99 dead:1::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not discard loopback packets"
 
 ip netns exec "$nsrouter" nft flush table inet filter
 
@@ -213,7 +801,7 @@ ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
 # ... pbr ruleset for the router, check iif+oif.
 if ! load_pbr_ruleset "$nsrouter";then
 	echo "SKIP: Could not load fib forward ruleset"
-	exit $ksft_skip
+	[ "$ret" -eq 0 ] && ret=$ksft_skip
 fi
 
 ip -net "$nsrouter" rule add from all table 128
@@ -224,11 +812,36 @@ ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
 # drop main ipv4 table
 ip -net "$nsrouter" -4 rule delete table main
 
-if ! test_ping 10.0.2.99 dead:2::99;then
-	ip -net "$nsrouter" nft list ruleset
-	echo "FAIL: fib mismatch in pbr setup"
-	exit 1
+if test_ping 10.0.2.99 dead:2::99;then
+	echo "PASS: fib expression forward check with policy based routing"
+else
+	echo "FAIL: fib expression forward check with policy based routing"
+	ret=1
 fi
 
-echo "PASS: fib expression forward check with policy based routing"
-exit 0
+test_fib_type "policy routing"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+# Un-do policy routing changes
+ip -net "$nsrouter" rule del from all table 128
+ip -net "$nsrouter" rule del from all iif veth0 table 129
+
+ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1
+
+ip -net "$ns1" -4 route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" -4 route add default via 10.0.1.1
+ip -net "$ns1" -6 route add default via dead:1::1
+
+ip -net "$nsrouter" -4 rule add from all table main priority 32766
+
+test_fib_type "default table"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+test_fib_vrf
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
new file mode 100755
index 000000000000..5ff7be9daeee
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -0,0 +1,154 @@
+#!/bin/bash -e
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+# Torture nftables' netdevice notifier callbacks and related code by frequent
+# renaming of interfaces which netdev-family chains and flowtables hook into.
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3 tool"
+
+# how many seconds to torture the kernel?
+# default to 80% of max run time but don't exceed 48s
+TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
+[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48
+
+trap "cleanup_all_ns" EXIT
+
+setup_ns nsc nsr nss
+
+ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr
+ip -net $nsc addr add 10.0.0.1/24 dev cr0
+ip -net $nsc link set cr0 up
+ip -net $nsc route add default via 10.0.0.2
+
+ip -net $nss link add sr0 type veth peer name rs0 netns $nsr
+ip -net $nss addr add 10.1.0.1/24 dev sr0
+ip -net $nss link set sr0 up
+ip -net $nss route add default via 10.1.0.2
+
+ip -net $nsr addr add 10.0.0.2/24 dev rc0
+ip -net $nsr link set rc0 up
+ip -net $nsr addr add 10.1.0.2/24 dev rs0
+ip -net $nsr link set rs0 up
+ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1
+
+{
+	echo "table netdev t {"
+	for ((i = 0; i < 10; i++)); do
+		cat <<-EOF
+		chain chain_rc$i {
+			type filter hook ingress device rc$i priority 0
+			counter
+		}
+		chain chain_rs$i {
+			type filter hook ingress device rs$i priority 0
+			counter
+		}
+		EOF
+	done
+	echo "}"
+	echo "table ip t {"
+	for ((i = 0; i < 10; i++)); do
+		cat <<-EOF
+		flowtable ft_${i} {
+			hook ingress priority 0
+			devices = { rc$i, rs$i }
+		}
+		EOF
+	done
+	echo "chain c {"
+	echo "type filter hook forward priority 0"
+	for ((i = 0; i < 10; i++)); do
+		echo -n "iifname rc$i oifname rs$i "
+		echo    "ip protocol tcp counter flow add @ft_${i}"
+	done
+	echo "counter"
+	echo "}"
+	echo "}"
+} | ip netns exec $nsr nft -f - || {
+	echo "SKIP: Could not load nft ruleset"
+	exit $ksft_skip
+}
+
+for ((o=0, n=1; ; o=n, n++, n %= 10)); do
+	ip -net $nsr link set rc$o name rc$n
+	ip -net $nsr link set rs$o name rs$n
+done &
+rename_loop_pid=$!
+
+while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done &
+nft_list_pid=$!
+
+ip netns exec $nsr nft monitor >/dev/null &
+nft_monitor_pid=$!
+
+ip netns exec $nss iperf3 --server --daemon -1
+summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p'
+rate=$(ip netns exec $nsc iperf3 \
+	--format k -c 10.1.0.1 --time $TEST_RUNTIME \
+	--length 56 --parallel 10 -i 0 | sed -n "$summary_expr")
+
+kill $nft_list_pid
+kill $nft_monitor_pid
+kill $rename_loop_pid
+wait
+
+wildcard_prep() {
+	ip netns exec $nsr nft -f - <<EOF
+table ip t {
+	flowtable ft_wild {
+		hook ingress priority 0
+		devices = { wild* }
+	}
+}
+EOF
+}
+
+if ! wildcard_prep; then
+	echo "SKIP wildcard tests: not supported by host's nft?"
+else
+	for ((i = 0; i < 100; i++)); do
+		ip -net $nsr link add wild$i type dummy &
+	done
+	wait
+	for ((i = 80; i < 100; i++)); do
+		ip -net $nsr link del wild$i &
+	done
+	for ((i = 0; i < 80; i++)); do
+		ip -net $nsr link del wild$i &
+	done
+	wait
+	for ((i = 0; i < 100; i += 10)); do
+		(
+		for ((j = 0; j < 10; j++)); do
+			ip -net $nsr link add wild$((i + j)) type dummy
+		done
+		for ((j = 0; j < 10; j++)); do
+			ip -net $nsr link del wild$((i + j))
+		done
+		) &
+	done
+	wait
+fi
+
+[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || {
+	echo "FAIL: Kernel is tainted!"
+	exit $ksft_fail
+}
+
+[[ $rate -gt 0 ]] || {
+	echo "FAIL: Zero throughput in iperf3"
+	exit $ksft_fail
+}
+
+[[ -f /sys/kernel/debug/kmemleak && \
+   -n $(</sys/kernel/debug/kmemleak) ]] && {
+	echo "FAIL: non-empty kmemleak report"
+	exit $ksft_fail
+}
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
index 3b81d88bdde3..9f200f80253a 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
@@ -88,7 +88,6 @@ for i in $(seq 1 "$maxclients");do
   echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
   echo netns exec "$gw" ip link set "veth$i" up
   echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2
-  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0
 
   # clients have same IP addresses.
   echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
@@ -178,7 +177,6 @@ fi
 
 ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
 ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
 
 # useful for debugging: allows to use 'ping' from clients to gateway.
 ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 784d1b46912b..6136ceec45e0 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -10,6 +10,8 @@ source lib.sh
 ret=0
 timeout=5
 
+SCTP_TEST_TIMEOUT=60
+
 cleanup()
 {
 	ip netns pids "$ns1" | xargs kill 2>/dev/null
@@ -40,7 +42,7 @@ TMPFILE3=$(mktemp)
 
 TMPINPUT=$(mktemp)
 COUNT=200
-[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8))
 dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
 
 if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
@@ -275,9 +277,11 @@ test_tcp_forward()
 	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
 	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
 
+	local tthen=$(date +%s)
+
 	ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
 
-	wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+	wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen"
 	kill "$nfqpid"
 }
 
@@ -288,13 +292,14 @@ test_tcp_localhost()
 
 	ip netns exec "$nsrouter" ./nf_queue -q 3 &
 	local nfqpid=$!
+	local tthen=$(date +%s)
 
 	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
 	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
 
 	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
 
-	wait "$rpid" && echo "PASS: tcp via loopback"
+	wait_and_check_retval "$rpid" "tcp via loopback" "$tthen"
 	kill "$nfqpid"
 }
 
@@ -417,6 +422,23 @@ check_output_files()
 	fi
 }
 
+wait_and_check_retval()
+{
+	local rpid="$1"
+	local msg="$2"
+	local tthen="$3"
+	local tnow=$(date +%s)
+
+	if wait "$rpid";then
+		echo -n "PASS: "
+	else
+		echo -n "FAIL: "
+		ret=1
+	fi
+
+	printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen))
+}
+
 test_sctp_forward()
 {
 	ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
@@ -428,13 +450,14 @@ table inet sctpq {
         }
 }
 EOF
-	timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+	timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
 	local rpid=$!
 
 	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
 
 	ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
 	local nfqpid=$!
+	local tthen=$(date +%s)
 
 	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
 
@@ -443,7 +466,7 @@ EOF
 		exit 1
 	fi
 
-	wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain"
+	wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen"
 	kill "$nfqpid"
 
 	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
@@ -462,13 +485,14 @@ EOF
 	# reduce test file size, software segmentation causes sk wmem increase.
 	dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
 
-	timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+	timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
 	local rpid=$!
 
 	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
 
 	ip netns exec "$ns1" ./nf_queue -q 11 &
 	local nfqpid=$!
+	local tthen=$(date +%s)
 
 	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
 
@@ -478,7 +502,7 @@ EOF
 	fi
 
 	# must wait before checking completeness of output file.
-	wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO"
+	wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen"
 	kill "$nfqpid"
 
 	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
index 86ec4e68594d..24ad41d526d9 100755
--- a/tools/testing/selftests/net/netfilter/rpath.sh
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -1,8 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-# return code to signal skipped test
-ksft_skip=4
+source lib.sh
 
 # search for legacy iptables (it uses the xtables extensions
 if iptables-legacy --version >/dev/null 2>&1; then
@@ -32,17 +31,10 @@ if [ -z "$iptables$ip6tables$nft" ]; then
 	exit $ksft_skip
 fi
 
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-trap "ip netns del $ns1; ip netns del $ns2" EXIT
-
-# create two netns, disable rp_filter in ns2 and
-# keep IPv6 address when moving into VRF
-ip netns add "$ns1"
-ip netns add "$ns2"
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+trap cleanup_all_ns EXIT
+
+# create two netns, keep IPv6 address when moving into VRF
+setup_ns ns1 ns2
 ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
 
 # a standard connection between the netns, should not trigger rp filter
diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore
new file mode 100644
index 000000000000..ee44c081ca7c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+ovpn-cli
diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile
new file mode 100644
index 000000000000..e0926d76b4c8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES)
+VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS = -I/usr/include/libnl3
+endif
+CFLAGS += $(VAR_CFLAGS)
+
+
+LDLIBS = -lmbedtls -lmbedcrypto
+VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS = -lnl-genl-3 -lnl-3
+endif
+LDLIBS += $(VAR_LDLIBS)
+
+
+TEST_FILES = common.sh
+
+TEST_PROGS = test.sh \
+	test-large-mtu.sh \
+	test-chachapoly.sh \
+	test-tcp.sh \
+	test-float.sh \
+	test-close-socket.sh \
+	test-close-socket-tcp.sh
+
+TEST_GEN_FILES := ovpn-cli
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh
new file mode 100644
index 000000000000..88869c675d03
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/common.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt}
+TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt}
+OVPN_CLI=${OVPN_CLI:-./ovpn-cli}
+ALG=${ALG:-aes}
+PROTO=${PROTO:-UDP}
+FLOAT=${FLOAT:-0}
+
+LAN_IP="11.11.11.11"
+
+create_ns() {
+	ip netns add peer${1}
+}
+
+setup_ns() {
+	MODE="P2P"
+
+	if [ ${1} -eq 0 ]; then
+		MODE="MP"
+		for p in $(seq 1 ${NUM_PEERS}); do
+			ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p}
+
+			ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p}
+			ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p}
+			ip -n peer0 link set veth${p} up
+
+			ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p}
+			ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p}
+			ip -n peer${p} link set veth${p} up
+		done
+	fi
+
+	ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE
+	ip -n peer${1} addr add ${2} dev tun${1}
+	# add a secondary IP to peer 1, to test a LAN behind a client
+	if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then
+		ip -n peer${1} addr add ${LAN_IP} dev tun${1}
+		ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0
+	fi
+	if [ -n "${3}" ]; then
+		ip -n peer${1} link set mtu ${3} dev tun${1}
+	fi
+	ip -n peer${1} link set tun${1} up
+}
+
+add_peer() {
+	if [ "${PROTO}" == "UDP" ]; then
+		if [ ${1} -eq 0 ]; then
+			ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE}
+
+			for p in $(seq 1 ${NUM_PEERS}); do
+				ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \
+					data64.key
+			done
+		else
+			RADDR=$(awk "NR == ${1} {print \$2}" ${UDP_PEERS_FILE})
+			RPORT=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE})
+			LPORT=$(awk "NR == ${1} {print \$5}" ${UDP_PEERS_FILE})
+			ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} ${LPORT} \
+				${RADDR} ${RPORT}
+			ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \
+				data64.key
+		fi
+	else
+		if [ ${1} -eq 0 ]; then
+			(ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && {
+				for p in $(seq 1 ${NUM_PEERS}); do
+					ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \
+						${ALG} 0 data64.key
+				done
+			}) &
+			sleep 5
+		else
+			ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \
+				data64.key
+		fi
+	fi
+}
+
+cleanup() {
+	# some ovpn-cli processes sleep in background so they need manual poking
+	killall $(basename ${OVPN_CLI}) 2>/dev/null || true
+
+	# netns peer0 is deleted without erasing ifaces first
+	for p in $(seq 1 10); do
+		ip -n peer${p} link set tun${p} down 2>/dev/null || true
+		ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true
+	done
+	for p in $(seq 1 10); do
+		ip -n peer0 link del veth${p} 2>/dev/null || true
+	done
+	for p in $(seq 0 10); do
+		ip netns del peer${p} 2>/dev/null || true
+	done
+}
+
+if [ "${PROTO}" == "UDP" ]; then
+	NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')}
+else
+	NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')}
+fi
+
+
diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config
new file mode 100644
index 000000000000..71946ba9fa17
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/config
@@ -0,0 +1,10 @@
+CONFIG_NET=y
+CONFIG_INET=y
+CONFIG_STREAM_PARSER=y
+CONFIG_NET_UDP_TUNNEL=y
+CONFIG_DST_CACHE=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_CHACHA20POLY1305=y
+CONFIG_OVPN=m
diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key
new file mode 100644
index 000000000000..a99e88c4e290
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/data64.key
@@ -0,0 +1,5 @@
+jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B
+ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9
+uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6
+KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE
+BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w==
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
new file mode 100644
index 000000000000..de9c26f98b2e
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -0,0 +1,2383 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  OpenVPN data channel accelerator
+ *
+ *  Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ *  Author:	Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <time.h>
+
+#include <linux/ovpn.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#include <netlink/socket.h>
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <mbedtls/base64.h>
+#include <mbedtls/error.h>
+
+#include <sys/socket.h>
+
+/* defines to make checkpatch happy */
+#define strscpy strncpy
+#define __always_unused __attribute__((__unused__))
+
+/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we
+ * have to explicitly do it to prevent the kernel from failing upon
+ * parsing of the message
+ */
+#define nla_nest_start(_msg, _type) \
+	nla_nest_start(_msg, (_type) | NLA_F_NESTED)
+
+/* libnl < 3.11.0 does not implement nla_get_uint() */
+uint64_t ovpn_nla_get_uint(struct nlattr *attr)
+{
+	if (nla_len(attr) == sizeof(uint32_t))
+		return nla_get_u32(attr);
+	else
+		return nla_get_u64(attr);
+}
+
+typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg);
+
+enum ovpn_key_direction {
+	KEY_DIR_IN = 0,
+	KEY_DIR_OUT,
+};
+
+#define KEY_LEN (256 / 8)
+#define NONCE_LEN 8
+
+#define PEER_ID_UNDEF 0x00FFFFFF
+#define MAX_PEERS 10
+
+struct nl_ctx {
+	struct nl_sock *nl_sock;
+	struct nl_msg *nl_msg;
+	struct nl_cb *nl_cb;
+
+	int ovpn_dco_id;
+};
+
+enum ovpn_cmd {
+	CMD_INVALID,
+	CMD_NEW_IFACE,
+	CMD_DEL_IFACE,
+	CMD_LISTEN,
+	CMD_CONNECT,
+	CMD_NEW_PEER,
+	CMD_NEW_MULTI_PEER,
+	CMD_SET_PEER,
+	CMD_DEL_PEER,
+	CMD_GET_PEER,
+	CMD_NEW_KEY,
+	CMD_DEL_KEY,
+	CMD_GET_KEY,
+	CMD_SWAP_KEYS,
+	CMD_LISTEN_MCAST,
+};
+
+struct ovpn_ctx {
+	enum ovpn_cmd cmd;
+
+	__u8 key_enc[KEY_LEN];
+	__u8 key_dec[KEY_LEN];
+	__u8 nonce[NONCE_LEN];
+
+	enum ovpn_cipher_alg cipher;
+
+	sa_family_t sa_family;
+
+	unsigned long peer_id;
+	unsigned long lport;
+
+	union {
+		struct sockaddr_in in4;
+		struct sockaddr_in6 in6;
+	} remote;
+
+	union {
+		struct sockaddr_in in4;
+		struct sockaddr_in6 in6;
+	} peer_ip;
+
+	bool peer_ip_set;
+
+	unsigned int ifindex;
+	char ifname[IFNAMSIZ];
+	enum ovpn_mode mode;
+	bool mode_set;
+
+	int socket;
+	int cli_sockets[MAX_PEERS];
+
+	__u32 keepalive_interval;
+	__u32 keepalive_timeout;
+
+	enum ovpn_key_direction key_dir;
+	enum ovpn_key_slot key_slot;
+	int key_id;
+
+	const char *peers_file;
+};
+
+static int ovpn_nl_recvmsgs(struct nl_ctx *ctx)
+{
+	int ret;
+
+	ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb);
+
+	switch (ret) {
+	case -NLE_INTR:
+		fprintf(stderr,
+			"netlink received interrupt due to signal - ignoring\n");
+		break;
+	case -NLE_NOMEM:
+		fprintf(stderr, "netlink out of memory error\n");
+		break;
+	case -NLE_AGAIN:
+		fprintf(stderr,
+			"netlink reports blocking read - aborting wait\n");
+		break;
+	default:
+		if (ret)
+			fprintf(stderr, "netlink reports error (%d): %s\n",
+				ret, nl_geterror(-ret));
+		break;
+	}
+
+	return ret;
+}
+
+static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd,
+					 int flags)
+{
+	struct nl_ctx *ctx;
+	int err, ret;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx)
+		return NULL;
+
+	ctx->nl_sock = nl_socket_alloc();
+	if (!ctx->nl_sock) {
+		fprintf(stderr, "cannot allocate netlink socket\n");
+		goto err_free;
+	}
+
+	nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192);
+
+	ret = genl_connect(ctx->nl_sock);
+	if (ret) {
+		fprintf(stderr, "cannot connect to generic netlink: %s\n",
+			nl_geterror(ret));
+		goto err_sock;
+	}
+
+	/* enable Extended ACK for detailed error reporting */
+	err = 1;
+	setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK,
+		   &err, sizeof(err));
+
+	ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME);
+	if (ctx->ovpn_dco_id < 0) {
+		fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n",
+			ctx->ovpn_dco_id);
+		goto err_free;
+	}
+
+	ctx->nl_msg = nlmsg_alloc();
+	if (!ctx->nl_msg) {
+		fprintf(stderr, "cannot allocate netlink message\n");
+		goto err_sock;
+	}
+
+	ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT);
+	if (!ctx->nl_cb) {
+		fprintf(stderr, "failed to allocate netlink callback\n");
+		goto err_msg;
+	}
+
+	nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb);
+
+	genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0);
+
+	if (ovpn->ifindex > 0)
+		NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex);
+
+	return ctx;
+nla_put_failure:
+err_msg:
+	nlmsg_free(ctx->nl_msg);
+err_sock:
+	nl_socket_free(ctx->nl_sock);
+err_free:
+	free(ctx);
+	return NULL;
+}
+
+static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd)
+{
+	return nl_ctx_alloc_flags(ovpn, cmd, 0);
+}
+
+static void nl_ctx_free(struct nl_ctx *ctx)
+{
+	if (!ctx)
+		return;
+
+	nl_socket_free(ctx->nl_sock);
+	nlmsg_free(ctx->nl_msg);
+	nl_cb_put(ctx->nl_cb);
+	free(ctx);
+}
+
+static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused,
+			    struct nlmsgerr *err, void *arg)
+{
+	struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1;
+	struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1];
+	int len = nlh->nlmsg_len;
+	struct nlattr *attrs;
+	int *ret = arg;
+	int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh);
+
+	*ret = err->error;
+
+	if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+		return NL_STOP;
+
+	if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+		ack_len += err->msg.nlmsg_len - sizeof(*nlh);
+
+	if (len <= ack_len)
+		return NL_STOP;
+
+	attrs = (void *)((uint8_t *)nlh + ack_len);
+	len -= ack_len;
+
+	nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL);
+	if (tb_msg[NLMSGERR_ATTR_MSG]) {
+		len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]),
+			      nla_len(tb_msg[NLMSGERR_ATTR_MSG]));
+		fprintf(stderr, "kernel error: %*s\n", len,
+			(char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]));
+	}
+
+	if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) {
+		fprintf(stderr, "missing required nesting type %u\n",
+			nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST]));
+	}
+
+	if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) {
+		fprintf(stderr, "missing required attribute type %u\n",
+			nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE]));
+	}
+
+	return NL_STOP;
+}
+
+static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused,
+			     void *arg)
+{
+	int *status = arg;
+
+	*status = 0;
+	return NL_SKIP;
+}
+
+static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused,
+			  void *arg)
+{
+	int *status = arg;
+
+	*status = 0;
+	return NL_STOP;
+}
+
+static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb)
+{
+	int status = 1;
+
+	nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status);
+	nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish,
+		  &status);
+	nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status);
+
+	if (cb)
+		nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx);
+
+	nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg);
+
+	while (status == 1)
+		ovpn_nl_recvmsgs(ctx);
+
+	if (status < 0)
+		fprintf(stderr, "failed to send netlink message: %s (%d)\n",
+			strerror(-status), status);
+
+	return status;
+}
+
+static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx)
+{
+	int idx_enc, idx_dec, ret = -1;
+	unsigned char *ckey = NULL;
+	__u8 *bkey = NULL;
+	size_t olen = 0;
+	long ckey_len;
+	FILE *fp;
+
+	fp = fopen(file, "r");
+	if (!fp) {
+		fprintf(stderr, "cannot open: %s\n", file);
+		return -1;
+	}
+
+	/* get file size */
+	fseek(fp, 0L, SEEK_END);
+	ckey_len = ftell(fp);
+	rewind(fp);
+
+	/* if the file is longer, let's just read a portion */
+	if (ckey_len > 256)
+		ckey_len = 256;
+
+	ckey = malloc(ckey_len);
+	if (!ckey)
+		goto err;
+
+	ret = fread(ckey, 1, ckey_len, fp);
+	if (ret != ckey_len) {
+		fprintf(stderr,
+			"couldn't read enough data from key file: %dbytes read\n",
+			ret);
+		goto err;
+	}
+
+	olen = 0;
+	ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len);
+	if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) {
+		char buf[256];
+
+		mbedtls_strerror(ret, buf, sizeof(buf));
+		fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf,
+			ret);
+
+		goto err;
+	}
+
+	bkey = malloc(olen);
+	if (!bkey) {
+		fprintf(stderr, "cannot allocate binary key buffer\n");
+		goto err;
+	}
+
+	ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len);
+	if (ret) {
+		char buf[256];
+
+		mbedtls_strerror(ret, buf, sizeof(buf));
+		fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf,
+			ret);
+
+		goto err;
+	}
+
+	if (olen < 2 * KEY_LEN + NONCE_LEN) {
+		fprintf(stderr,
+			"not enough data in key file, found %zdB but needs %dB\n",
+			olen, 2 * KEY_LEN + NONCE_LEN);
+		goto err;
+	}
+
+	switch (ctx->key_dir) {
+	case KEY_DIR_IN:
+		idx_enc = 0;
+		idx_dec = 1;
+		break;
+	case KEY_DIR_OUT:
+		idx_enc = 1;
+		idx_dec = 0;
+		break;
+	default:
+		goto err;
+	}
+
+	memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN);
+	memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN);
+	memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN);
+
+	ret = 0;
+
+err:
+	fclose(fp);
+	free(bkey);
+	free(ckey);
+
+	return ret;
+}
+
+static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx)
+{
+	if (strcmp(cipher, "aes") == 0)
+		ctx->cipher = OVPN_CIPHER_ALG_AES_GCM;
+	else if (strcmp(cipher, "chachapoly") == 0)
+		ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305;
+	else if (strcmp(cipher, "none") == 0)
+		ctx->cipher = OVPN_CIPHER_ALG_NONE;
+	else
+		return -ENOTSUP;
+
+	return 0;
+}
+
+static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx)
+{
+	int in_dir;
+
+	in_dir = strtoll(dir, NULL, 10);
+	switch (in_dir) {
+	case KEY_DIR_IN:
+	case KEY_DIR_OUT:
+		ctx->key_dir = in_dir;
+		break;
+	default:
+		fprintf(stderr,
+			"invalid key direction provided. Can be 0 or 1 only\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto)
+{
+	struct sockaddr_storage local_sock = { 0 };
+	struct sockaddr_in6 *in6;
+	struct sockaddr_in *in;
+	int ret, s, sock_type;
+	size_t sock_len;
+
+	if (proto == IPPROTO_UDP)
+		sock_type = SOCK_DGRAM;
+	else if (proto == IPPROTO_TCP)
+		sock_type = SOCK_STREAM;
+	else
+		return -EINVAL;
+
+	s = socket(family, sock_type, 0);
+	if (s < 0) {
+		perror("cannot create socket");
+		return -1;
+	}
+
+	switch (family) {
+	case AF_INET:
+		in = (struct sockaddr_in *)&local_sock;
+		in->sin_family = family;
+		in->sin_port = htons(ctx->lport);
+		in->sin_addr.s_addr = htonl(INADDR_ANY);
+		sock_len = sizeof(*in);
+		break;
+	case AF_INET6:
+		in6 = (struct sockaddr_in6 *)&local_sock;
+		in6->sin6_family = family;
+		in6->sin6_port = htons(ctx->lport);
+		in6->sin6_addr = in6addr_any;
+		sock_len = sizeof(*in6);
+		break;
+	default:
+		return -1;
+	}
+
+	int opt = 1;
+
+	ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+	if (ret < 0) {
+		perror("setsockopt for SO_REUSEADDR");
+		return ret;
+	}
+
+	ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+	if (ret < 0) {
+		perror("setsockopt for SO_REUSEPORT");
+		return ret;
+	}
+
+	if (family == AF_INET6) {
+		opt = 0;
+		if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt,
+			       sizeof(opt))) {
+			perror("failed to set IPV6_V6ONLY");
+			return -1;
+		}
+	}
+
+	ret = bind(s, (struct sockaddr *)&local_sock, sock_len);
+	if (ret < 0) {
+		perror("cannot bind socket");
+		goto err_socket;
+	}
+
+	ctx->socket = s;
+	ctx->sa_family = family;
+	return 0;
+
+err_socket:
+	close(s);
+	return -1;
+}
+
+static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family)
+{
+	return ovpn_socket(ctx, family, IPPROTO_UDP);
+}
+
+static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family)
+{
+	int ret;
+
+	ret = ovpn_socket(ctx, family, IPPROTO_TCP);
+	if (ret < 0)
+		return ret;
+
+	ret = listen(ctx->socket, 10);
+	if (ret < 0) {
+		perror("listen");
+		close(ctx->socket);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int ovpn_accept(struct ovpn_ctx *ctx)
+{
+	socklen_t socklen;
+	int ret;
+
+	socklen = sizeof(ctx->remote);
+	ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen);
+	if (ret < 0) {
+		perror("accept");
+		goto err;
+	}
+
+	fprintf(stderr, "Connection received!\n");
+
+	switch (socklen) {
+	case sizeof(struct sockaddr_in):
+	case sizeof(struct sockaddr_in6):
+		break;
+	default:
+		fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n");
+		close(ret);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	return ret;
+err:
+	close(ctx->socket);
+	return ret;
+}
+
+static int ovpn_connect(struct ovpn_ctx *ovpn)
+{
+	socklen_t socklen;
+	int s, ret;
+
+	s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0);
+	if (s < 0) {
+		perror("cannot create socket");
+		return -1;
+	}
+
+	switch (ovpn->remote.in4.sin_family) {
+	case AF_INET:
+		socklen = sizeof(struct sockaddr_in);
+		break;
+	case AF_INET6:
+		socklen = sizeof(struct sockaddr_in6);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen);
+	if (ret < 0) {
+		perror("connect");
+		goto err;
+	}
+
+	fprintf(stderr, "connected\n");
+
+	ovpn->socket = s;
+
+	return 0;
+err:
+	close(s);
+	return ret;
+}
+
+static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp)
+{
+	struct nlattr *attr;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW);
+	if (!ctx)
+		return -ENOMEM;
+
+	attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket);
+
+	if (!is_tcp) {
+		switch (ovpn->remote.in4.sin_family) {
+		case AF_INET:
+			NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4,
+				    ovpn->remote.in4.sin_addr.s_addr);
+			NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+				    ovpn->remote.in4.sin_port);
+			break;
+		case AF_INET6:
+			NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6,
+				sizeof(ovpn->remote.in6.sin6_addr),
+				&ovpn->remote.in6.sin6_addr);
+			NLA_PUT_U32(ctx->nl_msg,
+				    OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
+				    ovpn->remote.in6.sin6_scope_id);
+			NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+				    ovpn->remote.in6.sin6_port);
+			break;
+		default:
+			fprintf(stderr,
+				"Invalid family for remote socket address\n");
+			goto nla_put_failure;
+		}
+	}
+
+	if (ovpn->peer_ip_set) {
+		switch (ovpn->peer_ip.in4.sin_family) {
+		case AF_INET:
+			NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4,
+				    ovpn->peer_ip.in4.sin_addr.s_addr);
+			break;
+		case AF_INET6:
+			NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6,
+				sizeof(struct in6_addr),
+				&ovpn->peer_ip.in6.sin6_addr);
+			break;
+		default:
+			fprintf(stderr, "Invalid family for peer address\n");
+			goto nla_put_failure;
+		}
+	}
+
+	nla_nest_end(ctx->nl_msg, attr);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_set_peer(struct ovpn_ctx *ovpn)
+{
+	struct nlattr *attr;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET);
+	if (!ctx)
+		return -ENOMEM;
+
+	attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL,
+		    ovpn->keepalive_interval);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT,
+		    ovpn->keepalive_timeout);
+	nla_nest_end(ctx->nl_msg, attr);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_del_peer(struct ovpn_ctx *ovpn)
+{
+	struct nlattr *attr;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+	nla_nest_end(ctx->nl_msg, attr);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused)
+{
+	struct nlattr *pattrs[OVPN_A_PEER_MAX + 1];
+	struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+	struct nlattr *attrs[OVPN_A_MAX + 1];
+	__u16 rport = 0, lport = 0;
+
+	nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+		  genlmsg_attrlen(gnlh, 0), NULL);
+
+	if (!attrs[OVPN_A_PEER]) {
+		fprintf(stderr, "no packet content in netlink message\n");
+		return NL_SKIP;
+	}
+
+	nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]),
+		  nla_len(attrs[OVPN_A_PEER]), NULL);
+
+	if (pattrs[OVPN_A_PEER_ID])
+		fprintf(stderr, "* Peer %u\n",
+			nla_get_u32(pattrs[OVPN_A_PEER_ID]));
+
+	if (pattrs[OVPN_A_PEER_SOCKET_NETNSID])
+		fprintf(stderr, "\tsocket NetNS ID: %d\n",
+			nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID]));
+
+	if (pattrs[OVPN_A_PEER_VPN_IPV4]) {
+		char buf[INET_ADDRSTRLEN];
+
+		inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]),
+			  buf, sizeof(buf));
+		fprintf(stderr, "\tVPN IPv4: %s\n", buf);
+	}
+
+	if (pattrs[OVPN_A_PEER_VPN_IPV6]) {
+		char buf[INET6_ADDRSTRLEN];
+
+		inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]),
+			  buf, sizeof(buf));
+		fprintf(stderr, "\tVPN IPv6: %s\n", buf);
+	}
+
+	if (pattrs[OVPN_A_PEER_LOCAL_PORT])
+		lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT]));
+
+	if (pattrs[OVPN_A_PEER_REMOTE_PORT])
+		rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT]));
+
+	if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) {
+		void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6];
+		char buf[INET6_ADDRSTRLEN];
+		int scope_id = -1;
+
+		if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) {
+			void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID];
+
+			scope_id = nla_get_u32(p);
+		}
+
+		inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+		fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport,
+			scope_id);
+
+		if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) {
+			void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6];
+
+			inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+			fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+		}
+	}
+
+	if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) {
+		void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4];
+		char buf[INET_ADDRSTRLEN];
+
+		inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf));
+		fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport);
+
+		if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) {
+			void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4];
+
+			inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf));
+			fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+		}
+	}
+
+	if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) {
+		void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL];
+
+		fprintf(stderr, "\tKeepalive interval: %u sec\n",
+			nla_get_u32(p));
+	}
+
+	if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])
+		fprintf(stderr, "\tKeepalive timeout: %u sec\n",
+			nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]));
+
+	if (pattrs[OVPN_A_PEER_VPN_RX_BYTES])
+		fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES]));
+
+	if (pattrs[OVPN_A_PEER_VPN_TX_BYTES])
+		fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES]));
+
+	if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS])
+		fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS]));
+
+	if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS])
+		fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS]));
+
+	if (pattrs[OVPN_A_PEER_LINK_RX_BYTES])
+		fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES]));
+
+	if (pattrs[OVPN_A_PEER_LINK_TX_BYTES])
+		fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES]));
+
+	if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS])
+		fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS]));
+
+	if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS])
+		fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS]));
+
+	return NL_SKIP;
+}
+
+static int ovpn_get_peer(struct ovpn_ctx *ovpn)
+{
+	int flags = 0, ret = -1;
+	struct nlattr *attr;
+	struct nl_ctx *ctx;
+
+	if (ovpn->peer_id == PEER_ID_UNDEF)
+		flags = NLM_F_DUMP;
+
+	ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags);
+	if (!ctx)
+		return -ENOMEM;
+
+	if (ovpn->peer_id != PEER_ID_UNDEF) {
+		attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+		NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+		nla_nest_end(ctx->nl_msg, attr);
+	}
+
+	ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_new_key(struct ovpn_ctx *ovpn)
+{
+	struct nlattr *keyconf, *key_dir;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW);
+	if (!ctx)
+		return -ENOMEM;
+
+	keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher);
+
+	key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR);
+	NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc);
+	NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+	nla_nest_end(ctx->nl_msg, key_dir);
+
+	key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR);
+	NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec);
+	NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+	nla_nest_end(ctx->nl_msg, key_dir);
+
+	nla_nest_end(ctx->nl_msg, keyconf);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_del_key(struct ovpn_ctx *ovpn)
+{
+	struct nlattr *keyconf;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+	nla_nest_end(ctx->nl_msg, keyconf);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused)
+{
+	struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1];
+	struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+	struct nlattr *attrs[OVPN_A_MAX + 1];
+
+	nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+		  genlmsg_attrlen(gnlh, 0), NULL);
+
+	if (!attrs[OVPN_A_KEYCONF]) {
+		fprintf(stderr, "no packet content in netlink message\n");
+		return NL_SKIP;
+	}
+
+	nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]),
+		  nla_len(attrs[OVPN_A_KEYCONF]), NULL);
+
+	if (kattrs[OVPN_A_KEYCONF_PEER_ID])
+		fprintf(stderr, "* Peer %u\n",
+			nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID]));
+	if (kattrs[OVPN_A_KEYCONF_SLOT]) {
+		fprintf(stderr, "\t- Slot: ");
+		switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) {
+		case OVPN_KEY_SLOT_PRIMARY:
+			fprintf(stderr, "primary\n");
+			break;
+		case OVPN_KEY_SLOT_SECONDARY:
+			fprintf(stderr, "secondary\n");
+			break;
+		default:
+			fprintf(stderr, "invalid (%u)\n",
+				nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT]));
+			break;
+		}
+	}
+	if (kattrs[OVPN_A_KEYCONF_KEY_ID])
+		fprintf(stderr, "\t- Key ID: %u\n",
+			nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID]));
+	if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) {
+		fprintf(stderr, "\t- Cipher: ");
+		switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) {
+		case OVPN_CIPHER_ALG_NONE:
+			fprintf(stderr, "none\n");
+			break;
+		case OVPN_CIPHER_ALG_AES_GCM:
+			fprintf(stderr, "aes-gcm\n");
+			break;
+		case OVPN_CIPHER_ALG_CHACHA20_POLY1305:
+			fprintf(stderr, "chacha20poly1305\n");
+			break;
+		default:
+			fprintf(stderr, "invalid (%u)\n",
+				nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG]));
+			break;
+		}
+	}
+
+	return NL_SKIP;
+}
+
+static int ovpn_get_key(struct ovpn_ctx *ovpn)
+{
+	struct nlattr *keyconf;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET);
+	if (!ctx)
+		return -ENOMEM;
+
+	keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+	nla_nest_end(ctx->nl_msg, keyconf);
+
+	ret = ovpn_nl_msg_send(ctx, ovpn_handle_key);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_swap_keys(struct ovpn_ctx *ovpn)
+{
+	struct nl_ctx *ctx;
+	struct nlattr *kc;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP);
+	if (!ctx)
+		return -ENOMEM;
+
+	kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+	nla_nest_end(ctx->nl_msg, kc);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+/* Helper function used to easily add attributes to a rtnl message */
+static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type,
+			const void *data, int alen)
+{
+	int len = RTA_LENGTH(alen);
+	struct rtattr *rta;
+
+	if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen)	{
+		fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n",
+			__func__, maxlen);
+		return -EMSGSIZE;
+	}
+
+	rta = nlmsg_tail(n);
+	rta->rta_type = type;
+	rta->rta_len = len;
+
+	if (!data)
+		memset(RTA_DATA(rta), 0, alen);
+	else
+		memcpy(RTA_DATA(rta), data, alen);
+
+	n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+
+	return 0;
+}
+
+static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size,
+				      int attr)
+{
+	struct rtattr *nest = nlmsg_tail(msg);
+
+	if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0)
+		return NULL;
+
+	return nest;
+}
+
+static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest)
+{
+	nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest;
+}
+
+#define RT_SNDBUF_SIZE (1024 * 2)
+#define RT_RCVBUF_SIZE (1024 * 4)
+
+/* Open RTNL socket */
+static int ovpn_rt_socket(void)
+{
+	int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd;
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (fd < 0) {
+		fprintf(stderr, "%s: cannot open netlink socket\n", __func__);
+		return fd;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf,
+		       sizeof(sndbuf)) < 0) {
+		fprintf(stderr, "%s: SO_SNDBUF\n", __func__);
+		close(fd);
+		return -1;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf,
+		       sizeof(rcvbuf)) < 0) {
+		fprintf(stderr, "%s: SO_RCVBUF\n", __func__);
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+/* Bind socket to Netlink subsystem */
+static int ovpn_rt_bind(int fd, uint32_t groups)
+{
+	struct sockaddr_nl local = { 0 };
+	socklen_t addr_len;
+
+	local.nl_family = AF_NETLINK;
+	local.nl_groups = groups;
+
+	if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
+		fprintf(stderr, "%s: cannot bind netlink socket: %d\n",
+			__func__, errno);
+		return -errno;
+	}
+
+	addr_len = sizeof(local);
+	if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) {
+		fprintf(stderr, "%s: cannot getsockname: %d\n", __func__,
+			errno);
+		return -errno;
+	}
+
+	if (addr_len != sizeof(local)) {
+		fprintf(stderr, "%s: wrong address length %d\n", __func__,
+			addr_len);
+		return -EINVAL;
+	}
+
+	if (local.nl_family != AF_NETLINK) {
+		fprintf(stderr, "%s: wrong address family %d\n", __func__,
+			local.nl_family);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg);
+
+/* Send Netlink message and run callback on reply (if specified) */
+static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer,
+			unsigned int groups, ovpn_parse_reply_cb cb,
+			void *arg_cb)
+{
+	int len, rem_len, fd, ret, rcv_len;
+	struct sockaddr_nl nladdr = { 0 };
+	struct nlmsgerr *err;
+	struct nlmsghdr *h;
+	char buf[1024 * 16];
+	struct iovec iov = {
+		.iov_base = payload,
+		.iov_len = payload->nlmsg_len,
+	};
+	struct msghdr nlmsg = {
+		.msg_name = &nladdr,
+		.msg_namelen = sizeof(nladdr),
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+
+	nladdr.nl_family = AF_NETLINK;
+	nladdr.nl_pid = peer;
+	nladdr.nl_groups = groups;
+
+	payload->nlmsg_seq = time(NULL);
+
+	/* no need to send reply */
+	if (!cb)
+		payload->nlmsg_flags |= NLM_F_ACK;
+
+	fd = ovpn_rt_socket();
+	if (fd < 0) {
+		fprintf(stderr, "%s: can't open rtnl socket\n", __func__);
+		return -errno;
+	}
+
+	ret = ovpn_rt_bind(fd, 0);
+	if (ret < 0) {
+		fprintf(stderr, "%s: can't bind rtnl socket\n", __func__);
+		ret = -errno;
+		goto out;
+	}
+
+	ret = sendmsg(fd, &nlmsg, 0);
+	if (ret < 0) {
+		fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__);
+		ret = -errno;
+		goto out;
+	}
+
+	/* prepare buffer to store RTNL replies */
+	memset(buf, 0, sizeof(buf));
+	iov.iov_base = buf;
+
+	while (1) {
+		/*
+		 * iov_len is modified by recvmsg(), therefore has to be initialized before
+		 * using it again
+		 */
+		iov.iov_len = sizeof(buf);
+		rcv_len = recvmsg(fd, &nlmsg, 0);
+		if (rcv_len < 0) {
+			if (errno == EINTR || errno == EAGAIN) {
+				fprintf(stderr, "%s: interrupted call\n",
+					__func__);
+				continue;
+			}
+			fprintf(stderr, "%s: rtnl: error on recvmsg()\n",
+				__func__);
+			ret = -errno;
+			goto out;
+		}
+
+		if (rcv_len == 0) {
+			fprintf(stderr,
+				"%s: rtnl: socket reached unexpected EOF\n",
+				__func__);
+			ret = -EIO;
+			goto out;
+		}
+
+		if (nlmsg.msg_namelen != sizeof(nladdr)) {
+			fprintf(stderr,
+				"%s: sender address length: %u (expected %zu)\n",
+				__func__, nlmsg.msg_namelen, sizeof(nladdr));
+			ret = -EIO;
+			goto out;
+		}
+
+		h = (struct nlmsghdr *)buf;
+		while (rcv_len >= (int)sizeof(*h)) {
+			len = h->nlmsg_len;
+			rem_len = len - sizeof(*h);
+
+			if (rem_len < 0 || len > rcv_len) {
+				if (nlmsg.msg_flags & MSG_TRUNC) {
+					fprintf(stderr, "%s: truncated message\n",
+						__func__);
+					ret = -EIO;
+					goto out;
+				}
+				fprintf(stderr, "%s: malformed message: len=%d\n",
+					__func__, len);
+				ret = -EIO;
+				goto out;
+			}
+
+			if (h->nlmsg_type == NLMSG_DONE) {
+				ret = 0;
+				goto out;
+			}
+
+			if (h->nlmsg_type == NLMSG_ERROR) {
+				err = (struct nlmsgerr *)NLMSG_DATA(h);
+				if (rem_len < (int)sizeof(struct nlmsgerr)) {
+					fprintf(stderr, "%s: ERROR truncated\n",
+						__func__);
+					ret = -EIO;
+					goto out;
+				}
+
+				if (err->error) {
+					fprintf(stderr, "%s: (%d) %s\n",
+						__func__, err->error,
+						strerror(-err->error));
+					ret = err->error;
+					goto out;
+				}
+
+				ret = 0;
+				if (cb)	{
+					int r = cb(h, arg_cb);
+
+					if (r <= 0)
+						ret = r;
+				}
+				goto out;
+			}
+
+			if (cb) {
+				int r = cb(h, arg_cb);
+
+				if (r <= 0) {
+					ret = r;
+					goto out;
+				}
+			} else {
+				fprintf(stderr, "%s: RTNL: unexpected reply\n",
+					__func__);
+			}
+
+			rcv_len -= NLMSG_ALIGN(len);
+			h = (struct nlmsghdr *)((uint8_t *)h +
+						NLMSG_ALIGN(len));
+		}
+
+		if (nlmsg.msg_flags & MSG_TRUNC) {
+			fprintf(stderr, "%s: message truncated\n", __func__);
+			continue;
+		}
+
+		if (rcv_len) {
+			fprintf(stderr, "%s: rtnl: %d not parsed bytes\n",
+				__func__, rcv_len);
+			ret = -1;
+			goto out;
+		}
+	}
+out:
+	close(fd);
+
+	return ret;
+}
+
+struct ovpn_link_req {
+	struct nlmsghdr n;
+	struct ifinfomsg i;
+	char buf[256];
+};
+
+static int ovpn_new_iface(struct ovpn_ctx *ovpn)
+{
+	struct rtattr *linkinfo, *data;
+	struct ovpn_link_req req = { 0 };
+	int ret = -1;
+
+	fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname,
+		ovpn->mode);
+
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+	req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	req.n.nlmsg_type = RTM_NEWLINK;
+
+	if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname,
+			 strlen(ovpn->ifname) + 1) < 0)
+		goto err;
+
+	linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO);
+	if (!linkinfo)
+		goto err;
+
+	if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME,
+			 strlen(OVPN_FAMILY_NAME) + 1) < 0)
+		goto err;
+
+	if (ovpn->mode_set) {
+		data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA);
+		if (!data)
+			goto err;
+
+		if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE,
+				 &ovpn->mode, sizeof(uint8_t)) < 0)
+			goto err;
+
+		ovpn_nest_end(&req.n, data);
+	}
+
+	ovpn_nest_end(&req.n, linkinfo);
+
+	req.i.ifi_family = AF_PACKET;
+
+	ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+err:
+	return ret;
+}
+
+static int ovpn_del_iface(struct ovpn_ctx *ovpn)
+{
+	struct ovpn_link_req req = { 0 };
+
+	fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname,
+		ovpn->ifindex);
+
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+	req.n.nlmsg_flags = NLM_F_REQUEST;
+	req.n.nlmsg_type = RTM_DELLINK;
+
+	req.i.ifi_family = AF_PACKET;
+	req.i.ifi_index = ovpn->ifindex;
+
+	return ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+}
+
+static int nl_seq_check(struct nl_msg (*msg)__always_unused,
+			void (*arg)__always_unused)
+{
+	return NL_OK;
+}
+
+struct mcast_handler_args {
+	const char *group;
+	int id;
+};
+
+static int mcast_family_handler(struct nl_msg *msg, void *arg)
+{
+	struct mcast_handler_args *grp = arg;
+	struct nlattr *tb[CTRL_ATTR_MAX + 1];
+	struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+	struct nlattr *mcgrp;
+	int rem_mcgrp;
+
+	nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+		  genlmsg_attrlen(gnlh, 0), NULL);
+
+	if (!tb[CTRL_ATTR_MCAST_GROUPS])
+		return NL_SKIP;
+
+	nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+		struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+		nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+			  nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+		if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+		    !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+			continue;
+		if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+			    grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+			continue;
+		grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+		break;
+	}
+
+	return NL_SKIP;
+}
+
+static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused,
+			       struct nlmsgerr *err, void *arg)
+{
+	int *ret = arg;
+
+	*ret = err->error;
+	return NL_STOP;
+}
+
+static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg)
+{
+	int *ret = arg;
+
+	*ret = 0;
+	return NL_STOP;
+}
+
+static int ovpn_handle_msg(struct nl_msg *msg, void *arg)
+{
+	struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+	struct nlattr *attrs[OVPN_A_MAX + 1];
+	struct nlmsghdr *nlh = nlmsg_hdr(msg);
+	char ifname[IF_NAMESIZE];
+	int *ret = arg;
+	__u32 ifindex;
+
+	fprintf(stderr, "received message from ovpn-dco\n");
+
+	*ret = -1;
+
+	if (!genlmsg_valid_hdr(nlh, 0)) {
+		fprintf(stderr, "invalid header\n");
+		return NL_STOP;
+	}
+
+	if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+		      genlmsg_attrlen(gnlh, 0), NULL)) {
+		fprintf(stderr, "received bogus data from ovpn-dco\n");
+		return NL_STOP;
+	}
+
+	if (!attrs[OVPN_A_IFINDEX]) {
+		fprintf(stderr, "no ifindex in this message\n");
+		return NL_STOP;
+	}
+
+	ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]);
+	if (!if_indextoname(ifindex, ifname)) {
+		fprintf(stderr, "cannot resolve ifname for ifindex: %u\n",
+			ifindex);
+		return NL_STOP;
+	}
+
+	switch (gnlh->cmd) {
+	case OVPN_CMD_PEER_DEL_NTF:
+		fprintf(stdout, "received CMD_PEER_DEL_NTF\n");
+		break;
+	case OVPN_CMD_KEY_SWAP_NTF:
+		fprintf(stdout, "received CMD_KEY_SWAP_NTF\n");
+		break;
+	default:
+		fprintf(stderr, "received unknown command: %d\n", gnlh->cmd);
+		return NL_STOP;
+	}
+
+	*ret = 0;
+	return NL_OK;
+}
+
+static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family,
+			     const char *group)
+{
+	struct nl_msg *msg;
+	struct nl_cb *cb;
+	int ret, ctrlid;
+	struct mcast_handler_args grp = {
+		.group = group,
+		.id = -ENOENT,
+	};
+
+	msg = nlmsg_alloc();
+	if (!msg)
+		return -ENOMEM;
+
+	cb = nl_cb_alloc(NL_CB_DEFAULT);
+	if (!cb) {
+		ret = -ENOMEM;
+		goto out_fail_cb;
+	}
+
+	ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+	genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+	ret = -ENOBUFS;
+	NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+	ret = nl_send_auto_complete(sock, msg);
+	if (ret < 0)
+		goto nla_put_failure;
+
+	ret = 1;
+
+	nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret);
+	nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret);
+	nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp);
+
+	while (ret > 0)
+		nl_recvmsgs(sock, cb);
+
+	if (ret == 0)
+		ret = grp.id;
+ nla_put_failure:
+	nl_cb_put(cb);
+ out_fail_cb:
+	nlmsg_free(msg);
+	return ret;
+}
+
+static int ovpn_listen_mcast(void)
+{
+	struct nl_sock *sock;
+	struct nl_cb *cb;
+	int mcid, ret;
+
+	sock = nl_socket_alloc();
+	if (!sock) {
+		fprintf(stderr, "cannot allocate netlink socket\n");
+		goto err_free;
+	}
+
+	nl_socket_set_buffer_size(sock, 8192, 8192);
+
+	ret = genl_connect(sock);
+	if (ret < 0) {
+		fprintf(stderr, "cannot connect to generic netlink: %s\n",
+			nl_geterror(ret));
+		goto err_free;
+	}
+
+	mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS);
+	if (mcid < 0) {
+		fprintf(stderr, "cannot get mcast group: %s\n",
+			nl_geterror(mcid));
+		goto err_free;
+	}
+
+	ret = nl_socket_add_membership(sock, mcid);
+	if (ret) {
+		fprintf(stderr, "failed to join mcast group: %d\n", ret);
+		goto err_free;
+	}
+
+	ret = 1;
+	cb = nl_cb_alloc(NL_CB_DEFAULT);
+	nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL);
+	nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret);
+	nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret);
+
+	while (ret == 1) {
+		int err = nl_recvmsgs(sock, cb);
+
+		if (err < 0) {
+			fprintf(stderr,
+				"cannot receive netlink message: (%d) %s\n",
+				err, nl_geterror(-err));
+			ret = -1;
+			break;
+		}
+	}
+
+	nl_cb_put(cb);
+err_free:
+	nl_socket_free(sock);
+	return ret;
+}
+
+static void usage(const char *cmd)
+{
+	fprintf(stderr,
+		"Usage %s <command> <iface> [arguments..]\n",
+		cmd);
+	fprintf(stderr, "where <command> can be one of the following\n\n");
+
+	fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tmode:\n");
+	fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n");
+	fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n");
+
+	fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+
+	fprintf(stderr,
+		"* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tlport: TCP port to listen to\n");
+	fprintf(stderr,
+		"\tpeers_file: file containing one peer per line: Line format:\n");
+	fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n");
+	fprintf(stderr,
+		"\tipv6: whether the socket should listen to the IPv6 wildcard address\n");
+
+	fprintf(stderr,
+		"* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n");
+	fprintf(stderr, "\traddr: peer IP address to connect to\n");
+	fprintf(stderr, "\trport: peer TCP port to connect to\n");
+	fprintf(stderr,
+		"\tkey_file: file containing the symmetric key for encryption\n");
+
+	fprintf(stderr,
+		"* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tlport: local UDP port to bind to\n");
+	fprintf(stderr,
+		"\tpeer_id: peer ID to be used in data packets to/from this peer\n");
+	fprintf(stderr, "\traddr: peer IP address\n");
+	fprintf(stderr, "\trport: peer UDP port\n");
+	fprintf(stderr, "\tvpnaddr: peer VPN IP\n");
+
+	fprintf(stderr,
+		"* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tlport: local UDP port to bind to\n");
+	fprintf(stderr,
+		"\tpeers_file: text file containing one peer per line. Line format:\n");
+	fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n");
+
+	fprintf(stderr,
+		"* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+	fprintf(stderr,
+		"\tkeepalive_interval: interval for sending ping messages\n");
+	fprintf(stderr,
+		"\tkeepalive_timeout: time after which a peer is timed out\n");
+
+	fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n");
+
+	fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr,
+		"\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n");
+
+	fprintf(stderr,
+		"* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr,
+		"\tpeer_id: peer ID of the peer to configure the key for\n");
+	fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+	fprintf(stderr, "\tkey_id: an ID from 0 to 7\n");
+	fprintf(stderr,
+		"\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n");
+	fprintf(stderr,
+		"\tkey_dir: key direction, must 0 on one host and 1 on the other\n");
+	fprintf(stderr, "\tkey_file: file containing the pre-shared key\n");
+
+	fprintf(stderr,
+		"* del_key <iface> <peer_id> [slot]: erase existing data channel key\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+	fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n");
+
+	fprintf(stderr,
+		"* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n");
+	fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+
+	fprintf(stderr,
+		"* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+
+	fprintf(stderr,
+		"* listen_mcast: listen to ovpn netlink multicast messages\n");
+}
+
+static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host,
+			     const char *service, const char *vpnip)
+{
+	int ret;
+	struct addrinfo *result;
+	struct addrinfo hints = {
+		.ai_family = ovpn->sa_family,
+		.ai_socktype = SOCK_DGRAM,
+		.ai_protocol = IPPROTO_UDP
+	};
+
+	if (host) {
+		ret = getaddrinfo(host, service, &hints, &result);
+		if (ret) {
+			fprintf(stderr, "getaddrinfo on remote error: %s\n",
+				gai_strerror(ret));
+			return -1;
+		}
+
+		if (!(result->ai_family == AF_INET &&
+		      result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+		    !(result->ai_family == AF_INET6 &&
+		      result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen);
+	}
+
+	if (vpnip) {
+		ret = getaddrinfo(vpnip, NULL, &hints, &result);
+		if (ret) {
+			fprintf(stderr, "getaddrinfo on vpnip error: %s\n",
+				gai_strerror(ret));
+			return -1;
+		}
+
+		if (!(result->ai_family == AF_INET &&
+		      result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+		    !(result->ai_family == AF_INET6 &&
+		      result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen);
+		ovpn->sa_family = result->ai_family;
+
+		ovpn->peer_ip_set = true;
+	}
+
+	ret = 0;
+out:
+	freeaddrinfo(result);
+	return ret;
+}
+
+static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id,
+			       const char *raddr, const char *rport,
+			       const char *vpnip)
+{
+	ovpn->peer_id = strtoul(peer_id, NULL, 10);
+	if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+		fprintf(stderr, "peer ID value out of range\n");
+		return -1;
+	}
+
+	return ovpn_parse_remote(ovpn, raddr, rport, vpnip);
+}
+
+static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn)
+{
+	int slot = strtoul(arg, NULL, 10);
+
+	if (errno == ERANGE || slot < 1 || slot > 2) {
+		fprintf(stderr, "key slot out of range\n");
+		return -1;
+	}
+
+	switch (slot) {
+	case 1:
+		ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+		break;
+	case 2:
+		ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY;
+		break;
+	}
+
+	return 0;
+}
+
+static int ovpn_send_tcp_data(int socket)
+{
+	uint16_t len = htons(1000);
+	uint8_t buf[1002];
+	int ret;
+
+	memcpy(buf, &len, sizeof(len));
+	memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len));
+
+	ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+	fprintf(stdout, "Sent %u bytes over TCP socket\n", ret);
+
+	return ret > 0 ? 0 : ret;
+}
+
+static int ovpn_recv_tcp_data(int socket)
+{
+	uint8_t buf[1002];
+	uint16_t len;
+	int ret;
+
+	ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+	if (ret < 2) {
+		fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret);
+		return ret;
+	}
+
+	memcpy(&len, buf, sizeof(len));
+	len = ntohs(len);
+
+	fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n",
+		ret, len);
+
+	return 0;
+}
+
+static enum ovpn_cmd ovpn_parse_cmd(const char *cmd)
+{
+	if (!strcmp(cmd, "new_iface"))
+		return CMD_NEW_IFACE;
+
+	if (!strcmp(cmd, "del_iface"))
+		return CMD_DEL_IFACE;
+
+	if (!strcmp(cmd, "listen"))
+		return CMD_LISTEN;
+
+	if (!strcmp(cmd, "connect"))
+		return CMD_CONNECT;
+
+	if (!strcmp(cmd, "new_peer"))
+		return CMD_NEW_PEER;
+
+	if (!strcmp(cmd, "new_multi_peer"))
+		return CMD_NEW_MULTI_PEER;
+
+	if (!strcmp(cmd, "set_peer"))
+		return CMD_SET_PEER;
+
+	if (!strcmp(cmd, "del_peer"))
+		return CMD_DEL_PEER;
+
+	if (!strcmp(cmd, "get_peer"))
+		return CMD_GET_PEER;
+
+	if (!strcmp(cmd, "new_key"))
+		return CMD_NEW_KEY;
+
+	if (!strcmp(cmd, "del_key"))
+		return CMD_DEL_KEY;
+
+	if (!strcmp(cmd, "get_key"))
+		return CMD_GET_KEY;
+
+	if (!strcmp(cmd, "swap_keys"))
+		return CMD_SWAP_KEYS;
+
+	if (!strcmp(cmd, "listen_mcast"))
+		return CMD_LISTEN_MCAST;
+
+	return CMD_INVALID;
+}
+
+/* Send process to background and waits for signal.
+ *
+ * This helper is called at the end of commands
+ * creating sockets, so that the latter stay alive
+ * along with the process that created them.
+ *
+ * A signal is expected to be delivered in order to
+ * terminate the waiting processes
+ */
+static void ovpn_waitbg(void)
+{
+	daemon(1, 1);
+	pause();
+}
+
+static int ovpn_run_cmd(struct ovpn_ctx *ovpn)
+{
+	char peer_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128], lport[10];
+	char raddr[128], rport[10];
+	int n, ret;
+	FILE *fp;
+
+	switch (ovpn->cmd) {
+	case CMD_NEW_IFACE:
+		ret = ovpn_new_iface(ovpn);
+		break;
+	case CMD_DEL_IFACE:
+		ret = ovpn_del_iface(ovpn);
+		break;
+	case CMD_LISTEN:
+		ret = ovpn_listen(ovpn, ovpn->sa_family);
+		if (ret < 0) {
+			fprintf(stderr, "cannot listen on TCP socket\n");
+			return ret;
+		}
+
+		fp = fopen(ovpn->peers_file, "r");
+		if (!fp) {
+			fprintf(stderr, "cannot open file: %s\n",
+				ovpn->peers_file);
+			return -1;
+		}
+
+		int num_peers = 0;
+
+		while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) {
+			struct ovpn_ctx peer_ctx = { 0 };
+
+			if (num_peers == MAX_PEERS) {
+				fprintf(stderr, "max peers reached!\n");
+				return -E2BIG;
+			}
+
+			peer_ctx.ifindex = ovpn->ifindex;
+			peer_ctx.sa_family = ovpn->sa_family;
+
+			peer_ctx.socket = ovpn_accept(ovpn);
+			if (peer_ctx.socket < 0) {
+				fprintf(stderr, "cannot accept connection!\n");
+				return -1;
+			}
+
+			/* store peer sockets to test TCP I/O */
+			ovpn->cli_sockets[num_peers] = peer_ctx.socket;
+
+			ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL,
+						  NULL, vpnip);
+			if (ret < 0) {
+				fprintf(stderr, "error while parsing line\n");
+				return -1;
+			}
+
+			ret = ovpn_new_peer(&peer_ctx, true);
+			if (ret < 0) {
+				fprintf(stderr,
+					"cannot add peer to VPN: %s %s\n",
+					peer_id, vpnip);
+				return ret;
+			}
+			num_peers++;
+		}
+
+		for (int i = 0; i < num_peers; i++) {
+			ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]);
+			if (ret < 0)
+				break;
+		}
+		ovpn_waitbg();
+		break;
+	case CMD_CONNECT:
+		ret = ovpn_connect(ovpn);
+		if (ret < 0) {
+			fprintf(stderr, "cannot connect TCP socket\n");
+			return ret;
+		}
+
+		ret = ovpn_new_peer(ovpn, true);
+		if (ret < 0) {
+			fprintf(stderr, "cannot add peer to VPN\n");
+			close(ovpn->socket);
+			return ret;
+		}
+
+		if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) {
+			ret = ovpn_new_key(ovpn);
+			if (ret < 0) {
+				fprintf(stderr, "cannot set key\n");
+				return ret;
+			}
+		}
+
+		ret = ovpn_send_tcp_data(ovpn->socket);
+		ovpn_waitbg();
+		break;
+	case CMD_NEW_PEER:
+		ret = ovpn_udp_socket(ovpn, AF_INET6);
+		if (ret < 0)
+			return ret;
+
+		ret = ovpn_new_peer(ovpn, false);
+		ovpn_waitbg();
+		break;
+	case CMD_NEW_MULTI_PEER:
+		ret = ovpn_udp_socket(ovpn, AF_INET6);
+		if (ret < 0)
+			return ret;
+
+		fp = fopen(ovpn->peers_file, "r");
+		if (!fp) {
+			fprintf(stderr, "cannot open file: %s\n",
+				ovpn->peers_file);
+			return -1;
+		}
+
+		while ((n = fscanf(fp, "%s %s %s %s %s %s\n", peer_id, laddr,
+				   lport, raddr, rport, vpnip)) == 6) {
+			struct ovpn_ctx peer_ctx = { 0 };
+
+			peer_ctx.ifindex = ovpn->ifindex;
+			peer_ctx.socket = ovpn->socket;
+			peer_ctx.sa_family = AF_UNSPEC;
+
+			ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr,
+						  rport, vpnip);
+			if (ret < 0) {
+				fprintf(stderr, "error while parsing line\n");
+				return -1;
+			}
+
+			ret = ovpn_new_peer(&peer_ctx, false);
+			if (ret < 0) {
+				fprintf(stderr,
+					"cannot add peer to VPN: %s %s %s %s\n",
+					peer_id, raddr, rport, vpnip);
+				return ret;
+			}
+		}
+		ovpn_waitbg();
+		break;
+	case CMD_SET_PEER:
+		ret = ovpn_set_peer(ovpn);
+		break;
+	case CMD_DEL_PEER:
+		ret = ovpn_del_peer(ovpn);
+		break;
+	case CMD_GET_PEER:
+		if (ovpn->peer_id == PEER_ID_UNDEF)
+			fprintf(stderr, "List of peers connected to: %s\n",
+				ovpn->ifname);
+
+		ret = ovpn_get_peer(ovpn);
+		break;
+	case CMD_NEW_KEY:
+		ret = ovpn_new_key(ovpn);
+		break;
+	case CMD_DEL_KEY:
+		ret = ovpn_del_key(ovpn);
+		break;
+	case CMD_GET_KEY:
+		ret = ovpn_get_key(ovpn);
+		break;
+	case CMD_SWAP_KEYS:
+		ret = ovpn_swap_keys(ovpn);
+		break;
+	case CMD_LISTEN_MCAST:
+		ret = ovpn_listen_mcast();
+		break;
+	case CMD_INVALID:
+		break;
+	}
+
+	return ret;
+}
+
+static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
+{
+	int ret;
+
+	/* no args required for LISTEN_MCAST */
+	if (ovpn->cmd == CMD_LISTEN_MCAST)
+		return 0;
+
+	/* all commands need an ifname */
+	if (argc < 3)
+		return -EINVAL;
+
+	strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1);
+	ovpn->ifname[IFNAMSIZ - 1] = '\0';
+
+	/* all commands, except NEW_IFNAME, needs an ifindex */
+	if (ovpn->cmd != CMD_NEW_IFACE) {
+		ovpn->ifindex = if_nametoindex(ovpn->ifname);
+		if (!ovpn->ifindex) {
+			fprintf(stderr, "cannot find interface: %s\n",
+				strerror(errno));
+			return -1;
+		}
+	}
+
+	switch (ovpn->cmd) {
+	case CMD_NEW_IFACE:
+		if (argc < 4)
+			break;
+
+		if (!strcmp(argv[3], "P2P")) {
+			ovpn->mode = OVPN_MODE_P2P;
+		} else if (!strcmp(argv[3], "MP")) {
+			ovpn->mode = OVPN_MODE_MP;
+		} else {
+			fprintf(stderr, "Cannot parse iface mode: %s\n",
+				argv[3]);
+			return -1;
+		}
+		ovpn->mode_set = true;
+		break;
+	case CMD_DEL_IFACE:
+		break;
+	case CMD_LISTEN:
+		if (argc < 5)
+			return -EINVAL;
+
+		ovpn->lport = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE || ovpn->lport > 65535) {
+			fprintf(stderr, "lport value out of range\n");
+			return -1;
+		}
+
+		ovpn->peers_file = argv[4];
+
+		if (argc > 5 && !strcmp(argv[5], "ipv6"))
+			ovpn->sa_family = AF_INET6;
+		break;
+	case CMD_CONNECT:
+		if (argc < 6)
+			return -EINVAL;
+
+		ovpn->sa_family = AF_INET;
+
+		ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5],
+					  NULL);
+		if (ret < 0) {
+			fprintf(stderr, "Cannot parse remote peer data\n");
+			return -1;
+		}
+
+		if (argc > 6) {
+			ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+			ovpn->key_id = 0;
+			ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM;
+			ovpn->key_dir = KEY_DIR_OUT;
+
+			ret = ovpn_parse_key(argv[6], ovpn);
+			if (ret)
+				return -1;
+		}
+		break;
+	case CMD_NEW_PEER:
+		if (argc < 7)
+			return -EINVAL;
+
+		ovpn->lport = strtoul(argv[4], NULL, 10);
+		if (errno == ERANGE || ovpn->lport > 65535) {
+			fprintf(stderr, "lport value out of range\n");
+			return -1;
+		}
+
+		const char *vpnip = (argc > 7) ? argv[7] : NULL;
+
+		ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6],
+					  vpnip);
+		if (ret < 0)
+			return -1;
+		break;
+	case CMD_NEW_MULTI_PEER:
+		if (argc < 5)
+			return -EINVAL;
+
+		ovpn->lport = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE || ovpn->lport > 65535) {
+			fprintf(stderr, "lport value out of range\n");
+			return -1;
+		}
+
+		ovpn->peers_file = argv[4];
+		break;
+	case CMD_SET_PEER:
+		if (argc < 6)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+
+		ovpn->keepalive_interval = strtoul(argv[4], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr,
+				"keepalive interval value out of range\n");
+			return -1;
+		}
+
+		ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr,
+				"keepalive interval value out of range\n");
+			return -1;
+		}
+		break;
+	case CMD_DEL_PEER:
+		if (argc < 4)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+		break;
+	case CMD_GET_PEER:
+		ovpn->peer_id = PEER_ID_UNDEF;
+		if (argc > 3) {
+			ovpn->peer_id = strtoul(argv[3], NULL, 10);
+			if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+				fprintf(stderr, "peer ID value out of range\n");
+				return -1;
+			}
+		}
+		break;
+	case CMD_NEW_KEY:
+		if (argc < 9)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+
+		ret = ovpn_parse_key_slot(argv[4], ovpn);
+		if (ret)
+			return -1;
+
+		ovpn->key_id = strtoul(argv[5], NULL, 10);
+		if (errno == ERANGE || ovpn->key_id > 2) {
+			fprintf(stderr, "key ID out of range\n");
+			return -1;
+		}
+
+		ret = ovpn_parse_cipher(argv[6], ovpn);
+		if (ret < 0)
+			return -1;
+
+		ret = ovpn_parse_key_direction(argv[7], ovpn);
+		if (ret < 0)
+			return -1;
+
+		ret = ovpn_parse_key(argv[8], ovpn);
+		if (ret)
+			return -1;
+		break;
+	case CMD_DEL_KEY:
+		if (argc < 4)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+
+		ret = ovpn_parse_key_slot(argv[4], ovpn);
+		if (ret)
+			return ret;
+		break;
+	case CMD_GET_KEY:
+		if (argc < 5)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+
+		ret = ovpn_parse_key_slot(argv[4], ovpn);
+		if (ret)
+			return ret;
+		break;
+	case CMD_SWAP_KEYS:
+		if (argc < 4)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+		break;
+	case CMD_LISTEN_MCAST:
+		break;
+	case CMD_INVALID:
+		break;
+	}
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct ovpn_ctx ovpn;
+	int ret;
+
+	if (argc < 2) {
+		usage(argv[0]);
+		return -1;
+	}
+
+	memset(&ovpn, 0, sizeof(ovpn));
+	ovpn.sa_family = AF_UNSPEC;
+	ovpn.cipher = OVPN_CIPHER_ALG_NONE;
+
+	ovpn.cmd = ovpn_parse_cmd(argv[1]);
+	if (ovpn.cmd == CMD_INVALID) {
+		fprintf(stderr, "Error: unknown command.\n\n");
+		usage(argv[0]);
+		return -1;
+	}
+
+	ret = ovpn_parse_cmd_args(&ovpn, argc, argv);
+	if (ret < 0) {
+		fprintf(stderr, "Error: invalid arguments.\n\n");
+		if (ret == -EINVAL)
+			usage(argv[0]);
+		return ret;
+	}
+
+	ret = ovpn_run_cmd(&ovpn);
+	if (ret)
+		fprintf(stderr, "Cannot execute command: %s (%d)\n",
+			strerror(-ret), ret);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt
new file mode 100644
index 000000000000..d753eebe8716
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt
@@ -0,0 +1,5 @@
+1 5.5.5.2
+2 5.5.5.3
+3 5.5.5.4
+4 5.5.5.5
+5 5.5.5.6
diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
new file mode 100755
index 000000000000..32504079a2b8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+ALG="chachapoly"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
new file mode 100755
index 000000000000..093d44772ffd
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test-close-socket.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh
new file mode 100755
index 000000000000..5e48a8b67928
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	setup_ns ${p} 5.5.5.$((${p} + 1))/24
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+	ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+	ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+	ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+done
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh
new file mode 100755
index 000000000000..ba5d725e18b0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-float.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+FLOAT="1"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh
new file mode 100755
index 000000000000..ba3f1f315a34
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh
new file mode 100755
index 000000000000..e8acdc303307
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+	ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+	ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+	ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+	ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1))
+done
+
+# ping LAN behind client 1
+ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP}
+
+if [ "$FLOAT" == "1" ]; then
+	# make clients float..
+	for p in $(seq 1 ${NUM_PEERS}); do
+		ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p}
+		ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p}
+	done
+	for p in $(seq 1 ${NUM_PEERS}); do
+		ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1
+	done
+fi
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+echo "Adding secondary key and then swap:"
+for p in $(seq 1 ${NUM_PEERS}); do
+	ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key
+	ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key
+	ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p}
+done
+
+sleep 1
+
+echo "Querying all peers:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0
+ip netns exec peer1 ${OVPN_CLI} get_peer tun1
+
+echo "Querying peer 1:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1
+
+echo "Querying non-existent peer 10:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true
+
+echo "Deleting peer 1:"
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1
+ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1
+
+echo "Querying keys:"
+for p in $(seq 2 ${NUM_PEERS}); do
+	ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1
+	ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2
+done
+
+echo "Deleting peer while sending traffic:"
+(ip netns exec peer2 ping -qf -w 4 5.5.5.1)&
+sleep 2
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2
+# following command fails in TCP mode
+# (both ends get conn reset when one peer disconnects)
+ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true
+
+echo "Deleting keys:"
+for p in $(seq 3 ${NUM_PEERS}); do
+	ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1
+	ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2
+done
+
+echo "Setting timeout to 3s MP:"
+for p in $(seq 3 ${NUM_PEERS}); do
+	ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true
+	ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0
+done
+# wait for peers to timeout
+sleep 5
+
+echo "Setting timeout to 3s P2P:"
+for p in $(seq 3 ${NUM_PEERS}); do
+	ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3
+done
+sleep 5
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt
new file mode 100644
index 000000000000..e9773ddf875c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/udp_peers.txt
@@ -0,0 +1,6 @@
+1 10.10.1.1 1 10.10.1.2 1 5.5.5.2
+2 10.10.2.1 1 10.10.2.2 1 5.5.5.3
+3 10.10.3.1 1 10.10.3.2 1 5.5.5.4
+4 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5
+5 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6
+6 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 66be7699c72c..88e914c4eef9 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -205,7 +205,6 @@
 #	Check that PMTU exceptions are created for both paths.
 
 source lib.sh
-source net_helper.sh
 
 PAUSE_ON_FAIL=no
 VERBOSE=0
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index b8475cb29be7..1c43401a1c80 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -9,7 +9,6 @@
 #include <arpa/inet.h>
 #include <errno.h>
 #include <error.h>
-#include <linux/dccp.h>
 #include <linux/in.h>
 #include <linux/unistd.h>
 #include <stdbool.h>
@@ -21,10 +20,6 @@
 #include <sys/socket.h>
 #include <unistd.h>
 
-#ifndef SOL_DCCP
-#define SOL_DCCP 269
-#endif
-
 static const char *IP4_ADDR = "127.0.0.1";
 static const char *IP6_ADDR = "::1";
 static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
@@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count,
 
 		if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
 			error(1, errno, "tcp: failed to listen on receive port");
-		else if (proto == SOCK_DCCP) {
-			if (setsockopt(rcv_fds[i], SOL_DCCP,
-					DCCP_SOCKOPT_SERVICE,
-					&(int) {htonl(42)}, sizeof(int)))
-				error(1, errno, "failed to setsockopt");
-
-			if (listen(rcv_fds[i], 10))
-				error(1, errno, "dccp: failed to listen on receive port");
-		}
 	}
 }
 
@@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto)
 	if (fd < 0)
 		error(1, errno, "failed to create send socket");
 
-	if (proto == SOCK_DCCP &&
-		setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE,
-				&(int){htonl(42)}, sizeof(int)))
-		error(1, errno, "failed to setsockopt");
-
 	if (bind(fd, saddr, sz))
 		error(1, errno, "failed to bind send socket");
 
@@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto)
 	if (i < 0)
 		error(1, errno, "epoll_wait failed");
 
-	if (proto == SOCK_STREAM || proto == SOCK_DCCP) {
+	if (proto == SOCK_STREAM) {
 		fd = accept(ev.data.fd, NULL, NULL);
 		if (fd < 0)
 			error(1, errno, "failed to accept");
@@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto,
 
 static void test_proto(int proto, const char *proto_str)
 {
-	if (proto == SOCK_DCCP) {
-		int test_fd;
-
-		test_fd = socket(AF_INET, proto, 0);
-		if (test_fd < 0) {
-			if (errno == ESOCKTNOSUPPORT) {
-				fprintf(stderr, "DCCP not supported: skipping DCCP tests\n");
-				return;
-			} else
-				error(1, errno, "failed to create a DCCP socket");
-		}
-		close(test_fd);
-	}
-
 	fprintf(stderr, "%s IPv4 ... ", proto_str);
 	run_one_test(AF_INET, AF_INET, proto, IP4_ADDR);
 
@@ -271,7 +238,6 @@ int main(void)
 {
 	test_proto(SOCK_DGRAM, "UDP");
 	test_proto(SOCK_STREAM, "TCP");
-	test_proto(SOCK_DCCP, "DCCP");
 
 	fprintf(stderr, "SUCCESS\n");
 	return 0;
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 02d617040793..a5e959a080bb 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -285,11 +285,6 @@ setup_hs()
 	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
 
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
 	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
 	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
 	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index 79fb81e63c59..a649dba3cb77 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -250,11 +250,6 @@ setup_hs()
 	eval local rtname=\${rt_${rid}}
 	local rtveth=veth-t${tid}
 
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
 	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
 	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
 	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh
index 50563443a4ad..318487eda671 100755
--- a/tools/testing/selftests/net/srv6_end_flavors_test.sh
+++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh
@@ -399,7 +399,7 @@ __get_srv6_rtcfg_id()
 
 # Given the description of a router <id:op> as an input, the function returns
 # the <op> token which represents the operation (e.g. End behavior with or
-# withouth flavors) configured for the node.
+# without flavors) configured for the node.
 
 # Note that when the operation represents an End behavior with a list of
 # flavors, the output is the ordered version of that list.
@@ -480,7 +480,7 @@ setup_rt_local_sids()
 
 
 	# all SIDs start with a common locator. Routes and SRv6 Endpoint
-	# behavior instaces are grouped together in the 'localsid' table.
+	# behavior instances are grouped together in the 'localsid' table.
 	ip -netns "${nsname}" -6 rule \
 		add to "${LOCATOR_SERVICE}::/16" \
 		lookup "${LOCALSID_TABLE_ID}" prio 999
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
index 87e414cc417c..ba730655a7bf 100755
--- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -245,10 +245,8 @@
 # that adopted in the use cases already examined (of course, it is necessary to
 # consider the different SIDs/C-SIDs).
 
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
 
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
 readonly DUMMY_DEVNAME="dum0"
 readonly VRF_TID=100
 readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -376,32 +374,18 @@ test_command_or_ksft_skip()
 	fi
 }
 
-get_nodename()
-{
-	local name="$1"
-
-	echo "${name}-${RDMSUFF}"
-}
-
 get_rtname()
 {
 	local rtid="$1"
 
-	get_nodename "rt-${rtid}"
+	echo "rt_${rtid}"
 }
 
 get_hsname()
 {
 	local hsid="$1"
 
-	get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
-	local name="$1"
-
-	ip netns add "${name}"
+	echo "hs_${hsid}"
 }
 
 create_router()
@@ -410,8 +394,7 @@ create_router()
 	local nsname
 
 	nsname="$(get_rtname "${rtid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 create_host()
@@ -420,28 +403,12 @@ create_host()
 	local nsname
 
 	nsname="$(get_hsname "${hsid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 cleanup()
 {
-	local nsname
-	local i
-
-	# destroy routers
-	for i in ${ROUTERS}; do
-		nsname="$(get_rtname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
-	# destroy hosts
-	for i in ${HOSTS}; do
-		nsname="$(get_hsname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
+	cleanup_all_ns
 
 	# check whether the setup phase was completed successfully or not. In
 	# case of an error during the setup phase of the testing environment,
@@ -462,10 +429,10 @@ add_link_rt_pairs()
 	local nsname
 	local neigh_nsname
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
-		neigh_nsname="$(get_rtname "${neigh}")"
+		eval neigh_nsname=\${$(get_rtname "${neigh}")}
 
 		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
 			type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -497,7 +464,7 @@ setup_rt_networking()
 	local devname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -518,9 +485,6 @@ setup_rt_networking()
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
 }
 
@@ -596,7 +560,7 @@ setup_rt_local_sids()
 	local lcnode_func_prefix
 	local lcblock_prefix
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -668,8 +632,8 @@ __setup_l3vpn()
 	local rtsrc_nsname
 	local rtdst_nsname
 
-	rtsrc_nsname="$(get_rtname "${src}")"
-	rtdst_nsname="$(get_rtname "${dst}")"
+	eval rtsrc_nsname=\${$(get_rtname "${src}")}
+	eval rtdst_nsname=\${$(get_rtname "${dst}")}
 
 	container="${LCBLOCK_ADDR}"
 
@@ -744,8 +708,8 @@ setup_hs()
 	local hsname
 	local rtname
 
-	hsname="$(get_hsname "${hs}")"
-	rtname="$(get_rtname "${rt}")"
+	eval hsname=\${$(get_hsname "${hs}")}
+	eval rtname=\${$(get_rtname "${rt}")}
 
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -791,11 +755,6 @@ setup_hs()
 	ip netns exec "${rtname}" \
 		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
 
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec "${rtname}" \
-		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
 	ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
 }
 
@@ -880,7 +839,7 @@ check_rt_connectivity()
 	local prefix
 	local rtsrc_nsname
 
-	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+	eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
 
 	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
 
@@ -903,7 +862,7 @@ check_hs_ipv6_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -915,7 +874,7 @@ check_hs_ipv4_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test()
 	local nsname
 	local ret
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	__nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}"
 	ret="$?"
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index c79cb8ede17f..4b86040c58c6 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -287,10 +287,8 @@
 # packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46)
 # and sends it to the host hs-1.
 
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
 
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
 readonly DUMMY_DEVNAME="dum0"
 readonly VRF_TID=100
 readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -418,32 +416,18 @@ test_command_or_ksft_skip()
 	fi
 }
 
-get_nodename()
-{
-	local name="$1"
-
-	echo "${name}-${RDMSUFF}"
-}
-
 get_rtname()
 {
 	local rtid="$1"
 
-	get_nodename "rt-${rtid}"
+	echo "rt_${rtid}"
 }
 
 get_hsname()
 {
 	local hsid="$1"
 
-	get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
-	local name="$1"
-
-	ip netns add "${name}"
+	echo "hs_${hsid}"
 }
 
 create_router()
@@ -452,15 +436,12 @@ create_router()
 	local nsname
 
 	nsname="$(get_rtname "${rtid}")"
+	setup_ns "${nsname}"
 
-	__create_namespace "${nsname}"
-
+	eval nsname=\${$(get_rtname "${rtid}")}
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
 }
 
@@ -470,29 +451,12 @@ create_host()
 	local nsname
 
 	nsname="$(get_hsname "${hsid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 cleanup()
 {
-	local nsname
-	local i
-
-	# destroy routers
-	for i in ${ROUTERS}; do
-		nsname="$(get_rtname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
-	# destroy hosts
-	for i in ${HOSTS}; do
-		nsname="$(get_hsname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
+	cleanup_all_ns
 	# check whether the setup phase was completed successfully or not. In
 	# case of an error during the setup phase of the testing environment,
 	# the selftest is considered as "skipped".
@@ -512,10 +476,10 @@ add_link_rt_pairs()
 	local nsname
 	local neigh_nsname
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
-		neigh_nsname="$(get_rtname "${neigh}")"
+		eval neigh_nsname=\${$(get_rtname "${neigh}")}
 
 		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
 			type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -547,7 +511,7 @@ setup_rt_networking()
 	local devname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -631,7 +595,7 @@ set_end_x_nextcsid()
 	local rt="$1"
 	local adj="$2"
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 	net_prefix="$(get_network_prefix "${rt}" "${adj}")"
 	lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
 
@@ -650,7 +614,7 @@ set_underlay_sids_reachability()
 	local rt="$1"
 	local rt_neighs="$2"
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -685,7 +649,7 @@ setup_rt_local_sids()
 	local lcnode_func_prefix
 	local lcblock_prefix
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
         set_underlay_sids_reachability "${rt}" "${rt_neighs}"
 
@@ -728,8 +692,8 @@ __setup_l3vpn()
 	local rtsrc_nsname
 	local rtdst_nsname
 
-	rtsrc_nsname="$(get_rtname "${src}")"
-	rtdst_nsname="$(get_rtname "${dst}")"
+	eval rtsrc_nsname=\${$(get_rtname "${src}")}
+	eval rtdst_nsname=\${$(get_rtname "${dst}")}
 
 	container="${LCBLOCK_ADDR}"
 
@@ -804,8 +768,8 @@ setup_hs()
 	local hsname
 	local rtname
 
-	hsname="$(get_hsname "${hs}")"
-	rtname="$(get_rtname "${rt}")"
+	eval hsname=\${$(get_hsname "${hs}")}
+	eval rtname=\${$(get_rtname "${rt}")}
 
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -851,11 +815,6 @@ setup_hs()
 	ip netns exec "${rtname}" \
 		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
 
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec "${rtname}" \
-		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
 	ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
 }
 
@@ -947,7 +906,7 @@ check_rt_connectivity()
 	local prefix
 	local rtsrc_nsname
 
-	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+	eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
 
 	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
 
@@ -970,7 +929,7 @@ check_hs_ipv6_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -982,7 +941,7 @@ check_hs_ipv4_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1093,7 +1052,7 @@ rt_x_nextcsid_end_x_behavior_test()
 	local nsname
 	local ret
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	__nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}"
 	ret="$?"
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
index 28a775654b92..3efce1718c5f 100755
--- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -166,10 +166,8 @@
 #  hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d)
 #
 
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
 
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
 readonly VRF_TID=100
 readonly VRF_DEVNAME="vrf-${VRF_TID}"
 readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
@@ -248,32 +246,18 @@ test_command_or_ksft_skip()
 	fi
 }
 
-get_nodename()
-{
-	local name="$1"
-
-	echo "${name}-${RDMSUFF}"
-}
-
 get_rtname()
 {
 	local rtid="$1"
 
-	get_nodename "rt-${rtid}"
+	echo "rt_${rtid}"
 }
 
 get_hsname()
 {
 	local hsid="$1"
 
-	get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
-	local name="$1"
-
-	ip netns add "${name}"
+	echo "hs_${hsid}"
 }
 
 create_router()
@@ -282,8 +266,7 @@ create_router()
 	local nsname
 
 	nsname="$(get_rtname "${rtid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 create_host()
@@ -292,29 +275,12 @@ create_host()
 	local nsname
 
 	nsname="$(get_hsname "${hsid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 cleanup()
 {
-	local nsname
-	local i
-
-	# destroy routers
-	for i in ${ROUTERS}; do
-		nsname="$(get_rtname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
-	# destroy hosts
-	for i in ${HOSTS}; do
-		nsname="$(get_hsname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
+	cleanup_all_ns
 	# check whether the setup phase was completed successfully or not. In
 	# case of an error during the setup phase of the testing environment,
 	# the selftest is considered as "skipped".
@@ -334,10 +300,10 @@ add_link_rt_pairs()
 	local nsname
 	local neigh_nsname
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
-		neigh_nsname="$(get_rtname "${neigh}")"
+		eval neigh_nsname=\${$(get_rtname "${neigh}")}
 
 		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
 			type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -369,7 +335,7 @@ setup_rt_networking()
 	local devname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -387,9 +353,6 @@ setup_rt_networking()
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
 }
 
@@ -403,7 +366,7 @@ setup_rt_local_sids()
 	local nsname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -469,7 +432,7 @@ __setup_rt_policy()
 	local policy=''
 	local n
 
-	nsname="$(get_rtname "${encap_rt}")"
+	eval nsname=\${$(get_rtname "${encap_rt}")}
 
 	for n in ${end_rts}; do
 		policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -516,8 +479,8 @@ setup_hs()
 	local hsname
 	local rtname
 
-	hsname="$(get_hsname "${hs}")"
-	rtname="$(get_rtname "${rt}")"
+	eval hsname=\${$(get_hsname "${hs}")}
+	eval rtname=\${$(get_rtname "${rt}")}
 
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -555,11 +518,6 @@ setup_hs()
 	ip netns exec "${rtname}" \
 		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
 
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec "${rtname}" \
-		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
 	ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
 }
 
@@ -656,7 +614,7 @@ check_rt_connectivity()
 	local prefix
 	local rtsrc_nsname
 
-	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+	eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
 
 	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
 
@@ -679,7 +637,7 @@ check_hs_ipv6_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -691,7 +649,7 @@ check_hs_ipv4_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
index cb4177d41b21..cabc70538ffe 100755
--- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -116,10 +116,8 @@
 #  hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b)
 #
 
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
 
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
 readonly DUMMY_DEVNAME="dum0"
 readonly RT2HS_DEVNAME="veth-hs"
 readonly HS_VETH_NAME="veth0"
@@ -199,32 +197,18 @@ test_command_or_ksft_skip()
 	fi
 }
 
-get_nodename()
-{
-	local name="$1"
-
-	echo "${name}-${RDMSUFF}"
-}
-
 get_rtname()
 {
 	local rtid="$1"
 
-	get_nodename "rt-${rtid}"
+	echo "rt_${rtid}"
 }
 
 get_hsname()
 {
 	local hsid="$1"
 
-	get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
-	local name="$1"
-
-	ip netns add "${name}"
+	echo "hs_${hsid}"
 }
 
 create_router()
@@ -233,8 +217,7 @@ create_router()
 	local nsname
 
 	nsname="$(get_rtname "${rtid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 create_host()
@@ -243,28 +226,12 @@ create_host()
 	local nsname
 
 	nsname="$(get_hsname "${hsid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 cleanup()
 {
-	local nsname
-	local i
-
-	# destroy routers
-	for i in ${ROUTERS}; do
-		nsname="$(get_rtname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
-	# destroy hosts
-	for i in ${HOSTS}; do
-		nsname="$(get_hsname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
+	cleanup_all_ns
 
 	# check whether the setup phase was completed successfully or not. In
 	# case of an error during the setup phase of the testing environment,
@@ -285,10 +252,10 @@ add_link_rt_pairs()
 	local nsname
 	local neigh_nsname
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
-		neigh_nsname="$(get_rtname "${neigh}")"
+		eval neigh_nsname=\${$(get_rtname "${neigh}")}
 
 		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
 			type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -320,7 +287,7 @@ setup_rt_networking()
 	local devname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -341,9 +308,6 @@ setup_rt_networking()
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
 }
 
@@ -357,7 +321,7 @@ setup_rt_local_sids()
 	local nsname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -407,7 +371,7 @@ __setup_rt_policy()
 	local policy=''
 	local n
 
-	nsname="$(get_rtname "${encap_rt}")"
+	eval nsname=\${$(get_rtname "${encap_rt}")}
 
 	for n in ${end_rts}; do
 		policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -446,7 +410,7 @@ setup_decap()
 	local rt="$1"
 	local nsname
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	# Local End.DX2 behavior
 	ip -netns "${nsname}" -6 route \
@@ -463,8 +427,8 @@ setup_hs()
 	local hsname
 	local rtname
 
-	hsname="$(get_hsname "${hs}")"
-	rtname="$(get_rtname "${rt}")"
+	eval hsname=\${$(get_hsname "${hs}")}
+	eval rtname=\${$(get_rtname "${rt}")}
 
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -486,11 +450,6 @@ setup_hs()
 		add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
 
 	ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
-
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec "${rtname}" \
-		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
 }
 
 # set an auto-generated mac address
@@ -508,7 +467,7 @@ set_mac_address()
 	local ifname="$4"
 	local nsname
 
-	nsname=$(get_nodename "${nodename}")
+	eval nsname=\${${nodename}}
 
 	ip -netns "${nsname}" link set dev "${ifname}" down
 
@@ -532,7 +491,7 @@ set_host_l2peer()
 	local hssrc_name
 	local ipaddr
 
-	hssrc_name="$(get_hsname "${hssrc}")"
+	eval hssrc_name=\${$(get_hsname "${hssrc}")}
 
 	if [ "${proto}" -eq 6 ]; then
 		ipaddr="${ipprefix}::${hsdst}"
@@ -562,7 +521,7 @@ setup_l2vpn()
 	local rtdst="${hsdst}"
 
 	# set fixed mac for source node and the neigh MAC address
-	set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
+	set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
 	set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6
 	set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4
 
@@ -570,7 +529,7 @@ setup_l2vpn()
 	# to the mac address of the remote peer (L2 VPN destination host).
 	# Otherwise, traffic coming from the source host is dropped at the
 	# ingress router.
-	set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
+	set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
 
 	# set the SRv6 Policies at the ingress router
 	setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
@@ -647,7 +606,7 @@ check_rt_connectivity()
 	local prefix
 	local rtsrc_nsname
 
-	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+	eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
 
 	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
 
@@ -670,7 +629,7 @@ check_hs_ipv6_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -682,7 +641,7 @@ check_hs_ipv4_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
index 02b986c9c247..9067197c9055 100755
--- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -51,7 +51,9 @@ ret=0
 # All tests in this script. Can be overridden with -t option.
 TESTS="
 	neigh_suppress_arp
+	neigh_suppress_uc_arp
 	neigh_suppress_ns
+	neigh_suppress_uc_ns
 	neigh_vlan_suppress_arp
 	neigh_vlan_suppress_ns
 "
@@ -388,6 +390,52 @@ neigh_suppress_arp()
 	neigh_suppress_arp_common $vid $sip $tip
 }
 
+neigh_suppress_uc_arp_common()
+{
+	local vid=$1; shift
+	local sip=$1; shift
+	local tip=$1; shift
+	local tmac
+
+	echo
+	echo "Unicast ARP, per-port ARP suppression - VLAN $vid"
+	echo "-----------------------------------------------"
+
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on"
+
+	tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+	run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid"
+
+	run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+	run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass"
+
+	run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+	run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass"
+
+	run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q"
+	tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+	log_test $? 0 "Unicast ARP, suppression on, h1 filter"
+	tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+	log_test $? 0 "Unicast ARP, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_arp()
+{
+	local vid=10
+	local sip=192.0.2.1
+	local tip=192.0.2.2
+
+	neigh_suppress_uc_arp_common $vid $sip $tip
+
+	vid=20
+	sip=192.0.2.17
+	tip=192.0.2.18
+	neigh_suppress_uc_arp_common $vid $sip $tip
+}
+
 neigh_suppress_ns_common()
 {
 	local vid=$1; shift
@@ -494,6 +542,78 @@ neigh_suppress_ns()
 	neigh_suppress_ns_common $vid $saddr $daddr $maddr
 }
 
+icmpv6_header_get()
+{
+	local csum=$1; shift
+	local tip=$1; shift
+	local type
+	local p
+
+	# Type 135 (Neighbor Solicitation), hex format
+	type="87"
+	p=$(:
+		)"$type:"$(                     : ICMPv6.type
+		)"00:"$(                        : ICMPv6.code
+		)"$csum:"$(                     : ICMPv6.checksum
+		)"00:00:00:00:"$(               : Reserved
+	        )"$tip:"$(	                : Target Address
+		)
+	echo $p
+}
+
+neigh_suppress_uc_ns_common()
+{
+	local vid=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local full_dip=$1; shift
+	local csum=$1; shift
+	local tmac
+
+	echo
+	echo "Unicast NS, per-port NS suppression - VLAN $vid"
+	echo "---------------------------------------------"
+
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on"
+
+	tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+	run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid"
+
+	run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+	run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass"
+
+	run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+	run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass"
+
+	run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q"
+	tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+	log_test $? 0 "Unicast NS, suppression on, h1 filter"
+	tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+	log_test $? 0 "Unicast NS, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_ns()
+{
+	local vid=10
+	local saddr=2001:db8:1::1
+	local daddr=2001:db8:1::2
+	local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02
+	local csum="ef:79"
+
+	neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+
+	vid=20
+	saddr=2001:db8:2::1
+	daddr=2001:db8:2::2
+	full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02
+	csum="ef:76"
+
+	neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+}
+
 neigh_vlan_suppress_arp()
 {
 	local vid1=10
@@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then
 	exit $ksft_skip
 fi
 
+if [ ! -x "$(command -v mausezahn)" ]; then
+	echo "SKIP: Could not run test without mausezahn tool"
+	exit $ksft_skip
+fi
+
 bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
 if [ $? -ne 0 ]; then
    echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index d5ffd8c9172e..1dc337c709f8 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,7 +3,7 @@
 #
 # Run a series of udpgro functional tests.
 
-source net_helper.sh
+source lib.sh
 
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 815fad8c53a8..54fa4821bc5e 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,7 +3,7 @@
 #
 # Run a series of udpgro benchmarks
 
-source net_helper.sh
+source lib.sh
 
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 5f3d1a110d11..9a2cfec1153e 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,7 +3,7 @@
 #
 # Run a series of udpgro benchmarks
 
-source net_helper.sh
+source lib.sh
 
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index f22f6c66997e..a39fdc4aa2ff 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-source net_helper.sh
+source lib.sh
 
 BPF_FILE="lib/xdp_dummy.bpf.o"
 readonly BASE="ns-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index ddc97ecd8b39..9aa44d8176d9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -600,5 +600,40 @@
         "matchPattern": "qdisc hfsc",
         "matchCount": "1",
         "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"]
+    },
+    {
+        "id": "309e",
+        "name": "Test HFSC eltree double add with reentrant enqueue behaviour on netem",
+        "category": [
+            "qdisc",
+            "hfsc"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link set dev $DUMMY up || true",
+            "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+            "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 1s",
+            "$TC qdisc add dev $DUMMY parent 1:0 handle 2:0 hfsc",
+            "ping -I $DUMMY -f -c10 -s48 -W0.001 10.10.11.1 || true",
+            "$TC class add dev $DUMMY parent 2:0 classid 2:1 hfsc rt m2 20Kbit",
+            "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%",
+            "$TC class add dev $DUMMY parent 2:0 classid 2:2 hfsc rt m2 20Kbit",
+            "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip dst 10.10.11.2/32 flowid 2:1",
+            "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 2 u32 match ip dst 10.10.11.3/32 flowid 2:2",
+            "ping -c 1 10.10.11.2 -I$DUMMY > /dev/null || true",
+            "$TC filter del dev $DUMMY parent 2:0 protocol ip prio 1",
+            "$TC class del dev $DUMMY classid 2:1",
+            "ping -c 1 10.10.11.3 -I$DUMMY > /dev/null || true"
+        ],
+        "cmdUnderTest": "$TC class change dev $DUMMY parent 2:0 classid 2:2 hfsc sc m2 20Kbit",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -j class ls dev $DUMMY classid 2:1",
+        "matchJSON": [],
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1:0 root",
+            "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index cddff1772e10..589b18ed758a 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -31,6 +31,10 @@ try_modprobe act_skbedit
 try_modprobe act_skbmod
 try_modprobe act_tunnel_key
 try_modprobe act_vlan
+try_modprobe act_ife
+try_modprobe act_meta_mark
+try_modprobe act_meta_skbtcindex
+try_modprobe act_meta_skbprio
 try_modprobe cls_basic
 try_modprobe cls_bpf
 try_modprobe cls_cgroup
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 55500f901fbc..a8f550aecb35 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -611,6 +611,35 @@ n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips"
 } < <(n0 wg show wg0 allowed-ips)
 ip0 link del wg0
 
+allowedips=( )
+for i in {1..197}; do
+        allowedips+=( 192.168.0.$i )
+        allowedips+=( abcd::$i )
+done
+saved_ifs="$IFS"
+IFS=,
+allowedips="${allowedips[*]}"
+IFS="$saved_ifs"
+ip0 link add wg0 type wireguard
+n0 wg set wg0 peer "$pub1" allowed-ips "$allowedips"
+n0 wg set wg0 peer "$pub1" allowed-ips -192.168.0.1/32,-192.168.0.20/32,-192.168.0.100/32,-abcd::1/128,-abcd::20/128,-abcd::100/128
+{
+	read -r pub allowedips
+	[[ $pub == "$pub1" ]]
+	i=0
+	for ip in $allowedips; do
+		[[ $ip != "192.168.0.1" ]]
+		[[ $ip != "192.168.0.20" ]]
+		[[ $ip != "192.168.0.100" ]]
+		[[ $ip != "abcd::1" ]]
+		[[ $ip != "abcd::20" ]]
+		[[ $ip != "abcd::100" ]]
+		((++i))
+	done
+	((i == 388))
+} < <(n0 wg show wg0 allowed-ips)
+ip0 link del wg0
+
 ! n0 wg show doesnotexist || false
 
 ip0 link add wg0 type wireguard
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 35856b11c143..791d21b736a5 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -43,7 +43,7 @@ $(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.o
 $(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0))
 $(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117))
 $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac))
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20250521,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,b6f2628b85b1b23cc06517ec9c74f82d52c4cdbd020f3dd2f00c972a1782950e))
 
 export CFLAGS := -O3 -pipe
 ifeq ($(HOST_ARCH),$(ARCH))
@@ -401,6 +401,7 @@ $(BASH_PATH)/.installed: $(BASH_TAR)
 	flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
 	touch $@
 
+$(BASH_PATH)/bash: export CFLAGS_FOR_BUILD += -std=gnu17
 $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
 	cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble
 	$(MAKE) -C $(BASH_PATH)
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index c305d2f613f0..5d39f43dd667 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -22,7 +22,6 @@ CONFIG_HAVE_ARCH_KASAN=y
 CONFIG_KASAN=y
 CONFIG_KASAN_INLINE=y
 CONFIG_UBSAN=y
-CONFIG_UBSAN_SANITIZE_ALL=y
 CONFIG_DEBUG_KMEMLEAK=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_SHIRQ=y
diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c
index 44aee49b6cee..1453d38e08bb 100644
--- a/tools/testing/vsock/timeout.c
+++ b/tools/testing/vsock/timeout.c
@@ -21,6 +21,7 @@
 #include <stdbool.h>
 #include <unistd.h>
 #include <stdio.h>
+#include <time.h>
 #include "timeout.h"
 
 static volatile bool timeout;
@@ -28,6 +29,8 @@ static volatile bool timeout;
 /* SIGALRM handler function.  Do not use sleep(2), alarm(2), or
  * setitimer(2) while using this API - they may interfere with each
  * other.
+ *
+ * If you need to sleep, please use timeout_sleep() provided by this API.
  */
 void sigalrm(int signo)
 {
@@ -58,3 +61,18 @@ void timeout_end(void)
 	alarm(0);
 	timeout = false;
 }
+
+/* Sleep in a timeout section.
+ *
+ * nanosleep(2) can be used with this API since POSIX.1 explicitly
+ * specifies that it does not interact with signals.
+ */
+int timeout_usleep(useconds_t usec)
+{
+	struct timespec ts = {
+		.tv_sec = usec / 1000000,
+		.tv_nsec = (usec % 1000000) * 1000,
+	};
+
+	return nanosleep(&ts, NULL);
+}
diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h
index ecb7c840e65a..1c3fcad87a49 100644
--- a/tools/testing/vsock/timeout.h
+++ b/tools/testing/vsock/timeout.h
@@ -11,5 +11,6 @@ void sigalrm(int signo);
 void timeout_begin(unsigned int seconds);
 void timeout_check(const char *operation);
 void timeout_end(void);
+int timeout_usleep(useconds_t usec);
 
 #endif /* TIMEOUT_H */
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index de25892f865f..0c7e9cbcbc85 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -17,6 +17,7 @@
 #include <assert.h>
 #include <sys/epoll.h>
 #include <sys/mman.h>
+#include <linux/sockios.h>
 
 #include "timeout.h"
 #include "control.h"
@@ -96,6 +97,30 @@ void vsock_wait_remote_close(int fd)
 	close(epollfd);
 }
 
+/* Wait until transport reports no data left to be sent.
+ * Return false if transport does not implement the unsent_bytes() callback.
+ */
+bool vsock_wait_sent(int fd)
+{
+	int ret, sock_bytes_unsent;
+
+	timeout_begin(TIMEOUT);
+	do {
+		ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
+		if (ret < 0) {
+			if (errno == EOPNOTSUPP)
+				break;
+
+			perror("ioctl(SIOCOUTQ)");
+			exit(EXIT_FAILURE);
+		}
+		timeout_check("SIOCOUTQ");
+	} while (sock_bytes_unsent != 0);
+	timeout_end();
+
+	return !ret;
+}
+
 /* Create socket <type>, bind to <cid, port> and return the file descriptor. */
 int vsock_bind(unsigned int cid, unsigned int port, int type)
 {
@@ -798,3 +823,16 @@ void enable_so_zerocopy_check(int fd)
 	setsockopt_int_check(fd, SOL_SOCKET, SO_ZEROCOPY, 1,
 			     "setsockopt SO_ZEROCOPY");
 }
+
+void enable_so_linger(int fd, int timeout)
+{
+	struct linger optval = {
+		.l_onoff = 1,
+		.l_linger = timeout
+	};
+
+	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
+		perror("setsockopt(SO_LINGER)");
+		exit(EXIT_FAILURE);
+	}
+}
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index d1f765ce3eee..5e2db67072d5 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -54,6 +54,7 @@ int vsock_stream_listen(unsigned int cid, unsigned int port);
 int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
 			   struct sockaddr_vm *clientaddrp);
 void vsock_wait_remote_close(int fd);
+bool vsock_wait_sent(int fd);
 void send_buf(int fd, const void *buf, size_t len, int flags,
 	      ssize_t expected_ret);
 void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret);
@@ -79,4 +80,5 @@ void setsockopt_int_check(int fd, int level, int optname, int val,
 void setsockopt_timeval_check(int fd, int level, int optname,
 			      struct timeval val, char const *errmsg);
 void enable_so_zerocopy_check(int fd);
+void enable_so_linger(int fd, int timeout);
 #endif /* UTIL_H */
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 613551132a96..f669baaa0dca 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -21,7 +21,6 @@
 #include <poll.h>
 #include <signal.h>
 #include <sys/ioctl.h>
-#include <linux/sockios.h>
 #include <linux/time64.h>
 
 #include "vsock_test_zerocopy.h"
@@ -1058,18 +1057,39 @@ static void sigpipe(int signo)
 	have_sigpipe = 1;
 }
 
+#define SEND_SLEEP_USEC (10 * 1000)
+
 static void test_stream_check_sigpipe(int fd)
 {
 	ssize_t res;
 
 	have_sigpipe = 0;
 
-	res = send(fd, "A", 1, 0);
-	if (res != -1) {
-		fprintf(stderr, "expected send(2) failure, got %zi\n", res);
-		exit(EXIT_FAILURE);
+	/* When the other peer calls shutdown(SHUT_RD), there is a chance that
+	 * the send() call could occur before the message carrying the close
+	 * information arrives over the transport. In such cases, the send()
+	 * might still succeed. To avoid this race, let's retry the send() call
+	 * a few times, ensuring the test is more reliable.
+	 */
+	timeout_begin(TIMEOUT);
+	while(1) {
+		res = send(fd, "A", 1, 0);
+		if (res == -1 && errno != EINTR)
+			break;
+
+		/* Sleep a little before trying again to avoid flooding the
+		 * other peer and filling its receive buffer, causing
+		 * false-negative.
+		 */
+		timeout_usleep(SEND_SLEEP_USEC);
+		timeout_check("send");
 	}
+	timeout_end();
 
+	if (errno != EPIPE) {
+		fprintf(stderr, "unexpected send(2) errno %d\n", errno);
+		exit(EXIT_FAILURE);
+	}
 	if (!have_sigpipe) {
 		fprintf(stderr, "SIGPIPE expected\n");
 		exit(EXIT_FAILURE);
@@ -1077,12 +1097,21 @@ static void test_stream_check_sigpipe(int fd)
 
 	have_sigpipe = 0;
 
-	res = send(fd, "A", 1, MSG_NOSIGNAL);
-	if (res != -1) {
-		fprintf(stderr, "expected send(2) failure, got %zi\n", res);
-		exit(EXIT_FAILURE);
+	timeout_begin(TIMEOUT);
+	while(1) {
+		res = send(fd, "A", 1, MSG_NOSIGNAL);
+		if (res == -1 && errno != EINTR)
+			break;
+
+		timeout_usleep(SEND_SLEEP_USEC);
+		timeout_check("send");
 	}
+	timeout_end();
 
+	if (errno != EPIPE) {
+		fprintf(stderr, "unexpected send(2) errno %d\n", errno);
+		exit(EXIT_FAILURE);
+	}
 	if (have_sigpipe) {
 		fprintf(stderr, "SIGPIPE not expected\n");
 		exit(EXIT_FAILURE);
@@ -1250,7 +1279,7 @@ static void test_unsent_bytes_server(const struct test_opts *opts, int type)
 static void test_unsent_bytes_client(const struct test_opts *opts, int type)
 {
 	unsigned char buf[MSG_BUF_IOCTL_LEN];
-	int ret, fd, sock_bytes_unsent;
+	int fd;
 
 	fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
 	if (fd < 0) {
@@ -1267,22 +1296,12 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type)
 	/* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though
 	 * the "RECEIVED" message means that the other side has received the
 	 * data, there can be a delay in our kernel before updating the "unsent
-	 * bytes" counter. Repeat SIOCOUTQ until it returns 0.
+	 * bytes" counter. vsock_wait_sent() will repeat SIOCOUTQ until it
+	 * returns 0.
 	 */
-	timeout_begin(TIMEOUT);
-	do {
-		ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
-		if (ret < 0) {
-			if (errno == EOPNOTSUPP) {
-				fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
-				break;
-			}
-			perror("ioctl");
-			exit(EXIT_FAILURE);
-		}
-		timeout_check("SIOCOUTQ");
-	} while (sock_bytes_unsent != 0);
-	timeout_end();
+	if (!vsock_wait_sent(fd))
+		fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
+
 	close(fd);
 }
 
@@ -1794,10 +1813,6 @@ static void test_stream_connect_retry_server(const struct test_opts *opts)
 
 static void test_stream_linger_client(const struct test_opts *opts)
 {
-	struct linger optval = {
-		.l_onoff = 1,
-		.l_linger = 1
-	};
 	int fd;
 
 	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
@@ -1806,15 +1821,58 @@ static void test_stream_linger_client(const struct test_opts *opts)
 		exit(EXIT_FAILURE);
 	}
 
-	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
-		perror("setsockopt(SO_LINGER)");
+	enable_so_linger(fd, 1);
+	close(fd);
+}
+
+static void test_stream_linger_server(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+	if (fd < 0) {
+		perror("accept");
 		exit(EXIT_FAILURE);
 	}
 
+	vsock_wait_remote_close(fd);
 	close(fd);
 }
 
-static void test_stream_linger_server(const struct test_opts *opts)
+/* Half of the default to not risk timing out the control channel */
+#define LINGER_TIMEOUT	(TIMEOUT / 2)
+
+static void test_stream_nolinger_client(const struct test_opts *opts)
+{
+	bool waited;
+	time_t ns;
+	int fd;
+
+	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	enable_so_linger(fd, LINGER_TIMEOUT);
+	send_byte(fd, 1, 0); /* Left unread to expose incorrect behaviour. */
+	waited = vsock_wait_sent(fd);
+
+	ns = current_nsec();
+	close(fd);
+	ns = current_nsec() - ns;
+
+	if (!waited) {
+		fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
+	} else if (DIV_ROUND_UP(ns, NSEC_PER_SEC) >= LINGER_TIMEOUT) {
+		fprintf(stderr, "Unexpected lingering\n");
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("DONE");
+}
+
+static void test_stream_nolinger_server(const struct test_opts *opts)
 {
 	int fd;
 
@@ -1824,7 +1882,7 @@ static void test_stream_linger_server(const struct test_opts *opts)
 		exit(EXIT_FAILURE);
 	}
 
-	vsock_wait_remote_close(fd);
+	control_expectln("DONE");
 	close(fd);
 }
 
@@ -1988,6 +2046,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_stream_linger_client,
 		.run_server = test_stream_linger_server,
 	},
+	{
+		.name = "SOCK_STREAM SO_LINGER close() on unread",
+		.run_client = test_stream_nolinger_client,
+		.run_server = test_stream_nolinger_server,
+	},
 	{},
 };