aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/testing/selftests/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/netfilter')
-rw-r--r--tools/testing/selftests/netfilter/.gitignore6
-rw-r--r--tools/testing/selftests/netfilter/Makefile17
-rw-r--r--tools/testing/selftests/netfilter/audit_logread.c165
-rw-r--r--tools/testing/selftests/netfilter/bridge_netfilter.sh188
-rw-r--r--tools/testing/selftests/netfilter/config1
-rw-r--r--tools/testing/selftests/netfilter/connect_close.c136
-rw-r--r--tools/testing/selftests/netfilter/conntrack_dump_flush.c471
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_icmp_related.sh36
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_sctp_collision.sh89
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh167
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_vrf.sh241
-rwxr-xr-xtools/testing/selftests/netfilter/ipip-conntrack-mtu.sh207
-rw-r--r--tools/testing/selftests/netfilter/nf-queue.c61
-rwxr-xr-xtools/testing/selftests/netfilter/nf_nat_edemux.sh127
-rwxr-xr-xtools/testing/selftests/netfilter/nft_audit.sh245
-rwxr-xr-xtools/testing/selftests/netfilter/nft_concat_range.sh161
-rwxr-xr-xtools/testing/selftests/netfilter/nft_conntrack_helper.sh48
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh273
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh581
-rwxr-xr-xtools/testing/selftests/netfilter/nft_meta.sh142
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh355
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat_zones.sh309
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh143
-rwxr-xr-xtools/testing/selftests/netfilter/nft_synproxy.sh117
-rwxr-xr-xtools/testing/selftests/netfilter/nft_trans_stress.sh91
-rwxr-xr-xtools/testing/selftests/netfilter/nft_zones_many.sh163
-rwxr-xr-xtools/testing/selftests/netfilter/rpath.sh169
-rw-r--r--tools/testing/selftests/netfilter/sctp_collision.c99
-rw-r--r--tools/testing/selftests/netfilter/settings1
-rwxr-xr-xtools/testing/selftests/netfilter/xt_string.sh128
30 files changed, 4716 insertions, 221 deletions
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore
new file mode 100644
index 000000000000..c2229b3e40d4
--- /dev/null
+++ b/tools/testing/selftests/netfilter/.gitignore
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+nf-queue
+connect_close
+audit_logread
+conntrack_dump_flush
+sctp_collision
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index a179f0dca8ce..936c3085bb83 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,12 +1,21 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
+TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
- nft_queue.sh
+ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
+ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
+ conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
+ conntrack_sctp_collision.sh xt_string.sh \
+ bridge_netfilter.sh
-LDLIBS = -lmnl
-TEST_GEN_FILES = nf-queue
+HOSTPKG_CONFIG := pkg-config
+
+CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
+LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
+
+TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision \
+ conntrack_dump_flush
include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c
new file mode 100644
index 000000000000..a0a880fc2d9d
--- /dev/null
+++ b/tools/testing/selftests/netfilter/audit_logread.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <linux/audit.h>
+#include <linux/netlink.h>
+
+static int fd;
+
+#define MAX_AUDIT_MESSAGE_LENGTH 8970
+struct audit_message {
+ struct nlmsghdr nlh;
+ union {
+ struct audit_status s;
+ char data[MAX_AUDIT_MESSAGE_LENGTH];
+ } u;
+};
+
+int audit_recv(int fd, struct audit_message *rep)
+{
+ struct sockaddr_nl addr;
+ socklen_t addrlen = sizeof(addr);
+ int ret;
+
+ do {
+ ret = recvfrom(fd, rep, sizeof(*rep), 0,
+ (struct sockaddr *)&addr, &addrlen);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0 ||
+ addrlen != sizeof(addr) ||
+ addr.nl_pid != 0 ||
+ rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */
+ return -1;
+
+ return ret;
+}
+
+int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val)
+{
+ static int seq = 0;
+ struct audit_message msg = {
+ .nlh = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ .nlmsg_seq = ++seq,
+ },
+ .u.s = {
+ .mask = key,
+ .enabled = key == AUDIT_STATUS_ENABLED ? val : 0,
+ .pid = key == AUDIT_STATUS_PID ? val : 0,
+ }
+ };
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ };
+ int ret;
+
+ do {
+ ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0,
+ (struct sockaddr *)&addr, sizeof(addr));
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret != (int)msg.nlh.nlmsg_len)
+ return -1;
+ return 0;
+}
+
+int audit_set(int fd, uint32_t key, uint32_t val)
+{
+ struct audit_message rep = { 0 };
+ int ret;
+
+ ret = audit_send(fd, AUDIT_SET, key, val);
+ if (ret)
+ return ret;
+
+ ret = audit_recv(fd, &rep);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+int readlog(int fd)
+{
+ struct audit_message rep = { 0 };
+ int ret = audit_recv(fd, &rep);
+ const char *sep = "";
+ char *k, *v;
+
+ if (ret < 0)
+ return ret;
+
+ if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG)
+ return 0;
+
+ /* skip the initial "audit(...): " part */
+ strtok(rep.u.data, " ");
+
+ while ((k = strtok(NULL, "="))) {
+ v = strtok(NULL, " ");
+
+ /* these vary and/or are uninteresting, ignore */
+ if (!strcmp(k, "pid") ||
+ !strcmp(k, "comm") ||
+ !strcmp(k, "subj"))
+ continue;
+
+ /* strip the varying sequence number */
+ if (!strcmp(k, "table"))
+ *strchrnul(v, ':') = '\0';
+
+ printf("%s%s=%s", sep, k, v);
+ sep = " ";
+ }
+ if (*sep) {
+ printf("\n");
+ fflush(stdout);
+ }
+ return 0;
+}
+
+void cleanup(int sig)
+{
+ audit_set(fd, AUDIT_STATUS_ENABLED, 0);
+ close(fd);
+ if (sig)
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ struct sigaction act = {
+ .sa_handler = cleanup,
+ };
+
+ fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+ if (fd < 0) {
+ perror("Can't open netlink socket");
+ return -1;
+ }
+
+ if (sigaction(SIGTERM, &act, NULL) < 0 ||
+ sigaction(SIGINT, &act, NULL) < 0) {
+ perror("Can't set signal handler");
+ close(fd);
+ return -1;
+ }
+
+ audit_set(fd, AUDIT_STATUS_ENABLED, 1);
+ audit_set(fd, AUDIT_STATUS_PID, getpid());
+
+ while (1)
+ readlog(fd);
+}
diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh
new file mode 100644
index 000000000000..659b3ab02c8b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/bridge_netfilter.sh
@@ -0,0 +1,188 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bridge netfilter + conntrack, a combination that doesn't really work,
+# with multicast/broadcast packets racing for hash table insertion.
+
+# eth0 br0 eth0
+# setup is: ns1 <->,ns0 <-> ns3
+# ns2 <-' `'-> ns4
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+ns3="ns3-$sfx"
+ns4="ns4-$sfx"
+
+ebtables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ebtables"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+for i in $(seq 0 4); do
+ eval ip netns add \$ns$i
+done
+
+cleanup() {
+ for i in $(seq 0 4); do eval ip netns del \$ns$i;done
+}
+
+trap cleanup EXIT
+
+do_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ ip netns exec $fromns ping -c 1 -q $dstip > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "ERROR: ping from $fromns to $dstip"
+ ip netns exec ${ns0} nft list ruleset
+ ret=1
+ fi
+}
+
+bcast_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ for i in $(seq 1 1000); do
+ ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "ERROR: ping -b from $fromns to $dstip"
+ ip netns exec ${ns0} nft list ruleset
+ fi
+ done
+}
+
+ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1}
+if [ $? -ne 0 ]; then
+ echo "SKIP: Can't create veth device"
+ exit $ksft_skip
+fi
+
+ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2
+ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3
+ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4
+
+ip -net ${ns0} link set lo up
+
+for i in $(seq 1 4); do
+ ip -net ${ns0} link set veth$i up
+done
+
+ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1
+if [ $? -ne 0 ]; then
+ echo "SKIP: Can't create bridge br0"
+ exit $ksft_skip
+fi
+
+# make veth0,1,2 part of bridge.
+for i in $(seq 1 3); do
+ ip -net ${ns0} link set veth$i master br0
+done
+
+# add a macvlan on top of the bridge.
+MACVLAN_ADDR=ba:f3:13:37:42:23
+ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private
+ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR}
+ip -net ${ns0} link set macvlan0 up
+ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0
+
+# add a macvlan on top of veth4.
+MACVLAN_ADDR=ba:f3:13:37:42:24
+ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa
+ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR}
+ip -net ${ns0} link set macvlan4 up
+
+# make the macvlan part of the bridge.
+# veth4 is not a bridge port, only the macvlan on top of it.
+ip -net ${ns0} link set macvlan4 master br0
+
+ip -net ${ns0} link set br0 up
+ip -net ${ns0} addr add 10.0.0.1/24 dev br0
+ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1
+ret=$?
+if [ $ret -ne 0 ] ; then
+ echo "SKIP: bridge netfilter not available"
+ ret=$ksft_skip
+fi
+
+# for testing, so namespaces will reply to ping -b probes.
+ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
+
+# enable conntrack in ns0 and drop broadcast packets in forward to
+# avoid them from getting confirmed in the postrouting hook before
+# the cloned skb is passed up the stack.
+ip netns exec ${ns0} nft -f - <<EOF
+table ip filter {
+ chain input {
+ type filter hook input priority 1; policy accept
+ iifname br0 counter
+ ct state new accept
+ }
+}
+
+table bridge filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept
+ meta pkttype broadcast ip protocol icmp counter drop
+ }
+}
+EOF
+
+# place 1, 2 & 3 in same subnet, connected via ns0:br0.
+# ns4 is placed in same subnet as well, but its not
+# part of the bridge: the corresponding veth4 is not
+# part of the bridge, only its macvlan interface.
+for i in $(seq 1 4); do
+ eval ip -net \$ns$i link set lo up
+ eval ip -net \$ns$i link set eth0 up
+done
+for i in $(seq 1 2); do
+ eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0
+done
+
+ip -net ${ns3} addr add 10.23.0.13/24 dev eth0
+ip -net ${ns4} addr add 10.23.0.14/24 dev eth0
+
+# test basic connectivity
+do_ping ${ns1} 10.0.0.12
+do_ping ${ns3} 10.23.0.1
+do_ping ${ns4} 10.23.0.1
+
+if [ $ret -eq 0 ];then
+ echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0"
+fi
+
+bcast_ping ${ns1} 10.0.0.255
+
+# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
+bcast_ping ${ns3} 10.23.0.255
+
+# same, this time via veth4:macvlan4.
+bcast_ping ${ns4} 10.23.0.255
+
+read t < /proc/sys/kernel/tainted
+
+if [ $t -eq 0 ];then
+ echo PASS: kernel not tainted
+else
+ echo ERROR: kernel is tainted
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
index 4faf2ce021d9..7c42b1b2c69b 100644
--- a/tools/testing/selftests/netfilter/config
+++ b/tools/testing/selftests/netfilter/config
@@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m
CONFIG_NFT_MASQ=m
CONFIG_NFT_FLOW_OFFLOAD=m
CONFIG_NF_CT_NETLINK=m
+CONFIG_AUDIT=y
diff --git a/tools/testing/selftests/netfilter/connect_close.c b/tools/testing/selftests/netfilter/connect_close.c
new file mode 100644
index 000000000000..1c3b0add54c4
--- /dev/null
+++ b/tools/testing/selftests/netfilter/connect_close.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#define PORT 12345
+#define RUNTIME 10
+
+static struct {
+ unsigned int timeout;
+ unsigned int port;
+} opts = {
+ .timeout = RUNTIME,
+ .port = PORT,
+};
+
+static void handler(int sig)
+{
+ _exit(sig == SIGALRM ? 0 : 1);
+}
+
+static void set_timeout(void)
+{
+ struct sigaction action = {
+ .sa_handler = handler,
+ };
+
+ sigaction(SIGALRM, &action, NULL);
+
+ alarm(opts.timeout);
+}
+
+static void do_connect(const struct sockaddr_in *dst)
+{
+ int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+ if (s >= 0)
+ fcntl(s, F_SETFL, O_NONBLOCK);
+
+ connect(s, (struct sockaddr *)dst, sizeof(*dst));
+ close(s);
+}
+
+static void do_accept(const struct sockaddr_in *src)
+{
+ int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+ if (s < 0)
+ return;
+
+ setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+
+ bind(s, (struct sockaddr *)src, sizeof(*src));
+
+ listen(s, 16);
+
+ c = accept(s, NULL, NULL);
+ if (c >= 0)
+ close(c);
+
+ close(s);
+}
+
+static int accept_loop(void)
+{
+ struct sockaddr_in src = {
+ .sin_family = AF_INET,
+ .sin_port = htons(opts.port),
+ };
+
+ inet_pton(AF_INET, "127.0.0.1", &src.sin_addr);
+
+ set_timeout();
+
+ for (;;)
+ do_accept(&src);
+
+ return 1;
+}
+
+static int connect_loop(void)
+{
+ struct sockaddr_in dst = {
+ .sin_family = AF_INET,
+ .sin_port = htons(opts.port),
+ };
+
+ inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr);
+
+ set_timeout();
+
+ for (;;)
+ do_connect(&dst);
+
+ return 1;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "t:p:")) != -1) {
+ switch (c) {
+ case 't':
+ opts.timeout = atoi(optarg);
+ break;
+ case 'p':
+ opts.port = atoi(optarg);
+ break;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ pid_t p;
+
+ parse_opts(argc, argv);
+
+ p = fork();
+ if (p < 0)
+ return 111;
+
+ if (p > 0)
+ return accept_loop();
+
+ return connect_loop();
+}
diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
new file mode 100644
index 000000000000..b11ea8ee6719
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <time.h>
+#include <libmnl/libmnl.h>
+#include <netinet/ip.h>
+
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include "../kselftest_harness.h"
+
+#define TEST_ZONE_ID 123
+#define NF_CT_DEFAULT_ZONE_ID 0
+
+static int reply_counter;
+
+static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type,
+ uint32_t src_ip, uint32_t dst_ip,
+ uint16_t src_port, uint16_t dst_port)
+{
+ struct nlattr *nest, *nest_ip, *nest_proto;
+
+ nest = mnl_attr_nest_start(nlh, type);
+ if (!nest)
+ return -1;
+
+ nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+ if (!nest_ip)
+ return -1;
+ mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip);
+ mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip);
+ mnl_attr_nest_end(nlh, nest_ip);
+
+ nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+ if (!nest_proto)
+ return -1;
+ mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+ mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+ mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+ mnl_attr_nest_end(nlh, nest_proto);
+
+ mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type,
+ struct in6_addr src_ip, struct in6_addr dst_ip,
+ uint16_t src_port, uint16_t dst_port)
+{
+ struct nlattr *nest, *nest_ip, *nest_proto;
+
+ nest = mnl_attr_nest_start(nlh, type);
+ if (!nest)
+ return -1;
+
+ nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+ if (!nest_ip)
+ return -1;
+ mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip);
+ mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip);
+ mnl_attr_nest_end(nlh, nest_ip);
+
+ nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+ if (!nest_proto)
+ return -1;
+ mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+ mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+ mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+ mnl_attr_nest_end(nlh, nest_proto);
+
+ mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_proto(struct nlmsghdr *nlh)
+{
+ struct nlattr *nest, *nest_proto;
+
+ nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO);
+ if (!nest)
+ return -1;
+
+ nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP);
+ if (!nest_proto)
+ return -1;
+ mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED);
+ mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a);
+ mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a);
+ mnl_attr_nest_end(nlh, nest_proto);
+
+ mnl_attr_nest_end(nlh, nest);
+}
+
+static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh,
+ uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *rplnlh;
+ unsigned int portid;
+ int err, ret;
+
+ portid = mnl_socket_get_portid(sock);
+
+ ret = build_cta_proto(nlh);
+ if (ret < 0) {
+ perror("build_cta_proto");
+ return -1;
+ }
+ mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000));
+ mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+ if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ return -1;
+ }
+
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ if (ret < 0) {
+ perror("mnl_socket_recvfrom");
+ return ret;
+ }
+
+ ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+ if (ret < 0) {
+ if (errno == EEXIST) {
+ /* The entries are probably still there from a previous
+ * run. So we are good
+ */
+ return 0;
+ }
+ perror("mnl_cb_run");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip,
+ uint32_t dst_ip, uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfh;
+ int ret;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+ NLM_F_ACK | NLM_F_EXCL;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_INET;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443);
+ if (ret < 0) {
+ perror("build_cta_tuple_v4");
+ return ret;
+ }
+ ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345);
+ if (ret < 0) {
+ perror("build_cta_tuple_v4");
+ return ret;
+ }
+ return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int conntrack_data_generate_v6(struct mnl_socket *sock,
+ struct in6_addr src_ip,
+ struct in6_addr dst_ip,
+ uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfh;
+ int ret;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+ NLM_F_ACK | NLM_F_EXCL;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_INET6;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip,
+ 12345, 443);
+ if (ret < 0) {
+ perror("build_cta_tuple_v6");
+ return ret;
+ }
+ ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip,
+ 12345, 443);
+ if (ret < 0) {
+ perror("build_cta_tuple_v6");
+ return ret;
+ }
+ return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int count_entries(const struct nlmsghdr *nlh, void *data)
+{
+ reply_counter++;
+}
+
+static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh, *rplnlh;
+ struct nfgenmsg *nfh;
+ struct nlattr *nest;
+ unsigned int portid;
+ int err, ret;
+
+ portid = mnl_socket_get_portid(sock);
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_UNSPEC;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+ ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+ if (ret < 0) {
+ perror("mnl_socket_sendto");
+ return ret;
+ }
+
+ reply_counter = 0;
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ while (ret > 0) {
+ ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid,
+ count_entries, NULL);
+ if (ret <= MNL_CB_STOP)
+ break;
+
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ }
+ if (ret < 0) {
+ perror("mnl_socket_recvfrom");
+ return ret;
+ }
+
+ return reply_counter;
+}
+
+static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh, *rplnlh;
+ struct nfgenmsg *nfh;
+ struct nlattr *nest;
+ unsigned int portid;
+ int err, ret;
+
+ portid = mnl_socket_get_portid(sock);
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_UNSPEC;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+ ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+ if (ret < 0) {
+ perror("mnl_socket_sendto");
+ return ret;
+ }
+
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ if (ret < 0) {
+ perror("mnl_socket_recvfrom");
+ return ret;
+ }
+
+ ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+ if (ret < 0) {
+ perror("mnl_cb_run");
+ return ret;
+ }
+
+ return 0;
+}
+
+FIXTURE(conntrack_dump_flush)
+{
+ struct mnl_socket *sock;
+};
+
+FIXTURE_SETUP(conntrack_dump_flush)
+{
+ struct in6_addr src, dst;
+ int ret;
+
+ self->sock = mnl_socket_open(NETLINK_NETFILTER);
+ if (!self->sock) {
+ perror("mnl_socket_open");
+ exit(EXIT_FAILURE);
+ }
+
+ if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) {
+ perror("mnl_socket_bind");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ if (ret < 0 && errno == EPERM)
+ SKIP(return, "Needs to be run as root");
+ else if (ret < 0 && errno == EOPNOTSUPP)
+ SKIP(return, "Kernel does not seem to support conntrack zones");
+
+ ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1,
+ TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3,
+ TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 0);
+ ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5,
+ TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 0);
+ ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7,
+ NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x01000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x02000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x03000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x04000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 0);
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x05000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x06000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 0);
+
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x07000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x08000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_GE(ret, 2);
+ if (ret > 2)
+ SKIP(return, "kernel does not support filtering by zone");
+}
+
+FIXTURE_TEARDOWN(conntrack_dump_flush)
+{
+}
+
+TEST_F(conntrack_dump_flush, test_dump_by_zone)
+{
+ int ret;
+
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone)
+{
+ int ret;
+
+ ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone_default)
+{
+ int ret;
+
+ ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
index b48e1833bc89..76645aaf2b58 100755
--- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -35,6 +35,8 @@ cleanup() {
for i in 1 2;do ip netns del nsrouter$i;done
}
+trap cleanup EXIT
+
ipv4() {
echo -n 192.168.$1.2
}
@@ -146,11 +148,17 @@ ip netns exec nsclient1 nft -f - <<EOF
table inet filter {
counter unknown { }
counter related { }
+ counter redir4 { }
+ counter redir6 { }
chain input {
type filter hook input priority 0; policy accept;
- meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+ icmp type "redirect" ct state "related" counter name "redir4" accept
+ icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept
+
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+
counter name "unknown" drop
}
}
@@ -279,5 +287,29 @@ else
echo "ERROR: icmp error RELATED state test has failed"
fi
-cleanup
+# add 'bad' route, expect icmp REDIRECT to be generated
+ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1
+ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1
+
+ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null
+
+expect="packets 1 bytes 112"
+check_counter nsclient1 "redir4" "$expect"
+if [ $? -ne 0 ];then
+ ret=1
+fi
+
+ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null
+expect="packets 1 bytes 192"
+check_counter nsclient1 "redir6" "$expect"
+if [ $? -ne 0 ];then
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: icmp redirects had RELATED state"
+else
+ echo "ERROR: icmp redirect RELATED state test has failed"
+fi
+
exit $ret
diff --git a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
new file mode 100755
index 000000000000..a924e595cfd8
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For SCTP COLLISION SCENARIO as Below:
+#
+# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
+# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
+# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
+# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
+# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
+# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
+#
+# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP="198.51.200.1"
+CLIENT_PORT=1234
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP="198.51.100.1"
+SERVER_PORT=1234
+
+ROUTER_NS=$(mktemp -u router-XXXXXXXX)
+CLIENT_GW="198.51.200.2"
+SERVER_GW="198.51.100.2"
+
+# setup the topo
+setup() {
+ ip net add $CLIENT_NS
+ ip net add $SERVER_NS
+ ip net add $ROUTER_NS
+ ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS
+ ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS
+
+ ip -n $SERVER_NS link set link0 up
+ ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0
+ ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW
+
+ ip -n $ROUTER_NS link set link1 up
+ ip -n $ROUTER_NS link set link2 up
+ ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1
+ ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2
+ ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1
+
+ ip -n $CLIENT_NS link set link3 up
+ ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3
+ ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW
+
+ # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
+ # tc on $SERVER_NS side
+ tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb
+ tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit
+ tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
+ 0xff match u8 2 0xff at 32 flowid 1:1
+ tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms
+
+ # simulate the ctstate check on OVS nf_conntrack
+ ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
+ ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP
+
+ # use a smaller number for assoc's max_retrans to reproduce the issue
+ modprobe sctp
+ ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3
+}
+
+cleanup() {
+ ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null
+ ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null
+ ip net del "$CLIENT_NS"
+ ip net del "$SERVER_NS"
+ ip net del "$ROUTER_NS"
+}
+
+do_test() {
+ ip net exec $SERVER_NS ./sctp_collision server \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
+ ip net exec $CLIENT_NS ./sctp_collision client \
+ $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
+}
+
+# NOTE: one way to work around the issue is set a smaller hb_interval
+# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500
+
+# run the test case
+trap cleanup EXIT
+setup && \
+echo "Test for SCTP Collision in nf_conntrack:" && \
+do_test && echo "PASS!"
+exit $?
diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
new file mode 100755
index 000000000000..e7d7bf13cff5
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that UNREPLIED tcp conntrack will eventually timeout.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+waittime=20
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+ ip netns pids $ns2 | xargs kill 2>/dev/null
+
+ ip netns del $ns1
+ ip netns del $ns2
+}
+
+ipv4() {
+ echo -n 192.168.$1.2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns2 nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+# Create test namespaces
+ip netns add $ns1 || exit 1
+
+trap cleanup EXIT
+
+ip netns add $ns2 || exit 1
+
+# Connect the namespace to the host using a veth pair
+ip -net $ns1 link add name veth1 type veth peer name veth2
+ip -net $ns1 link set netns $ns2 dev veth2
+
+ip -net $ns1 link set up dev lo
+ip -net $ns2 link set up dev lo
+ip -net $ns1 link set up dev veth1
+ip -net $ns2 link set up dev veth2
+
+ip -net $ns2 addr add 10.11.11.2/24 dev veth2
+ip -net $ns2 route add default via 10.11.11.1
+
+ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1
+
+# add a rule inside NS so we enable conntrack
+ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT
+
+ip -net $ns1 addr add 10.11.11.1/24 dev veth1
+ip -net $ns1 route add 10.99.99.99 via 10.11.11.2
+
+# Check connectivity works
+ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1
+
+ip netns exec $ns2 nc -l -p 8080 < /dev/null &
+
+# however, conntrack entries are there
+
+ip netns exec $ns2 nft -f - <<EOF
+table inet filter {
+ counter connreq { }
+ counter redir { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept
+ ct state new ct status dnat tcp dport 8080 counter name "redir" accept
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nft rules"
+ exit 1
+fi
+
+ip netns exec $ns2 sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10
+
+echo "INFO: connect $ns1 -> $ns2 to the virtual ip"
+ip netns exec $ns1 bash -c 'while true ; do
+ nc -p 60000 10.99.99.99 80
+ sleep 1
+ done' &
+
+sleep 1
+
+ip netns exec $ns2 nft -f - <<EOF
+table inet nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ ip daddr 10.99.99.99 tcp dport 80 redirect to :8080
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nat redirect"
+ exit 1
+fi
+
+count=$(ip netns exec $ns2 conntrack -L -p tcp --dport 80 2>/dev/null | wc -l)
+if [ $count -eq 0 ]; then
+ echo "ERROR: $ns2 did not pick up tcp connection from peer"
+ exit 1
+fi
+
+echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect"
+for i in $(seq 1 $waittime); do
+ echo -n "."
+
+ sleep 1
+
+ count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l)
+ if [ $count -gt 0 ]; then
+ echo
+ echo "PASS: redirection took effect after $i seconds"
+ break
+ fi
+
+ m=$((i%20))
+ if [ $m -eq 0 ]; then
+ echo " waited for $i seconds"
+ fi
+done
+
+expect="packets 1 bytes 60"
+check_counter "$ns2" "redir" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: redirection counter has expected values"
+else
+ echo "ERROR: no tcp connection was redirected"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh
new file mode 100755
index 000000000000..8b5ea9234588
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh
@@ -0,0 +1,241 @@
+#!/bin/sh
+
+# This script demonstrates interaction of conntrack and vrf.
+# The vrf driver calls the netfilter hooks again, with oif/iif
+# pointing at the VRF device.
+#
+# For ingress, this means first iteration has iifname of lower/real
+# device. In this script, thats veth0.
+# Second iteration is iifname set to vrf device, tvrf in this script.
+#
+# For egress, this is reversed: first iteration has the vrf device,
+# second iteration is done with the lower/real/veth0 device.
+#
+# test_ct_zone_in demonstrates unexpected change of nftables
+# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
+# connection on VRF rcv"
+#
+# It was possible to assign conntrack zone to a packet (or mark it for
+# `notracking`) in the prerouting chain before conntrack, based on real iif.
+#
+# After the change, the zone assignment is lost and the zone is assigned based
+# on the VRF master interface (in case such a rule exists).
+# assignment is lost. Instead, assignment based on the `iif` matching
+# Thus it is impossible to distinguish packets based on the original
+# interface.
+#
+# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
+# that was supposed to be fixed by the commit mentioned above to make sure
+# that any fix to test case 1 won't break masquerade again.
+
+ksft_skip=4
+
+IP0=172.30.30.1
+IP1=172.30.30.2
+PFXL=30
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+
+cleanup()
+{
+ ip netns pids $ns0 | xargs kill 2>/dev/null
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+
+ ip netns del $ns0 $ns1
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add "$ns0"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns0"
+ exit $ksft_skip
+fi
+ip netns add "$ns1"
+
+trap cleanup EXIT
+
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not add veth device"
+ exit $ksft_skip
+fi
+
+ip -net $ns0 li add tvrf type vrf table 9876
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not add vrf device"
+ exit $ksft_skip
+fi
+
+ip -net $ns0 li set lo up
+
+ip -net $ns0 li set veth0 master tvrf
+ip -net $ns0 li set tvrf up
+ip -net $ns0 li set veth0 up
+ip -net $ns1 li set veth0 up
+
+ip -net $ns0 addr add $IP0/$PFXL dev veth0
+ip -net $ns1 addr add $IP1/$PFXL dev veth0
+
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not start iperf3"
+ exit $ksft_skip
+fi
+
+# test vrf ingress handling.
+# The incoming connection should be placed in conntrack zone 1,
+# as decided by the first iteration of the ruleset.
+test_ct_zone_in()
+{
+ip netns exec $ns0 nft -f - <<EOF
+table testct {
+ chain rawpre {
+ type filter hook prerouting priority raw;
+
+ iif { veth0, tvrf } counter meta nftrace set 1
+ iif veth0 counter ct zone set 1 counter return
+ iif tvrf counter ct zone set 2 counter return
+ ip protocol icmp counter
+ notrack counter
+ }
+
+ chain rawout {
+ type filter hook output priority raw;
+
+ oif veth0 counter ct zone set 1 counter return
+ oif tvrf counter ct zone set 2 counter return
+ notrack counter
+ }
+}
+EOF
+ ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
+
+ # should be in zone 1, not zone 2
+ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+ if [ $count -eq 1 ]; then
+ echo "PASS: entry found in conntrack zone 1"
+ else
+ echo "FAIL: entry not found in conntrack zone 1"
+ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+ if [ $count -eq 1 ]; then
+ echo "FAIL: entry found in zone 2 instead"
+ else
+ echo "FAIL: entry not in zone 1 or 2, dumping table"
+ ip netns exec $ns0 conntrack -L
+ ip netns exec $ns0 nft list ruleset
+ fi
+ fi
+}
+
+# add masq rule that gets evaluated w. outif set to vrf device.
+# This tests the first iteration of the packet through conntrack,
+# oifname is the vrf device.
+test_masquerade_vrf()
+{
+ local qdisc=$1
+
+ if [ "$qdisc" != "default" ]; then
+ tc -net $ns0 qdisc add dev tvrf root $qdisc
+ fi
+
+ ip netns exec $ns0 conntrack -F 2>/dev/null
+
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+ chain rawout {
+ type filter hook output priority raw;
+
+ oif tvrf ct state untracked counter
+ }
+ chain postrouting2 {
+ type filter hook postrouting priority mangle;
+
+ oif tvrf ct state untracked counter
+ }
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ # NB: masquerade should always be combined with 'oif(name) bla',
+ # lack of this is intentional here, we want to exercise double-snat.
+ ip saddr 172.30.30.0/30 counter masquerade random
+ }
+}
+EOF
+ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
+ ret=1
+ return
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
+ ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
+ if [ $? -eq 0 ]; then
+ echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
+ else
+ echo "FAIL: vrf rules have unexpected counter value"
+ ret=1
+ fi
+
+ if [ "$qdisc" != "default" ]; then
+ tc -net $ns0 qdisc del dev tvrf root
+ fi
+}
+
+# add masq rule that gets evaluated w. outif set to veth device.
+# This tests the 2nd iteration of the packet through conntrack,
+# oifname is the lower device (veth0 in this case).
+test_masquerade_veth()
+{
+ ip netns exec $ns0 conntrack -F 2>/dev/null
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
+ }
+}
+EOF
+ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
+ ret=1
+ return
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+ if [ $? -eq 0 ]; then
+ echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
+ else
+ echo "FAIL: vrf masq rule has unexpected counter value"
+ ret=1
+ fi
+}
+
+test_ct_zone_in
+test_masquerade_vrf "default"
+test_masquerade_vrf "pfifo"
+test_masquerade_veth
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
new file mode 100755
index 000000000000..eb9553e4986b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
@@ -0,0 +1,207 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Conntrack needs to reassemble fragments in order to have complete
+# packets for rule matching. Reassembly can lead to packet loss.
+
+# Consider the following setup:
+# +--------+ +---------+ +--------+
+# |Router A|-------|Wanrouter|-------|Router B|
+# | |.IPIP..| |..IPIP.| |
+# +--------+ +---------+ +--------+
+# / mtu 1400 \
+# / \
+#+--------+ +--------+
+#|Client A| |Client B|
+#| | | |
+#+--------+ +--------+
+
+# Router A and Router B use IPIP tunnel interfaces to tunnel traffic
+# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
+# on its interfaces.
+
+rnd=$(mktemp -u XXXXXXXX)
+rx=$(mktemp)
+
+r_a="ns-ra-$rnd"
+r_b="ns-rb-$rnd"
+r_w="ns-rw-$rnd"
+c_a="ns-ca-$rnd"
+c_b="ns-cb-$rnd"
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
+
+checktool "iptables --version" "run test without iptables"
+checktool "ip -Version" "run test without ip tool"
+checktool "which socat" "run test without socat"
+checktool "ip netns add ${r_a}" "create net namespace"
+
+for n in ${r_b} ${r_w} ${c_a} ${c_b};do
+ ip netns add ${n}
+done
+
+cleanup() {
+ for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do
+ ip netns del ${n}
+ done
+ rm -f ${rx}
+}
+
+trap cleanup EXIT
+
+test_path() {
+ msg="$1"
+
+ ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null &
+
+ sleep 1
+ for i in 1 2 3; do
+ head -c1400 /dev/zero | tr "\000" "a" | \
+ ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000
+ done
+
+ wait
+
+ bytes=$(wc -c < ${rx})
+
+ if [ $bytes -eq 1400 ];then
+ echo "OK: PMTU $msg connection tracking"
+ else
+ echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
+ exit 1
+ fi
+}
+
+# Detailed setup for Router A
+# ---------------------------
+# Interfaces:
+# eth0: 10.2.2.1/24
+# eth1: 192.168.10.1/24
+# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1
+# Routes:
+# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B)
+# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w}
+ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a}
+
+l_addr="10.2.2.1"
+r_addr="10.4.4.1"
+ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+ ip -net ${r_a} link set $dev up
+done
+
+ip -net ${r_a} addr add 10.2.2.1/24 dev veth0
+ip -net ${r_a} addr add 192.168.10.1/24 dev veth1
+
+ip -net ${r_a} route add 192.168.20.0/24 dev ipip0
+ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254
+
+ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Detailed setup for Router B
+# ---------------------------
+# Interfaces:
+# eth0: 10.4.4.1/24
+# eth1: 192.168.20.1/24
+# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1
+# Routes:
+# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A)
+# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w}
+ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b}
+
+l_addr="10.4.4.1"
+r_addr="10.2.2.1"
+
+ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+ ip -net ${r_b} link set $dev up
+done
+
+ip -net ${r_b} addr add 10.4.4.1/24 dev veth0
+ip -net ${r_b} addr add 192.168.20.1/24 dev veth1
+
+ip -net ${r_b} route add 192.168.10.0/24 dev ipip0
+ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254
+ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Client A
+ip -net ${c_a} addr add 192.168.10.2/24 dev veth0
+ip -net ${c_a} link set dev lo up
+ip -net ${c_a} link set dev veth0 up
+ip -net ${c_a} route add default via 192.168.10.1
+
+# Client A
+ip -net ${c_b} addr add 192.168.20.2/24 dev veth0
+ip -net ${c_b} link set dev veth0 up
+ip -net ${c_b} link set dev lo up
+ip -net ${c_b} route add default via 192.168.20.1
+
+# Wan
+ip -net ${r_w} addr add 10.2.2.254/24 dev veth0
+ip -net ${r_w} addr add 10.4.4.254/24 dev veth1
+
+ip -net ${r_w} link set dev lo up
+ip -net ${r_w} link set dev veth0 up mtu 1400
+ip -net ${r_w} link set dev veth1 up mtu 1400
+
+ip -net ${r_a} link set dev veth0 mtu 1400
+ip -net ${r_b} link set dev veth0 mtu 1400
+
+ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Path MTU discovery
+# ------------------
+# Running tracepath from Client A to Client B shows PMTU discovery is working
+# as expected:
+#
+# clienta:~# tracepath 192.168.20.2
+# 1?: [LOCALHOST] pmtu 1500
+# 1: 192.168.10.1 0.867ms
+# 1: 192.168.10.1 0.302ms
+# 2: 192.168.10.1 0.312ms pmtu 1480
+# 2: no reply
+# 3: 192.168.10.1 0.510ms pmtu 1380
+# 3: 192.168.20.2 2.320ms reached
+# Resume: pmtu 1380 hops 3 back 3
+
+# ip netns exec ${c_a} traceroute --mtu 192.168.20.2
+
+# Router A has learned PMTU (1400) to Router B from Wanrouter.
+# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B
+# from Router A.
+
+#Send large UDP packet
+#---------------------
+#Now we send a 1400 bytes UDP packet from Client A to Client B:
+
+# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000
+test_path "without"
+
+# The IPv4 stack on Client A already knows the PMTU to Client B, so the
+# UDP packet is sent as two fragments (1380 + 20). Router A forwards the
+# fragments between eth1 and ipip0. The fragments fit into the tunnel and
+# reach their destination.
+
+#When sending the large UDP packet again, Router A now reassembles the
+#fragments before routing the packet over ipip0. The resulting IPIP
+#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
+#dropped on Router A before sending.
+
+ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW
+test_path "with"
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c
index 29c73bce38fa..9e56b9d47037 100644
--- a/tools/testing/selftests/netfilter/nf-queue.c
+++ b/tools/testing/selftests/netfilter/nf-queue.c
@@ -17,9 +17,12 @@
struct options {
bool count_packets;
+ bool gso_enabled;
int verbose;
unsigned int queue_num;
unsigned int timeout;
+ uint32_t verdict;
+ uint32_t delay_ms;
};
static unsigned int queue_stats[5];
@@ -27,7 +30,7 @@ static struct options opts;
static void help(const char *p)
{
- printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
+ printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
}
static int parse_attr_cb(const struct nlattr *attr, void *data)
@@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
}
static struct nlmsghdr *
-nfq_build_verdict(char *buf, int id, int queue_num, int verd)
+nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
{
struct nfqnl_msg_verdict_hdr vh = {
.verdict = htonl(verd),
@@ -189,9 +192,6 @@ static void print_stats(void)
unsigned int last, total;
int i;
- if (!opts.count_packets)
- return;
-
total = 0;
last = queue_stats[0];
@@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void)
nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
- flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
+ flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
+ flags |= NFQA_CFG_F_UID_GID;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
@@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void)
return nl;
}
+static void sleep_ms(uint32_t delay)
+{
+ struct timespec ts = { .tv_sec = delay / 1000 };
+
+ delay %= 1000;
+
+ ts.tv_nsec = delay * 1000llu * 1000llu;
+
+ nanosleep(&ts, NULL);
+}
+
static int mainloop(void)
{
unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
@@ -278,7 +290,7 @@ static int mainloop(void)
ret = mnl_socket_recvfrom(nl, buf, buflen);
if (ret == -1) {
- if (errno == ENOBUFS)
+ if (errno == ENOBUFS || errno == EINTR)
continue;
if (errno == EAGAIN) {
@@ -298,7 +310,10 @@ static int mainloop(void)
}
id = ret - MNL_CB_OK;
- nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
+ if (opts.delay_ms)
+ sleep_ms(opts.delay_ms);
+
+ nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
@@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
+ while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
switch (c) {
case 'c':
opts.count_packets = true;
@@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv)
if (opts.queue_num > 0xffff)
opts.queue_num = 0;
break;
+ case 'Q':
+ opts.verdict = atoi(optarg);
+ if (opts.verdict > 0xffff) {
+ fprintf(stderr, "Expected destination queue number\n");
+ exit(1);
+ }
+
+ opts.verdict <<= 16;
+ opts.verdict |= NF_QUEUE;
+ break;
+ case 'd':
+ opts.delay_ms = atoi(optarg);
+ if (opts.delay_ms == 0) {
+ fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
+ exit(1);
+ }
+ break;
case 't':
opts.timeout = atoi(optarg);
break;
+ case 'G':
+ opts.gso_enabled = false;
+ break;
case 'v':
opts.verbose++;
break;
}
}
+
+ if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
+ fprintf(stderr, "Cannot use same destination and source queue\n");
+ exit(1);
+ }
}
int main(int argc, char *argv[])
{
int ret;
+ opts.verdict = NF_ACCEPT;
+ opts.gso_enabled = true;
+
parse_opts(argc, argv);
ret = mainloop();
diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
new file mode 100755
index 000000000000..a1aa8f4a5828
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test NAT source port clash resolution
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+socatpid=0
+
+cleanup()
+{
+ [ $socatpid -gt 0 ] && kill $socatpid
+ ip netns del $ns1
+ ip netns del $ns2
+}
+
+socat -h > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without socat"
+ exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without iptables"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add "$ns1"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns1"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+ip netns add $ns2
+
+# Connect the namespaces using a veth pair
+ip link add name veth2 type veth peer name veth1
+ip link set netns $ns1 dev veth1
+ip link set netns $ns2 dev veth2
+
+ip netns exec $ns1 ip link set up dev lo
+ip netns exec $ns1 ip link set up dev veth1
+ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1
+
+ip netns exec $ns2 ip link set up dev lo
+ip netns exec $ns2 ip link set up dev veth2
+ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
+
+# Create a server in one namespace
+ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
+socatpid=$!
+
+# Restrict source port to just one so we don't have to exhaust
+# all others.
+ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000"
+
+# add a virtual IP using DNAT
+ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+
+# ... and route it to the other namespace
+ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1
+
+sleep 1
+
+# add a persistent connection from the other namespace
+ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+
+sleep 1
+
+# ip daddr:dport will be rewritten to 192.168.1.1 5201
+# NAT must reallocate source port 10000 because
+# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
+echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
+ret=$?
+
+# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+if [ $ret -eq 0 ]; then
+ echo "PASS: socat can connect via NAT'd address"
+else
+ echo "FAIL: socat cannot connect via NAT'd address"
+fi
+
+# check sport clashres.
+ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
+ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
+
+sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+cpid1=$!
+sleep 1
+
+# if connect succeeds, client closes instantly due to EOF on stdin.
+# if connect hangs, it will time out after 5s.
+echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+cpid2=$!
+
+time_then=$(date +%s)
+wait $cpid2
+rv=$?
+time_now=$(date +%s)
+
+# Check how much time has elapsed, expectation is for
+# 'cpid2' to connect and then exit (and no connect delay).
+delta=$((time_now - time_then))
+
+if [ $delta -lt 2 -a $rv -eq 0 ]; then
+ echo "PASS: could connect to service via redirected ports"
+else
+ echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh
new file mode 100755
index 000000000000..99ed5bd6e840
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_audit.sh
@@ -0,0 +1,245 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that audit logs generated for nft commands are as expected.
+
+SKIP_RC=4
+RC=0
+
+nft --version >/dev/null 2>&1 || {
+ echo "SKIP: missing nft tool"
+ exit $SKIP_RC
+}
+
+# Run everything in a separate network namespace
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+
+# give other scripts a chance to finish - audit_logread sees all activity
+sleep 1
+
+logfile=$(mktemp)
+rulefile=$(mktemp)
+echo "logging into $logfile"
+./audit_logread >"$logfile" &
+logread_pid=$!
+trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT
+exec 3<"$logfile"
+
+do_test() { # (cmd, log)
+ echo -n "testing for cmd: $1 ... "
+ cat <&3 >/dev/null
+ $1 >/dev/null || exit 1
+ sleep 0.1
+ res=$(diff -a -u <(echo "$2") - <&3)
+ [ $? -eq 0 ] && { echo "OK"; return; }
+ echo "FAIL"
+ grep -v '^\(---\|+++\|@@\)' <<< "$res"
+ ((RC--))
+}
+
+nft flush ruleset
+
+# adding tables, chains and rules
+
+for table in t1 t2; do
+ do_test "nft add table $table" \
+ "table=$table family=2 entries=1 op=nft_register_table"
+
+ do_test "nft add chain $table c1" \
+ "table=$table family=2 entries=1 op=nft_register_chain"
+
+ do_test "nft add chain $table c2; add chain $table c3" \
+ "table=$table family=2 entries=2 op=nft_register_chain"
+
+ cmd="add rule $table c1 counter"
+
+ do_test "nft $cmd" \
+ "table=$table family=2 entries=1 op=nft_register_rule"
+
+ do_test "nft $cmd; $cmd" \
+ "table=$table family=2 entries=2 op=nft_register_rule"
+
+ cmd=""
+ sep=""
+ for chain in c2 c3; do
+ for i in {1..3}; do
+ cmd+="$sep add rule $table $chain counter"
+ sep=";"
+ done
+ done
+ do_test "nft $cmd" \
+ "table=$table family=2 entries=6 op=nft_register_rule"
+done
+
+for ((i = 0; i < 500; i++)); do
+ echo "add rule t2 c3 counter accept comment \"rule $i\""
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=500 op=nft_register_rule'
+
+# adding sets and elements
+
+settype='type inet_service; counter'
+setelem='{ 22, 80, 443 }'
+setblock="{ $settype; elements = $setelem; }"
+do_test "nft add set t1 s $setblock" \
+"table=t1 family=2 entries=4 op=nft_register_set"
+
+do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \
+"table=t1 family=2 entries=5 op=nft_register_set"
+
+do_test "nft add element t1 s3 $setelem" \
+"table=t1 family=2 entries=3 op=nft_register_setelem"
+
+# adding counters
+
+do_test 'nft add counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add counter t2 c1; add counter t2 c2' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+for ((i = 3; i <= 500; i++)); do
+ echo "add counter t2 c$i"
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
+# adding/updating quotas
+
+do_test 'nft add quota t1 q1 { 10 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+for ((i = 3; i <= 500; i++)); do
+ echo "add quota t2 q$i { 10 bytes }"
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
+# changing the quota value triggers obj update path
+do_test 'nft add quota t1 q1 { 20 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+# resetting rules
+
+do_test 'nft reset rules t1 c2' \
+'table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules table t1' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules t2 c3' \
+'table=t2 family=2 entries=189 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=126 op=nft_reset_rule'
+
+do_test 'nft reset rules t2' \
+'table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=186 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=129 op=nft_reset_rule'
+
+do_test 'nft reset rules' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=180 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=135 op=nft_reset_rule'
+
+# resetting sets and elements
+
+elem=(22 ,80 ,443)
+relem=""
+for i in {1..3}; do
+ relem+="${elem[((i - 1))]}"
+ do_test "nft reset element t1 s { $relem }" \
+ "table=t1 family=2 entries=$i op=nft_reset_setelem"
+done
+
+do_test 'nft reset set t1 s' \
+'table=t1 family=2 entries=3 op=nft_reset_setelem'
+
+# resetting counters
+
+do_test 'nft reset counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t2' \
+'table=t2 family=2 entries=342 op=nft_reset_obj
+table=t2 family=2 entries=158 op=nft_reset_obj'
+
+do_test 'nft reset counters' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=341 op=nft_reset_obj
+table=t2 family=2 entries=159 op=nft_reset_obj'
+
+# resetting quotas
+
+do_test 'nft reset quota t1 q1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t2' \
+'table=t2 family=2 entries=315 op=nft_reset_obj
+table=t2 family=2 entries=185 op=nft_reset_obj'
+
+do_test 'nft reset quotas' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=314 op=nft_reset_obj
+table=t2 family=2 entries=186 op=nft_reset_obj'
+
+# deleting rules
+
+readarray -t handles < <(nft -a list chain t1 c1 | \
+ sed -n 's/.*counter.* handle \(.*\)$/\1/p')
+
+do_test "nft delete rule t1 c1 handle ${handles[0]}" \
+'table=t1 family=2 entries=1 op=nft_unregister_rule'
+
+cmd='delete rule t1 c1 handle'
+do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \
+'table=t1 family=2 entries=2 op=nft_unregister_rule'
+
+do_test 'nft flush chain t1 c2' \
+'table=t1 family=2 entries=3 op=nft_unregister_rule'
+
+do_test 'nft flush table t2' \
+'table=t2 family=2 entries=509 op=nft_unregister_rule'
+
+# deleting chains
+
+do_test 'nft delete chain t2 c2' \
+'table=t2 family=2 entries=1 op=nft_unregister_chain'
+
+# deleting sets and elements
+
+do_test 'nft delete element t1 s { 22 }' \
+'table=t1 family=2 entries=1 op=nft_unregister_setelem'
+
+do_test 'nft delete element t1 s { 80, 443 }' \
+'table=t1 family=2 entries=2 op=nft_unregister_setelem'
+
+do_test 'nft flush set t1 s2' \
+'table=t1 family=2 entries=3 op=nft_unregister_setelem'
+
+do_test 'nft delete set t1 s2' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+do_test 'nft delete set t1 s3' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+exit $RC
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
index 5a4938d6dcf2..e908009576c7 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
@@ -23,15 +23,15 @@ TESTS="reported_issues correctness concurrency timeout"
# Set types, defined by TYPE_ variables below
TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
- net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port
- net_port_mac_proto_net"
+ net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp
+ net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add"
+BUGS="flush_remove_add reload"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
- ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+ ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
# Definition of set types:
@@ -91,7 +91,7 @@ src
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp
race_repeat 3
@@ -116,7 +116,7 @@ src
start 10
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp6
race_repeat 3
@@ -141,7 +141,7 @@ src
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp
race_repeat 0
@@ -163,7 +163,7 @@ src mac
start 10
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp6
race_repeat 0
@@ -185,7 +185,7 @@ src mac proto
start 10
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp6
race_repeat 0
@@ -207,7 +207,7 @@ src addr4
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp
race_repeat 3
@@ -227,7 +227,7 @@ src addr6 port
start 10
count 5
src_delta 2000
-tools sendip nc
+tools sendip socat nc
proto udp6
race_repeat 3
@@ -247,7 +247,7 @@ src mac proto addr4
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp
race_repeat 0
@@ -264,7 +264,7 @@ src mac
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp
race_repeat 0
@@ -277,6 +277,23 @@ perf_entries 1000
perf_proto ipv4
"
+TYPE_mac_net="
+display mac,net
+type_spec ether_addr . ipv4_addr
+chain_spec ether saddr . ip saddr
+dst
+src mac addr4
+start 1
+count 5
+src_delta 2000
+tools sendip socat nc bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
TYPE_net_mac_icmp="
display net,mac - ICMP
type_spec ipv4_addr . ether_addr
@@ -320,7 +337,7 @@ src addr4
start 1
count 5
src_delta 2000
-tools sendip nc
+tools sendip socat nc
proto udp
race_repeat 3
@@ -337,6 +354,23 @@ TYPE_flush_remove_add="
display Add two elements, flush, re-add
"
+TYPE_reload="
+display net,mac with reload
+type_spec ipv4_addr . ether_addr
+chain_spec ip daddr . ether saddr
+dst addr4
+src mac
+start 1
+count 1
+src_delta 2000
+tools sendip socat nc bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -507,6 +541,24 @@ setup_send_udp() {
dst_port=
src_addr4=
}
+ elif command -v socat -v >/dev/null; then
+ send_udp() {
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}" dev veth_b
+ __socatbind=",bind=${src_addr4}"
+ if [ -n "${src_port}" ];then
+ __socatbind="${__socatbind}:${src_port}"
+ fi
+ fi
+
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+ [ -z "${dst_port}" ] && dst_port=12345
+
+ echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:${dst_addr4}:${dst_port}"${__socatbind}"
+
+ src_addr4=
+ src_port=
+ }
elif command -v nc >/dev/null; then
if nc -u -w0 1.1.1.1 1 2>/dev/null; then
# OpenBSD netcat
@@ -572,6 +624,29 @@ setup_send_udp6() {
dst_port=
src_addr6=
}
+ elif command -v socat -v >/dev/null; then
+ send_udp6() {
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ __socatbind6=
+
+ if [ -n "${src_addr6}" ]; then
+ if [ -n "${src_addr6} != "${src_addr6_added} ]; then
+ B ip addr add "${src_addr6}" dev veth_b nodad
+
+ src_addr6_added=${src_addr6}
+ fi
+
+ __socatbind6=",bind=[${src_addr6}]"
+
+ if [ -n "${src_port}" ] ;then
+ __socatbind6="${__socatbind6}:${src_port}"
+ fi
+ fi
+
+ echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:[${dst_addr6}]:${dst_port}"${__socatbind6}"
+ }
elif command -v nc >/dev/null && nc -u -w0 1.1.1.1 1 2>/dev/null; then
# GNU netcat might not work with IPv6, try next tool
send_udp6() {
@@ -984,7 +1059,8 @@ format() {
fi
done
for f in ${src}; do
- __expr="${__expr} . "
+ [ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
__start="$(eval format_"${f}" "${srcstart}")"
__end="$(eval format_"${f}" "${srcend}")"
@@ -1455,6 +1531,59 @@ test_bug_flush_remove_add() {
nft flush ruleset
}
+# - add ranged element, check that packets match it
+# - reload the set, check packets still match
+test_bug_reload() {
+ setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+ rstart=${start}
+
+ range_size=1
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+
+ # Avoid negative or zero-sized port ranges
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ fi
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" || return 1
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ # check kernel does allocate pcpu sctrach map
+ # for reload with no elemet add/delete
+ ( echo flush set inet filter test ;
+ nft list set inet filter test ) | nft -f -
+
+ start=${rstart}
+ range_size=1
+
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+
+ # Avoid negative or zero-sized port ranges
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ fi
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ send_match "${j}" $((j + src_delta)) || return 1
+ done
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ nft flush ruleset
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}
@@ -1513,4 +1642,4 @@ for name in ${TESTS}; do
done
done
-[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP}
+[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0
diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
index edf0a48da6bf..faa7778d7bd1 100755
--- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
+++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
@@ -94,28 +94,50 @@ check_for_helper()
local message=$2
local port=$3
- ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp'
+ if echo $message |grep -q 'ipv6';then
+ local family="ipv6"
+ else
+ local family="ipv4"
+ fi
+
+ ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp'
if [ $? -ne 0 ] ; then
- echo "FAIL: ${netns} did not show attached helper $message" 1>&2
- ret=1
+ if [ $autoassign -eq 0 ] ;then
+ echo "FAIL: ${netns} did not show attached helper $message" 1>&2
+ ret=1
+ else
+ echo "PASS: ${netns} did not show attached helper $message" 1>&2
+ fi
+ else
+ if [ $autoassign -eq 0 ] ;then
+ echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
+ else
+ echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2
+ ret=1
+ fi
fi
- echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
return 0
}
test_helper()
{
local port=$1
- local msg=$2
+ local autoassign=$2
+
+ if [ $autoassign -eq 0 ] ;then
+ msg="set via ruleset"
+ else
+ msg="auto-assign"
+ fi
sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null &
- sleep 1
sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null &
+ sleep 1
- check_for_helper "$ns1" "ip $msg" $port
- check_for_helper "$ns2" "ip $msg" $port
+ check_for_helper "$ns1" "ip $msg" $port $autoassign
+ check_for_helper "$ns2" "ip $msg" $port $autoassign
wait
@@ -128,8 +150,8 @@ test_helper()
sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null &
- sleep 1
sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null &
+ sleep 1
check_for_helper "$ns1" "ipv6 $msg" $port
check_for_helper "$ns2" "ipv6 $msg" $port
@@ -167,9 +189,9 @@ if [ $? -ne 0 ];then
fi
fi
-test_helper 2121 "set via ruleset"
-ip netns exec ${ns1} sysctl -q 'net.netfilter.nf_conntrack_helper=1'
-ip netns exec ${ns2} sysctl -q 'net.netfilter.nf_conntrack_helper=1'
-test_helper 21 "auto-assign"
+test_helper 2121 0
+ip netns exec ${ns1} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+ip netns exec ${ns2} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+test_helper 21 1
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
new file mode 100755
index 000000000000..dff476e45e77
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -0,0 +1,273 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+ ip netns del ${ns1}
+ ip netns del ${ns2}
+ ip netns del ${nsrouter}
+
+ [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+dmesg | grep -q ' nft_rpfilter: '
+if [ $? -eq 0 ]; then
+ dmesg -c | grep ' nft_rpfilter: '
+ echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+load_ruleset() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
+load_pbr_ruleset() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain forward {
+ type filter hook forward priority raw;
+ fib saddr . iif oif gt 0 accept
+ log drop
+ }
+}
+EOF
+}
+
+load_ruleset_count() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+ ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+ }
+}
+EOF
+}
+
+check_drops() {
+ dmesg | grep -q ' nft_rpfilter: '
+ if [ $? -eq 0 ]; then
+ dmesg | grep ' nft_rpfilter: '
+ echo "FAIL: rpfilter did drop packets"
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_counter() {
+ local want=$1
+ local ns=$2
+ local address=$3
+
+ line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" )
+ ret=$?
+
+ if [ $ret -ne 0 ];then
+ echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+ ip netns exec ${ns} nft list table inet filter
+ return 1
+ fi
+
+ if [ $want -gt 0 ]; then
+ echo "PASS: fib expression did drop packets for $address"
+ fi
+
+ return 0
+}
+
+load_ruleset ${nsrouter}
+load_ruleset ${ns1}
+load_ruleset ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+test_ping() {
+ local daddr4=$1
+ local daddr6=$2
+
+ ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null
+ ret=$?
+ if [ $ret -ne 0 ];then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+ return 1
+ fi
+
+ ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null
+ ret=$?
+ if [ $ret -ne 0 ];then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
+
+sleep 3
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec ${nsrouter} nft flush table inet filter
+
+ip -net ${ns1} route del default
+ip -net ${ns1} -6 route del default
+
+ip -net ${ns1} addr del 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr del dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr add dead:2::99/64 dev eth0
+
+ip -net ${ns1} route add default via 10.0.2.1
+ip -net ${ns1} -6 route add default via dead:2::1
+
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth0
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count ${nsrouter}
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1
+check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1
+
+ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1
+
+sleep 2
+ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+
+# delete all rules
+ip netns exec ${ns1} nft flush ruleset
+ip netns exec ${ns2} nft flush ruleset
+ip netns exec ${nsrouter} nft flush ruleset
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr del dead:2::99/64 dev eth0
+
+ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+load_pbr_ruleset ${nsrouter}
+if [ $? -ne 0 ] ; then
+ echo "SKIP: Could not load fib forward ruleset"
+ exit $ksft_skip
+fi
+
+ip -net ${nsrouter} rule add from all table 128
+ip -net ${nsrouter} rule add from all iif veth0 table 129
+ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
+ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net ${nsrouter} -4 rule delete table main
+
+test_ping 10.0.2.99 dead:2::99
+if [ $? -ne 0 ] ; then
+ ip -net ${nsrouter} nft list ruleset
+ echo "FAIL: fib mismatch in pbr setup"
+ exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
+exit 0
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index d3e0809ab368..a32f490f7539 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -2,62 +2,57 @@
# SPDX-License-Identifier: GPL-2.0
#
# This tests basic flowtable functionality.
-# Creates following topology:
+# Creates following default topology:
#
# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
# Router1 is the one doing flow offloading, Router2 has no special
# purpose other than having a link that is smaller than either Originator
# and responder, i.e. TCPMSS announced values are too large and will still
# result in fragmentation and/or PMTU discovery.
+#
+# You can check with different Orgininator/Link/Responder MTU eg:
+# nft_flowtable.sh -o8000 -l1500 -r2000
+#
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsr1="nsr1-$sfx"
+nsr2="nsr2-$sfx"
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
ret=0
-ns1in=""
-ns2in=""
+nsin=""
ns1out=""
ns2out=""
log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-which nc > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nc (netcat)"
- exit $ksft_skip
-fi
-
-ip netns add nsr1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace"
- exit $ksft_skip
-fi
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
-ip netns add ns1
-ip netns add ns2
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "which nc" "run test without nc (netcat)"
+checktool "ip netns add $nsr1" "create net namespace $nsr1"
-ip netns add nsr2
+ip netns add $ns1
+ip netns add $ns2
+ip netns add $nsr2
cleanup() {
- for i in 1 2; do
- ip netns del ns$i
- ip netns del nsr$i
- done
+ ip netns del $ns1
+ ip netns del $ns2
+ ip netns del $nsr1
+ ip netns del $nsr2
- rm -f "$ns1in" "$ns1out"
- rm -f "$ns2in" "$ns2out"
+ rm -f "$nsin" "$ns1out" "$ns2out"
[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
}
@@ -66,22 +61,21 @@ trap cleanup EXIT
sysctl -q net.netfilter.nf_log_all_netns=1
-ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
-ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
+ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1
+ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2
-ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
+ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2
for dev in lo veth0 veth1; do
- for i in 1 2; do
- ip -net nsr$i link set $dev up
- done
+ ip -net $nsr1 link set $dev up
+ ip -net $nsr2 link set $dev up
done
-ip -net nsr1 addr add 10.0.1.1/24 dev veth0
-ip -net nsr1 addr add dead:1::1/64 dev veth0
+ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net $nsr1 addr add dead:1::1/64 dev veth0
-ip -net nsr2 addr add 10.0.2.1/24 dev veth1
-ip -net nsr2 addr add dead:2::1/64 dev veth1
+ip -net $nsr2 addr add 10.0.2.1/24 dev veth1
+ip -net $nsr2 addr add dead:2::1/64 dev veth1
# set different MTUs so we need to push packets coming from ns1 (large MTU)
# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
@@ -89,76 +83,100 @@ ip -net nsr2 addr add dead:2::1/64 dev veth1
# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
# is NOT the lowest link mtu.
-ip -net nsr1 link set veth0 mtu 9000
-ip -net ns1 link set eth0 mtu 9000
+omtu=9000
+lmtu=1500
+rmtu=2000
+
+usage(){
+ echo "nft_flowtable.sh [OPTIONS]"
+ echo
+ echo "MTU options"
+ echo " -o originator"
+ echo " -l link"
+ echo " -r responder"
+ exit 1
+}
+
+while getopts "o:l:r:" o
+do
+ case $o in
+ o) omtu=$OPTARG;;
+ l) lmtu=$OPTARG;;
+ r) rmtu=$OPTARG;;
+ *) usage;;
+ esac
+done
+
+if ! ip -net $nsr1 link set veth0 mtu $omtu; then
+ exit 1
+fi
+
+ip -net $ns1 link set eth0 mtu $omtu
+
+if ! ip -net $nsr2 link set veth1 mtu $rmtu; then
+ exit 1
+fi
-ip -net nsr2 link set veth1 mtu 2000
-ip -net ns2 link set eth0 mtu 2000
+ip -net $ns2 link set eth0 mtu $rmtu
# transfer-net between nsr1 and nsr2.
# these addresses are not used for connections.
-ip -net nsr1 addr add 192.168.10.1/24 dev veth1
-ip -net nsr1 addr add fee1:2::1/64 dev veth1
+ip -net $nsr1 addr add 192.168.10.1/24 dev veth1
+ip -net $nsr1 addr add fee1:2::1/64 dev veth1
-ip -net nsr2 addr add 192.168.10.2/24 dev veth0
-ip -net nsr2 addr add fee1:2::2/64 dev veth0
+ip -net $nsr2 addr add 192.168.10.2/24 dev veth0
+ip -net $nsr2 addr add fee1:2::2/64 dev veth0
-for i in 1 2; do
- ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
- ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+for i in 0 1; do
+ ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+ ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+done
- ip -net ns$i link set lo up
- ip -net ns$i link set eth0 up
- ip -net ns$i addr add 10.0.$i.99/24 dev eth0
- ip -net ns$i route add default via 10.0.$i.1
- ip -net ns$i addr add dead:$i::99/64 dev eth0
- ip -net ns$i route add default via dead:$i::1
- ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
+for ns in $ns1 $ns2;do
+ ip -net $ns link set lo up
+ ip -net $ns link set eth0 up
+ if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+ echo "ERROR: Check Originator/Responder values (problem during address addition)"
+ exit 1
+ fi
# don't set ip DF bit for first two tests
- ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
+ ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
done
-ip -net nsr1 route add default via 192.168.10.2
-ip -net nsr2 route add default via 192.168.10.1
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns2 addr add 10.0.2.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns2 route add default via 10.0.2.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0
+ip -net $ns2 addr add dead:2::99/64 dev eth0
+ip -net $ns1 route add default via dead:1::1
+ip -net $ns2 route add default via dead:2::1
-ip netns exec nsr1 nft -f - <<EOF
+ip -net $nsr1 route add default via 192.168.10.2
+ip -net $nsr2 route add default via 192.168.10.1
+
+ip netns exec $nsr1 nft -f - <<EOF
table inet filter {
flowtable f1 {
hook ingress priority 0
devices = { veth0, veth1 }
}
+ counter routed_orig { }
+ counter routed_repl { }
+
chain forward {
type filter hook forward priority 0; policy drop;
# flow offloaded? Tag ct with mark 1, so we can detect when it fails.
- meta oif "veth1" tcp dport 12345 flow offload @f1 counter
-
- # use packet size to trigger 'should be offloaded by now'.
- # otherwise, if 'flow offload' expression never offloads, the
- # test will pass.
- tcp dport 12345 meta length gt 200 ct mark set 1 counter
-
- # this turns off flow offloading internally, so expect packets again
- tcp flags fin,rst ct mark set 0 accept
-
- # this allows large packets from responder, we need this as long
- # as PMTUd is off.
- # This rule is deleted for the last test, when we expect PMTUd
- # to kick in and ensure all packets meet mtu requirements.
- meta length gt 1500 accept comment something-to-grep-for
+ meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept
- # next line blocks connection w.o. working offload.
- # we only do this for reverse dir, because we expect packets to
- # enter slow path due to MTU mismatch of veth0 and veth1.
- tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
+ # count packets supposedly offloaded as per direction.
+ ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept
ct state established,related accept
- # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
- meta length lt 200 oif "veth1" tcp dport 12345 counter accept
-
meta nfproto ipv4 meta l4proto icmp accept
meta nfproto ipv6 meta l4proto icmpv6 accept
}
@@ -170,35 +188,51 @@ if [ $? -ne 0 ]; then
exit $ksft_skip
fi
+ip netns exec $ns2 nft -f - <<EOF
+table inet filter {
+ counter ip4dscp0 { }
+ counter ip4dscp3 { }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto tcp goto {
+ ip dscp cs3 counter name ip4dscp3 accept
+ ip dscp 0 counter name ip4dscp0 accept
+ }
+ }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+ echo "SKIP: Could not load nft ruleset"
+ exit $ksft_skip
+fi
+
# test basic connectivity
-ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
-if [ $? -ne 0 ];then
- echo "ERROR: ns1 cannot reach ns2" 1>&2
- bash
+if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+ echo "ERROR: $ns1 cannot reach ns2" 1>&2
exit 1
fi
-ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
-if [ $? -ne 0 ];then
- echo "ERROR: ns2 cannot reach ns1" 1>&2
+if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+ echo "ERROR: $ns2 cannot reach $ns1" 1>&2
exit 1
fi
if [ $ret -eq 0 ];then
- echo "PASS: netns routing/connectivity: ns1 can reach ns2"
+ echo "PASS: netns routing/connectivity: $ns1 can reach $ns2"
fi
-ns1in=$(mktemp)
+nsin=$(mktemp)
ns1out=$(mktemp)
-ns2in=$(mktemp)
ns2out=$(mktemp)
make_file()
{
name=$1
- who=$2
- SIZE=$((RANDOM % (1024 * 8)))
+ SIZE=$((RANDOM % (1024 * 128)))
+ SIZE=$((SIZE + (1024 * 8)))
TSIZE=$((SIZE * 1024))
dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
@@ -209,14 +243,99 @@ make_file()
dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
}
+check_counters()
+{
+ local what=$1
+ local ok=1
+
+ local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets)
+ local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets)
+
+ local orig_cnt=${orig#*bytes}
+ local repl_cnt=${repl#*bytes}
+
+ local fs=$(du -sb $nsin)
+ local max_orig=${fs%%/*}
+ local max_repl=$((max_orig/4))
+
+ if [ $orig_cnt -gt $max_orig ];then
+ echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2
+ ret=1
+ ok=0
+ fi
+
+ if [ $repl_cnt -gt $max_repl ];then
+ echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2
+ ret=1
+ ok=0
+ fi
+
+ if [ $ok -eq 1 ]; then
+ echo "PASS: $what"
+ fi
+}
+
+check_dscp()
+{
+ local what=$1
+ local ok=1
+
+ local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp3 | grep packets)
+
+ local pc4=${counter%*bytes*}
+ local pc4=${pc4#*packets}
+
+ local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp0 | grep packets)
+ local pc4z=${counter%*bytes*}
+ local pc4z=${pc4z#*packets}
+
+ case "$what" in
+ "dscp_none")
+ if [ $pc4 -gt 0 ] || [ $pc4z -eq 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ "dscp_fwd")
+ if [ $pc4 -eq 0 ] || [ $pc4z -eq 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ "dscp_ingress")
+ if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ "dscp_egress")
+ if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ *)
+ echo "FAIL: Unknown DSCP check" 1>&2
+ ret=1
+ ok=0
+ esac
+
+ if [ $ok -eq 1 ] ;then
+ echo "PASS: $what: dscp packet counters match"
+ fi
+}
+
check_transfer()
{
in=$1
out=$2
what=$3
- cmp "$in" "$out" > /dev/null 2>&1
- if [ $? -ne 0 ] ;then
+ if ! cmp "$in" "$out" > /dev/null 2>&1; then
echo "FAIL: file mismatch for $what" 1>&2
ls -l "$in"
ls -l "$out"
@@ -234,26 +353,39 @@ test_tcp_forwarding_ip()
local dstport=$4
local lret=0
- ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
+ ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" &
lpid=$!
sleep 1
- ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
+ ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" &
cpid=$!
- sleep 3
+ sleep 1
+
+ prev="$(ls -l $ns1out $ns2out)"
+ sleep 1
- kill $lpid
- kill $cpid
- wait
+ while [[ "$prev" != "$(ls -l $ns1out $ns2out)" ]]; do
+ sleep 1;
+ prev="$(ls -l $ns1out $ns2out)"
+ done
- check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
- if [ $? -ne 0 ];then
+ if test -d /proc/"$lpid"/; then
+ kill $lpid
+ fi
+
+ if test -d /proc/"$cpid"/; then
+ kill $cpid
+ fi
+
+ wait $lpid
+ wait $cpid
+
+ if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
lret=1
fi
- check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
- if [ $? -ne 0 ];then
+ if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
lret=1
fi
@@ -267,45 +399,107 @@ test_tcp_forwarding()
return $?
}
+test_tcp_forwarding_set_dscp()
+{
+ check_dscp "dscp_none"
+
+ip netns exec $nsr1 nft -f - <<EOF
+table netdev dscpmangle {
+ chain setdscp0 {
+ type filter hook ingress device "veth0" priority 0; policy accept
+ ip dscp set cs3
+ }
+}
+EOF
+if [ $? -eq 0 ]; then
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ check_dscp "dscp_ingress"
+
+ ip netns exec $nsr1 nft delete table netdev dscpmangle
+else
+ echo "SKIP: Could not load netdev:ingress for veth0"
+fi
+
+ip netns exec $nsr1 nft -f - <<EOF
+table netdev dscpmangle {
+ chain setdscp0 {
+ type filter hook egress device "veth1" priority 0; policy accept
+ ip dscp set cs3
+ }
+}
+EOF
+if [ $? -eq 0 ]; then
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ check_dscp "dscp_egress"
+
+ ip netns exec $nsr1 nft flush table netdev dscpmangle
+else
+ echo "SKIP: Could not load netdev:egress for veth1"
+fi
+
+ # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
+ # counters should have seen packets (before and after ft offload kicks in).
+ ip netns exec $nsr1 nft -a insert rule inet filter forward ip dscp set cs3
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ check_dscp "dscp_fwd"
+}
+
test_tcp_forwarding_nat()
{
local lret
+ local pmtu
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
lret=$?
+ pmtu=$3
+ what=$4
+
if [ $lret -eq 0 ] ; then
+ if [ $pmtu -eq 1 ] ;then
+ check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+ else
+ echo "PASS: flow offload for ns1/ns2 with masquerade $what"
+ fi
+
test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
lret=$?
+ if [ $pmtu -eq 1 ] ;then
+ check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+ elif [ $lret -eq 0 ] ; then
+ echo "PASS: flow offload for ns1/ns2 with dnat $what"
+ fi
fi
return $lret
}
-make_file "$ns1in" "ns1"
-make_file "$ns2in" "ns2"
+make_file "$nsin"
# First test:
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
+# Due to MTU mismatch in both directions, all packets (except small packets like pure
+# acks) have to be handled by normal forwarding path. Therefore, packet counters
+# are not checked.
+if test_tcp_forwarding $ns1 $ns2; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
- ip netns exec nsr1 nft list ruleset
+ ip netns exec $nsr1 nft list ruleset
ret=1
fi
# delete default route, i.e. ns2 won't be able to reach ns1 and
# will depend on ns1 being masqueraded in nsr1.
# expect ns1 has nsr1 address.
-ip -net ns2 route del default via 10.0.2.1
-ip -net ns2 route del default via dead:2::1
-ip -net ns2 route add 192.168.10.1 via 10.0.2.1
+ip -net $ns2 route del default via 10.0.2.1
+ip -net $ns2 route del default via dead:2::1
+ip -net $ns2 route add 192.168.10.1 via 10.0.2.1
# Second test:
-# Same, but with NAT enabled.
-ip netns exec nsr1 nft -f - <<EOF
+# Same, but with NAT enabled. Same as in first test: we expect normal forward path
+# to handle most packets.
+ip netns exec $nsr1 nft -f - <<EOF
table ip nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
@@ -319,39 +513,117 @@ table ip nat {
}
EOF
-test_tcp_forwarding_nat ns1 ns2
+if ! test_tcp_forwarding_set_dscp $ns1 $ns2 0 ""; then
+ echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+ exit 0
+fi
-if [ $? -eq 0 ] ;then
- echo "PASS: flow offloaded for ns1/ns2 with NAT"
-else
+if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
- ip netns exec nsr1 nft list ruleset
+ ip netns exec $nsr1 nft list ruleset
ret=1
fi
# Third test:
-# Same as second test, but with PMTU discovery enabled.
-handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
+# Same as second test, but with PMTU discovery enabled. This
+# means that we expect the fastpath to handle packets as soon
+# as the endpoints adjust the packet size.
+ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+
+# reset counters.
+# With pmtu in-place we'll also check that nft counters
+# are lower than file size and packets were forwarded via flowtable layer.
+# For earlier tests (large mtus), packets cannot be handled via flowtable
+# (except pure acks and other small packets).
+ip netns exec $nsr1 nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then
+ echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
+ ip netns exec $nsr1 nft list ruleset
+fi
-ip netns exec nsr1 nft delete rule inet filter forward $handle
-if [ $? -ne 0 ] ;then
- echo "FAIL: Could not delete large-packet accept rule"
- exit 1
+# Another test:
+# Add bridge interface br0 to Router1, with NAT enabled.
+ip -net $nsr1 link add name br0 type bridge
+ip -net $nsr1 addr flush dev veth0
+ip -net $nsr1 link set up dev veth0
+ip -net $nsr1 link set veth0 master br0
+ip -net $nsr1 addr add 10.0.1.1/24 dev br0
+ip -net $nsr1 addr add dead:1::1/64 dev br0
+ip -net $nsr1 link set up dev br0
+
+ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+
+# br0 with NAT enabled.
+ip netns exec $nsr1 nft -f - <<EOF
+flush table ip nat
+table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+ }
+
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oifname "veth1" counter masquerade
+ }
+}
+EOF
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
+ ip netns exec $nsr1 nft list ruleset
+ ret=1
fi
-ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-test_tcp_forwarding_nat ns1 ns2
-if [ $? -eq 0 ] ;then
- echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
-else
- echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
- ip netns exec nsr1 nft list ruleset
+# Another test:
+# Add bridge interface br0 to Router1, with NAT and VLAN.
+ip -net $nsr1 link set veth0 nomaster
+ip -net $nsr1 link set down dev veth0
+ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10
+ip -net $nsr1 link set up dev veth0
+ip -net $nsr1 link set up dev veth0.10
+ip -net $nsr1 link set veth0.10 master br0
+
+ip -net $ns1 addr flush dev eth0
+ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10
+ip -net $ns1 link set eth0 up
+ip -net $ns1 link set eth0.10 up
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0.10
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
+ ip netns exec $nsr1 nft list ruleset
+ ret=1
fi
-KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
-KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
+# restore test topology (remove bridge and VLAN)
+ip -net $nsr1 link set veth0 nomaster
+ip -net $nsr1 link set veth0 down
+ip -net $nsr1 link set veth0.10 down
+ip -net $nsr1 link delete veth0.10 type vlan
+ip -net $nsr1 link delete br0 type bridge
+ip -net $ns1 addr flush dev eth0.10
+ip -net $ns1 link set eth0.10 down
+ip -net $ns1 link set eth0 down
+ip -net $ns1 link delete eth0.10 type vlan
+
+# restore address in ns1 and nsr1
+ip -net $ns1 link set eth0 up
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0
+ip -net $ns1 route add default via dead:1::1
+ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net $nsr1 addr add dead:1::1/64 dev veth0
+ip -net $nsr1 link set up dev veth0
+
+KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
+KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1)
SPI1=$RANDOM
SPI2=$RANDOM
@@ -378,24 +650,23 @@ do_esp() {
}
-do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
-do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
-ip netns exec nsr1 nft delete table ip nat
+ip netns exec $nsr1 nft delete table ip nat
# restore default routes
-ip -net ns2 route del 192.168.10.1 via 10.0.2.1
-ip -net ns2 route add default via 10.0.2.1
-ip -net ns2 route add default via dead:2::1
+ip -net $ns2 route del 192.168.10.1 via 10.0.2.1
+ip -net $ns2 route add default via 10.0.2.1
+ip -net $ns2 route add default via dead:2::1
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
- echo "PASS: ipsec tunnel mode for ns1/ns2"
+if test_tcp_forwarding $ns1 $ns2; then
+ check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
- ip netns exec nsr1 nft list ruleset 1>&2
- ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
+ ip netns exec $nsr1 nft list ruleset 1>&2
+ ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2
fi
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh
new file mode 100755
index 000000000000..f33154c04d34
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_meta.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# check iif/iifname/oifgroup/iiftype match.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+
+if ! nft --version > /dev/null 2>&1; then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+cleanup()
+{
+ ip netns del "$ns0"
+}
+
+ip netns add "$ns0"
+ip -net "$ns0" link set lo up
+ip -net "$ns0" addr add 127.0.0.1 dev lo
+
+trap cleanup EXIT
+
+currentyear=$(date +%Y)
+lastyear=$((currentyear-1))
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table inet filter {
+ counter iifcount {}
+ counter iifnamecount {}
+ counter iifgroupcount {}
+ counter iiftypecount {}
+ counter infproto4count {}
+ counter il4protocounter {}
+ counter imarkcounter {}
+ counter icpu0counter {}
+ counter ilastyearcounter {}
+ counter icurrentyearcounter {}
+
+ counter oifcount {}
+ counter oifnamecount {}
+ counter oifgroupcount {}
+ counter oiftypecount {}
+ counter onfproto4count {}
+ counter ol4protocounter {}
+ counter oskuidcounter {}
+ counter oskgidcounter {}
+ counter omarkcounter {}
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+
+ meta iif lo counter name "iifcount"
+ meta iifname "lo" counter name "iifnamecount"
+ meta iifgroup "default" counter name "iifgroupcount"
+ meta iiftype "loopback" counter name "iiftypecount"
+ meta nfproto ipv4 counter name "infproto4count"
+ meta l4proto icmp counter name "il4protocounter"
+ meta mark 42 counter name "imarkcounter"
+ meta cpu 0 counter name "icpu0counter"
+ meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
+ meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
+ }
+
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oif lo counter name "oifcount" counter
+ meta oifname "lo" counter name "oifnamecount"
+ meta oifgroup "default" counter name "oifgroupcount"
+ meta oiftype "loopback" counter name "oiftypecount"
+ meta nfproto ipv4 counter name "onfproto4count"
+ meta l4proto icmp counter name "ol4protocounter"
+ meta skuid 0 counter name "oskuidcounter"
+ meta skgid 0 counter name "oskgidcounter"
+ meta mark 42 counter name "omarkcounter"
+ }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add test ruleset"
+ exit $ksft_skip
+fi
+
+ret=0
+
+check_one_counter()
+{
+ local cname="$1"
+ local want="packets $2"
+ local verbose="$3"
+
+ if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
+ echo "FAIL: $cname, want \"$want\", got"
+ ret=1
+ ip netns exec "$ns0" nft list counter inet filter $cname
+ fi
+}
+
+check_lo_counters()
+{
+ local want="$1"
+ local verbose="$2"
+ local counter
+
+ for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
+ oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
+ il4protocounter icurrentyearcounter ol4protocounter \
+ ; do
+ check_one_counter "$counter" "$want" "$verbose"
+ done
+}
+
+check_lo_counters "0" false
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 -m 42 > /dev/null
+
+check_lo_counters "2" true
+
+check_one_counter oskuidcounter "1" true
+check_one_counter oskgidcounter "1" true
+check_one_counter imarkcounter "1" true
+check_one_counter omarkcounter "1" true
+check_one_counter ilastyearcounter "0" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta iif/oif counters at expected values"
+else
+ exit $ret
+fi
+
+#First CPU execution and counter
+taskset -p 01 $$ > /dev/null
+ip netns exec "$ns0" nft reset counters > /dev/null
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
+check_one_counter icpu0counter "2" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta cpu counter at expected values"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index d7e07f4c3d7f..dd40d9f6f259 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -374,6 +374,47 @@ EOF
return $lret
}
+test_local_dnat_portonly()
+{
+ local family=$1
+ local daddr=$2
+ local lret=0
+ local sr_s
+ local sr_r
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain output {
+ type nat hook output priority 0; policy accept;
+ meta l4proto tcp dnat to :2000
+
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ if [ $family = "inet" ];then
+ echo "SKIP: inet port test"
+ test_inet_nat=false
+ return
+ fi
+ echo "SKIP: Could not add $family dnat hook"
+ return
+ fi
+
+ echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
+ sc_s=$!
+
+ sleep 1
+
+ result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
+
+ if [ "$result" = "SERVER-inet" ];then
+ echo "PASS: inet port rewrite without l3 address"
+ else
+ echo "ERROR: inet port rewrite"
+ ret=1
+ fi
+}
test_masquerade6()
{
@@ -741,6 +782,300 @@ EOF
return $lret
}
+# test port shadowing.
+# create two listening services, one on router (ns0), one
+# on client (ns2), which is masqueraded from ns1 point of view.
+# ns2 sends udp packet coming from service port to ns1, on a highport.
+# Later, if n1 uses same highport to connect to ns0:service, packet
+# might be port-forwarded to ns2 instead.
+
+# second argument tells if we expect the 'fake-entry' to take effect
+# (CLIENT) or not (ROUTER).
+test_port_shadow()
+{
+ local test=$1
+ local expect=$2
+ local daddrc="10.0.1.99"
+ local daddrs="10.0.1.1"
+ local result=""
+ local logmsg=""
+
+ # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+ echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
+
+ echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
+ sc_r=$!
+
+ echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
+ sc_c=$!
+
+ sleep 0.3
+
+ # ns1 tries to connect to ns0:1405. With default settings this should connect
+ # to client, it matches the conntrack entry created above.
+
+ result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
+
+ if [ "$result" = "$expect" ] ;then
+ echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
+ else
+ echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended"
+ ret=1
+ fi
+
+ kill $sc_r $sc_c 2>/dev/null
+
+ # flush udp entries for next test round, if any
+ ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
+}
+
+# This prevents port shadow of router service via packet filter,
+# packets claiming to originate from service port from internal
+# network are dropped.
+test_port_shadow_filter()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta iif veth1 udp sport 1405 drop
+ }
+}
+EOF
+ test_port_shadow "port-filter" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family filter
+}
+
+# This prevents port shadow of router service via notrack.
+test_port_shadow_notrack()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family raw {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 udp dport 1405 notrack
+ }
+ chain output {
+ type filter hook output priority -300; policy accept;
+ meta oif veth0 udp sport 1405 notrack
+ }
+}
+EOF
+ test_port_shadow "port-notrack" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family raw
+}
+
+# This prevents port shadow of router service via sport remap.
+test_port_shadow_pat()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family pat {
+ chain postrouting {
+ type nat hook postrouting priority -1; policy accept;
+ meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random
+ }
+}
+EOF
+ test_port_shadow "pat" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family pat
+}
+
+test_port_shadowing()
+{
+ local family="ip"
+
+ conntrack -h >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run nat port shadowing test without conntrack tool"
+ return
+ fi
+
+ socat -h > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run nat port shadowing test without socat tool"
+ return
+ fi
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family masquerade hook"
+ return $ksft_skip
+ fi
+
+ # test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
+ test_port_shadow "default" "CLIENT"
+
+ # test packet filter based mitigation: prevent forwarding of
+ # packets claiming to come from the service port.
+ test_port_shadow_filter "$family"
+
+ # test conntrack based mitigation: connections going or coming
+ # from router:service bypass connection tracking.
+ test_port_shadow_notrack "$family"
+
+ # test nat based mitigation: fowarded packets coming from service port
+ # are masqueraded with random highport.
+ test_port_shadow_pat "$family"
+
+ ip netns exec "$ns0" nft delete table $family nat
+}
+
+test_stateless_nat_ip()
+{
+ local lret=0
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules"
+ return 1
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table ip stateless {
+ map xlate_in {
+ typeof meta iifname . ip saddr . ip daddr : ip daddr
+ elements = {
+ "veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2,
+ }
+ }
+ map xlate_out {
+ typeof meta iifname . ip saddr . ip daddr : ip daddr
+ elements = {
+ "veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99
+ }
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -400; policy accept;
+ ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in
+ ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add ip statless rules"
+ return $ksft_skip
+ fi
+
+ reset_counters
+
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules"
+ lret=1
+ fi
+
+ # ns1 should have seen packets from .2.2, due to stateless rewrite.
+ expect="packets 1 bytes 84"
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns1" ns0insl "$expect" "test_stateless 1"
+ lret=1
+ fi
+
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2"
+ lret=1
+ fi
+ done
+
+ # ns1 should not have seen packets from ns2, due to masquerade
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+ socat -h > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run stateless nat frag test without socat tool"
+ if [ $lret -eq 0 ]; then
+ return $ksft_skip
+ fi
+
+ ip netns exec "$ns0" nft delete table ip stateless
+ return $lret
+ fi
+
+ local tmpfile=$(mktemp)
+ dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null
+
+ local outfile=$(mktemp)
+ ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null &
+ sc_r=$!
+
+ sleep 1
+ # re-do with large ping -> ip fragmentation
+ ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2
+ lret=1
+ fi
+
+ wait
+
+ cmp "$tmpfile" "$outfile"
+ if [ $? -ne 0 ]; then
+ ls -l "$tmpfile" "$outfile"
+ echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
+ lret=1
+ fi
+
+ rm -f "$tmpfile" "$outfile"
+
+ # ns1 should have seen packets from 2.2, due to stateless rewrite.
+ expect="packets 3 bytes 4164"
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns1" ns0insl "$expect" "test_stateless 5"
+ lret=1
+ fi
+
+ ip netns exec "$ns0" nft delete table ip stateless
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not delete table ip stateless" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IP statless for $ns2"
+
+ return $lret
+}
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in 0 1 2; do
@@ -808,6 +1143,19 @@ table inet filter {
EOF
done
+# special case for stateless nat check, counter needs to
+# be done before (input) ip defragmentation
+ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF
+table inet filter {
+ counter ns0insl {}
+
+ chain pre {
+ type filter hook prerouting priority -400; policy accept;
+ ip saddr 10.0.2.2 counter name "ns0insl"
+ }
+}
+EOF
+
sleep 3
# test basic connectivity
for i in 1 2; do
@@ -841,6 +1189,10 @@ fi
reset_counters
test_local_dnat ip
test_local_dnat6 ip6
+
+reset_counters
+test_local_dnat_portonly inet 10.0.1.99
+
reset_counters
$test_inet_nat && test_local_dnat inet
$test_inet_nat && test_local_dnat6 inet
@@ -861,6 +1213,9 @@ reset_counters
$test_inet_nat && test_redirect inet
$test_inet_nat && test_redirect6 inet
+test_port_shadowing
+test_stateless_nat_ip
+
if [ $ret -ne 0 ];then
echo -n "FAIL: "
nft --version
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh
new file mode 100755
index 000000000000..b9ab37380f33
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat_zones.sh
@@ -0,0 +1,309 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_iperf=1
+ret=0
+
+# client1---.
+# veth1-.
+# |
+# NAT Gateway --veth0--> Server
+# | |
+# veth2-' |
+# client2---' |
+# .... |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces. Each client connects to Server, each with colliding tuples:
+# clientsaddr:10000 -> serveraddr:dport
+# NAT Gateway is supposed to do port reallocation for each of the
+# connections.
+
+sfx=$(mktemp -u "XXXXXXXX")
+gw="ns-gw-$sfx"
+cl1="ns-cl1-$sfx"
+cl2="ns-cl2-$sfx"
+srv="ns-srv-$sfx"
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+ ip netns del $gw
+ ip netns del $srv
+ for i in $(seq 1 $maxclients); do
+ ip netns del ns-cl$i-$sfx 2>/dev/null
+ done
+
+ sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+iperf3 -v >/dev/null 2>&1
+if [ $? -ne 0 ];then
+ have_iperf=0
+fi
+
+ip netns add "$gw"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $gw"
+ exit $ksft_skip
+fi
+ip -net "$gw" link set lo up
+
+trap cleanup EXIT
+
+ip netns add "$srv"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create server netns $srv"
+ exit $ksft_skip
+fi
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set lo up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+
+ ip netns add "$cl"
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not create client netns $cl"
+ exit $ksft_skip
+ fi
+ ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+ fi
+done
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+ echo netns exec "$cl" ip link set lo up
+ echo netns exec "$cl" ip link set eth0 up
+ echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+ echo netns exec "$gw" ip link set veth$i up
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+
+ # clients have same IP addresses.
+ echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+ echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+ echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+ echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+ # NB: same addresses on client-facing interfaces.
+ echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
+ echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+
+ # gw: policy routing
+ echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0
+
+ip netns exec $gw nft -f /dev/stdin<<EOF
+table inet raw {
+ map iiftomark {
+ type ifname : mark
+ }
+
+ map iiftozone {
+ typeof iifname : ct zone
+ }
+
+ set inicmp {
+ flags dynamic
+ type ipv4_addr . ifname . ipv4_addr
+ }
+ set inflows {
+ flags dynamic
+ type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+ }
+
+ set inflows6 {
+ flags dynamic
+ type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -64000; policy accept;
+ ct original zone set meta iifname map @iiftozone
+ meta mark set meta iifname map @iiftomark
+
+ tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+ add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+ ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+ }
+
+ chain nat_postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ct mark set meta mark meta oifname veth0 masquerade
+ }
+
+ chain mangle_prerouting {
+ type filter hook prerouting priority -100; policy accept;
+ ct direction reply meta mark set ct mark
+ }
+}
+EOF
+
+( echo add element inet raw iiftomark \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+ echo add element inet raw iiftozone \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec $gw nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 $maxclients); do
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+ if [ $? -ne 0 ]; then
+ echo FAIL: Ping failure from $cl 1>&2
+ ret=1
+ break
+ fi
+done
+
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+ break
+ fi
+done
+
+ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if [ $ret -eq 0 ]; then
+ echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_iperf -eq 0 ];then
+ echo "SKIP: iperf3 not installed"
+ if [ $ret -ne 0 ];then
+ exit $ret
+ fi
+ exit $ksft_skip
+fi
+
+ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
+iperfpid=$!
+sleep 1
+
+for i in $(seq 1 $maxclients); do
+ if [ $ret -ne 0 ]; then
+ break
+ fi
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo FAIL: Failure to connect for $cl 1>&2
+ ip netns exec $gw conntrack -S 1>&2
+ ret=1
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: iperf3 connections for all $maxclients net namespaces"
+fi
+
+kill $iperfpid
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+ break
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 6898448b4266..e12729753351 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -12,14 +12,20 @@ sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
nsrouter="nsrouter-$sfx"
+timeout=4
cleanup()
{
+ ip netns pids ${ns1} | xargs kill 2>/dev/null
+ ip netns pids ${ns2} | xargs kill 2>/dev/null
+ ip netns pids ${nsrouter} | xargs kill 2>/dev/null
+
ip netns del ${ns1}
ip netns del ${ns2}
ip netns del ${nsrouter}
rm -f "$TMPFILE0"
rm -f "$TMPFILE1"
+ rm -f "$TMPFILE2" "$TMPFILE3"
}
nft --version > /dev/null 2>&1
@@ -42,6 +48,8 @@ fi
TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
trap cleanup EXIT
ip netns add ${ns1}
@@ -83,7 +91,7 @@ load_ruleset() {
local name=$1
local prio=$2
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet $name {
chain nfq {
ip protocol icmp queue bypass
@@ -105,6 +113,7 @@ table inet $name {
chain output {
type filter hook output priority $prio; policy accept;
tcp dport 12345 queue num 3
+ tcp sport 23456 queue num 3
jump nfq
}
chain post {
@@ -118,7 +127,7 @@ EOF
load_counter_ruleset() {
local prio=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet countrules {
chain pre {
type filter hook prerouting priority $prio; policy accept;
@@ -175,7 +184,7 @@ test_ping_router() {
test_queue_blackhole() {
local proto=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table $proto blackh {
chain forward {
type filter hook forward priority 0; policy accept;
@@ -184,10 +193,10 @@ table $proto blackh {
}
EOF
if [ $proto = "ip" ] ;then
- ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
lret=$?
elif [ $proto = "ip6" ]; then
- ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
lret=$?
else
lret=111
@@ -214,8 +223,8 @@ test_queue()
local last=""
# spawn nf-queue listeners
- ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
- ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
sleep 1
test_ping
ret=$?
@@ -250,11 +259,11 @@ test_queue()
test_tcp_forward()
{
- ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
+ ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
local nfqpid=$!
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
@@ -270,15 +279,13 @@ test_tcp_forward()
test_tcp_localhost()
{
- tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
-
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
- ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
local nfqpid=$!
sleep 1
@@ -287,6 +294,113 @@ test_tcp_localhost()
wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_connectclose()
+{
+ tmpfile=$(mktemp) || exit 1
+
+ ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout &
+
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
+ local nfqpid=$!
+
+ sleep 1
+ rm -f "$tmpfile"
+
+ wait $rpid
+ [ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+}
+EOF
+ tmpfile=$(mktemp) || exit 1
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
+ ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+ local rpid=$!
+
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
+
+ # nfqueue 1 will be called via output hook. But this time,
+ # re-queue the packet to nfqueue program on queue 2.
+ ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
+
+ sleep 1
+ ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
+ rm -f "$tmpfile"
+
+ wait
+
+ if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+ echo "FAIL: lost packets during requeue?!" 1>&2
+ return
+ fi
+
+ echo "PASS: tcp via loopback and re-queueing"
+}
+
+test_icmp_vrf() {
+ ip -net $ns1 link add tvrf type vrf table 9876
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not add vrf device"
+ return
+ fi
+
+ ip -net $ns1 li set eth0 master tvrf
+ ip -net $ns1 li set tvrf up
+
+ ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec ${ns1} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+}
+EOF
+ ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
+ local nfqpid=$!
+
+ sleep 1
+ ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+ for n in output post; do
+ for d in tvrf eth0; do
+ ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
+ if [ $? -ne 0 ] ; then
+ echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+ ip netns exec ${ns1} nft list ruleset
+ ret=1
+ return
+ fi
+ done
+ done
+
+ wait $nfqpid
+ [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
+ wait 2>/dev/null
}
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -328,5 +442,8 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
+test_tcp_localhost_connectclose
+test_tcp_localhost_requeue
+test_icmp_vrf
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_synproxy.sh b/tools/testing/selftests/netfilter/nft_synproxy.sh
new file mode 100755
index 000000000000..b62933b680d6
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_synproxy.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+rnd=$(mktemp -u XXXXXXXX)
+nsr="nsr-$rnd" # synproxy machine
+ns1="ns1-$rnd" # iperf client
+ns2="ns2-$rnd" # iperf server
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
+
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "iperf3 --version" "run test without iperf3"
+checktool "ip netns add $nsr" "create net namespace"
+
+modprobe -q nf_conntrack
+
+ip netns add $ns1
+ip netns add $ns2
+
+cleanup() {
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+ ip netns pids $ns2 | xargs kill 2>/dev/null
+ ip netns del $ns1
+ ip netns del $ns2
+
+ ip netns del $nsr
+}
+
+trap cleanup EXIT
+
+ip link add veth0 netns $nsr type veth peer name eth0 netns $ns1
+ip link add veth1 netns $nsr type veth peer name eth0 netns $ns2
+
+for dev in lo veth0 veth1; do
+ip -net $nsr link set $dev up
+done
+
+ip -net $nsr addr add 10.0.1.1/24 dev veth0
+ip -net $nsr addr add 10.0.2.1/24 dev veth1
+
+ip netns exec $nsr sysctl -q net.ipv4.conf.veth0.forwarding=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.veth1.forwarding=1
+ip netns exec $nsr sysctl -q net.netfilter.nf_conntrack_tcp_loose=0
+
+for n in $ns1 $ns2; do
+ ip -net $n link set lo up
+ ip -net $n link set eth0 up
+done
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns2 addr add 10.0.2.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns2 route add default via 10.0.2.1
+
+# test basic connectivity
+if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+ echo "ERROR: $ns1 cannot reach $ns2" 1>&2
+ exit 1
+fi
+
+if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+ echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+ exit 1
+fi
+
+ip netns exec $ns2 iperf3 -s > /dev/null 2>&1 &
+# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 &
+
+sleep 1
+
+ip netns exec $nsr nft -f - <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 tcp flags syn counter notrack
+ }
+
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+
+ ct state new,established counter accept
+
+ meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp
+
+ ct state invalid counter drop
+
+ # make ns2 unreachable w.o. tcp synproxy
+ tcp flags syn counter drop
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "SKIP: Cannot add nft synproxy"
+ exit $ksft_skip
+fi
+
+ip netns exec $ns1 timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null
+
+if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 returned an error" 1>&2
+ ret=$?
+ ip netns exec $nsr nft list ruleset
+else
+ echo "PASS: synproxy connection successful"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
index f1affd12c4b1..2ffba45a78bf 100755
--- a/tools/testing/selftests/netfilter/nft_trans_stress.sh
+++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh
@@ -9,8 +9,35 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-testns=testns1
+testns=testns-$(mktemp -u "XXXXXXXX")
+tmp=""
+
tables="foo bar baz quux"
+global_ret=0
+eret=0
+lret=0
+
+cleanup() {
+ ip netns pids "$testns" | xargs kill 2>/dev/null
+ ip netns del "$testns"
+
+ rm -f "$tmp"
+}
+
+check_result()
+{
+ local r=$1
+ local OK="PASS"
+
+ if [ $r -ne 0 ] ;then
+ OK="FAIL"
+ global_ret=$r
+ fi
+
+ echo "$OK: nft $2 test returned $r"
+
+ eret=0
+}
nft --version > /dev/null 2>&1
if [ $? -ne 0 ];then
@@ -24,6 +51,7 @@ if [ $? -ne 0 ];then
exit $ksft_skip
fi
+trap cleanup EXIT
tmp=$(mktemp)
for table in $tables; do
@@ -59,20 +87,65 @@ done)
sleep 1
+ip netns exec "$testns" nft -f "$tmp"
for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
for table in $tables;do
- randsleep=$((RANDOM%10))
+ randsleep=$((RANDOM%2))
sleep $randsleep
- ip netns exec "$testns" nft delete table inet $table 2>/dev/null
+ ip netns exec "$testns" nft delete table inet $table
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ eret=$lret
+ fi
+done
+
+check_result $eret "add/delete"
+
+for i in $(seq 1 10) ; do
+ (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin
+
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ eret=$lret
+ fi
+done
+
+check_result $eret "reload"
+
+for i in $(seq 1 10) ; do
+ (echo "flush ruleset"; cat "$tmp"
+ echo "insert rule inet foo INPUT meta nftrace set 1"
+ echo "insert rule inet foo OUTPUT meta nftrace set 1"
+ ) | ip netns exec "$testns" nft -f /dev/stdin
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ eret=$lret
+ fi
+
+ (echo "flush ruleset"; cat "$tmp"
+ ) | ip netns exec "$testns" nft -f /dev/stdin
+
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ eret=$lret
+ fi
done
-randsleep=$((RANDOM%10))
-sleep $randsleep
+check_result $eret "add/delete with nftrace enabled"
+
+echo "insert rule inet foo INPUT meta nftrace set 1" >> $tmp
+echo "insert rule inet foo OUTPUT meta nftrace set 1" >> $tmp
-pkill -9 ping
+for i in $(seq 1 10) ; do
+ (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin
+
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ eret=1
+ fi
+done
-wait
+check_result $lret "add/delete with nftrace enabled"
-rm -f "$tmp"
-ip netns del "$testns"
+exit $global_ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
new file mode 100755
index 000000000000..5a8db0b48928
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_zones_many.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns="ns-$sfx"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+zones=2000
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+ ip netns del $ns
+}
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
+
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "socat -V" "run test without socat tool"
+checktool "ip netns add $ns" "create net namespace"
+
+trap cleanup EXIT
+
+conntrack -V > /dev/null 2>&1
+if [ $? -eq 0 ];then
+ have_ct_tool=1
+fi
+
+ip -net "$ns" link set lo up
+
+test_zones() {
+ local max_zones=$1
+
+ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+ip netns exec $ns nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+ map rndzone {
+ typeof numgen inc mod $max_zones : ct zone
+ }
+
+ chain output {
+ type filter hook output priority -64000; policy accept;
+ udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone
+ }
+}
+EOF
+ (
+ echo "add element inet raw rndzone {"
+ for i in $(seq 1 $max_zones);do
+ echo -n "$i : $i"
+ if [ $i -lt $max_zones ]; then
+ echo ","
+ else
+ echo "}"
+ fi
+ done
+ ) | ip netns exec $ns nft -f /dev/stdin
+
+ local i=0
+ local j=0
+ local outerstart=$(date +%s%3N)
+ local stop=$outerstart
+
+ while [ $i -lt $max_zones ]; do
+ local start=$(date +%s%3N)
+ i=$((i + 1000))
+ j=$((j + 1))
+ # nft rule in output places each packet in a different zone.
+ dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
+ if [ $? -ne 0 ] ;then
+ ret=1
+ break
+ fi
+
+ stop=$(date +%s%3N)
+ local duration=$((stop-start))
+ echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)"
+ done
+
+ if [ $have_ct_tool -eq 1 ]; then
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries from packet path in $duration ms total"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+ ret=1
+ fi
+ fi
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: insert $max_zones entries from packet path" 1>&2
+ fi
+}
+
+test_conntrack_tool() {
+ local max_zones=$1
+
+ ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+
+ local outerstart=$(date +%s%3N)
+ local start=$(date +%s%3N)
+ local stop=$start
+ local i=0
+ while [ $i -lt $max_zones ]; do
+ i=$((i + 1))
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+ echo "FAIL: conntrack -I returned an error"
+ ret=1
+ break
+ fi
+
+ if [ $((i%1000)) -eq 0 ];then
+ stop=$(date +%s%3N)
+
+ local duration=$((stop-start))
+ echo "PASS: added 1000 entries in $duration ms (now $i total)"
+ start=$stop
+ fi
+ done
+
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+ ret=1
+ fi
+}
+
+test_zones $zones
+
+if [ $have_ct_tool -eq 1 ];then
+ test_conntrack_tool $zones
+else
+ echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+ if [ $ret -eq 0 ];then
+ exit $ksft_skip
+ fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/netfilter/rpath.sh
new file mode 100755
index 000000000000..5289c8447a41
--- /dev/null
+++ b/tools/testing/selftests/netfilter/rpath.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+
+# search for legacy iptables (it uses the xtables extensions
+if iptables-legacy --version >/dev/null 2>&1; then
+ iptables='iptables-legacy'
+elif iptables --version >/dev/null 2>&1; then
+ iptables='iptables'
+else
+ iptables=''
+fi
+
+if ip6tables-legacy --version >/dev/null 2>&1; then
+ ip6tables='ip6tables-legacy'
+elif ip6tables --version >/dev/null 2>&1; then
+ ip6tables='ip6tables'
+else
+ ip6tables=''
+fi
+
+if nft --version >/dev/null 2>&1; then
+ nft='nft'
+else
+ nft=''
+fi
+
+if [ -z "$iptables$ip6tables$nft" ]; then
+ echo "SKIP: Test needs iptables, ip6tables or nft"
+ exit $ksft_skip
+fi
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+trap "ip netns del $ns1; ip netns del $ns2" EXIT
+
+# create two netns, disable rp_filter in ns2 and
+# keep IPv6 address when moving into VRF
+ip netns add "$ns1"
+ip netns add "$ns2"
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
+
+# a standard connection between the netns, should not trigger rp filter
+ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2"
+ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up
+ip -net "$ns1" a a 192.168.23.2/24 dev v0
+ip -net "$ns2" a a 192.168.23.1/24 dev v0
+ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad
+
+# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0
+ip -net "$ns2" link add d0 type dummy
+ip -net "$ns2" link set d0 up
+ip -net "$ns1" a a 192.168.42.2/24 dev v0
+ip -net "$ns2" a a 192.168.42.1/24 dev d0
+ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
+
+# firewall matches to test
+[ -n "$iptables" ] && {
+ common='-t raw -A PREROUTING -s 192.168.0.0/16'
+ ip netns exec "$ns2" "$iptables" $common -m rpfilter
+ ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert
+}
+[ -n "$ip6tables" ] && {
+ common='-t raw -A PREROUTING -s fec0::/16'
+ ip netns exec "$ns2" "$ip6tables" $common -m rpfilter
+ ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert
+}
+[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF
+table inet t {
+ chain c {
+ type filter hook prerouting priority raw;
+ ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter
+ ip6 saddr fec0::/16 fib saddr . iif oif exists counter
+ }
+}
+EOF
+
+die() {
+ echo "FAIL: $*"
+ #ip netns exec "$ns2" "$iptables" -t raw -vS
+ #ip netns exec "$ns2" "$ip6tables" -t raw -vS
+ #ip netns exec "$ns2" nft list ruleset
+ exit 1
+}
+
+# check rule counters, return true if rule did not match
+ipt_zero_rule() { # (command)
+ [ -n "$1" ] || return 0
+ ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0"
+}
+ipt_zero_reverse_rule() { # (command)
+ [ -n "$1" ] || return 0
+ ip netns exec "$ns2" "$1" -t raw -vS | \
+ grep -q -- "-m rpfilter --invert -c 0 0"
+}
+nft_zero_rule() { # (family)
+ [ -n "$nft" ] || return 0
+ ip netns exec "$ns2" "$nft" list chain inet t c | \
+ grep -q "$1 saddr .* counter packets 0 bytes 0"
+}
+
+netns_ping() { # (netns, args...)
+ local netns="$1"
+ shift
+ ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null
+}
+
+clear_counters() {
+ [ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z
+ [ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z
+ if [ -n "$nft" ]; then
+ (
+ echo "delete table inet t";
+ ip netns exec "$ns2" $nft -s list table inet t;
+ ) | ip netns exec "$ns2" $nft -f -
+ fi
+}
+
+testrun() {
+ clear_counters
+
+ # test 1: martian traffic should fail rpfilter matches
+ netns_ping "$ns1" -I v0 192.168.42.1 && \
+ die "martian ping 192.168.42.1 succeeded"
+ netns_ping "$ns1" -I v0 fec0:42::1 && \
+ die "martian ping fec0:42::1 succeeded"
+
+ ipt_zero_rule "$iptables" || die "iptables matched martian"
+ ipt_zero_rule "$ip6tables" || die "ip6tables matched martian"
+ ipt_zero_reverse_rule "$iptables" && die "iptables not matched martian"
+ ipt_zero_reverse_rule "$ip6tables" && die "ip6tables not matched martian"
+ nft_zero_rule ip || die "nft IPv4 matched martian"
+ nft_zero_rule ip6 || die "nft IPv6 matched martian"
+
+ clear_counters
+
+ # test 2: rpfilter match should pass for regular traffic
+ netns_ping "$ns1" 192.168.23.1 || \
+ die "regular ping 192.168.23.1 failed"
+ netns_ping "$ns1" fec0:23::1 || \
+ die "regular ping fec0:23::1 failed"
+
+ ipt_zero_rule "$iptables" && die "iptables match not effective"
+ ipt_zero_rule "$ip6tables" && die "ip6tables match not effective"
+ ipt_zero_reverse_rule "$iptables" || die "iptables match over-effective"
+ ipt_zero_reverse_rule "$ip6tables" || die "ip6tables match over-effective"
+ nft_zero_rule ip && die "nft IPv4 match not effective"
+ nft_zero_rule ip6 && die "nft IPv6 match not effective"
+
+}
+
+testrun
+
+# repeat test with vrf device in $ns2
+ip -net "$ns2" link add vrf0 type vrf table 10
+ip -net "$ns2" link set vrf0 up
+ip -net "$ns2" link set v0 master vrf0
+
+testrun
+
+echo "PASS: netfilter reverse path match works as intended"
+exit 0
diff --git a/tools/testing/selftests/netfilter/sctp_collision.c b/tools/testing/selftests/netfilter/sctp_collision.c
new file mode 100644
index 000000000000..21bb1cfd8a85
--- /dev/null
+++ b/tools/testing/selftests/netfilter/sctp_collision.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in saddr = {}, daddr = {};
+ int sd, ret, len = sizeof(daddr);
+ struct timeval tv = {25, 0};
+ char buf[] = "hello";
+
+ if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
+ printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
+ argv[0]);
+ return -1;
+ }
+
+ sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
+ if (sd < 0) {
+ printf("Failed to create sd\n");
+ return -1;
+ }
+
+ saddr.sin_family = AF_INET;
+ saddr.sin_addr.s_addr = inet_addr(argv[2]);
+ saddr.sin_port = htons(atoi(argv[3]));
+
+ ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr));
+ if (ret < 0) {
+ printf("Failed to bind to address\n");
+ goto out;
+ }
+
+ ret = listen(sd, 5);
+ if (ret < 0) {
+ printf("Failed to listen on port\n");
+ goto out;
+ }
+
+ daddr.sin_family = AF_INET;
+ daddr.sin_addr.s_addr = inet_addr(argv[4]);
+ daddr.sin_port = htons(atoi(argv[5]));
+
+ /* make test shorter than 25s */
+ ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+ if (ret < 0) {
+ printf("Failed to setsockopt SO_RCVTIMEO\n");
+ goto out;
+ }
+
+ if (!strcmp(argv[1], "server")) {
+ sleep(1); /* wait a bit for client's INIT */
+ ret = connect(sd, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to connect to peer\n");
+ goto out;
+ }
+ ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
+ if (ret < 0) {
+ printf("Failed to recv msg %d\n", ret);
+ goto out;
+ }
+ ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to send msg %d\n", ret);
+ goto out;
+ }
+ printf("Server: sent! %d\n", ret);
+ }
+
+ if (!strcmp(argv[1], "client")) {
+ usleep(300000); /* wait a bit for server's listening */
+ ret = connect(sd, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to connect to peer\n");
+ goto out;
+ }
+ sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */
+ ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to send msg %d\n", ret);
+ goto out;
+ }
+ ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
+ if (ret < 0) {
+ printf("Failed to recv msg %d\n", ret);
+ goto out;
+ }
+ printf("Client: rcvd! %d\n", ret);
+ }
+ ret = 0;
+out:
+ close(sd);
+ return ret;
+}
diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/netfilter/xt_string.sh
new file mode 100755
index 000000000000..1802653a4728
--- /dev/null
+++ b/tools/testing/selftests/netfilter/xt_string.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+rc=0
+
+if ! iptables --version >/dev/null 2>&1; then
+ echo "SKIP: Test needs iptables"
+ exit $ksft_skip
+fi
+if ! ip -V >/dev/null 2>&1; then
+ echo "SKIP: Test needs iproute2"
+ exit $ksft_skip
+fi
+if ! nc -h >/dev/null 2>&1; then
+ echo "SKIP: Test needs netcat"
+ exit $ksft_skip
+fi
+
+pattern="foo bar baz"
+patlen=11
+hdrlen=$((20 + 8)) # IPv4 + UDP
+ns="ns-$(mktemp -u XXXXXXXX)"
+trap 'ip netns del $ns' EXIT
+ip netns add "$ns"
+ip -net "$ns" link add d0 type dummy
+ip -net "$ns" link set d0 up
+ip -net "$ns" addr add 10.1.2.1/24 dev d0
+
+#ip netns exec "$ns" tcpdump -npXi d0 &
+#tcpdump_pid=$!
+#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT
+
+add_rule() { # (alg, from, to)
+ ip netns exec "$ns" \
+ iptables -A OUTPUT -o d0 -m string \
+ --string "$pattern" --algo $1 --from $2 --to $3
+}
+showrules() { # ()
+ ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A'
+}
+zerorules() {
+ ip netns exec "$ns" iptables -Z OUTPUT
+}
+countrule() { # (pattern)
+ showrules | grep -c -- "$*"
+}
+send() { # (offset)
+ ( for ((i = 0; i < $1 - $hdrlen; i++)); do
+ printf " "
+ done
+ printf "$pattern"
+ ) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374
+}
+
+add_rule bm 1000 1500
+add_rule bm 1400 1600
+add_rule kmp 1000 1500
+add_rule kmp 1400 1600
+
+zerorules
+send 0
+send $((1000 - $patlen))
+if [ $(countrule -c 0 0) -ne 4 ]; then
+ echo "FAIL: rules match data before --from"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1000
+send $((1400 - $patlen))
+if [ $(countrule -c 2) -ne 2 ]; then
+ echo "FAIL: only two rules should match at low offset"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1500 - $patlen))
+if [ $(countrule -c 1) -ne 4 ]; then
+ echo "FAIL: all rules should match at end of packet"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1495
+if [ $(countrule -c 1) -ne 1 ]; then
+ echo "FAIL: only kmp with proper --to should match pattern spanning fragments"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1500
+if [ $(countrule -c 1) -ne 2 ]; then
+ echo "FAIL: two rules should match pattern at start of second fragment"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1600 - $patlen))
+if [ $(countrule -c 1) -ne 2 ]; then
+ echo "FAIL: two rules should match pattern at end of largest --to"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1600 - $patlen + 1))
+if [ $(countrule -c 1) -ne 0 ]; then
+ echo "FAIL: no rules should match pattern extending largest --to"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1600
+if [ $(countrule -c 1) -ne 0 ]; then
+ echo "FAIL: no rule should match pattern past largest --to"
+ showrules
+ ((rc--))
+fi
+
+exit $rc