aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-09-02 16:56:07 -0700
committerJakub Kicinski <kuba@kernel.org>2025-09-02 16:57:00 -0700
commit41ec374bdece1f59f2511e8a3046bb6efa1ed48d (patch)
tree2e50492fd6e184b710416369aaced0eadefef0d8
parentnet: pcs: rzn1-miic: Correct MODCTRL register offset (diff)
parentselftests: net: Add a selftest for VXLAN with FDB nexthop groups (diff)
downloadwireguard-linux-41ec374bdece1f59f2511e8a3046bb6efa1ed48d.tar.xz
wireguard-linux-41ec374bdece1f59f2511e8a3046bb6efa1ed48d.zip
Merge branch 'vxlan-fix-npds-when-using-nexthop-objects'
Ido Schimmel says: ==================== vxlan: Fix NPDs when using nexthop objects With FDB nexthop groups, VXLAN FDB entries do not necessarily point to a remote destination but rather to an FDB nexthop group. This means that first_remote_{rcu,rtnl}() can return NULL and a few places in the driver were not ready for that, resulting in NULL pointer dereferences. Patches #1-#2 fix these NPDs. Note that vxlan_fdb_find_uc() still dereferences the remote returned by first_remote_rcu() without checking that it is not NULL, but this function is only invoked by a single driver which vetoes the creation of FDB nexthop groups. I will patch this in net-next to make the code less fragile. Patch #3 adds a selftests which exercises these code paths and tests basic Tx functionality with FDB nexthop groups. I verified that the test crashes the kernel without the first two patches. ==================== Link: https://patch.msgid.link/20250901065035.159644-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/vxlan/vxlan_core.c18
-rw-r--r--drivers/net/vxlan/vxlan_private.h4
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nh.sh223
4 files changed, 237 insertions, 9 deletions
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index f32be2e301f2..dab864bc733c 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -1445,6 +1445,10 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
if (READ_ONCE(f->updated) != now)
WRITE_ONCE(f->updated, now);
+ /* Don't override an fdb with nexthop with a learnt entry */
+ if (rcu_access_pointer(f->nh))
+ return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
+
if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
rdst->remote_ifindex == ifindex))
return SKB_NOT_DROPPED_YET;
@@ -1453,10 +1457,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
if (f->state & (NUD_PERMANENT | NUD_NOARP))
return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
- /* Don't override an fdb with nexthop with a learnt entry */
- if (rcu_access_pointer(f->nh))
- return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;
-
if (net_ratelimit())
netdev_info(dev,
"%pM migrated from %pIS to %pIS\n",
@@ -1877,6 +1877,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
n = neigh_lookup(&arp_tbl, &tip, dev);
if (n) {
+ struct vxlan_rdst *rdst = NULL;
struct vxlan_fdb *f;
struct sk_buff *reply;
@@ -1887,7 +1888,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
rcu_read_lock();
f = vxlan_find_mac_tx(vxlan, n->ha, vni);
- if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
+ if (f)
+ rdst = first_remote_rcu(f);
+ if (rdst && vxlan_addr_any(&rdst->remote_ip)) {
/* bridge-local neighbor */
neigh_release(n);
rcu_read_unlock();
@@ -2044,6 +2047,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev);
if (n) {
+ struct vxlan_rdst *rdst = NULL;
struct vxlan_fdb *f;
struct sk_buff *reply;
@@ -2053,7 +2057,9 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
}
f = vxlan_find_mac_tx(vxlan, n->ha, vni);
- if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) {
+ if (f)
+ rdst = first_remote_rcu(f);
+ if (rdst && vxlan_addr_any(&rdst->remote_ip)) {
/* bridge-local neighbor */
neigh_release(n);
goto out;
diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h
index 6c625fb29c6c..99fe772ad679 100644
--- a/drivers/net/vxlan/vxlan_private.h
+++ b/drivers/net/vxlan/vxlan_private.h
@@ -61,9 +61,7 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
}
-/* First remote destination for a forwarding entry.
- * Guaranteed to be non-NULL because remotes are never deleted.
- */
+/* First remote destination for a forwarding entry. */
static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
{
if (rcu_access_pointer(fdb->nh))
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index b31a71f2b372..c7e03e1d6f63 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -99,6 +99,7 @@ TEST_GEN_PROGS += bind_wildcard
TEST_GEN_PROGS += bind_timewait
TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
+TEST_PROGS += test_vxlan_nh.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
TEST_PROGS += test_bridge_backup_port.sh
TEST_PROGS += test_neigh.sh
diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh
new file mode 100755
index 000000000000..20f3369f776b
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_nh.sh
@@ -0,0 +1,223 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ basic_tx_ipv4
+ basic_tx_ipv6
+ learning
+ proxy_ipv4
+ proxy_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Cleanup
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+nh_stats_get()
+{
+ ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]"
+}
+
+tc_stats_get()
+{
+ tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1"
+}
+
+basic_tx_common()
+{
+ local af_str=$1; shift
+ local proto=$1; shift
+ local local_addr=$1; shift
+ local plen=$1; shift
+ local remote_addr=$1; shift
+
+ RET=0
+
+ # Test basic Tx functionality. Check that stats are incremented on
+ # both the FDB nexthop group and the egress device.
+
+ run_cmd "ip -n $ns1 link add name dummy1 up type dummy"
+ run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1"
+ run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact"
+ run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass"
+
+ run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789"
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q"
+
+ busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null
+ check_err $? "FDB nexthop group stats did not increase"
+
+ busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null
+ check_err $? "tc filter stats did not increase"
+
+ log_test "VXLAN FDB nexthop: $af_str basic Tx"
+}
+
+basic_tx_ipv4()
+{
+ basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2
+}
+
+basic_tx_ipv6()
+{
+ basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2
+}
+
+learning()
+{
+ RET=0
+
+ # When learning is enabled on the VXLAN device, an incoming packet
+ # might try to refresh an FDB entry that points to an FDB nexthop group
+ # instead of an ordinary remote destination. Check that the kernel does
+ # not crash in this situation.
+
+ run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo"
+ run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass"
+ run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning"
+
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020"
+ run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q"
+
+ log_test "VXLAN FDB nexthop: learning"
+}
+
+proxy_common()
+{
+ local af_str=$1; shift
+ local local_addr=$1; shift
+ local plen=$1; shift
+ local remote_addr=$1; shift
+ local neigh_addr=$1; shift
+ local ping_cmd=$1; shift
+
+ RET=0
+
+ # When the "proxy" option is enabled on the VXLAN device, the device
+ # will suppress ARP requests and IPv6 Neighbor Solicitation messages if
+ # it is able to reply on behalf of the remote host. That is, if a
+ # matching and valid neighbor entry is configured on the VXLAN device
+ # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The
+ # FDB entry for the neighbor's MAC address might point to an FDB
+ # nexthop group instead of an ordinary remote destination. Check that
+ # the kernel does not crash in this situation.
+
+ run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy"
+
+ run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0"
+
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 $ping_cmd"
+
+ log_test "VXLAN FDB nexthop: $af_str proxy"
+}
+
+proxy_ipv4()
+{
+ proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \
+ "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3"
+}
+
+proxy_ipv6()
+{
+ proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \
+ "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command mausezahn
+require_command arping
+require_command ndisc6
+require_command jq
+
+if ! ip nexthop help 2>&1 | grep -q "stats"; then
+ echo "SKIP: iproute2 ip too old, missing nexthop stats support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup_ns ns1; $t; cleanup_all_ns;
+done