aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-03-24 15:09:33 -0700
committerJakub Kicinski <kuba@kernel.org>2025-03-24 15:09:33 -0700
commit98b2c048e2e2f1810e106bd2a0b4ad50b241af1b (patch)
tree4eaf83b19ef62816daa49f829a52c3975aa52458
parentnet: introduce per netns packet chains (diff)
parentselftests: vxlan_bridge: Test flood with unresolved FDB entry (diff)
downloadlinux-rng-98b2c048e2e2f1810e106bd2a0b4ad50b241af1b.tar.xz
linux-rng-98b2c048e2e2f1810e106bd2a0b4ad50b241af1b.zip
Merge branch 'mlxsw-add-vxlan-to-the-same-hardware-domain-as-physical-bridge-ports'
Petr Machata says: ==================== mlxsw: Add VXLAN to the same hardware domain as physical bridge ports Amit Cohen writes: Packets which are trapped to CPU for forwarding in software data path are handled according to driver marking of skb->offload_{,l3}_fwd_mark. Packets which are marked as L2-forwarded in hardware, will not be flooded by the bridge to bridge ports which are in the same hardware domain as the ingress port. Currently, mlxsw does not add VXLAN bridge ports to the same hardware domain as physical bridge ports despite the fact that the device is able to forward packets to and from VXLAN tunnels in hardware. In some scenarios this can result in remote VTEPs receiving duplicate packets. To solve such packets duplication, add VXLAN bridge ports to the same hardware domain as other bridge ports. One complication is ARP suppression which requires the local VTEP to avoid flooding ARP packets to remote VTEPs if the local VTEP is able to reply on behalf of remote hosts. This is currently implemented by having the device flood ARP packets in hardware and trapping them during VXLAN encapsulation, but marking them with skb->offload_fwd_mark=1 so that the bridge will not re-flood them to physical bridge ports. The above scheme will break when VXLAN bridge ports are added to the same hardware domain as physical bridge ports as ARP packets that cannot be suppressed by the bridge will not be able to egress the VXLAN bridge ports due to hardware domain filtering. This is solved by trapping ARP packets when they enter the device and not marking them as being forwarded in hardware. Patch set overview: Patch #1 sets hardware to trap ARP packets at layer 2 Patches #2-#4 are preparations for setting hardwarwe domain of VXLAN Patch #5 sets hardware domain of VXLAN Patch #6 extends VXLAN flood test to verify that this set solves the packets duplication ==================== Link: https://patch.msgid.link/cover.1742224300.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/trap.h5
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh15
7 files changed, 83 insertions, 49 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index c7e6a3258244..3080ea032e7f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2409,8 +2409,6 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
/* Multicast Router Traps */
MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
- /* NVE traps */
- MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, NEIGH_DISCOVERY, false),
};
static const struct mlxsw_listener mlxsw_sp1_listener[] = {
@@ -5232,25 +5230,13 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp,
return 0;
if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
return -EOPNOTSUPP;
- if (cu_info->linking) {
- if (!netif_running(dev))
- return 0;
- /* When the bridge is VLAN-aware, the VNI of the VxLAN
- * device needs to be mapped to a VLAN, but at this
- * point no VLANs are configured on the VxLAN device
- */
- if (br_vlan_enabled(upper_dev))
- return 0;
+ if (!netif_running(dev))
+ return 0;
+ if (cu_info->linking)
return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev,
dev, 0, extack);
- } else {
- /* VLANs were already flushed, which triggered the
- * necessary cleanup
- */
- if (br_vlan_enabled(upper_dev))
- return 0;
+ else
mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev);
- }
break;
case NETDEV_PRE_UP:
upper_dev = netdev_master_upper_dev_get(dev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index fa7082ee5183..37cd1d002b3b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -661,10 +661,10 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *br_dev);
int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
const struct net_device *br_dev,
- const struct net_device *vxlan_dev, u16 vid,
+ struct net_device *vxlan_dev, u16 vid,
struct netlink_ext_ack *extack);
void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
- const struct net_device *vxlan_dev);
+ struct net_device *vxlan_dev);
extern struct notifier_block mlxsw_sp_switchdev_notifier;
/* spectrum.c */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 6397ff0dc951..a48bf342084d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2929,23 +2929,8 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port);
}
-int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
- const struct net_device *br_dev,
- const struct net_device *vxlan_dev, u16 vid,
- struct netlink_ext_ack *extack)
-{
- struct mlxsw_sp_bridge_device *bridge_device;
-
- bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
- if (WARN_ON(!bridge_device))
- return -EINVAL;
-
- return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid,
- extack);
-}
-
-void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
- const struct net_device *vxlan_dev)
+static void __mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *vxlan_dev)
{
struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
struct mlxsw_sp_fid *fid;
@@ -2963,6 +2948,47 @@ void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_fid_put(fid);
}
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev,
+ struct net_device *vxlan_dev, u16 vid,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ int err;
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (WARN_ON(!bridge_device))
+ return -EINVAL;
+
+ mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(bridge_device->dev);
+ if (!mlxsw_sp_port)
+ return -EINVAL;
+
+ err = bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid,
+ extack);
+ if (err)
+ return err;
+
+ err = switchdev_bridge_port_offload(vxlan_dev, mlxsw_sp_port->dev,
+ NULL, NULL, NULL, false, extack);
+ if (err)
+ goto err_bridge_port_offload;
+
+ return 0;
+
+err_bridge_port_offload:
+ __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
+ return err;
+}
+
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *vxlan_dev)
+{
+ switchdev_bridge_port_unoffload(vxlan_dev, NULL, NULL, NULL);
+ __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
+}
+
static void
mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr,
enum mlxsw_sp_l3proto *proto,
@@ -3867,7 +3893,7 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_fid_put(fid);
return -EINVAL;
}
- mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
+ __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
mlxsw_sp_fid_put(fid);
return 0;
}
@@ -3883,7 +3909,7 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp,
/* Fourth case: Thew new VLAN is PVID, which means the VLAN currently
* mapped to the VNI should be unmapped
*/
- mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
+ __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
mlxsw_sp_fid_put(fid);
/* Fifth case: The new VLAN is also egress untagged, which means the
@@ -3923,7 +3949,7 @@ mlxsw_sp_switchdev_vxlan_vlan_del(struct mlxsw_sp *mlxsw_sp,
if (mlxsw_sp_fid_8021q_vid(fid) != vid)
goto out;
- mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
+ __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
out:
mlxsw_sp_fid_put(fid);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 1f9c1c86839f..b5c3f789c685 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -959,18 +959,18 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
},
{
.trap = MLXSW_SP_TRAP_CONTROL(ARP_REQUEST, NEIGH_DISCOVERY,
- MIRROR),
+ TRAP),
.listeners_arr = {
- MLXSW_SP_RXL_MARK(ROUTER_ARPBC, NEIGH_DISCOVERY,
- TRAP_TO_CPU, false),
+ MLXSW_SP_RXL_NO_MARK(ARPBC, NEIGH_DISCOVERY,
+ TRAP_TO_CPU, false),
},
},
{
.trap = MLXSW_SP_TRAP_CONTROL(ARP_RESPONSE, NEIGH_DISCOVERY,
- MIRROR),
+ TRAP),
.listeners_arr = {
- MLXSW_SP_RXL_MARK(ROUTER_ARPUC, NEIGH_DISCOVERY,
- TRAP_TO_CPU, false),
+ MLXSW_SP_RXL_NO_MARK(ARPUC, NEIGH_DISCOVERY,
+ TRAP_TO_CPU, false),
},
},
{
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 83477c8e6971..80ee5c4825dc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -29,6 +29,8 @@ enum {
MLXSW_TRAP_ID_FDB_MISMATCH = 0x3B,
MLXSW_TRAP_ID_FID_MISS = 0x3D,
MLXSW_TRAP_ID_DECAP_ECN0 = 0x40,
+ MLXSW_TRAP_ID_ARPBC = 0x50,
+ MLXSW_TRAP_ID_ARPUC = 0x51,
MLXSW_TRAP_ID_MTUERROR = 0x52,
MLXSW_TRAP_ID_TTLERROR = 0x53,
MLXSW_TRAP_ID_LBERROR = 0x54,
@@ -66,13 +68,10 @@ enum {
MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
MLXSW_TRAP_ID_NVE_DECAP_ARP = 0xB8,
- MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD,
MLXSW_TRAP_ID_IPV4_BFD = 0xD0,
MLXSW_TRAP_ID_IPV6_BFD = 0xD1,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
- MLXSW_TRAP_ID_ROUTER_ARPBC = 0xE0,
- MLXSW_TRAP_ID_ROUTER_ARPUC = 0xE1,
MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A,
MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130,
MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131,
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index 180c5eca556f..b43816dd998c 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -428,6 +428,14 @@ __test_flood()
test_flood()
{
__test_flood de:ad:be:ef:13:37 192.0.2.100 "flood"
+
+ # Add an entry with arbitrary destination IP. Verify that packets are
+ # not duplicated (this can happen if hardware floods the packets, and
+ # then traps them due to misconfiguration, so software data path repeats
+ # flooding and resends packets).
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
+ __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry"
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
}
vxlan_fdb_add_del()
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
index fb9a34cb50c6..afc65647f673 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -539,6 +539,21 @@ test_flood()
10 10 0 10 0
__test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \
10 0 10 0 10
+
+ # Add entries with arbitrary destination IP. Verify that packets are
+ # not duplicated (this can happen if hardware floods the packets, and
+ # then traps them due to misconfiguration, so software data path repeats
+ # flooding and resends packets).
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
+
+ __test_flood de:ad:be:ef:13:37 192.0.2.100 10 \
+ "flood vlan 10, unresolved FDB entry" 10 10 0 10 0
+ __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \
+ "flood vlan 20, unresolved FDB entry" 10 0 10 0 10
+
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
}
vxlan_fdb_add_del()