diff options
author | 2025-03-24 15:09:33 -0700 | |
---|---|---|
committer | 2025-03-24 15:09:33 -0700 | |
commit | 98b2c048e2e2f1810e106bd2a0b4ad50b241af1b (patch) | |
tree | 4eaf83b19ef62816daa49f829a52c3975aa52458 | |
parent | net: introduce per netns packet chains (diff) | |
parent | selftests: vxlan_bridge: Test flood with unresolved FDB entry (diff) | |
download | wireguard-linux-98b2c048e2e2f1810e106bd2a0b4ad50b241af1b.tar.xz wireguard-linux-98b2c048e2e2f1810e106bd2a0b4ad50b241af1b.zip |
Merge branch 'mlxsw-add-vxlan-to-the-same-hardware-domain-as-physical-bridge-ports'
Petr Machata says:
====================
mlxsw: Add VXLAN to the same hardware domain as physical bridge ports
Amit Cohen writes:
Packets which are trapped to CPU for forwarding in software data path
are handled according to driver marking of skb->offload_{,l3}_fwd_mark.
Packets which are marked as L2-forwarded in hardware, will not be flooded
by the bridge to bridge ports which are in the same hardware domain as the
ingress port.
Currently, mlxsw does not add VXLAN bridge ports to the same hardware
domain as physical bridge ports despite the fact that the device is able
to forward packets to and from VXLAN tunnels in hardware. In some
scenarios this can result in remote VTEPs receiving duplicate packets.
To solve such packets duplication, add VXLAN bridge ports to the same
hardware domain as other bridge ports.
One complication is ARP suppression which requires the local VTEP to avoid
flooding ARP packets to remote VTEPs if the local VTEP is able to reply on
behalf of remote hosts. This is currently implemented by having the device
flood ARP packets in hardware and trapping them during VXLAN encapsulation,
but marking them with skb->offload_fwd_mark=1 so that the bridge will not
re-flood them to physical bridge ports.
The above scheme will break when VXLAN bridge ports are added to the same
hardware domain as physical bridge ports as ARP packets that cannot be
suppressed by the bridge will not be able to egress the VXLAN bridge ports
due to hardware domain filtering. This is solved by trapping ARP packets
when they enter the device and not marking them as being forwarded in
hardware.
Patch set overview:
Patch #1 sets hardware to trap ARP packets at layer 2
Patches #2-#4 are preparations for setting hardwarwe domain of VXLAN
Patch #5 sets hardware domain of VXLAN
Patch #6 extends VXLAN flood test to verify that this set solves the
packets duplication
====================
Link: https://patch.msgid.link/cover.1742224300.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
7 files changed, 83 insertions, 49 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c7e6a3258244..3080ea032e7f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2409,8 +2409,6 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { /* Multicast Router Traps */ MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), - /* NVE traps */ - MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, NEIGH_DISCOVERY, false), }; static const struct mlxsw_listener mlxsw_sp1_listener[] = { @@ -5232,25 +5230,13 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, return 0; if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) return -EOPNOTSUPP; - if (cu_info->linking) { - if (!netif_running(dev)) - return 0; - /* When the bridge is VLAN-aware, the VNI of the VxLAN - * device needs to be mapped to a VLAN, but at this - * point no VLANs are configured on the VxLAN device - */ - if (br_vlan_enabled(upper_dev)) - return 0; + if (!netif_running(dev)) + return 0; + if (cu_info->linking) return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev, 0, extack); - } else { - /* VLANs were already flushed, which triggered the - * necessary cleanup - */ - if (br_vlan_enabled(upper_dev)) - return 0; + else mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev); - } break; case NETDEV_PRE_UP: upper_dev = netdev_master_upper_dev_get(dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index fa7082ee5183..37cd1d002b3b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -661,10 +661,10 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev); int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev, - const struct net_device *vxlan_dev, u16 vid, + struct net_device *vxlan_dev, u16 vid, struct netlink_ext_ack *extack); void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, - const struct net_device *vxlan_dev); + struct net_device *vxlan_dev); extern struct notifier_block mlxsw_sp_switchdev_notifier; /* spectrum.c */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 6397ff0dc951..a48bf342084d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2929,23 +2929,8 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port); } -int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev, - const struct net_device *vxlan_dev, u16 vid, - struct netlink_ext_ack *extack) -{ - struct mlxsw_sp_bridge_device *bridge_device; - - bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); - if (WARN_ON(!bridge_device)) - return -EINVAL; - - return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid, - extack); -} - -void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, - const struct net_device *vxlan_dev) +static void __mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + const struct net_device *vxlan_dev) { struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); struct mlxsw_sp_fid *fid; @@ -2963,6 +2948,47 @@ void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_put(fid); } +int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + struct net_device *vxlan_dev, u16 vid, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (WARN_ON(!bridge_device)) + return -EINVAL; + + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(bridge_device->dev); + if (!mlxsw_sp_port) + return -EINVAL; + + err = bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid, + extack); + if (err) + return err; + + err = switchdev_bridge_port_offload(vxlan_dev, mlxsw_sp_port->dev, + NULL, NULL, NULL, false, extack); + if (err) + goto err_bridge_port_offload; + + return 0; + +err_bridge_port_offload: + __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + return err; +} + +void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + struct net_device *vxlan_dev) +{ + switchdev_bridge_port_unoffload(vxlan_dev, NULL, NULL, NULL); + __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); +} + static void mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr, enum mlxsw_sp_l3proto *proto, @@ -3867,7 +3893,7 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_put(fid); return -EINVAL; } - mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); mlxsw_sp_fid_put(fid); return 0; } @@ -3883,7 +3909,7 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp, /* Fourth case: Thew new VLAN is PVID, which means the VLAN currently * mapped to the VNI should be unmapped */ - mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); mlxsw_sp_fid_put(fid); /* Fifth case: The new VLAN is also egress untagged, which means the @@ -3923,7 +3949,7 @@ mlxsw_sp_switchdev_vxlan_vlan_del(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp_fid_8021q_vid(fid) != vid) goto out; - mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); out: mlxsw_sp_fid_put(fid); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 1f9c1c86839f..b5c3f789c685 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -959,18 +959,18 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = { }, { .trap = MLXSW_SP_TRAP_CONTROL(ARP_REQUEST, NEIGH_DISCOVERY, - MIRROR), + TRAP), .listeners_arr = { - MLXSW_SP_RXL_MARK(ROUTER_ARPBC, NEIGH_DISCOVERY, - TRAP_TO_CPU, false), + MLXSW_SP_RXL_NO_MARK(ARPBC, NEIGH_DISCOVERY, + TRAP_TO_CPU, false), }, }, { .trap = MLXSW_SP_TRAP_CONTROL(ARP_RESPONSE, NEIGH_DISCOVERY, - MIRROR), + TRAP), .listeners_arr = { - MLXSW_SP_RXL_MARK(ROUTER_ARPUC, NEIGH_DISCOVERY, - TRAP_TO_CPU, false), + MLXSW_SP_RXL_NO_MARK(ARPUC, NEIGH_DISCOVERY, + TRAP_TO_CPU, false), }, }, { diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 83477c8e6971..80ee5c4825dc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -29,6 +29,8 @@ enum { MLXSW_TRAP_ID_FDB_MISMATCH = 0x3B, MLXSW_TRAP_ID_FID_MISS = 0x3D, MLXSW_TRAP_ID_DECAP_ECN0 = 0x40, + MLXSW_TRAP_ID_ARPBC = 0x50, + MLXSW_TRAP_ID_ARPUC = 0x51, MLXSW_TRAP_ID_MTUERROR = 0x52, MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, @@ -66,13 +68,10 @@ enum { MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92, MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1, MLXSW_TRAP_ID_NVE_DECAP_ARP = 0xB8, - MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD, MLXSW_TRAP_ID_IPV4_BFD = 0xD0, MLXSW_TRAP_ID_IPV6_BFD = 0xD1, MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, - MLXSW_TRAP_ID_ROUTER_ARPBC = 0xE0, - MLXSW_TRAP_ID_ROUTER_ARPUC = 0xE1, MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A, MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130, MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131, diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 180c5eca556f..b43816dd998c 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -428,6 +428,14 @@ __test_flood() test_flood() { __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood" + + # Add an entry with arbitrary destination IP. Verify that packets are + # not duplicated (this can happen if hardware floods the packets, and + # then traps them due to misconfiguration, so software data path repeats + # flooding and resends packets). + bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self + __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry" + bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self } vxlan_fdb_add_del() diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index fb9a34cb50c6..afc65647f673 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -539,6 +539,21 @@ test_flood() 10 10 0 10 0 __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \ 10 0 10 0 10 + + # Add entries with arbitrary destination IP. Verify that packets are + # not duplicated (this can happen if hardware floods the packets, and + # then traps them due to misconfiguration, so software data path repeats + # flooding and resends packets). + bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self + + __test_flood de:ad:be:ef:13:37 192.0.2.100 10 \ + "flood vlan 10, unresolved FDB entry" 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \ + "flood vlan 20, unresolved FDB entry" 10 0 10 0 10 + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self } vxlan_fdb_add_del() |