aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2023-03-17 08:05:50 +0000
committerDavid S. Miller <davem@davemloft.net>2023-03-17 08:05:50 +0000
commitabf36703d7046cd0aaf5ac5524d321851fb6bf03 (patch)
treea5c3e186cba39d49beef330f53768593647dbadf /drivers/net
parentMerge branch 'J784S4-CPSW9G-bindings' (diff)
parentselftests: net: Add VXLAN MDB test (diff)
downloadwireguard-linux-abf36703d7046cd0aaf5ac5524d321851fb6bf03.tar.xz
wireguard-linux-abf36703d7046cd0aaf5ac5524d321851fb6bf03.zip
Merge branch 'vxlan-MDB-support'
Ido Schimmel says: ==================== vxlan: Add MDB support tl;dr ===== This patchset implements MDB support in the VXLAN driver, allowing it to selectively forward IP multicast traffic to VTEPs with interested receivers instead of flooding it to all the VTEPs as BUM. The motivating use case is intra and inter subnet multicast forwarding using EVPN [1][2], which means that MDB entries are only installed by the user space control plane and no snooping is implemented, thereby avoiding a lot of unnecessary complexity in the kernel. Background ========== Both the bridge and VXLAN drivers have an FDB that allows them to forward Ethernet frames based on their destination MAC addresses and VLAN/VNI. These FDBs are managed using the same PF_BRIDGE/RTM_*NEIGH netlink messages and bridge(8) utility. However, only the bridge driver has an MDB that allows it to selectively forward IP multicast packets to bridge ports with interested receivers behind them, based on (S, G) and (*, G) MDB entries. When these packets reach the VXLAN driver they are flooded using the "all-zeros" FDB entry (00:00:00:00:00:00). The entry either includes the list of all the VTEPs in the tenant domain (when ingress replication is used) or the multicast address of the BUM tunnel (when P2MP tunnels are used), to which all the VTEPs join. Networks that make heavy use of multicast in the overlay can benefit from a solution that allows them to selectively forward IP multicast traffic only to VTEPs with interested receivers. Such a solution is described in the next section. Motivation ========== RFC 7432 [3] defines a "MAC/IP Advertisement route" (type 2) [4] that allows VTEPs in the EVPN network to advertise and learn reachability information for unicast MAC addresses. Traffic destined to a unicast MAC address can therefore be selectively forwarded to a single VTEP behind which the MAC is located. The same is not true for IP multicast traffic. Such traffic is simply flooded as BUM to all VTEPs in the broadcast domain (BD) / subnet, regardless if a VTEP has interested receivers for the multicast stream or not. This is especially problematic for overlay networks that make heavy use of multicast. The issue is addressed by RFC 9251 [1] that defines a "Selective Multicast Ethernet Tag Route" (type 6) [5] which allows VTEPs in the EVPN network to advertise multicast streams that they are interested in. This is done by having each VTEP suppress IGMP/MLD packets from being transmitted to the NVE network and instead communicate the information over BGP to other VTEPs. The draft in [2] further extends RFC 9251 with procedures to allow efficient forwarding of IP multicast traffic not only in a given subnet, but also between different subnets in a tenant domain. The required changes in the bridge driver to support the above were already merged in merge commit 8150f0cfb24f ("Merge branch 'bridge-mcast-extensions-for-evpn'"). However, full support entails MDB support in the VXLAN driver so that it will be able to selectively forward IP multicast traffic only to VTEPs with interested receivers. The implementation of this MDB is described in the next section. Implementation ============== The user interface is extended to allow user space to specify the destination VTEP(s) and related parameters. Example usage: # bridge mdb add dev vxlan0 port vxlan0 grp 239.1.1.1 permanent dst 198.51.100.1 # bridge mdb add dev vxlan0 port vxlan0 grp 239.1.1.1 permanent dst 192.0.2.1 $ bridge -d -s mdb show dev vxlan0 port vxlan0 grp 239.1.1.1 permanent filter_mode exclude proto static dst 192.0.2.1 0.00 dev vxlan0 port vxlan0 grp 239.1.1.1 permanent filter_mode exclude proto static dst 198.51.100.1 0.00 Since the MDB is fully managed by user space and since snooping is not implemented, only permanent entries can be installed and temporary entries are rejected by the kernel. The netlink interface is extended with a few new attributes in the RTM_NEWMDB / RTM_DELMDB request messages: [ struct nlmsghdr ] [ struct br_port_msg ] [ MDBA_SET_ENTRY ] struct br_mdb_entry [ MDBA_SET_ENTRY_ATTRS ] [ MDBE_ATTR_SOURCE ] struct in_addr / struct in6_addr [ MDBE_ATTR_SRC_LIST ] [ MDBE_SRC_LIST_ENTRY ] [ MDBE_SRCATTR_ADDRESS ] struct in_addr / struct in6_addr [ ...] [ MDBE_ATTR_GROUP_MODE ] u8 [ MDBE_ATTR_RTPORT ] u8 [ MDBE_ATTR_DST ] // new struct in_addr / struct in6_addr [ MDBE_ATTR_DST_PORT ] // new u16 [ MDBE_ATTR_VNI ] // new u32 [ MDBE_ATTR_IFINDEX ] // new s32 [ MDBE_ATTR_SRC_VNI ] // new u32 RTM_NEWMDB / RTM_DELMDB responses and notifications are extended with corresponding attributes. One MDB entry that can be installed in the VXLAN MDB, but not in the bridge MDB is the catchall entry (0.0.0.0 / ::). It is used to transmit unregistered multicast traffic that is not link-local and is especially useful when inter-subnet multicast forwarding is required. See patch #12 for a detailed explanation and motivation. It is similar to the "all-zeros" FDB entry that can be installed in the VXLAN FDB, but not the bridge FDB. "added_by_star_ex" entries -------------------------- The bridge driver automatically installs (S, G) MDB port group entries marked as "added_by_star_ex" whenever it detects that an (S, G) entry can prevent traffic from being forwarded via a port associated with an EXCLUDE (*, G) entry. The bridge will add the port to the port group of the (S, G) entry, thereby creating a new port group entry. The complexity associated with these entries is not trivial, but it needs to reside in the bridge driver because it automatically installs MDB entries in response to snooped IGMP / MLD packets. The same in not true for the VXLAN MDB which is entirely managed by user space who is fully capable of forming the correct replication lists on its own. In addition, the complexity associated with the "added_by_star_ex" entries in the VXLAN driver is higher compared to the bridge: Whenever a remote VTEP is added to the catchall entry, it needs to be added to all the existing MDB entries, as such a remote requested all the multicast traffic to be forwarded to it. Similarly, whenever an (*, G) or (S, G) entry is added, all the remotes associated with the catchall entry need to be added to it. Given the above, this patchset does not implement support for such entries. One argument against this decision can be that in the future someone might want to populate the VXLAN MDB in response to decapsulated IGMP / MLD packets and not according to EVPN routes. Regardless of my doubts regarding this possibility, it can be implemented using a new VXLAN device knob that will also enable the "added_by_star_ex" functionality. Testing ======= Tested using existing VXLAN and MDB selftests under "net/" and "net/forwarding/". Added a dedicated selftest in the last patch. Patchset overview ================= Patches #1-#3 are small preparations in the bridge driver. I plan to submit them separately together with an MDB dump test case. Patches #4-#6 are additional preparations centered around the extraction of the MDB netlink handlers from the bridge driver to the common rtnetlink code. This allows reusing the existing MDB netlink messages for the configuration of the VXLAN MDB. Patches #7-#9 include more small preparations in the common rtnetlink code and the VXLAN driver. Patch #10 implements the MDB control path in the VXLAN driver, which will allow user space to create, delete, replace and dump MDB entries. Patches #11-#12 implement the MDB data path in the VXLAN driver, allowing it to selectively forward IP multicast traffic according to the matched MDB entry. Patch #13 finally enables MDB support in the VXLAN driver. iproute2 patches can be found here [6]. Note that in order to fully support the specifications in [1] and [2], additional functionality is required from the data path. However, it can be achieved using existing kernel interfaces which is why it is not described here. Changelog ========= Since v1 [7]: Patch #9: Use htons() in 'case' instead of ntohs() in 'switch'. Since RFC [8]: Patch #3: Use NL_ASSERT_DUMP_CTX_FITS(). Patch #3: memset the entire context when moving to the next device. Patch #3: Reset sequence counters when moving to the next device. Patch #3: Use NL_SET_ERR_MSG_ATTR() in rtnl_validate_mdb_entry(). Patch #7: Remove restrictions regarding mixing of multicast and unicast remote destination IPs in an MDB entry. While such configuration does not make sense to me, it is no forbidden by the VXLAN FDB code and does not crash the kernel. Patch #7: Fix check regarding all-zeros MDB entry and source. Patch #11: New patch. [1] https://datatracker.ietf.org/doc/html/rfc9251 [2] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast [3] https://datatracker.ietf.org/doc/html/rfc7432 [4] https://datatracker.ietf.org/doc/html/rfc7432#section-7.2 [5] https://datatracker.ietf.org/doc/html/rfc9251#section-9.1 [6] https://github.com/idosch/iproute2/commits/submit/mdb_vxlan_rfc_v1 [7] https://lore.kernel.org/netdev/20230313145349.3557231-1-idosch@nvidia.com/ [8] https://lore.kernel.org/netdev/20230204170801.3897900-1-idosch@nvidia.com/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/vxlan/Makefile2
-rw-r--r--drivers/net/vxlan/vxlan_core.c78
-rw-r--r--drivers/net/vxlan/vxlan_mdb.c1462
-rw-r--r--drivers/net/vxlan/vxlan_private.h84
4 files changed, 1575 insertions, 51 deletions
diff --git a/drivers/net/vxlan/Makefile b/drivers/net/vxlan/Makefile
index d4c255499b72..91b8fec8b6cf 100644
--- a/drivers/net/vxlan/Makefile
+++ b/drivers/net/vxlan/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_VXLAN) += vxlan.o
-vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o
+vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o vxlan_mdb.o
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index f2c30214cae8..e2e5f5dac7e6 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -71,53 +71,6 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
ip_tunnel_collect_metadata();
}
-#if IS_ENABLED(CONFIG_IPV6)
-static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
-{
- if (nla_len(nla) >= sizeof(struct in6_addr)) {
- ip->sin6.sin6_addr = nla_get_in6_addr(nla);
- ip->sa.sa_family = AF_INET6;
- return 0;
- } else if (nla_len(nla) >= sizeof(__be32)) {
- ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
- ip->sa.sa_family = AF_INET;
- return 0;
- } else {
- return -EAFNOSUPPORT;
- }
-}
-
-static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
- const union vxlan_addr *ip)
-{
- if (ip->sa.sa_family == AF_INET6)
- return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
- else
- return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
-}
-
-#else /* !CONFIG_IPV6 */
-
-static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
-{
- if (nla_len(nla) >= sizeof(struct in6_addr)) {
- return -EAFNOSUPPORT;
- } else if (nla_len(nla) >= sizeof(__be32)) {
- ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
- ip->sa.sa_family = AF_INET;
- return 0;
- } else {
- return -EAFNOSUPPORT;
- }
-}
-
-static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
- const union vxlan_addr *ip)
-{
- return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
-}
-#endif
-
/* Find VXLAN socket based on network namespace, address family, UDP port,
* enabled unshareable flags and socket device binding (see l3mdev with
* non-default VRF).
@@ -2442,9 +2395,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
return 0;
}
-static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
- __be32 default_vni, struct vxlan_rdst *rdst,
- bool did_rsc)
+void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
+ __be32 default_vni, struct vxlan_rdst *rdst, bool did_rsc)
{
struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
@@ -2791,6 +2743,21 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
#endif
}
+ if (vxlan->cfg.flags & VXLAN_F_MDB) {
+ struct vxlan_mdb_entry *mdb_entry;
+
+ rcu_read_lock();
+ mdb_entry = vxlan_mdb_entry_skb_get(vxlan, skb, vni);
+ if (mdb_entry) {
+ netdev_tx_t ret;
+
+ ret = vxlan_mdb_xmit(vxlan, mdb_entry, skb);
+ rcu_read_unlock();
+ return ret;
+ }
+ rcu_read_unlock();
+ }
+
eth = eth_hdr(skb);
f = vxlan_find_mac(vxlan, eth->h_dest, vni);
did_rsc = false;
@@ -2926,8 +2893,14 @@ static int vxlan_init(struct net_device *dev)
if (err)
goto err_free_percpu;
+ err = vxlan_mdb_init(vxlan);
+ if (err)
+ goto err_gro_cells_destroy;
+
return 0;
+err_gro_cells_destroy:
+ gro_cells_destroy(&vxlan->gro_cells);
err_free_percpu:
free_percpu(dev->tstats);
err_vnigroup_uninit:
@@ -2952,6 +2925,8 @@ static void vxlan_uninit(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ vxlan_mdb_fini(vxlan);
+
if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
vxlan_vnigroup_uninit(vxlan);
@@ -3108,6 +3083,9 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
.ndo_fdb_del = vxlan_fdb_delete,
.ndo_fdb_dump = vxlan_fdb_dump,
.ndo_fdb_get = vxlan_fdb_get,
+ .ndo_mdb_add = vxlan_mdb_add,
+ .ndo_mdb_del = vxlan_mdb_del,
+ .ndo_mdb_dump = vxlan_mdb_dump,
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};
diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c
new file mode 100644
index 000000000000..5e041622261a
--- /dev/null
+++ b/drivers/net/vxlan/vxlan_mdb.c
@@ -0,0 +1,1462 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/if_bridge.h>
+#include <linux/in.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/netlink.h>
+#include <net/vxlan.h>
+
+#include "vxlan_private.h"
+
+struct vxlan_mdb_entry_key {
+ union vxlan_addr src;
+ union vxlan_addr dst;
+ __be32 vni;
+};
+
+struct vxlan_mdb_entry {
+ struct rhash_head rhnode;
+ struct list_head remotes;
+ struct vxlan_mdb_entry_key key;
+ struct hlist_node mdb_node;
+ struct rcu_head rcu;
+};
+
+#define VXLAN_MDB_REMOTE_F_BLOCKED BIT(0)
+
+struct vxlan_mdb_remote {
+ struct list_head list;
+ struct vxlan_rdst __rcu *rd;
+ u8 flags;
+ u8 filter_mode;
+ u8 rt_protocol;
+ struct hlist_head src_list;
+ struct rcu_head rcu;
+};
+
+#define VXLAN_SGRP_F_DELETE BIT(0)
+
+struct vxlan_mdb_src_entry {
+ struct hlist_node node;
+ union vxlan_addr addr;
+ u8 flags;
+};
+
+struct vxlan_mdb_dump_ctx {
+ long reserved;
+ long entry_idx;
+ long remote_idx;
+};
+
+struct vxlan_mdb_config_src_entry {
+ union vxlan_addr addr;
+ struct list_head node;
+};
+
+struct vxlan_mdb_config {
+ struct vxlan_dev *vxlan;
+ struct vxlan_mdb_entry_key group;
+ struct list_head src_list;
+ union vxlan_addr remote_ip;
+ u32 remote_ifindex;
+ __be32 remote_vni;
+ __be16 remote_port;
+ u16 nlflags;
+ u8 flags;
+ u8 filter_mode;
+ u8 rt_protocol;
+};
+
+static const struct rhashtable_params vxlan_mdb_rht_params = {
+ .head_offset = offsetof(struct vxlan_mdb_entry, rhnode),
+ .key_offset = offsetof(struct vxlan_mdb_entry, key),
+ .key_len = sizeof(struct vxlan_mdb_entry_key),
+ .automatic_shrinking = true,
+};
+
+static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg,
+ struct netlink_ext_ack *extack);
+static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg,
+ struct netlink_ext_ack *extack);
+
+static void vxlan_br_mdb_entry_fill(const struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote,
+ struct br_mdb_entry *e)
+{
+ const union vxlan_addr *dst = &mdb_entry->key.dst;
+
+ memset(e, 0, sizeof(*e));
+ e->ifindex = vxlan->dev->ifindex;
+ e->state = MDB_PERMANENT;
+
+ if (remote->flags & VXLAN_MDB_REMOTE_F_BLOCKED)
+ e->flags |= MDB_FLAGS_BLOCKED;
+
+ switch (dst->sa.sa_family) {
+ case AF_INET:
+ e->addr.u.ip4 = dst->sin.sin_addr.s_addr;
+ e->addr.proto = htons(ETH_P_IP);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ e->addr.u.ip6 = dst->sin6.sin6_addr;
+ e->addr.proto = htons(ETH_P_IPV6);
+ break;
+#endif
+ }
+}
+
+static int vxlan_mdb_entry_info_fill_srcs(struct sk_buff *skb,
+ const struct vxlan_mdb_remote *remote)
+{
+ struct vxlan_mdb_src_entry *ent;
+ struct nlattr *nest;
+
+ if (hlist_empty(&remote->src_list))
+ return 0;
+
+ nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST);
+ if (!nest)
+ return -EMSGSIZE;
+
+ hlist_for_each_entry(ent, &remote->src_list, node) {
+ struct nlattr *nest_ent;
+
+ nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY);
+ if (!nest_ent)
+ goto out_cancel_err;
+
+ if (vxlan_nla_put_addr(skb, MDBA_MDB_SRCATTR_ADDRESS,
+ &ent->addr) ||
+ nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER, 0))
+ goto out_cancel_err;
+
+ nla_nest_end(skb, nest_ent);
+ }
+
+ nla_nest_end(skb, nest);
+
+ return 0;
+
+out_cancel_err:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int vxlan_mdb_entry_info_fill(const struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote)
+{
+ struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
+ struct br_mdb_entry e;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY_INFO);
+ if (!nest)
+ return -EMSGSIZE;
+
+ vxlan_br_mdb_entry_fill(vxlan, mdb_entry, remote, &e);
+
+ if (nla_put_nohdr(skb, sizeof(e), &e) ||
+ nla_put_u32(skb, MDBA_MDB_EATTR_TIMER, 0))
+ goto nest_err;
+
+ if (!vxlan_addr_any(&mdb_entry->key.src) &&
+ vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_SOURCE, &mdb_entry->key.src))
+ goto nest_err;
+
+ if (nla_put_u8(skb, MDBA_MDB_EATTR_RTPROT, remote->rt_protocol) ||
+ nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, remote->filter_mode) ||
+ vxlan_mdb_entry_info_fill_srcs(skb, remote) ||
+ vxlan_nla_put_addr(skb, MDBA_MDB_EATTR_DST, &rd->remote_ip))
+ goto nest_err;
+
+ if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port &&
+ nla_put_u16(skb, MDBA_MDB_EATTR_DST_PORT,
+ be16_to_cpu(rd->remote_port)))
+ goto nest_err;
+
+ if (rd->remote_vni != vxlan->default_dst.remote_vni &&
+ nla_put_u32(skb, MDBA_MDB_EATTR_VNI, be32_to_cpu(rd->remote_vni)))
+ goto nest_err;
+
+ if (rd->remote_ifindex &&
+ nla_put_u32(skb, MDBA_MDB_EATTR_IFINDEX, rd->remote_ifindex))
+ goto nest_err;
+
+ if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) &&
+ mdb_entry->key.vni && nla_put_u32(skb, MDBA_MDB_EATTR_SRC_VNI,
+ be32_to_cpu(mdb_entry->key.vni)))
+ goto nest_err;
+
+ nla_nest_end(skb, nest);
+
+ return 0;
+
+nest_err:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int vxlan_mdb_entry_fill(const struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ struct vxlan_mdb_dump_ctx *ctx,
+ const struct vxlan_mdb_entry *mdb_entry)
+{
+ int remote_idx = 0, s_remote_idx = ctx->remote_idx;
+ struct vxlan_mdb_remote *remote;
+ struct nlattr *nest;
+ int err = 0;
+
+ nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
+ if (!nest)
+ return -EMSGSIZE;
+
+ list_for_each_entry(remote, &mdb_entry->remotes, list) {
+ if (remote_idx < s_remote_idx)
+ goto skip;
+
+ err = vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote);
+ if (err)
+ break;
+skip:
+ remote_idx++;
+ }
+
+ ctx->remote_idx = err ? remote_idx : 0;
+ nla_nest_end(skb, nest);
+ return err;
+}
+
+static int vxlan_mdb_fill(const struct vxlan_dev *vxlan, struct sk_buff *skb,
+ struct vxlan_mdb_dump_ctx *ctx)
+{
+ int entry_idx = 0, s_entry_idx = ctx->entry_idx;
+ struct vxlan_mdb_entry *mdb_entry;
+ struct nlattr *nest;
+ int err = 0;
+
+ nest = nla_nest_start_noflag(skb, MDBA_MDB);
+ if (!nest)
+ return -EMSGSIZE;
+
+ hlist_for_each_entry(mdb_entry, &vxlan->mdb_list, mdb_node) {
+ if (entry_idx < s_entry_idx)
+ goto skip;
+
+ err = vxlan_mdb_entry_fill(vxlan, skb, ctx, mdb_entry);
+ if (err)
+ break;
+skip:
+ entry_idx++;
+ }
+
+ ctx->entry_idx = err ? entry_idx : 0;
+ nla_nest_end(skb, nest);
+ return err;
+}
+
+int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct vxlan_mdb_dump_ctx *ctx = (void *)cb->ctx;
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct br_port_msg *bpm;
+ struct nlmsghdr *nlh;
+ int err;
+
+ ASSERT_RTNL();
+
+ NL_ASSERT_DUMP_CTX_FITS(struct vxlan_mdb_dump_ctx);
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWMDB, sizeof(*bpm),
+ NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ bpm = nlmsg_data(nlh);
+ memset(bpm, 0, sizeof(*bpm));
+ bpm->family = AF_BRIDGE;
+ bpm->ifindex = dev->ifindex;
+
+ err = vxlan_mdb_fill(vxlan, skb, ctx);
+
+ nlmsg_end(skb, nlh);
+
+ cb->seq = vxlan->mdb_seq;
+ nl_dump_check_consistent(cb, nlh);
+
+ return err;
+}
+
+static const struct nla_policy
+vxlan_mdbe_src_list_entry_pol[MDBE_SRCATTR_MAX + 1] = {
+ [MDBE_SRCATTR_ADDRESS] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+};
+
+static const struct nla_policy
+vxlan_mdbe_src_list_pol[MDBE_SRC_LIST_MAX + 1] = {
+ [MDBE_SRC_LIST_ENTRY] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_entry_pol),
+};
+
+static struct netlink_range_validation vni_range = {
+ .max = VXLAN_N_VID - 1,
+};
+
+static const struct nla_policy vxlan_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
+ [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+ [MDBE_ATTR_GROUP_MODE] = NLA_POLICY_RANGE(NLA_U8, MCAST_EXCLUDE,
+ MCAST_INCLUDE),
+ [MDBE_ATTR_SRC_LIST] = NLA_POLICY_NESTED(vxlan_mdbe_src_list_pol),
+ [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
+ [MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY,
+ sizeof(struct in_addr),
+ sizeof(struct in6_addr)),
+ [MDBE_ATTR_DST_PORT] = { .type = NLA_U16 },
+ [MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
+ [MDBE_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
+ [MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
+};
+
+static bool vxlan_mdb_is_valid_source(const struct nlattr *attr, __be16 proto,
+ struct netlink_ext_ack *extack)
+{
+ switch (proto) {
+ case htons(ETH_P_IP):
+ if (nla_len(attr) != sizeof(struct in_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 invalid source address length");
+ return false;
+ }
+ if (ipv4_is_multicast(nla_get_in_addr(attr))) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv4 multicast source address is not allowed");
+ return false;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6): {
+ struct in6_addr src;
+
+ if (nla_len(attr) != sizeof(struct in6_addr)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 invalid source address length");
+ return false;
+ }
+ src = nla_get_in6_addr(attr);
+ if (ipv6_addr_is_multicast(&src)) {
+ NL_SET_ERR_MSG_MOD(extack, "IPv6 multicast source address is not allowed");
+ return false;
+ }
+ break;
+ }
+#endif
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Invalid protocol used with source address");
+ return false;
+ }
+
+ return true;
+}
+
+static void vxlan_mdb_config_group_set(struct vxlan_mdb_config *cfg,
+ const struct br_mdb_entry *entry,
+ const struct nlattr *source_attr)
+{
+ struct vxlan_mdb_entry_key *group = &cfg->group;
+
+ switch (entry->addr.proto) {
+ case htons(ETH_P_IP):
+ group->dst.sa.sa_family = AF_INET;
+ group->dst.sin.sin_addr.s_addr = entry->addr.u.ip4;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ group->dst.sa.sa_family = AF_INET6;
+ group->dst.sin6.sin6_addr = entry->addr.u.ip6;
+ break;
+#endif
+ }
+
+ if (source_attr)
+ vxlan_nla_get_addr(&group->src, source_attr);
+}
+
+static bool vxlan_mdb_is_star_g(const struct vxlan_mdb_entry_key *group)
+{
+ return !vxlan_addr_any(&group->dst) && vxlan_addr_any(&group->src);
+}
+
+static bool vxlan_mdb_is_sg(const struct vxlan_mdb_entry_key *group)
+{
+ return !vxlan_addr_any(&group->dst) && !vxlan_addr_any(&group->src);
+}
+
+static int vxlan_mdb_config_src_entry_init(struct vxlan_mdb_config *cfg,
+ __be16 proto,
+ const struct nlattr *src_entry,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[MDBE_SRCATTR_MAX + 1];
+ struct vxlan_mdb_config_src_entry *src;
+ int err;
+
+ err = nla_parse_nested(tb, MDBE_SRCATTR_MAX, src_entry,
+ vxlan_mdbe_src_list_entry_pol, extack);
+ if (err)
+ return err;
+
+ if (NL_REQ_ATTR_CHECK(extack, src_entry, tb, MDBE_SRCATTR_ADDRESS))
+ return -EINVAL;
+
+ if (!vxlan_mdb_is_valid_source(tb[MDBE_SRCATTR_ADDRESS], proto,
+ extack))
+ return -EINVAL;
+
+ src = kzalloc(sizeof(*src), GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+
+ err = vxlan_nla_get_addr(&src->addr, tb[MDBE_SRCATTR_ADDRESS]);
+ if (err)
+ goto err_free_src;
+
+ list_add_tail(&src->node, &cfg->src_list);
+
+ return 0;
+
+err_free_src:
+ kfree(src);
+ return err;
+}
+
+static void
+vxlan_mdb_config_src_entry_fini(struct vxlan_mdb_config_src_entry *src)
+{
+ list_del(&src->node);
+ kfree(src);
+}
+
+static int vxlan_mdb_config_src_list_init(struct vxlan_mdb_config *cfg,
+ __be16 proto,
+ const struct nlattr *src_list,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_config_src_entry *src, *tmp;
+ struct nlattr *src_entry;
+ int rem, err;
+
+ nla_for_each_nested(src_entry, src_list, rem) {
+ err = vxlan_mdb_config_src_entry_init(cfg, proto, src_entry,
+ extack);
+ if (err)
+ goto err_src_entry_init;
+ }
+
+ return 0;
+
+err_src_entry_init:
+ list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node)
+ vxlan_mdb_config_src_entry_fini(src);
+ return err;
+}
+
+static void vxlan_mdb_config_src_list_fini(struct vxlan_mdb_config *cfg)
+{
+ struct vxlan_mdb_config_src_entry *src, *tmp;
+
+ list_for_each_entry_safe_reverse(src, tmp, &cfg->src_list, node)
+ vxlan_mdb_config_src_entry_fini(src);
+}
+
+static int vxlan_mdb_config_attrs_init(struct vxlan_mdb_config *cfg,
+ const struct br_mdb_entry *entry,
+ const struct nlattr *set_attrs,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, set_attrs,
+ vxlan_mdbe_attrs_pol, extack);
+ if (err)
+ return err;
+
+ if (NL_REQ_ATTR_CHECK(extack, set_attrs, mdbe_attrs, MDBE_ATTR_DST)) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing remote destination IP address");
+ return -EINVAL;
+ }
+
+ if (mdbe_attrs[MDBE_ATTR_SOURCE] &&
+ !vxlan_mdb_is_valid_source(mdbe_attrs[MDBE_ATTR_SOURCE],
+ entry->addr.proto, extack))
+ return -EINVAL;
+
+ vxlan_mdb_config_group_set(cfg, entry, mdbe_attrs[MDBE_ATTR_SOURCE]);
+
+ /* rtnetlink code only validates that IPv4 group address is
+ * multicast.
+ */
+ if (!vxlan_addr_is_multicast(&cfg->group.dst) &&
+ !vxlan_addr_any(&cfg->group.dst)) {
+ NL_SET_ERR_MSG_MOD(extack, "Group address is not multicast");
+ return -EINVAL;
+ }
+
+ if (vxlan_addr_any(&cfg->group.dst) &&
+ mdbe_attrs[MDBE_ATTR_SOURCE]) {
+ NL_SET_ERR_MSG_MOD(extack, "Source cannot be specified for the all-zeros entry");
+ return -EINVAL;
+ }
+
+ if (vxlan_mdb_is_sg(&cfg->group))
+ cfg->filter_mode = MCAST_INCLUDE;
+
+ if (mdbe_attrs[MDBE_ATTR_GROUP_MODE]) {
+ if (!vxlan_mdb_is_star_g(&cfg->group)) {
+ NL_SET_ERR_MSG_MOD(extack, "Filter mode can only be set for (*, G) entries");
+ return -EINVAL;
+ }
+ cfg->filter_mode = nla_get_u8(mdbe_attrs[MDBE_ATTR_GROUP_MODE]);
+ }
+
+ if (mdbe_attrs[MDBE_ATTR_SRC_LIST]) {
+ if (!vxlan_mdb_is_star_g(&cfg->group)) {
+ NL_SET_ERR_MSG_MOD(extack, "Source list can only be set for (*, G) entries");
+ return -EINVAL;
+ }
+ if (!mdbe_attrs[MDBE_ATTR_GROUP_MODE]) {
+ NL_SET_ERR_MSG_MOD(extack, "Source list cannot be set without filter mode");
+ return -EINVAL;
+ }
+ err = vxlan_mdb_config_src_list_init(cfg, entry->addr.proto,
+ mdbe_attrs[MDBE_ATTR_SRC_LIST],
+ extack);
+ if (err)
+ return err;
+ }
+
+ if (vxlan_mdb_is_star_g(&cfg->group) && list_empty(&cfg->src_list) &&
+ cfg->filter_mode == MCAST_INCLUDE) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot add (*, G) INCLUDE with an empty source list");
+ return -EINVAL;
+ }
+
+ if (mdbe_attrs[MDBE_ATTR_RTPROT])
+ cfg->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);
+
+ err = vxlan_nla_get_addr(&cfg->remote_ip, mdbe_attrs[MDBE_ATTR_DST]);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid remote destination address");
+ goto err_src_list_fini;
+ }
+
+ if (mdbe_attrs[MDBE_ATTR_DST_PORT])
+ cfg->remote_port =
+ cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT]));
+
+ if (mdbe_attrs[MDBE_ATTR_VNI])
+ cfg->remote_vni =
+ cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI]));
+
+ if (mdbe_attrs[MDBE_ATTR_IFINDEX]) {
+ cfg->remote_ifindex =
+ nla_get_s32(mdbe_attrs[MDBE_ATTR_IFINDEX]);
+ if (!__dev_get_by_index(cfg->vxlan->net, cfg->remote_ifindex)) {
+ NL_SET_ERR_MSG_MOD(extack, "Outgoing interface not found");
+ err = -EINVAL;
+ goto err_src_list_fini;
+ }
+ }
+
+ if (mdbe_attrs[MDBE_ATTR_SRC_VNI])
+ cfg->group.vni =
+ cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI]));
+
+ return 0;
+
+err_src_list_fini:
+ vxlan_mdb_config_src_list_fini(cfg);
+ return err;
+}
+
+static int vxlan_mdb_config_init(struct vxlan_mdb_config *cfg,
+ struct net_device *dev, struct nlattr *tb[],
+ u16 nlmsg_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+
+ memset(cfg, 0, sizeof(*cfg));
+ cfg->vxlan = vxlan;
+ cfg->group.vni = vxlan->default_dst.remote_vni;
+ INIT_LIST_HEAD(&cfg->src_list);
+ cfg->nlflags = nlmsg_flags;
+ cfg->filter_mode = MCAST_EXCLUDE;
+ cfg->rt_protocol = RTPROT_STATIC;
+ cfg->remote_vni = vxlan->default_dst.remote_vni;
+ cfg->remote_port = vxlan->cfg.dst_port;
+
+ if (entry->ifindex != dev->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "Port net device must be the VXLAN net device");
+ return -EINVAL;
+ }
+
+ /* State is not part of the entry key and can be ignored on deletion
+ * requests.
+ */
+ if ((nlmsg_flags & (NLM_F_CREATE | NLM_F_REPLACE)) &&
+ entry->state != MDB_PERMANENT) {
+ NL_SET_ERR_MSG_MOD(extack, "MDB entry must be permanent");
+ return -EINVAL;
+ }
+
+ if (entry->flags) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid MDB entry flags");
+ return -EINVAL;
+ }
+
+ if (entry->vid) {
+ NL_SET_ERR_MSG_MOD(extack, "VID must not be specified");
+ return -EINVAL;
+ }
+
+ if (entry->addr.proto != htons(ETH_P_IP) &&
+ entry->addr.proto != htons(ETH_P_IPV6)) {
+ NL_SET_ERR_MSG_MOD(extack, "Group address must be an IPv4 / IPv6 address");
+ return -EINVAL;
+ }
+
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY_ATTRS)) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY_ATTRS attribute");
+ return -EINVAL;
+ }
+
+ return vxlan_mdb_config_attrs_init(cfg, entry, tb[MDBA_SET_ENTRY_ATTRS],
+ extack);
+}
+
+static void vxlan_mdb_config_fini(struct vxlan_mdb_config *cfg)
+{
+ vxlan_mdb_config_src_list_fini(cfg);
+}
+
+static struct vxlan_mdb_entry *
+vxlan_mdb_entry_lookup(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry_key *group)
+{
+ return rhashtable_lookup_fast(&vxlan->mdb_tbl, group,
+ vxlan_mdb_rht_params);
+}
+
+static struct vxlan_mdb_remote *
+vxlan_mdb_remote_lookup(const struct vxlan_mdb_entry *mdb_entry,
+ const union vxlan_addr *addr)
+{
+ struct vxlan_mdb_remote *remote;
+
+ list_for_each_entry(remote, &mdb_entry->remotes, list) {
+ struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
+
+ if (vxlan_addr_equal(addr, &rd->remote_ip))
+ return remote;
+ }
+
+ return NULL;
+}
+
+static void vxlan_mdb_rdst_free(struct rcu_head *head)
+{
+ struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
+
+ dst_cache_destroy(&rd->dst_cache);
+ kfree(rd);
+}
+
+static int vxlan_mdb_remote_rdst_init(const struct vxlan_mdb_config *cfg,
+ struct vxlan_mdb_remote *remote)
+{
+ struct vxlan_rdst *rd;
+ int err;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+
+ err = dst_cache_init(&rd->dst_cache, GFP_KERNEL);
+ if (err)
+ goto err_free_rdst;
+
+ rd->remote_ip = cfg->remote_ip;
+ rd->remote_port = cfg->remote_port;
+ rd->remote_vni = cfg->remote_vni;
+ rd->remote_ifindex = cfg->remote_ifindex;
+ rcu_assign_pointer(remote->rd, rd);
+
+ return 0;
+
+err_free_rdst:
+ kfree(rd);
+ return err;
+}
+
+static void vxlan_mdb_remote_rdst_fini(struct vxlan_rdst *rd)
+{
+ call_rcu(&rd->rcu, vxlan_mdb_rdst_free);
+}
+
+static int vxlan_mdb_remote_init(const struct vxlan_mdb_config *cfg,
+ struct vxlan_mdb_remote *remote)
+{
+ int err;
+
+ err = vxlan_mdb_remote_rdst_init(cfg, remote);
+ if (err)
+ return err;
+
+ remote->flags = cfg->flags;
+ remote->filter_mode = cfg->filter_mode;
+ remote->rt_protocol = cfg->rt_protocol;
+ INIT_HLIST_HEAD(&remote->src_list);
+
+ return 0;
+}
+
+static void vxlan_mdb_remote_fini(struct vxlan_dev *vxlan,
+ struct vxlan_mdb_remote *remote)
+{
+ WARN_ON_ONCE(!hlist_empty(&remote->src_list));
+ vxlan_mdb_remote_rdst_fini(rtnl_dereference(remote->rd));
+}
+
+static struct vxlan_mdb_src_entry *
+vxlan_mdb_remote_src_entry_lookup(const struct vxlan_mdb_remote *remote,
+ const union vxlan_addr *addr)
+{
+ struct vxlan_mdb_src_entry *ent;
+
+ hlist_for_each_entry(ent, &remote->src_list, node) {
+ if (vxlan_addr_equal(&ent->addr, addr))
+ return ent;
+ }
+
+ return NULL;
+}
+
+static struct vxlan_mdb_src_entry *
+vxlan_mdb_remote_src_entry_add(struct vxlan_mdb_remote *remote,
+ const union vxlan_addr *addr)
+{
+ struct vxlan_mdb_src_entry *ent;
+
+ ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ return NULL;
+
+ ent->addr = *addr;
+ hlist_add_head(&ent->node, &remote->src_list);
+
+ return ent;
+}
+
+static void
+vxlan_mdb_remote_src_entry_del(struct vxlan_mdb_src_entry *ent)
+{
+ hlist_del(&ent->node);
+ kfree(ent);
+}
+
+static int
+vxlan_mdb_remote_src_fwd_add(const struct vxlan_mdb_config *cfg,
+ const union vxlan_addr *addr,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_config sg_cfg;
+
+ memset(&sg_cfg, 0, sizeof(sg_cfg));
+ sg_cfg.vxlan = cfg->vxlan;
+ sg_cfg.group.src = *addr;
+ sg_cfg.group.dst = cfg->group.dst;
+ sg_cfg.group.vni = cfg->group.vni;
+ INIT_LIST_HEAD(&sg_cfg.src_list);
+ sg_cfg.remote_ip = cfg->remote_ip;
+ sg_cfg.remote_ifindex = cfg->remote_ifindex;
+ sg_cfg.remote_vni = cfg->remote_vni;
+ sg_cfg.remote_port = cfg->remote_port;
+ sg_cfg.nlflags = cfg->nlflags;
+ sg_cfg.filter_mode = MCAST_INCLUDE;
+ if (cfg->filter_mode == MCAST_EXCLUDE)
+ sg_cfg.flags = VXLAN_MDB_REMOTE_F_BLOCKED;
+ sg_cfg.rt_protocol = cfg->rt_protocol;
+
+ return __vxlan_mdb_add(&sg_cfg, extack);
+}
+
+static void
+vxlan_mdb_remote_src_fwd_del(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry_key *group,
+ const struct vxlan_mdb_remote *remote,
+ const union vxlan_addr *addr)
+{
+ struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
+ struct vxlan_mdb_config sg_cfg;
+
+ memset(&sg_cfg, 0, sizeof(sg_cfg));
+ sg_cfg.vxlan = vxlan;
+ sg_cfg.group.src = *addr;
+ sg_cfg.group.dst = group->dst;
+ sg_cfg.group.vni = group->vni;
+ INIT_LIST_HEAD(&sg_cfg.src_list);
+ sg_cfg.remote_ip = rd->remote_ip;
+
+ __vxlan_mdb_del(&sg_cfg, NULL);
+}
+
+static int
+vxlan_mdb_remote_src_add(const struct vxlan_mdb_config *cfg,
+ struct vxlan_mdb_remote *remote,
+ const struct vxlan_mdb_config_src_entry *src,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_src_entry *ent;
+ int err;
+
+ ent = vxlan_mdb_remote_src_entry_lookup(remote, &src->addr);
+ if (!ent) {
+ ent = vxlan_mdb_remote_src_entry_add(remote, &src->addr);
+ if (!ent)
+ return -ENOMEM;
+ } else if (!(cfg->nlflags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Source entry already exists");
+ return -EEXIST;
+ }
+
+ err = vxlan_mdb_remote_src_fwd_add(cfg, &ent->addr, extack);
+ if (err)
+ goto err_src_del;
+
+ /* Clear flags in case source entry was marked for deletion as part of
+ * replace flow.
+ */
+ ent->flags = 0;
+
+ return 0;
+
+err_src_del:
+ vxlan_mdb_remote_src_entry_del(ent);
+ return err;
+}
+
+static void vxlan_mdb_remote_src_del(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry_key *group,
+ const struct vxlan_mdb_remote *remote,
+ struct vxlan_mdb_src_entry *ent)
+{
+ vxlan_mdb_remote_src_fwd_del(vxlan, group, remote, &ent->addr);
+ vxlan_mdb_remote_src_entry_del(ent);
+}
+
+static int vxlan_mdb_remote_srcs_add(const struct vxlan_mdb_config *cfg,
+ struct vxlan_mdb_remote *remote,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_config_src_entry *src;
+ struct vxlan_mdb_src_entry *ent;
+ struct hlist_node *tmp;
+ int err;
+
+ list_for_each_entry(src, &cfg->src_list, node) {
+ err = vxlan_mdb_remote_src_add(cfg, remote, src, extack);
+ if (err)
+ goto err_src_del;
+ }
+
+ return 0;
+
+err_src_del:
+ hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node)
+ vxlan_mdb_remote_src_del(cfg->vxlan, &cfg->group, remote, ent);
+ return err;
+}
+
+static void vxlan_mdb_remote_srcs_del(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry_key *group,
+ struct vxlan_mdb_remote *remote)
+{
+ struct vxlan_mdb_src_entry *ent;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node)
+ vxlan_mdb_remote_src_del(vxlan, group, remote, ent);
+}
+
+static size_t
+vxlan_mdb_nlmsg_src_list_size(const struct vxlan_mdb_entry_key *group,
+ const struct vxlan_mdb_remote *remote)
+{
+ struct vxlan_mdb_src_entry *ent;
+ size_t nlmsg_size;
+
+ if (hlist_empty(&remote->src_list))
+ return 0;
+
+ /* MDBA_MDB_EATTR_SRC_LIST */
+ nlmsg_size = nla_total_size(0);
+
+ hlist_for_each_entry(ent, &remote->src_list, node) {
+ /* MDBA_MDB_SRCLIST_ENTRY */
+ nlmsg_size += nla_total_size(0) +
+ /* MDBA_MDB_SRCATTR_ADDRESS */
+ nla_total_size(vxlan_addr_size(&group->dst)) +
+ /* MDBA_MDB_SRCATTR_TIMER */
+ nla_total_size(sizeof(u8));
+ }
+
+ return nlmsg_size;
+}
+
+static size_t vxlan_mdb_nlmsg_size(const struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote)
+{
+ const struct vxlan_mdb_entry_key *group = &mdb_entry->key;
+ struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
+ size_t nlmsg_size;
+
+ nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) +
+ /* MDBA_MDB */
+ nla_total_size(0) +
+ /* MDBA_MDB_ENTRY */
+ nla_total_size(0) +
+ /* MDBA_MDB_ENTRY_INFO */
+ nla_total_size(sizeof(struct br_mdb_entry)) +
+ /* MDBA_MDB_EATTR_TIMER */
+ nla_total_size(sizeof(u32));
+ /* MDBA_MDB_EATTR_SOURCE */
+ if (vxlan_mdb_is_sg(group))
+ nlmsg_size += nla_total_size(vxlan_addr_size(&group->dst));
+ /* MDBA_MDB_EATTR_RTPROT */
+ nlmsg_size += nla_total_size(sizeof(u8));
+ /* MDBA_MDB_EATTR_SRC_LIST */
+ nlmsg_size += vxlan_mdb_nlmsg_src_list_size(group, remote);
+ /* MDBA_MDB_EATTR_GROUP_MODE */
+ nlmsg_size += nla_total_size(sizeof(u8));
+ /* MDBA_MDB_EATTR_DST */
+ nlmsg_size += nla_total_size(vxlan_addr_size(&rd->remote_ip));
+ /* MDBA_MDB_EATTR_DST_PORT */
+ if (rd->remote_port && rd->remote_port != vxlan->cfg.dst_port)
+ nlmsg_size += nla_total_size(sizeof(u16));
+ /* MDBA_MDB_EATTR_VNI */
+ if (rd->remote_vni != vxlan->default_dst.remote_vni)
+ nlmsg_size += nla_total_size(sizeof(u32));
+ /* MDBA_MDB_EATTR_IFINDEX */
+ if (rd->remote_ifindex)
+ nlmsg_size += nla_total_size(sizeof(u32));
+ /* MDBA_MDB_EATTR_SRC_VNI */
+ if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && group->vni)
+ nlmsg_size += nla_total_size(sizeof(u32));
+
+ return nlmsg_size;
+}
+
+static int vxlan_mdb_nlmsg_fill(const struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote,
+ int type)
+{
+ struct nlattr *mdb_nest, *mdb_entry_nest;
+ struct br_port_msg *bpm;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ bpm = nlmsg_data(nlh);
+ memset(bpm, 0, sizeof(*bpm));
+ bpm->family = AF_BRIDGE;
+ bpm->ifindex = vxlan->dev->ifindex;
+
+ mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB);
+ if (!mdb_nest)
+ goto cancel;
+ mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY);
+ if (!mdb_entry_nest)
+ goto cancel;
+
+ if (vxlan_mdb_entry_info_fill(vxlan, skb, mdb_entry, remote))
+ goto cancel;
+
+ nla_nest_end(skb, mdb_entry_nest);
+ nla_nest_end(skb, mdb_nest);
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static void vxlan_mdb_remote_notify(const struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ const struct vxlan_mdb_remote *remote,
+ int type)
+{
+ struct net *net = dev_net(vxlan->dev);
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(vxlan_mdb_nlmsg_size(vxlan, mdb_entry, remote),
+ GFP_KERNEL);
+ if (!skb)
+ goto errout;
+
+ err = vxlan_mdb_nlmsg_fill(vxlan, skb, mdb_entry, remote, type);
+ if (err) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_KERNEL);
+ return;
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_MDB, err);
+}
+
+static int
+vxlan_mdb_remote_srcs_replace(const struct vxlan_mdb_config *cfg,
+ const struct vxlan_mdb_entry *mdb_entry,
+ struct vxlan_mdb_remote *remote,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = cfg->vxlan;
+ struct vxlan_mdb_src_entry *ent;
+ struct hlist_node *tmp;
+ int err;
+
+ hlist_for_each_entry(ent, &remote->src_list, node)
+ ent->flags |= VXLAN_SGRP_F_DELETE;
+
+ err = vxlan_mdb_remote_srcs_add(cfg, remote, extack);
+ if (err)
+ goto err_clear_delete;
+
+ hlist_for_each_entry_safe(ent, tmp, &remote->src_list, node) {
+ if (ent->flags & VXLAN_SGRP_F_DELETE)
+ vxlan_mdb_remote_src_del(vxlan, &mdb_entry->key, remote,
+ ent);
+ }
+
+ return 0;
+
+err_clear_delete:
+ hlist_for_each_entry(ent, &remote->src_list, node)
+ ent->flags &= ~VXLAN_SGRP_F_DELETE;
+ return err;
+}
+
+static int vxlan_mdb_remote_replace(const struct vxlan_mdb_config *cfg,
+ const struct vxlan_mdb_entry *mdb_entry,
+ struct vxlan_mdb_remote *remote,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_rdst *new_rd, *old_rd = rtnl_dereference(remote->rd);
+ struct vxlan_dev *vxlan = cfg->vxlan;
+ int err;
+
+ err = vxlan_mdb_remote_rdst_init(cfg, remote);
+ if (err)
+ return err;
+ new_rd = rtnl_dereference(remote->rd);
+
+ err = vxlan_mdb_remote_srcs_replace(cfg, mdb_entry, remote, extack);
+ if (err)
+ goto err_rdst_reset;
+
+ WRITE_ONCE(remote->flags, cfg->flags);
+ WRITE_ONCE(remote->filter_mode, cfg->filter_mode);
+ remote->rt_protocol = cfg->rt_protocol;
+ vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_NEWMDB);
+
+ vxlan_mdb_remote_rdst_fini(old_rd);
+
+ return 0;
+
+err_rdst_reset:
+ rcu_assign_pointer(remote->rd, old_rd);
+ vxlan_mdb_remote_rdst_fini(new_rd);
+ return err;
+}
+
+static int vxlan_mdb_remote_add(const struct vxlan_mdb_config *cfg,
+ struct vxlan_mdb_entry *mdb_entry,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_remote *remote;
+ int err;
+
+ remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip);
+ if (remote) {
+ if (!(cfg->nlflags & NLM_F_REPLACE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Replace not specified and MDB remote entry already exists");
+ return -EEXIST;
+ }
+ return vxlan_mdb_remote_replace(cfg, mdb_entry, remote, extack);
+ }
+
+ if (!(cfg->nlflags & NLM_F_CREATE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Create not specified and entry does not exist");
+ return -ENOENT;
+ }
+
+ remote = kzalloc(sizeof(*remote), GFP_KERNEL);
+ if (!remote)
+ return -ENOMEM;
+
+ err = vxlan_mdb_remote_init(cfg, remote);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to initialize remote MDB entry");
+ goto err_free_remote;
+ }
+
+ err = vxlan_mdb_remote_srcs_add(cfg, remote, extack);
+ if (err)
+ goto err_remote_fini;
+
+ list_add_rcu(&remote->list, &mdb_entry->remotes);
+ vxlan_mdb_remote_notify(cfg->vxlan, mdb_entry, remote, RTM_NEWMDB);
+
+ return 0;
+
+err_remote_fini:
+ vxlan_mdb_remote_fini(cfg->vxlan, remote);
+err_free_remote:
+ kfree(remote);
+ return err;
+}
+
+static void vxlan_mdb_remote_del(struct vxlan_dev *vxlan,
+ struct vxlan_mdb_entry *mdb_entry,
+ struct vxlan_mdb_remote *remote)
+{
+ vxlan_mdb_remote_notify(vxlan, mdb_entry, remote, RTM_DELMDB);
+ list_del_rcu(&remote->list);
+ vxlan_mdb_remote_srcs_del(vxlan, &mdb_entry->key, remote);
+ vxlan_mdb_remote_fini(vxlan, remote);
+ kfree_rcu(remote, rcu);
+}
+
+static struct vxlan_mdb_entry *
+vxlan_mdb_entry_get(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry_key *group)
+{
+ struct vxlan_mdb_entry *mdb_entry;
+ int err;
+
+ mdb_entry = vxlan_mdb_entry_lookup(vxlan, group);
+ if (mdb_entry)
+ return mdb_entry;
+
+ mdb_entry = kzalloc(sizeof(*mdb_entry), GFP_KERNEL);
+ if (!mdb_entry)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&mdb_entry->remotes);
+ memcpy(&mdb_entry->key, group, sizeof(mdb_entry->key));
+ hlist_add_head(&mdb_entry->mdb_node, &vxlan->mdb_list);
+
+ err = rhashtable_lookup_insert_fast(&vxlan->mdb_tbl,
+ &mdb_entry->rhnode,
+ vxlan_mdb_rht_params);
+ if (err)
+ goto err_free_entry;
+
+ if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list))
+ vxlan->cfg.flags |= VXLAN_F_MDB;
+
+ return mdb_entry;
+
+err_free_entry:
+ hlist_del(&mdb_entry->mdb_node);
+ kfree(mdb_entry);
+ return ERR_PTR(err);
+}
+
+static void vxlan_mdb_entry_put(struct vxlan_dev *vxlan,
+ struct vxlan_mdb_entry *mdb_entry)
+{
+ if (!list_empty(&mdb_entry->remotes))
+ return;
+
+ if (hlist_is_singular_node(&mdb_entry->mdb_node, &vxlan->mdb_list))
+ vxlan->cfg.flags &= ~VXLAN_F_MDB;
+
+ rhashtable_remove_fast(&vxlan->mdb_tbl, &mdb_entry->rhnode,
+ vxlan_mdb_rht_params);
+ hlist_del(&mdb_entry->mdb_node);
+ kfree_rcu(mdb_entry, rcu);
+}
+
+static int __vxlan_mdb_add(const struct vxlan_mdb_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = cfg->vxlan;
+ struct vxlan_mdb_entry *mdb_entry;
+ int err;
+
+ mdb_entry = vxlan_mdb_entry_get(vxlan, &cfg->group);
+ if (IS_ERR(mdb_entry))
+ return PTR_ERR(mdb_entry);
+
+ err = vxlan_mdb_remote_add(cfg, mdb_entry, extack);
+ if (err)
+ goto err_entry_put;
+
+ vxlan->mdb_seq++;
+
+ return 0;
+
+err_entry_put:
+ vxlan_mdb_entry_put(vxlan, mdb_entry);
+ return err;
+}
+
+static int __vxlan_mdb_del(const struct vxlan_mdb_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = cfg->vxlan;
+ struct vxlan_mdb_entry *mdb_entry;
+ struct vxlan_mdb_remote *remote;
+
+ mdb_entry = vxlan_mdb_entry_lookup(vxlan, &cfg->group);
+ if (!mdb_entry) {
+ NL_SET_ERR_MSG_MOD(extack, "Did not find MDB entry");
+ return -ENOENT;
+ }
+
+ remote = vxlan_mdb_remote_lookup(mdb_entry, &cfg->remote_ip);
+ if (!remote) {
+ NL_SET_ERR_MSG_MOD(extack, "Did not find MDB remote entry");
+ return -ENOENT;
+ }
+
+ vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
+ vxlan_mdb_entry_put(vxlan, mdb_entry);
+
+ vxlan->mdb_seq++;
+
+ return 0;
+}
+
+int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_config cfg;
+ int err;
+
+ ASSERT_RTNL();
+
+ err = vxlan_mdb_config_init(&cfg, dev, tb, nlmsg_flags, extack);
+ if (err)
+ return err;
+
+ err = __vxlan_mdb_add(&cfg, extack);
+
+ vxlan_mdb_config_fini(&cfg);
+ return err;
+}
+
+int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_mdb_config cfg;
+ int err;
+
+ ASSERT_RTNL();
+
+ err = vxlan_mdb_config_init(&cfg, dev, tb, 0, extack);
+ if (err)
+ return err;
+
+ err = __vxlan_mdb_del(&cfg, extack);
+
+ vxlan_mdb_config_fini(&cfg);
+ return err;
+}
+
+struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ __be32 src_vni)
+{
+ struct vxlan_mdb_entry *mdb_entry;
+ struct vxlan_mdb_entry_key group;
+
+ if (!is_multicast_ether_addr(eth_hdr(skb)->h_dest) ||
+ is_broadcast_ether_addr(eth_hdr(skb)->h_dest))
+ return NULL;
+
+ /* When not in collect metadata mode, 'src_vni' is zero, but MDB
+ * entries are stored with the VNI of the VXLAN device.
+ */
+ if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA))
+ src_vni = vxlan->default_dst.remote_vni;
+
+ memset(&group, 0, sizeof(group));
+ group.vni = src_vni;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ return NULL;
+ group.dst.sa.sa_family = AF_INET;
+ group.dst.sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
+ group.src.sa.sa_family = AF_INET;
+ group.src.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ return NULL;
+ group.dst.sa.sa_family = AF_INET6;
+ group.dst.sin6.sin6_addr = ipv6_hdr(skb)->daddr;
+ group.src.sa.sa_family = AF_INET6;
+ group.src.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
+ break;
+#endif
+ default:
+ return NULL;
+ }
+
+ mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group);
+ if (mdb_entry)
+ return mdb_entry;
+
+ memset(&group.src, 0, sizeof(group.src));
+ mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group);
+ if (mdb_entry)
+ return mdb_entry;
+
+ /* No (S, G) or (*, G) found. Look up the all-zeros entry, but only if
+ * the destination IP address is not link-local multicast since we want
+ * to transmit such traffic together with broadcast and unknown unicast
+ * traffic.
+ */
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (ipv4_is_local_multicast(group.dst.sin.sin_addr.s_addr))
+ return NULL;
+ group.dst.sin.sin_addr.s_addr = 0;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ if (ipv6_addr_type(&group.dst.sin6.sin6_addr) &
+ IPV6_ADDR_LINKLOCAL)
+ return NULL;
+ memset(&group.dst.sin6.sin6_addr, 0,
+ sizeof(group.dst.sin6.sin6_addr));
+ break;
+#endif
+ default:
+ return NULL;
+ }
+
+ return vxlan_mdb_entry_lookup(vxlan, &group);
+}
+
+netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ struct sk_buff *skb)
+{
+ struct vxlan_mdb_remote *remote, *fremote = NULL;
+ __be32 src_vni = mdb_entry->key.vni;
+
+ list_for_each_entry_rcu(remote, &mdb_entry->remotes, list) {
+ struct sk_buff *skb1;
+
+ if ((vxlan_mdb_is_star_g(&mdb_entry->key) &&
+ READ_ONCE(remote->filter_mode) == MCAST_INCLUDE) ||
+ (READ_ONCE(remote->flags) & VXLAN_MDB_REMOTE_F_BLOCKED))
+ continue;
+
+ if (!fremote) {
+ fremote = remote;
+ continue;
+ }
+
+ skb1 = skb_clone(skb, GFP_ATOMIC);
+ if (skb1)
+ vxlan_xmit_one(skb1, vxlan->dev, src_vni,
+ rcu_dereference(remote->rd), false);
+ }
+
+ if (fremote)
+ vxlan_xmit_one(skb, vxlan->dev, src_vni,
+ rcu_dereference(fremote->rd), false);
+ else
+ kfree_skb(skb);
+
+ return NETDEV_TX_OK;
+}
+
+static void vxlan_mdb_check_empty(void *ptr, void *arg)
+{
+ WARN_ON_ONCE(1);
+}
+
+static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan,
+ struct vxlan_mdb_entry *mdb_entry)
+{
+ struct vxlan_mdb_remote *remote, *tmp;
+
+ list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list)
+ vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
+}
+
+static void vxlan_mdb_entries_flush(struct vxlan_dev *vxlan)
+{
+ struct vxlan_mdb_entry *mdb_entry;
+ struct hlist_node *tmp;
+
+ /* The removal of an entry cannot trigger the removal of another entry
+ * since entries are always added to the head of the list.
+ */
+ hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) {
+ vxlan_mdb_remotes_flush(vxlan, mdb_entry);
+ vxlan_mdb_entry_put(vxlan, mdb_entry);
+ }
+}
+
+int vxlan_mdb_init(struct vxlan_dev *vxlan)
+{
+ int err;
+
+ err = rhashtable_init(&vxlan->mdb_tbl, &vxlan_mdb_rht_params);
+ if (err)
+ return err;
+
+ INIT_HLIST_HEAD(&vxlan->mdb_list);
+
+ return 0;
+}
+
+void vxlan_mdb_fini(struct vxlan_dev *vxlan)
+{
+ vxlan_mdb_entries_flush(vxlan);
+ WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB);
+ rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty,
+ NULL);
+}
diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h
index 599c3b4fdd5e..817fa3075842 100644
--- a/drivers/net/vxlan/vxlan_private.h
+++ b/drivers/net/vxlan/vxlan_private.h
@@ -85,6 +85,39 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
}
+static inline int vxlan_nla_get_addr(union vxlan_addr *ip,
+ const struct nlattr *nla)
+{
+ if (nla_len(nla) >= sizeof(struct in6_addr)) {
+ ip->sin6.sin6_addr = nla_get_in6_addr(nla);
+ ip->sa.sa_family = AF_INET6;
+ return 0;
+ } else if (nla_len(nla) >= sizeof(__be32)) {
+ ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
+ ip->sa.sa_family = AF_INET;
+ return 0;
+ } else {
+ return -EAFNOSUPPORT;
+ }
+}
+
+static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
+ const union vxlan_addr *ip)
+{
+ if (ip->sa.sa_family == AF_INET6)
+ return nla_put_in6_addr(skb, attr, &ip->sin6.sin6_addr);
+ else
+ return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
+}
+
+static inline bool vxlan_addr_is_multicast(const union vxlan_addr *ip)
+{
+ if (ip->sa.sa_family == AF_INET6)
+ return ipv6_addr_is_multicast(&ip->sin6.sin6_addr);
+ else
+ return ipv4_is_multicast(ip->sin.sin_addr.s_addr);
+}
+
#else /* !CONFIG_IPV6 */
static inline
@@ -93,8 +126,41 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
}
+static inline int vxlan_nla_get_addr(union vxlan_addr *ip,
+ const struct nlattr *nla)
+{
+ if (nla_len(nla) >= sizeof(struct in6_addr)) {
+ return -EAFNOSUPPORT;
+ } else if (nla_len(nla) >= sizeof(__be32)) {
+ ip->sin.sin_addr.s_addr = nla_get_in_addr(nla);
+ ip->sa.sa_family = AF_INET;
+ return 0;
+ } else {
+ return -EAFNOSUPPORT;
+ }
+}
+
+static inline int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
+ const union vxlan_addr *ip)
+{
+ return nla_put_in_addr(skb, attr, ip->sin.sin_addr.s_addr);
+}
+
+static inline bool vxlan_addr_is_multicast(const union vxlan_addr *ip)
+{
+ return ipv4_is_multicast(ip->sin.sin_addr.s_addr);
+}
+
#endif
+static inline size_t vxlan_addr_size(const union vxlan_addr *ip)
+{
+ if (ip->sa.sa_family == AF_INET6)
+ return sizeof(struct in6_addr);
+ else
+ return sizeof(__be32);
+}
+
static inline struct vxlan_vni_node *
vxlan_vnifilter_lookup(struct vxlan_dev *vxlan, __be32 vni)
{
@@ -127,6 +193,8 @@ int vxlan_fdb_update(struct vxlan_dev *vxlan,
__be16 port, __be32 src_vni, __be32 vni,
__u32 ifindex, __u16 ndm_flags, u32 nhid,
bool swdev_notify, struct netlink_ext_ack *extack);
+void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
+ __be32 default_vni, struct vxlan_rdst *rdst, bool did_rsc);
int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
struct vxlan_config *conf, __be32 vni);
@@ -159,4 +227,20 @@ int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip,
int rifindex);
int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip,
int rifindex);
+
+/* vxlan_mdb.c */
+int vxlan_mdb_dump(struct net_device *dev, struct sk_buff *skb,
+ struct netlink_callback *cb);
+int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
+ struct netlink_ext_ack *extack);
+int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack);
+struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
+ struct sk_buff *skb,
+ __be32 src_vni);
+netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan,
+ const struct vxlan_mdb_entry *mdb_entry,
+ struct sk_buff *skb);
+int vxlan_mdb_init(struct vxlan_dev *vxlan);
+void vxlan_mdb_fini(struct vxlan_dev *vxlan);
#endif