aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-06-20 13:37:04 -0400
committerDavid S. Miller <davem@davemloft.net>2017-06-20 13:37:04 -0400
commitcf67bebecb4b5564586a4674646382ca380ec205 (patch)
tree20c023f49c87ae453ec2ecb8f4196ade88452554
parentMerge branch 'net-more-skb_put-work' (diff)
parentvxlan: allow multiple VXLANs with same VNI for IPv6 link-local addresses (diff)
downloadlinux-dev-cf67bebecb4b5564586a4674646382ca380ec205.tar.xz
linux-dev-cf67bebecb4b5564586a4674646382ca380ec205.zip
Merge branch 'vxlan-cleanup-and-IPv6-link-local-support'
Matthias Schiffer says: ==================== vxlan: cleanup and IPv6 link-local support Running VXLANs over IPv6 link-local addresses allows to use them as a drop-in replacement for VLANs, avoiding to allocate additional outer IP addresses to run the VXLAN over. Since v1, I have added a lot more consistency checks to the address configuration, making sure address families and scopes match. To simplify the implementation, I also did some general refactoring of the configuration handling in the new first patch of the series. The second patch is more cleanup; is slightly touches OVS code, so that list is in CC this time, too. As in v1, the last two patches actually make VXLAN over IPv6 link-local work, and allow multiple VXLANs with the same VNI and port, as long as link-local addresses on different interfaces are used. As suggested, I now store in the flags field if the VXLAN uses link-local addresses or not. v3 removes log messages as suggested by Roopa Prabhu (as it is very unusual for errors in netlink requests to be printed to the kernel log.) The commit message of patch 5 has been extended to add a note about IPv4. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/vxlan.c412
-rw-r--r--include/net/vxlan.h3
-rw-r--r--net/openvswitch/vport-vxlan.c4
3 files changed, 255 insertions, 164 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 94ce98229828..653b2bb32be1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -226,7 +226,8 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
return NULL;
}
-static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni)
+static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
+ __be32 vni)
{
struct vxlan_dev *vxlan;
@@ -235,17 +236,27 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni)
vni = 0;
hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) {
- if (vxlan->default_dst.remote_vni == vni)
- return vxlan;
+ if (vxlan->default_dst.remote_vni != vni)
+ continue;
+
+ if (IS_ENABLED(CONFIG_IPV6)) {
+ const struct vxlan_config *cfg = &vxlan->cfg;
+
+ if ((cfg->flags & VXLAN_F_IPV6_LINKLOCAL) &&
+ cfg->remote_ifindex != ifindex)
+ continue;
+ }
+
+ return vxlan;
}
return NULL;
}
/* Look up VNI in a per net namespace table */
-static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni,
- sa_family_t family, __be16 port,
- u32 flags)
+static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
+ __be32 vni, sa_family_t family,
+ __be16 port, u32 flags)
{
struct vxlan_sock *vs;
@@ -253,7 +264,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni,
if (!vs)
return NULL;
- return vxlan_vs_find_vni(vs, vni);
+ return vxlan_vs_find_vni(vs, ifindex, vni);
}
/* Fill in neighbour message in skbuff. */
@@ -305,7 +316,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
goto nla_put_failure;
- if ((vxlan->flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
+ if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
nla_put_u32(skb, NDA_SRC_VNI,
be32_to_cpu(fdb->vni)))
goto nla_put_failure;
@@ -419,7 +430,7 @@ static u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
const u8 *mac, __be32 vni)
{
- if (vxlan->flags & VXLAN_F_COLLECT_METADATA)
+ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
return &vxlan->fdb_head[eth_vni_hash(mac, vni)];
else
return &vxlan->fdb_head[eth_hash(mac)];
@@ -434,7 +445,7 @@ static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan,
hlist_for_each_entry_rcu(f, head, hlist) {
if (ether_addr_equal(mac, f->eth_addr)) {
- if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
+ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
if (vni == f->vni)
return f;
} else {
@@ -957,16 +968,24 @@ out:
*/
static bool vxlan_snoop(struct net_device *dev,
union vxlan_addr *src_ip, const u8 *src_mac,
- __be32 vni)
+ u32 src_ifindex, __be32 vni)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_fdb *f;
+ u32 ifindex = 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (src_ip->sa.sa_family == AF_INET6 &&
+ (ipv6_addr_type(&src_ip->sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL))
+ ifindex = src_ifindex;
+#endif
f = vxlan_find_mac(vxlan, src_mac, vni);
if (likely(f)) {
struct vxlan_rdst *rdst = first_remote_rcu(f);
- if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip)))
+ if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
+ rdst->remote_ifindex == ifindex))
return false;
/* Don't migrate static entries, drop packets */
@@ -993,7 +1012,7 @@ static bool vxlan_snoop(struct net_device *dev,
vxlan->cfg.dst_port,
vni,
vxlan->default_dst.remote_vni,
- 0, NTF_SELF);
+ ifindex, NTF_SELF);
spin_unlock(&vxlan->hash_lock);
}
@@ -1264,6 +1283,7 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan,
struct sk_buff *skb, __be32 vni)
{
union vxlan_addr saddr;
+ u32 ifindex = skb->dev->ifindex;
skb_reset_mac_header(skb);
skb->protocol = eth_type_trans(skb, vxlan->dev);
@@ -1284,8 +1304,8 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan,
#endif
}
- if ((vxlan->flags & VXLAN_F_LEARN) &&
- vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, vni))
+ if ((vxlan->cfg.flags & VXLAN_F_LEARN) &&
+ vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni))
return false;
return true;
@@ -1351,7 +1371,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
- vxlan = vxlan_vs_find_vni(vs, vni);
+ vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
if (!vxlan)
goto drop;
@@ -1507,7 +1527,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
if (netif_rx_ni(reply) == NET_RX_DROP)
dev->stats.rx_dropped++;
- } else if (vxlan->flags & VXLAN_F_L3MISS) {
+ } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
union vxlan_addr ipa = {
.sin.sin_addr.s_addr = tip,
.sin.sin_family = AF_INET,
@@ -1665,7 +1685,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
if (netif_rx_ni(reply) == NET_RX_DROP)
dev->stats.rx_dropped++;
- } else if (vxlan->flags & VXLAN_F_L3MISS) {
+ } else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
union vxlan_addr ipa = {
.sin6.sin6_addr = msg->target,
.sin6.sin6_family = AF_INET6,
@@ -1698,7 +1718,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
pip = ip_hdr(skb);
n = neigh_lookup(&arp_tbl, &pip->daddr, dev);
- if (!n && (vxlan->flags & VXLAN_F_L3MISS)) {
+ if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {
union vxlan_addr ipa = {
.sin.sin_addr.s_addr = pip->daddr,
.sin.sin_family = AF_INET,
@@ -1719,7 +1739,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
pip6 = ipv6_hdr(skb);
n = neigh_lookup(ipv6_stub->nd_tbl, &pip6->daddr, dev);
- if (!n && (vxlan->flags & VXLAN_F_L3MISS)) {
+ if (!n && (vxlan->cfg.flags & VXLAN_F_L3MISS)) {
union vxlan_addr ipa = {
.sin6.sin6_addr = pip6->daddr,
.sin6.sin6_family = AF_INET6,
@@ -1993,8 +2013,9 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
#endif
}
- if (dst_vxlan->flags & VXLAN_F_LEARN)
- vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, vni);
+ if (dst_vxlan->cfg.flags & VXLAN_F_LEARN)
+ vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, 0,
+ vni);
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tx_packets++;
@@ -2012,8 +2033,10 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
}
static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
- struct vxlan_dev *vxlan, union vxlan_addr *daddr,
- __be16 dst_port, __be32 vni, struct dst_entry *dst,
+ struct vxlan_dev *vxlan,
+ union vxlan_addr *daddr,
+ __be16 dst_port, int dst_ifindex, __be32 vni,
+ struct dst_entry *dst,
u32 rt_flags)
{
#if IS_ENABLED(CONFIG_IPV6)
@@ -2029,9 +2052,9 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
struct vxlan_dev *dst_vxlan;
dst_release(dst);
- dst_vxlan = vxlan_find_vni(vxlan->net, vni,
+ dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
daddr->sa.sa_family, dst_port,
- vxlan->flags);
+ vxlan->cfg.flags);
if (!dst_vxlan) {
dev->stats.tx_errors++;
kfree_skb(skb);
@@ -2061,8 +2084,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct dst_entry *ndst = NULL;
__be32 vni, label;
__u8 tos, ttl;
+ int ifindex;
int err;
- u32 flags = vxlan->flags;
+ u32 flags = vxlan->cfg.flags;
bool udp_sum = false;
bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
@@ -2081,6 +2105,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = (rdst->remote_vni) ? : default_vni;
+ ifindex = rdst->remote_ifindex;
local_ip = vxlan->cfg.saddr;
dst_cache = &rdst->dst_cache;
md->gbp = skb->mark;
@@ -2114,6 +2139,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst = &remote_ip;
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
vni = tunnel_id_to_key32(info->key.tun_id);
+ ifindex = 0;
dst_cache = &info->dst_cache;
if (info->options_len)
md = ip_tunnel_info_opts(info);
@@ -2131,8 +2157,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct rtable *rt;
__be16 df = 0;
- rt = vxlan_get_route(vxlan, dev, sock4, skb,
- rdst ? rdst->remote_ifindex : 0, tos,
+ rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos,
dst->sin.sin_addr.s_addr,
&local_ip.sin.sin_addr.s_addr,
dst_port, src_port,
@@ -2145,8 +2170,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
/* Bypass encapsulation if the destination is local */
if (!info) {
err = encap_bypass_if_local(skb, dev, vxlan, dst,
- dst_port, vni, &rt->dst,
- rt->rt_flags);
+ dst_port, ifindex, vni,
+ &rt->dst, rt->rt_flags);
if (err)
goto out_unlock;
} else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
@@ -2168,8 +2193,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
} else {
struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
- ndst = vxlan6_get_route(vxlan, dev, sock6, skb,
- rdst ? rdst->remote_ifindex : 0, tos,
+ ndst = vxlan6_get_route(vxlan, dev, sock6, skb, ifindex, tos,
label, &dst->sin6.sin6_addr,
&local_ip.sin6.sin6_addr,
dst_port, src_port,
@@ -2184,8 +2208,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
err = encap_bypass_if_local(skb, dev, vxlan, dst,
- dst_port, vni, ndst,
- rt6i_flags);
+ dst_port, ifindex, vni,
+ ndst, rt6i_flags);
if (err)
goto out_unlock;
}
@@ -2244,7 +2268,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb);
- if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
+ if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) {
if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
info->mode & IP_TUNNEL_INFO_TX) {
vni = tunnel_id_to_key32(info->key.tun_id);
@@ -2257,7 +2281,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
}
}
- if (vxlan->flags & VXLAN_F_PROXY) {
+ if (vxlan->cfg.flags & VXLAN_F_PROXY) {
eth = eth_hdr(skb);
if (ntohs(eth->h_proto) == ETH_P_ARP)
return arp_reduce(dev, skb, vni);
@@ -2277,7 +2301,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
f = vxlan_find_mac(vxlan, eth->h_dest, vni);
did_rsc = false;
- if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) &&
+ if (f && (f->flags & NTF_ROUTER) && (vxlan->cfg.flags & VXLAN_F_RSC) &&
(ntohs(eth->h_proto) == ETH_P_IP ||
ntohs(eth->h_proto) == ETH_P_IPV6)) {
did_rsc = route_shortcircuit(dev, skb);
@@ -2288,7 +2312,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
if (f == NULL) {
f = vxlan_find_mac(vxlan, all_zeros_mac, vni);
if (f == NULL) {
- if ((vxlan->flags & VXLAN_F_L2MISS) &&
+ if ((vxlan->cfg.flags & VXLAN_F_L2MISS) &&
!is_multicast_ether_addr(eth->h_dest))
vxlan_fdb_miss(vxlan, eth->h_dest);
@@ -2484,10 +2508,7 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
struct vxlan_rdst *dst = &vxlan->default_dst;
struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
dst->remote_ifindex);
- bool use_ipv6 = false;
-
- if (dst->remote_ip.sa.sa_family == AF_INET6)
- use_ipv6 = true;
+ bool use_ipv6 = !!(vxlan->cfg.flags & VXLAN_F_IPV6);
/* This check is different than dev->max_mtu, because it looks at
* the lowerdev->mtu, rather than the static dev->max_mtu
@@ -2623,6 +2644,10 @@ static void vxlan_setup(struct net_device *dev)
netif_keep_dst(dev);
dev->priv_flags |= IFF_NO_QUEUE;
+ /* MTU range: 68 - 65535 */
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = ETH_MAX_MTU;
+
INIT_LIST_HEAD(&vxlan->next);
spin_lock_init(&vxlan->hash_lock);
@@ -2630,9 +2655,8 @@ static void vxlan_setup(struct net_device *dev)
vxlan->age_timer.function = vxlan_cleanup;
vxlan->age_timer.data = (unsigned long) vxlan;
- vxlan->cfg.dst_port = htons(vxlan_port);
-
vxlan->dev = dev;
+ vxlan->net = dev_net(dev);
gro_cells_init(&vxlan->gro_cells, dev);
@@ -2701,11 +2725,19 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
}
}
+ if (tb[IFLA_MTU]) {
+ u32 mtu = nla_get_u32(data[IFLA_MTU]);
+
+ if (mtu < ETH_MIN_MTU || mtu > ETH_MAX_MTU)
+ return -EINVAL;
+ }
+
if (!data)
return -EINVAL;
if (data[IFLA_VXLAN_ID]) {
- __u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
+ u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
+
if (id >= VXLAN_N_VID)
return -ERANGE;
}
@@ -2821,7 +2853,7 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
if (!vxlan->cfg.no_share) {
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
- vxlan->cfg.dst_port, vxlan->flags);
+ vxlan->cfg.dst_port, vxlan->cfg.flags);
if (vs && !atomic_add_unless(&vs->refcnt, 1, 0)) {
spin_unlock(&vn->sock_lock);
return -EBUSY;
@@ -2830,7 +2862,7 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
}
if (!vs)
vs = vxlan_socket_create(vxlan->net, ipv6,
- vxlan->cfg.dst_port, vxlan->flags);
+ vxlan->cfg.dst_port, vxlan->cfg.flags);
if (IS_ERR(vs))
return PTR_ERR(vs);
#if IS_ENABLED(CONFIG_IPV6)
@@ -2845,8 +2877,8 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
static int vxlan_sock_add(struct vxlan_dev *vxlan)
{
- bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
- bool ipv6 = vxlan->flags & VXLAN_F_IPV6 || metadata;
+ bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
+ bool ipv6 = vxlan->cfg.flags & VXLAN_F_IPV6 || metadata;
bool ipv4 = !ipv6 || metadata;
int ret = 0;
@@ -2866,116 +2898,172 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan)
return ret;
}
-static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
- struct vxlan_config *conf,
- bool changelink)
+static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
+ struct net_device **lower,
+ struct vxlan_dev *old)
{
struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
- struct vxlan_dev *vxlan = netdev_priv(dev), *tmp;
- struct vxlan_rdst *dst = &vxlan->default_dst;
- unsigned short needed_headroom = ETH_HLEN;
+ struct vxlan_dev *tmp;
bool use_ipv6 = false;
- __be16 default_port = vxlan->cfg.dst_port;
- struct net_device *lowerdev = NULL;
- if (!changelink) {
- if (conf->flags & VXLAN_F_GPE) {
- /* For now, allow GPE only together with
- * COLLECT_METADATA. This can be relaxed later; in such
- * case, the other side of the PtP link will have to be
- * provided.
- */
- if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
- !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
- pr_info("unsupported combination of extensions\n");
- return -EINVAL;
- }
- vxlan_raw_setup(dev);
- } else {
- vxlan_ether_setup(dev);
+ if (conf->flags & VXLAN_F_GPE) {
+ /* For now, allow GPE only together with
+ * COLLECT_METADATA. This can be relaxed later; in such
+ * case, the other side of the PtP link will have to be
+ * provided.
+ */
+ if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
+ !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+ return -EINVAL;
}
-
- /* MTU range: 68 - 65535 */
- dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = ETH_MAX_MTU;
- vxlan->net = src_net;
}
- dst->remote_vni = conf->vni;
+ if (!conf->remote_ip.sa.sa_family && !conf->saddr.sa.sa_family) {
+ /* Unless IPv6 is explicitly requested, assume IPv4 */
+ conf->remote_ip.sa.sa_family = AF_INET;
+ conf->saddr.sa.sa_family = AF_INET;
+ } else if (!conf->remote_ip.sa.sa_family) {
+ conf->remote_ip.sa.sa_family = conf->saddr.sa.sa_family;
+ } else if (!conf->saddr.sa.sa_family) {
+ conf->saddr.sa.sa_family = conf->remote_ip.sa.sa_family;
+ }
- memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip));
+ if (conf->saddr.sa.sa_family != conf->remote_ip.sa.sa_family)
+ return -EINVAL;
- /* Unless IPv6 is explicitly requested, assume IPv4 */
- if (!dst->remote_ip.sa.sa_family)
- dst->remote_ip.sa.sa_family = AF_INET;
+ if (vxlan_addr_multicast(&conf->saddr))
+ return -EINVAL;
- if (dst->remote_ip.sa.sa_family == AF_INET6 ||
- vxlan->cfg.saddr.sa.sa_family == AF_INET6) {
+ if (conf->saddr.sa.sa_family == AF_INET6) {
if (!IS_ENABLED(CONFIG_IPV6))
return -EPFNOSUPPORT;
use_ipv6 = true;
- vxlan->flags |= VXLAN_F_IPV6;
+ conf->flags |= VXLAN_F_IPV6;
+
+ if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+ int local_type =
+ ipv6_addr_type(&conf->saddr.sin6.sin6_addr);
+ int remote_type =
+ ipv6_addr_type(&conf->remote_ip.sin6.sin6_addr);
+
+ if (local_type & IPV6_ADDR_LINKLOCAL) {
+ if (!(remote_type & IPV6_ADDR_LINKLOCAL) &&
+ (remote_type != IPV6_ADDR_ANY))
+ return -EINVAL;
+
+ conf->flags |= VXLAN_F_IPV6_LINKLOCAL;
+ } else {
+ if (remote_type ==
+ (IPV6_ADDR_UNICAST | IPV6_ADDR_LINKLOCAL))
+ return -EINVAL;
+
+ conf->flags &= ~VXLAN_F_IPV6_LINKLOCAL;
+ }
+ }
}
- if (conf->label && !use_ipv6) {
- pr_info("label only supported in use with IPv6\n");
+ if (conf->label && !use_ipv6)
return -EINVAL;
- }
- if (conf->remote_ifindex &&
- conf->remote_ifindex != vxlan->cfg.remote_ifindex) {
- lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
- dst->remote_ifindex = conf->remote_ifindex;
+ if (conf->remote_ifindex) {
+ struct net_device *lowerdev;
- if (!lowerdev) {
- pr_info("ifindex %d does not exist\n",
- dst->remote_ifindex);
+ lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
+ if (!lowerdev)
return -ENODEV;
- }
#if IS_ENABLED(CONFIG_IPV6)
if (use_ipv6) {
struct inet6_dev *idev = __in6_dev_get(lowerdev);
- if (idev && idev->cnf.disable_ipv6) {
- pr_info("IPv6 is disabled via sysctl\n");
+ if (idev && idev->cnf.disable_ipv6)
return -EPERM;
- }
}
#endif
- if (!conf->mtu)
- dev->mtu = lowerdev->mtu -
- (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
+ *lower = lowerdev;
+ } else {
+ if (vxlan_addr_multicast(&conf->remote_ip))
+ return -EINVAL;
- needed_headroom = lowerdev->hard_header_len;
- } else if (!conf->remote_ifindex &&
- vxlan_addr_multicast(&dst->remote_ip)) {
- pr_info("multicast destination requires interface to be specified\n");
- return -EINVAL;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (conf->flags & VXLAN_F_IPV6_LINKLOCAL)
+ return -EINVAL;
+#endif
+
+ *lower = NULL;
}
- if (lowerdev) {
- dev->gso_max_size = lowerdev->gso_max_size;
- dev->gso_max_segs = lowerdev->gso_max_segs;
+ if (!conf->dst_port) {
+ if (conf->flags & VXLAN_F_GPE)
+ conf->dst_port = htons(4790); /* IANA VXLAN-GPE port */
+ else
+ conf->dst_port = htons(vxlan_port);
}
- if (conf->mtu) {
- int max_mtu = ETH_MAX_MTU;
+ if (!conf->age_interval)
+ conf->age_interval = FDB_AGE_DEFAULT;
- if (lowerdev)
- max_mtu = lowerdev->mtu;
+ list_for_each_entry(tmp, &vn->vxlan_list, next) {
+ if (tmp == old)
+ continue;
- max_mtu -= (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
+ if (tmp->cfg.vni != conf->vni)
+ continue;
+ if (tmp->cfg.dst_port != conf->dst_port)
+ continue;
+ if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) !=
+ (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)))
+ continue;
- if (conf->mtu < dev->min_mtu || conf->mtu > dev->max_mtu)
- return -EINVAL;
+ if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) &&
+ tmp->cfg.remote_ifindex != conf->remote_ifindex)
+ continue;
+
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static void vxlan_config_apply(struct net_device *dev,
+ struct vxlan_config *conf,
+ struct net_device *lowerdev, bool changelink)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ unsigned short needed_headroom = ETH_HLEN;
+ bool use_ipv6 = !!(conf->flags & VXLAN_F_IPV6);
+ int max_mtu = ETH_MAX_MTU;
+
+ if (!changelink) {
+ if (conf->flags & VXLAN_F_GPE)
+ vxlan_raw_setup(dev);
+ else
+ vxlan_ether_setup(dev);
+
+ if (conf->mtu)
+ dev->mtu = conf->mtu;
+ }
+
+ dst->remote_vni = conf->vni;
+
+ memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip));
+
+ if (lowerdev) {
+ dst->remote_ifindex = conf->remote_ifindex;
- dev->mtu = conf->mtu;
+ dev->gso_max_size = lowerdev->gso_max_size;
+ dev->gso_max_segs = lowerdev->gso_max_segs;
- if (conf->mtu > max_mtu)
- dev->mtu = max_mtu;
+ needed_headroom = lowerdev->hard_header_len;
+
+ max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
+ VXLAN_HEADROOM);
}
+ if (dev->mtu > max_mtu)
+ dev->mtu = max_mtu;
+
if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
needed_headroom += VXLAN6_HEADROOM;
else
@@ -2983,31 +3071,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
dev->needed_headroom = needed_headroom;
memcpy(&vxlan->cfg, conf, sizeof(*conf));
- if (!vxlan->cfg.dst_port) {
- if (conf->flags & VXLAN_F_GPE)
- vxlan->cfg.dst_port = htons(4790); /* IANA VXLAN-GPE port */
- else
- vxlan->cfg.dst_port = default_port;
- }
- vxlan->flags |= conf->flags;
+}
- if (!vxlan->cfg.age_interval)
- vxlan->cfg.age_interval = FDB_AGE_DEFAULT;
+static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
+ struct vxlan_config *conf,
+ bool changelink)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct net_device *lowerdev;
+ int ret;
- if (changelink)
- return 0;
+ ret = vxlan_config_validate(src_net, conf, &lowerdev, vxlan);
+ if (ret)
+ return ret;
- list_for_each_entry(tmp, &vn->vxlan_list, next) {
- if (tmp->cfg.vni == conf->vni &&
- (tmp->default_dst.remote_ip.sa.sa_family == AF_INET6 ||
- tmp->cfg.saddr.sa.sa_family == AF_INET6) == use_ipv6 &&
- tmp->cfg.dst_port == vxlan->cfg.dst_port &&
- (tmp->flags & VXLAN_F_RCV_FLAGS) ==
- (vxlan->flags & VXLAN_F_RCV_FLAGS)) {
- pr_info("duplicate VNI %u\n", be32_to_cpu(conf->vni));
- return -EEXIST;
- }
- }
+ vxlan_config_apply(dev, conf, lowerdev, changelink);
return 0;
}
@@ -3071,22 +3149,35 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
}
if (data[IFLA_VXLAN_GROUP]) {
+ if (changelink && (conf->remote_ip.sa.sa_family != AF_INET))
+ return -EOPNOTSUPP;
+
conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
+ conf->remote_ip.sa.sa_family = AF_INET;
} else if (data[IFLA_VXLAN_GROUP6]) {
if (!IS_ENABLED(CONFIG_IPV6))
return -EPFNOSUPPORT;
+ if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6))
+ return -EOPNOTSUPP;
+
conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
conf->remote_ip.sa.sa_family = AF_INET6;
}
if (data[IFLA_VXLAN_LOCAL]) {
+ if (changelink && (conf->saddr.sa.sa_family != AF_INET))
+ return -EOPNOTSUPP;
+
conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
conf->saddr.sa.sa_family = AF_INET;
} else if (data[IFLA_VXLAN_LOCAL6]) {
if (!IS_ENABLED(CONFIG_IPV6))
return -EPFNOSUPPORT;
+ if (changelink && (conf->saddr.sa.sa_family != AF_INET6))
+ return -EOPNOTSUPP;
+
/* TODO: respect scope id */
conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
conf->saddr.sa.sa_family = AF_INET6;
@@ -3106,12 +3197,10 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
IPV6_FLOWLABEL_MASK;
if (data[IFLA_VXLAN_LEARNING]) {
- if (nla_get_u8(data[IFLA_VXLAN_LEARNING])) {
+ if (nla_get_u8(data[IFLA_VXLAN_LEARNING]))
conf->flags |= VXLAN_F_LEARN;
- } else {
+ else
conf->flags &= ~VXLAN_F_LEARN;
- vxlan->flags &= ~VXLAN_F_LEARN;
- }
} else if (!changelink) {
/* default to learn on a new device */
conf->flags |= VXLAN_F_LEARN;
@@ -3394,43 +3483,44 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
- !!(vxlan->flags & VXLAN_F_LEARN)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
nla_put_u8(skb, IFLA_VXLAN_PROXY,
- !!(vxlan->flags & VXLAN_F_PROXY)) ||
- nla_put_u8(skb, IFLA_VXLAN_RSC, !!(vxlan->flags & VXLAN_F_RSC)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
+ nla_put_u8(skb, IFLA_VXLAN_RSC,
+ !!(vxlan->cfg.flags & VXLAN_F_RSC)) ||
nla_put_u8(skb, IFLA_VXLAN_L2MISS,
- !!(vxlan->flags & VXLAN_F_L2MISS)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
nla_put_u8(skb, IFLA_VXLAN_L3MISS,
- !!(vxlan->flags & VXLAN_F_L3MISS)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA,
- !!(vxlan->flags & VXLAN_F_COLLECT_METADATA)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) ||
nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||
nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
- !(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
+ !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
- !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
- !!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
- !!(vxlan->flags & VXLAN_F_REMCSUM_TX)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
- !!(vxlan->flags & VXLAN_F_REMCSUM_RX)))
+ !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
goto nla_put_failure;
if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
goto nla_put_failure;
- if (vxlan->flags & VXLAN_F_GBP &&
+ if (vxlan->cfg.flags & VXLAN_F_GBP &&
nla_put_flag(skb, IFLA_VXLAN_GBP))
goto nla_put_failure;
- if (vxlan->flags & VXLAN_F_GPE &&
+ if (vxlan->cfg.flags & VXLAN_F_GPE &&
nla_put_flag(skb, IFLA_VXLAN_GPE))
goto nla_put_failure;
- if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL &&
+ if (vxlan->cfg.flags & VXLAN_F_REMCSUM_NOPARTIAL &&
nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
goto nla_put_failure;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 49a59202f85e..b816a0a6686e 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -232,7 +232,6 @@ struct vxlan_dev {
struct net_device *dev;
struct net *net; /* netns for packet i/o */
struct vxlan_rdst default_dst; /* default destination */
- u32 flags; /* VXLAN_F_* in vxlan.h */
struct timer_list age_timer;
spinlock_t hash_lock;
@@ -259,6 +258,7 @@ struct vxlan_dev {
#define VXLAN_F_REMCSUM_NOPARTIAL 0x1000
#define VXLAN_F_COLLECT_METADATA 0x2000
#define VXLAN_F_GPE 0x4000
+#define VXLAN_F_IPV6_LINKLOCAL 0x8000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
@@ -273,6 +273,7 @@ struct vxlan_dev {
/* Flags that can be set together with VXLAN_F_GPE. */
#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \
VXLAN_F_IPV6 | \
+ VXLAN_F_IPV6_LINKLOCAL | \
VXLAN_F_UDP_ZERO_CSUM_TX | \
VXLAN_F_UDP_ZERO_CSUM6_TX | \
VXLAN_F_UDP_ZERO_CSUM6_RX | \
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 869acb3b3d3f..7e6301b2ec4d 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -40,14 +40,14 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
return -EMSGSIZE;
- if (vxlan->flags & VXLAN_F_GBP) {
+ if (vxlan->cfg.flags & VXLAN_F_GBP) {
struct nlattr *exts;
exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
if (!exts)
return -EMSGSIZE;
- if (vxlan->flags & VXLAN_F_GBP &&
+ if (vxlan->cfg.flags & VXLAN_F_GBP &&
nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
return -EMSGSIZE;