aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS8
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/vrf.c81
-rw-r--r--include/linux/netdevice.h36
-rw-r--r--include/net/l3mdev.h149
-rw-r--r--include/net/route.h3
-rw-r--r--include/net/vrf.h178
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile3
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/fib_frontend.c12
-rw-r--r--net/ipv4/icmp.c8
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/xfrm4_policy.c8
-rw-r--r--net/ipv6/xfrm6_policy.c8
-rw-r--r--net/l3mdev/Kconfig10
-rw-r--r--net/l3mdev/Makefile5
-rw-r--r--net/l3mdev/l3mdev.c92
21 files changed, 356 insertions, 276 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index bcd263de4827..fa43fa2f30e4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6095,6 +6095,13 @@ F: Documentation/auxdisplay/ks0108
F: drivers/auxdisplay/ks0108.c
F: include/linux/ks0108.h
+L3MDEV
+M: David Ahern <dsa@cumulusnetworks.com>
+L: netdev@vger.kernel.org
+S: Maintained
+F: net/l3mdev
+F: include/net/l3mdev.h
+
LAPB module
L: linux-x25@vger.kernel.org
S: Orphan
@@ -11266,7 +11273,6 @@ M: Shrijeet Mukherjee <shm@cumulusnetworks.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/vrf.c
-F: include/net/vrf.h
F: Documentation/networking/vrf.txt
VT1211 HARDWARE MONITOR DRIVER
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d18eb607bee6..b9ebd0d18a52 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -299,6 +299,7 @@ config NLMON
config NET_VRF
tristate "Virtual Routing and Forwarding (Lite)"
depends on IP_MULTIPLE_TABLES && IPV6_MULTIPLE_TABLES
+ depends on NET_L3_MASTER_DEV
---help---
This option enables the support for mapping interfaces into VRF's. The
support enables VRF devices.
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 4ecb3a3e516a..64f2ab663ffe 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -34,7 +34,7 @@
#include <net/rtnetlink.h>
#include <net/route.h>
#include <net/addrconf.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#define DRV_NAME "vrf"
#define DRV_VERSION "1.0"
@@ -44,6 +44,21 @@
#define vrf_master_get_rcu(dev) \
((struct net_device *)rcu_dereference(dev->rx_handler_data))
+struct slave {
+ struct list_head list;
+ struct net_device *dev;
+};
+
+struct slave_queue {
+ struct list_head all_slaves;
+};
+
+struct net_vrf {
+ struct slave_queue queue;
+ struct rtable *rth;
+ u32 tb_id;
+};
+
struct pcpu_dstats {
u64 tx_pkts;
u64 tx_bytes;
@@ -395,18 +410,15 @@ static void __vrf_insert_slave(struct slave_queue *queue, struct slave *slave)
static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
{
- struct net_vrf_dev *vrf_ptr = kmalloc(sizeof(*vrf_ptr), GFP_KERNEL);
struct slave *slave = kzalloc(sizeof(*slave), GFP_KERNEL);
struct net_vrf *vrf = netdev_priv(dev);
struct slave_queue *queue = &vrf->queue;
int ret = -ENOMEM;
- if (!slave || !vrf_ptr)
+ if (!slave)
goto out_fail;
slave->dev = port_dev;
- vrf_ptr->ifindex = dev->ifindex;
- vrf_ptr->tb_id = vrf->tb_id;
/* register the packet handler for slave ports */
ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev);
@@ -423,7 +435,6 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
port_dev->flags |= IFF_SLAVE;
__vrf_insert_slave(queue, slave);
- rcu_assign_pointer(port_dev->vrf_ptr, vrf_ptr);
cycle_netdev(port_dev);
return 0;
@@ -431,14 +442,13 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
out_unregister:
netdev_rx_handler_unregister(port_dev);
out_fail:
- kfree(vrf_ptr);
kfree(slave);
return ret;
}
static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
{
- if (netif_is_vrf(port_dev) || vrf_is_slave(port_dev))
+ if (netif_is_l3_master(port_dev) || vrf_is_slave(port_dev))
return -EINVAL;
return do_vrf_add_slave(dev, port_dev);
@@ -447,21 +457,15 @@ static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
/* inverse of do_vrf_add_slave */
static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
{
- struct net_vrf_dev *vrf_ptr = rtnl_dereference(port_dev->vrf_ptr);
struct net_vrf *vrf = netdev_priv(dev);
struct slave_queue *queue = &vrf->queue;
struct slave *slave;
- RCU_INIT_POINTER(port_dev->vrf_ptr, NULL);
-
netdev_upper_dev_unlink(port_dev, dev);
port_dev->flags &= ~IFF_SLAVE;
netdev_rx_handler_unregister(port_dev);
- /* after netdev_rx_handler_unregister for synchronize_rcu */
- kfree(vrf_ptr);
-
cycle_netdev(port_dev);
slave = __vrf_find_slave_dev(queue, port_dev);
@@ -529,6 +533,33 @@ static const struct net_device_ops vrf_netdev_ops = {
.ndo_del_slave = vrf_del_slave,
};
+static u32 vrf_fib_table(const struct net_device *dev)
+{
+ struct net_vrf *vrf = netdev_priv(dev);
+
+ return vrf->tb_id;
+}
+
+static struct rtable *vrf_get_rtable(const struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ struct rtable *rth = NULL;
+
+ if (!(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
+ struct net_vrf *vrf = netdev_priv(dev);
+
+ rth = vrf->rth;
+ atomic_inc(&rth->dst.__refcnt);
+ }
+
+ return rth;
+}
+
+static const struct l3mdev_ops vrf_l3mdev_ops = {
+ .l3mdev_fib_table = vrf_fib_table,
+ .l3mdev_get_rtable = vrf_get_rtable,
+};
+
static void vrf_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *info)
{
@@ -546,6 +577,7 @@ static void vrf_setup(struct net_device *dev)
/* Initialize the device structure. */
dev->netdev_ops = &vrf_netdev_ops;
+ dev->l3mdev_ops = &vrf_l3mdev_ops;
dev->ethtool_ops = &vrf_ethtool_ops;
dev->destructor = free_netdev;
@@ -572,10 +604,6 @@ static int vrf_validate(struct nlattr *tb[], struct nlattr *data[])
static void vrf_dellink(struct net_device *dev, struct list_head *head)
{
- struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr);
-
- RCU_INIT_POINTER(dev->vrf_ptr, NULL);
- kfree_rcu(vrf_ptr, rcu);
unregister_netdevice_queue(dev, head);
}
@@ -583,7 +611,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct net_vrf *vrf = netdev_priv(dev);
- struct net_vrf_dev *vrf_ptr;
int err;
if (!data || !data[IFLA_VRF_TABLE])
@@ -591,26 +618,15 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]);
- dev->priv_flags |= IFF_VRF_MASTER;
-
- err = -ENOMEM;
- vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL);
- if (!vrf_ptr)
- goto out_fail;
-
- vrf_ptr->ifindex = dev->ifindex;
- vrf_ptr->tb_id = vrf->tb_id;
+ dev->priv_flags |= IFF_L3MDEV_MASTER;
err = register_netdevice(dev);
if (err < 0)
goto out_fail;
- rcu_assign_pointer(dev->vrf_ptr, vrf_ptr);
-
return 0;
out_fail:
- kfree(vrf_ptr);
free_netdev(dev);
return err;
}
@@ -654,10 +670,9 @@ static int vrf_device_event(struct notifier_block *unused,
/* only care about unregister events to drop slave references */
if (event == NETDEV_UNREGISTER) {
- struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr);
struct net_device *vrf_dev;
- if (!vrf_ptr || netif_is_vrf(dev))
+ if (netif_is_l3_master(dev))
goto out;
vrf_dev = netdev_master_upper_dev_get(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d2ffeafc9998..b9450784ae06 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1258,7 +1258,7 @@ struct net_device_ops {
* @IFF_LIVE_ADDR_CHANGE: device supports hardware address
* change when it's running
* @IFF_MACVLAN: Macvlan device
- * @IFF_VRF_MASTER: device is a VRF master
+ * @IFF_L3MDEV_MASTER: device is an L3 master device
* @IFF_NO_QUEUE: device can run without qdisc attached
* @IFF_OPENVSWITCH: device is a Open vSwitch master
*/
@@ -1283,7 +1283,7 @@ enum netdev_priv_flags {
IFF_XMIT_DST_RELEASE_PERM = 1<<17,
IFF_IPVLAN_MASTER = 1<<18,
IFF_IPVLAN_SLAVE = 1<<19,
- IFF_VRF_MASTER = 1<<20,
+ IFF_L3MDEV_MASTER = 1<<20,
IFF_NO_QUEUE = 1<<21,
IFF_OPENVSWITCH = 1<<22,
};
@@ -1308,7 +1308,7 @@ enum netdev_priv_flags {
#define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM
#define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER
#define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE
-#define IFF_VRF_MASTER IFF_VRF_MASTER
+#define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER
#define IFF_NO_QUEUE IFF_NO_QUEUE
#define IFF_OPENVSWITCH IFF_OPENVSWITCH
@@ -1427,7 +1427,6 @@ enum netdev_priv_flags {
* @dn_ptr: DECnet specific data
* @ip6_ptr: IPv6 specific data
* @ax25_ptr: AX.25 specific data
- * @vrf_ptr: VRF specific data
* @ieee80211_ptr: IEEE 802.11 specific data, assign before registering
*
* @last_rx: Time of last Rx
@@ -1587,6 +1586,9 @@ struct net_device {
#ifdef CONFIG_NET_SWITCHDEV
const struct switchdev_ops *switchdev_ops;
#endif
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ const struct l3mdev_ops *l3mdev_ops;
+#endif
const struct header_ops *header_ops;
@@ -1646,7 +1648,6 @@ struct net_device {
struct dn_dev __rcu *dn_ptr;
struct inet6_dev __rcu *ip6_ptr;
void *ax25_ptr;
- struct net_vrf_dev __rcu *vrf_ptr;
struct wireless_dev *ieee80211_ptr;
struct wpan_dev *ieee802154_ptr;
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
@@ -3824,9 +3825,9 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
return dev->priv_flags & IFF_SUPP_NOFCS;
}
-static inline bool netif_is_vrf(const struct net_device *dev)
+static inline bool netif_is_l3_master(const struct net_device *dev)
{
- return dev->priv_flags & IFF_VRF_MASTER;
+ return dev->priv_flags & IFF_L3MDEV_MASTER;
}
static inline bool netif_is_bridge_master(const struct net_device *dev)
@@ -3839,27 +3840,6 @@ static inline bool netif_is_ovs_master(const struct net_device *dev)
return dev->priv_flags & IFF_OPENVSWITCH;
}
-static inline bool netif_index_is_vrf(struct net *net, int ifindex)
-{
- bool rc = false;
-
-#if IS_ENABLED(CONFIG_NET_VRF)
- struct net_device *dev;
-
- if (ifindex == 0)
- return false;
-
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, ifindex);
- if (dev)
- rc = netif_is_vrf(dev);
-
- rcu_read_unlock();
-#endif
- return rc;
-}
-
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
static inline void netif_keep_dst(struct net_device *dev)
{
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
new file mode 100644
index 000000000000..87cee05a0a17
--- /dev/null
+++ b/include/net/l3mdev.h
@@ -0,0 +1,149 @@
+/*
+ * include/net/l3mdev.h - L3 master device API
+ * Copyright (c) 2015 Cumulus Networks
+ * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef _NET_L3MDEV_H_
+#define _NET_L3MDEV_H_
+
+/**
+ * struct l3mdev_ops - l3mdev operations
+ *
+ * @l3mdev_fib_table: Get FIB table id to use for lookups
+ *
+ * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
+ */
+
+struct l3mdev_ops {
+ u32 (*l3mdev_fib_table)(const struct net_device *dev);
+ struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev,
+ const struct flowi4 *fl4);
+};
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+
+int l3mdev_master_ifindex_rcu(struct net_device *dev);
+static inline int l3mdev_master_ifindex(struct net_device *dev)
+{
+ int ifindex;
+
+ rcu_read_lock();
+ ifindex = l3mdev_master_ifindex_rcu(dev);
+ rcu_read_unlock();
+
+ return ifindex;
+}
+
+/* get index of an interface to use for FIB lookups. For devices
+ * enslaved to an L3 master device FIB lookups are based on the
+ * master index
+ */
+static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
+{
+ return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex;
+}
+
+static inline int l3mdev_fib_oif(struct net_device *dev)
+{
+ int oif;
+
+ rcu_read_lock();
+ oif = l3mdev_fib_oif_rcu(dev);
+ rcu_read_unlock();
+
+ return oif;
+}
+
+u32 l3mdev_fib_table_rcu(const struct net_device *dev);
+u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
+static inline u32 l3mdev_fib_table(const struct net_device *dev)
+{
+ u32 tb_id;
+
+ rcu_read_lock();
+ tb_id = l3mdev_fib_table_rcu(dev);
+ rcu_read_unlock();
+
+ return tb_id;
+}
+
+static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable)
+ return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4);
+
+ return NULL;
+}
+
+static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+ bool rc = false;
+
+ if (ifindex == 0)
+ return false;
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (dev)
+ rc = netif_is_l3_master(dev);
+
+ rcu_read_unlock();
+
+ return rc;
+}
+
+#else
+
+static inline int l3mdev_master_ifindex_rcu(struct net_device *dev)
+{
+ return 0;
+}
+static inline int l3mdev_master_ifindex(struct net_device *dev)
+{
+ return 0;
+}
+
+static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
+{
+ return dev ? dev->ifindex : 0;
+}
+static inline int l3mdev_fib_oif(struct net_device *dev)
+{
+ return dev ? dev->ifindex : 0;
+}
+
+static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
+{
+ return 0;
+}
+static inline u32 l3mdev_fib_table(const struct net_device *dev)
+{
+ return 0;
+}
+static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
+{
+ return 0;
+}
+
+static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
+ const struct flowi4 *fl4)
+{
+ return NULL;
+}
+
+static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
+{
+ return false;
+}
+
+#endif
+
+#endif /* _NET_L3MDEV_H_ */
diff --git a/include/net/route.h b/include/net/route.h
index d1bd90bb3187..e211dc167db1 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -28,6 +28,7 @@
#include <net/inetpeer.h>
#include <net/flow.h>
#include <net/inet_sock.h>
+#include <net/l3mdev.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
@@ -256,7 +257,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC;
- if (netif_index_is_vrf(sock_net(sk), oif))
+ if (netif_index_is_l3_master(sock_net(sk), oif))
flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF;
flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
diff --git a/include/net/vrf.h b/include/net/vrf.h
deleted file mode 100644
index 593e6094ddd4..000000000000
--- a/include/net/vrf.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * include/net/net_vrf.h - adds vrf dev structure definitions
- * Copyright (c) 2015 Cumulus Networks
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __LINUX_NET_VRF_H
-#define __LINUX_NET_VRF_H
-
-struct net_vrf_dev {
- struct rcu_head rcu;
- int ifindex; /* ifindex of master dev */
- u32 tb_id; /* table id for VRF */
-};
-
-struct slave {
- struct list_head list;
- struct net_device *dev;
-};
-
-struct slave_queue {
- struct list_head all_slaves;
-};
-
-struct net_vrf {
- struct slave_queue queue;
- struct rtable *rth;
- u32 tb_id;
-};
-
-
-#if IS_ENABLED(CONFIG_NET_VRF)
-/* called with rcu_read_lock() */
-static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
-{
- struct net_vrf_dev *vrf_ptr;
- int ifindex = 0;
-
- if (!dev)
- return 0;
-
- if (netif_is_vrf(dev)) {
- ifindex = dev->ifindex;
- } else {
- vrf_ptr = rcu_dereference(dev->vrf_ptr);
- if (vrf_ptr)
- ifindex = vrf_ptr->ifindex;
- }
-
- return ifindex;
-}
-
-static inline int vrf_master_ifindex(const struct net_device *dev)
-{
- int ifindex;
-
- rcu_read_lock();
- ifindex = vrf_master_ifindex_rcu(dev);
- rcu_read_unlock();
-
- return ifindex;
-}
-
-/* called with rcu_read_lock */
-static inline u32 vrf_dev_table_rcu(const struct net_device *dev)
-{
- u32 tb_id = 0;
-
- if (dev) {
- struct net_vrf_dev *vrf_ptr;
-
- vrf_ptr = rcu_dereference(dev->vrf_ptr);
- if (vrf_ptr)
- tb_id = vrf_ptr->tb_id;
- }
- return tb_id;
-}
-
-static inline u32 vrf_dev_table(const struct net_device *dev)
-{
- u32 tb_id;
-
- rcu_read_lock();
- tb_id = vrf_dev_table_rcu(dev);
- rcu_read_unlock();
-
- return tb_id;
-}
-
-static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex)
-{
- struct net_device *dev;
- u32 tb_id = 0;
-
- if (!ifindex)
- return 0;
-
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, ifindex);
- if (dev)
- tb_id = vrf_dev_table_rcu(dev);
-
- rcu_read_unlock();
-
- return tb_id;
-}
-
-/* called with rtnl */
-static inline u32 vrf_dev_table_rtnl(const struct net_device *dev)
-{
- u32 tb_id = 0;
-
- if (dev) {
- struct net_vrf_dev *vrf_ptr;
-
- vrf_ptr = rtnl_dereference(dev->vrf_ptr);
- if (vrf_ptr)
- tb_id = vrf_ptr->tb_id;
- }
- return tb_id;
-}
-
-/* caller has already checked netif_is_vrf(dev) */
-static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev)
-{
- struct rtable *rth = ERR_PTR(-ENETUNREACH);
- struct net_vrf *vrf = netdev_priv(dev);
-
- if (vrf) {
- rth = vrf->rth;
- atomic_inc(&rth->dst.__refcnt);
- }
- return rth;
-}
-
-#else
-static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
-{
- return 0;
-}
-
-static inline int vrf_master_ifindex(const struct net_device *dev)
-{
- return 0;
-}
-
-static inline u32 vrf_dev_table_rcu(const struct net_device *dev)
-{
- return 0;
-}
-
-static inline u32 vrf_dev_table(const struct net_device *dev)
-{
- return 0;
-}
-
-static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex)
-{
- return 0;
-}
-
-static inline u32 vrf_dev_table_rtnl(const struct net_device *dev)
-{
- return 0;
-}
-
-static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev)
-{
- return ERR_PTR(-ENETUNREACH);
-}
-#endif
-
-#endif /* __LINUX_NET_VRF_H */
diff --git a/net/Kconfig b/net/Kconfig
index 7021c1bf44d6..127da94ae25e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,7 @@ source "net/netlink/Kconfig"
source "net/mpls/Kconfig"
source "net/hsr/Kconfig"
source "net/switchdev/Kconfig"
+source "net/l3mdev/Kconfig"
config RPS
bool
diff --git a/net/Makefile b/net/Makefile
index 3995613e5510..a5d04098dfce 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -74,3 +74,6 @@ obj-$(CONFIG_HSR) += hsr/
ifneq ($(CONFIG_NET_SWITCHDEV),)
obj-y += switchdev/
endif
+ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
+obj-y += l3mdev/
+endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3af85eecbe11..11c4ca13ec3b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,7 +119,7 @@
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
#endif
-#include <net/vrf.h>
+#include <net/l3mdev.h>
/* The inetsw table contains everything that inet_create needs to
@@ -446,7 +446,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
}
- tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id;
+ tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
/* Not specified by any standard per-se, however it breaks too
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6fcbd215cdbc..fac172370276 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -45,7 +45,7 @@
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
#include <net/xfrm.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#include <trace/events/fib.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -255,7 +255,7 @@ EXPORT_SYMBOL(inet_addr_type);
unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
__be32 addr)
{
- u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL;
+ u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
return __inet_dev_addr_type(net, dev, addr, rt_table);
}
@@ -268,7 +268,7 @@ unsigned int inet_addr_type_dev_table(struct net *net,
const struct net_device *dev,
__be32 addr)
{
- u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL;
+ u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
return __inet_dev_addr_type(net, NULL, addr, rt_table);
}
@@ -332,7 +332,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
bool dev_match;
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = vrf_master_ifindex_rcu(dev);
+ fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
if (!fl4.flowi4_iif)
fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
@@ -366,7 +366,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (nh->nh_dev == dev) {
dev_match = true;
break;
- } else if (vrf_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
+ } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
dev_match = true;
break;
}
@@ -803,7 +803,7 @@ out:
static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
{
struct net *net = dev_net(ifa->ifa_dev->dev);
- u32 tb_id = vrf_dev_table_rtnl(ifa->ifa_dev->dev);
+ u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
struct fib_table *tb;
struct fib_config cfg = {
.fc_protocol = RTPROT_KERNEL,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e5eb8ac4089d..6b96dee2800b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -96,7 +96,7 @@
#include <net/xfrm.h>
#include <net/inet_common.h>
#include <net/ip_fib.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
/*
* Build xmit assembly blocks
@@ -309,7 +309,7 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
rc = false;
if (icmp_global_allow()) {
- int vif = vrf_master_ifindex(dst->dev);
+ int vif = l3mdev_master_ifindex(dst->dev);
struct inet_peer *peer;
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
@@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.flowi4_mark = mark;
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP;
- fl4.flowi4_oif = vrf_master_ifindex(skb->dev);
+ fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
@@ -461,7 +461,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
- fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev);
+ fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = __ip_route_output_key(net, fl4);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fa7f15305f9a..9772b789adf3 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -48,7 +48,7 @@
#include <linux/inet.h>
#include <linux/netfilter_ipv4.h>
#include <net/inet_ecn.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
* code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -78,7 +78,7 @@ struct ipq {
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
- int vif; /* VRF device index */
+ int vif; /* L3 master device index */
unsigned int rid;
struct inet_peer *peer;
};
@@ -657,7 +657,7 @@ out_fail:
int ip_defrag(struct sk_buff *skb, u32 user)
{
struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
- int vif = vrf_master_ifindex_rcu(dev);
+ int vif = l3mdev_master_ifindex_rcu(dev);
struct net *net = dev_net(dev);
struct ipq *qp;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 06d2c87ed505..aff6766922e8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1571,7 +1571,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
}
oif = arg->bound_dev_if;
- if (!oif && netif_index_is_vrf(net, skb->skb_iif))
+ if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
oif = skb->skb_iif;
flowi4_init_output(&fl4, oif,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8c84a6664b30..1441de1550e6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,7 @@
#endif
#include <net/secure_seq.h>
#include <net/ip_tunnels.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
@@ -847,7 +847,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
return;
}
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
- vif = vrf_master_ifindex_rcu(rt->dst.dev);
+ vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
rcu_read_unlock();
net = dev_net(rt->dst.dev);
@@ -941,7 +941,7 @@ static int ip_error(struct sk_buff *skb)
}
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
- vrf_master_ifindex(skb->dev), 1);
+ l3mdev_master_ifindex(skb->dev), 1);
send = true;
if (peer) {
@@ -1739,7 +1739,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
* Now we are ready to route packet.
*/
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = vrf_master_ifindex_rcu(dev) ? : dev->ifindex;
+ fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@ -2124,11 +2124,10 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_HOST);
}
- if (netif_is_vrf(dev_out) &&
- !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
- rth = vrf_dev_get_rth(dev_out);
+
+ rth = l3mdev_get_rtable(dev_out, fl4);
+ if (rth)
goto out;
- }
}
if (!fl4->daddr) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f7d1d5e19e95..156ba75b6000 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1021,7 +1021,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
* device lookup source address from VRF table. This mimics
* behavior of ip_route_connect{_init}.
*/
- if (netif_index_is_vrf(net, ipc.oif)) {
+ if (netif_index_is_l3_master(net, ipc.oif)) {
flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE, sk->sk_protocol,
(flow_flags | FLOWI_FLAG_VRFSRC |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 0304d1680ca2..f2606b9056bb 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -15,7 +15,7 @@
#include <net/dst.h>
#include <net/xfrm.h>
#include <net/ip.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
@@ -111,10 +111,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
struct flowi4 *fl4 = &fl->u.ip4;
int oif = 0;
- if (skb_dst(skb)) {
- oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
- : skb_dst(skb)->dev->ifindex;
- }
+ if (skb_dst(skb))
+ oif = l3mdev_fib_oif(skb_dst(skb)->dev);
memset(fl4, 0, sizeof(struct flowi4));
fl4->flowi4_mark = skb->mark;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 30caa289c5db..69cee4e0d728 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,7 +20,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
@@ -132,10 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
nexthdr = nh[nhoff];
- if (skb_dst(skb)) {
- oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
- : skb_dst(skb)->dev->ifindex;
- }
+ if (skb_dst(skb))
+ oif = l3mdev_fib_oif(skb_dst(skb)->dev);
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
new file mode 100644
index 000000000000..5d47325037bc
--- /dev/null
+++ b/net/l3mdev/Kconfig
@@ -0,0 +1,10 @@
+#
+# Configuration for L3 master device support
+#
+
+config NET_L3_MASTER_DEV
+ bool "L3 Master device support"
+ depends on INET || IPV6
+ ---help---
+ This module provides glue between core networking code and device
+ drivers to support L3 master devices like VRF.
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
new file mode 100644
index 000000000000..84a53a6f609a
--- /dev/null
+++ b/net/l3mdev/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the L3 device API
+#
+
+obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
new file mode 100644
index 000000000000..ddf75ad41713
--- /dev/null
+++ b/net/l3mdev/l3mdev.c
@@ -0,0 +1,92 @@
+/*
+ * net/l3mdev/l3mdev.c - L3 master device implementation
+ * Copyright (c) 2015 Cumulus Networks
+ * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <net/l3mdev.h>
+
+/**
+ * l3mdev_master_ifindex - get index of L3 master device
+ * @dev: targeted interface
+ */
+
+int l3mdev_master_ifindex_rcu(struct net_device *dev)
+{
+ int ifindex = 0;
+
+ if (!dev)
+ return 0;
+
+ if (netif_is_l3_master(dev)) {
+ ifindex = dev->ifindex;
+ } else if (dev->flags & IFF_SLAVE) {
+ struct net_device *master;
+
+ master = netdev_master_upper_dev_get_rcu(dev);
+ if (master && netif_is_l3_master(master))
+ ifindex = master->ifindex;
+ }
+
+ return ifindex;
+}
+EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
+
+/**
+ * l3mdev_fib_table - get FIB table id associated with an L3
+ * master interface
+ * @dev: targeted interface
+ */
+
+u32 l3mdev_fib_table_rcu(const struct net_device *dev)
+{
+ u32 tb_id = 0;
+
+ if (!dev)
+ return 0;
+
+ if (netif_is_l3_master(dev)) {
+ if (dev->l3mdev_ops->l3mdev_fib_table)
+ tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
+ } else if (dev->flags & IFF_SLAVE) {
+ /* Users of netdev_master_upper_dev_get_rcu need non-const,
+ * but current inet_*type functions take a const
+ */
+ struct net_device *_dev = (struct net_device *) dev;
+ const struct net_device *master;
+
+ master = netdev_master_upper_dev_get_rcu(_dev);
+ if (master && netif_is_l3_master(master) &&
+ master->l3mdev_ops->l3mdev_fib_table)
+ tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
+ }
+
+ return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu);
+
+u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+ u32 tb_id = 0;
+
+ if (!ifindex)
+ return 0;
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (dev)
+ tb_id = l3mdev_fib_table_rcu(dev);
+
+ rcu_read_unlock();
+
+ return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);