diff options
-rw-r--r-- | MAINTAINERS | 8 | ||||
-rw-r--r-- | drivers/net/Kconfig | 1 | ||||
-rw-r--r-- | drivers/net/vrf.c | 81 | ||||
-rw-r--r-- | include/linux/netdevice.h | 36 | ||||
-rw-r--r-- | include/net/l3mdev.h | 149 | ||||
-rw-r--r-- | include/net/route.h | 3 | ||||
-rw-r--r-- | include/net/vrf.h | 178 | ||||
-rw-r--r-- | net/Kconfig | 1 | ||||
-rw-r--r-- | net/Makefile | 3 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 4 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 12 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 8 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 15 | ||||
-rw-r--r-- | net/ipv4/udp.c | 2 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 8 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 8 | ||||
-rw-r--r-- | net/l3mdev/Kconfig | 10 | ||||
-rw-r--r-- | net/l3mdev/Makefile | 5 | ||||
-rw-r--r-- | net/l3mdev/l3mdev.c | 92 |
21 files changed, 356 insertions, 276 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index bcd263de4827..fa43fa2f30e4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6095,6 +6095,13 @@ F: Documentation/auxdisplay/ks0108 F: drivers/auxdisplay/ks0108.c F: include/linux/ks0108.h +L3MDEV +M: David Ahern <dsa@cumulusnetworks.com> +L: netdev@vger.kernel.org +S: Maintained +F: net/l3mdev +F: include/net/l3mdev.h + LAPB module L: linux-x25@vger.kernel.org S: Orphan @@ -11266,7 +11273,6 @@ M: Shrijeet Mukherjee <shm@cumulusnetworks.com> L: netdev@vger.kernel.org S: Maintained F: drivers/net/vrf.c -F: include/net/vrf.h F: Documentation/networking/vrf.txt VT1211 HARDWARE MONITOR DRIVER diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index d18eb607bee6..b9ebd0d18a52 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -299,6 +299,7 @@ config NLMON config NET_VRF tristate "Virtual Routing and Forwarding (Lite)" depends on IP_MULTIPLE_TABLES && IPV6_MULTIPLE_TABLES + depends on NET_L3_MASTER_DEV ---help--- This option enables the support for mapping interfaces into VRF's. The support enables VRF devices. diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 4ecb3a3e516a..64f2ab663ffe 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -34,7 +34,7 @@ #include <net/rtnetlink.h> #include <net/route.h> #include <net/addrconf.h> -#include <net/vrf.h> +#include <net/l3mdev.h> #define DRV_NAME "vrf" #define DRV_VERSION "1.0" @@ -44,6 +44,21 @@ #define vrf_master_get_rcu(dev) \ ((struct net_device *)rcu_dereference(dev->rx_handler_data)) +struct slave { + struct list_head list; + struct net_device *dev; +}; + +struct slave_queue { + struct list_head all_slaves; +}; + +struct net_vrf { + struct slave_queue queue; + struct rtable *rth; + u32 tb_id; +}; + struct pcpu_dstats { u64 tx_pkts; u64 tx_bytes; @@ -395,18 +410,15 @@ static void __vrf_insert_slave(struct slave_queue *queue, struct slave *slave) static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) { - struct net_vrf_dev *vrf_ptr = kmalloc(sizeof(*vrf_ptr), GFP_KERNEL); struct slave *slave = kzalloc(sizeof(*slave), GFP_KERNEL); struct net_vrf *vrf = netdev_priv(dev); struct slave_queue *queue = &vrf->queue; int ret = -ENOMEM; - if (!slave || !vrf_ptr) + if (!slave) goto out_fail; slave->dev = port_dev; - vrf_ptr->ifindex = dev->ifindex; - vrf_ptr->tb_id = vrf->tb_id; /* register the packet handler for slave ports */ ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev); @@ -423,7 +435,6 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) port_dev->flags |= IFF_SLAVE; __vrf_insert_slave(queue, slave); - rcu_assign_pointer(port_dev->vrf_ptr, vrf_ptr); cycle_netdev(port_dev); return 0; @@ -431,14 +442,13 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) out_unregister: netdev_rx_handler_unregister(port_dev); out_fail: - kfree(vrf_ptr); kfree(slave); return ret; } static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) { - if (netif_is_vrf(port_dev) || vrf_is_slave(port_dev)) + if (netif_is_l3_master(port_dev) || vrf_is_slave(port_dev)) return -EINVAL; return do_vrf_add_slave(dev, port_dev); @@ -447,21 +457,15 @@ static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) /* inverse of do_vrf_add_slave */ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) { - struct net_vrf_dev *vrf_ptr = rtnl_dereference(port_dev->vrf_ptr); struct net_vrf *vrf = netdev_priv(dev); struct slave_queue *queue = &vrf->queue; struct slave *slave; - RCU_INIT_POINTER(port_dev->vrf_ptr, NULL); - netdev_upper_dev_unlink(port_dev, dev); port_dev->flags &= ~IFF_SLAVE; netdev_rx_handler_unregister(port_dev); - /* after netdev_rx_handler_unregister for synchronize_rcu */ - kfree(vrf_ptr); - cycle_netdev(port_dev); slave = __vrf_find_slave_dev(queue, port_dev); @@ -529,6 +533,33 @@ static const struct net_device_ops vrf_netdev_ops = { .ndo_del_slave = vrf_del_slave, }; +static u32 vrf_fib_table(const struct net_device *dev) +{ + struct net_vrf *vrf = netdev_priv(dev); + + return vrf->tb_id; +} + +static struct rtable *vrf_get_rtable(const struct net_device *dev, + const struct flowi4 *fl4) +{ + struct rtable *rth = NULL; + + if (!(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) { + struct net_vrf *vrf = netdev_priv(dev); + + rth = vrf->rth; + atomic_inc(&rth->dst.__refcnt); + } + + return rth; +} + +static const struct l3mdev_ops vrf_l3mdev_ops = { + .l3mdev_fib_table = vrf_fib_table, + .l3mdev_get_rtable = vrf_get_rtable, +}; + static void vrf_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@ -546,6 +577,7 @@ static void vrf_setup(struct net_device *dev) /* Initialize the device structure. */ dev->netdev_ops = &vrf_netdev_ops; + dev->l3mdev_ops = &vrf_l3mdev_ops; dev->ethtool_ops = &vrf_ethtool_ops; dev->destructor = free_netdev; @@ -572,10 +604,6 @@ static int vrf_validate(struct nlattr *tb[], struct nlattr *data[]) static void vrf_dellink(struct net_device *dev, struct list_head *head) { - struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr); - - RCU_INIT_POINTER(dev->vrf_ptr, NULL); - kfree_rcu(vrf_ptr, rcu); unregister_netdevice_queue(dev, head); } @@ -583,7 +611,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net_vrf *vrf = netdev_priv(dev); - struct net_vrf_dev *vrf_ptr; int err; if (!data || !data[IFLA_VRF_TABLE]) @@ -591,26 +618,15 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]); - dev->priv_flags |= IFF_VRF_MASTER; - - err = -ENOMEM; - vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL); - if (!vrf_ptr) - goto out_fail; - - vrf_ptr->ifindex = dev->ifindex; - vrf_ptr->tb_id = vrf->tb_id; + dev->priv_flags |= IFF_L3MDEV_MASTER; err = register_netdevice(dev); if (err < 0) goto out_fail; - rcu_assign_pointer(dev->vrf_ptr, vrf_ptr); - return 0; out_fail: - kfree(vrf_ptr); free_netdev(dev); return err; } @@ -654,10 +670,9 @@ static int vrf_device_event(struct notifier_block *unused, /* only care about unregister events to drop slave references */ if (event == NETDEV_UNREGISTER) { - struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr); struct net_device *vrf_dev; - if (!vrf_ptr || netif_is_vrf(dev)) + if (netif_is_l3_master(dev)) goto out; vrf_dev = netdev_master_upper_dev_get(dev); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d2ffeafc9998..b9450784ae06 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1258,7 +1258,7 @@ struct net_device_ops { * @IFF_LIVE_ADDR_CHANGE: device supports hardware address * change when it's running * @IFF_MACVLAN: Macvlan device - * @IFF_VRF_MASTER: device is a VRF master + * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master */ @@ -1283,7 +1283,7 @@ enum netdev_priv_flags { IFF_XMIT_DST_RELEASE_PERM = 1<<17, IFF_IPVLAN_MASTER = 1<<18, IFF_IPVLAN_SLAVE = 1<<19, - IFF_VRF_MASTER = 1<<20, + IFF_L3MDEV_MASTER = 1<<20, IFF_NO_QUEUE = 1<<21, IFF_OPENVSWITCH = 1<<22, }; @@ -1308,7 +1308,7 @@ enum netdev_priv_flags { #define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM #define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER #define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE -#define IFF_VRF_MASTER IFF_VRF_MASTER +#define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER #define IFF_NO_QUEUE IFF_NO_QUEUE #define IFF_OPENVSWITCH IFF_OPENVSWITCH @@ -1427,7 +1427,6 @@ enum netdev_priv_flags { * @dn_ptr: DECnet specific data * @ip6_ptr: IPv6 specific data * @ax25_ptr: AX.25 specific data - * @vrf_ptr: VRF specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering * * @last_rx: Time of last Rx @@ -1587,6 +1586,9 @@ struct net_device { #ifdef CONFIG_NET_SWITCHDEV const struct switchdev_ops *switchdev_ops; #endif +#ifdef CONFIG_NET_L3_MASTER_DEV + const struct l3mdev_ops *l3mdev_ops; +#endif const struct header_ops *header_ops; @@ -1646,7 +1648,6 @@ struct net_device { struct dn_dev __rcu *dn_ptr; struct inet6_dev __rcu *ip6_ptr; void *ax25_ptr; - struct net_vrf_dev __rcu *vrf_ptr; struct wireless_dev *ieee80211_ptr; struct wpan_dev *ieee802154_ptr; #if IS_ENABLED(CONFIG_MPLS_ROUTING) @@ -3824,9 +3825,9 @@ static inline bool netif_supports_nofcs(struct net_device *dev) return dev->priv_flags & IFF_SUPP_NOFCS; } -static inline bool netif_is_vrf(const struct net_device *dev) +static inline bool netif_is_l3_master(const struct net_device *dev) { - return dev->priv_flags & IFF_VRF_MASTER; + return dev->priv_flags & IFF_L3MDEV_MASTER; } static inline bool netif_is_bridge_master(const struct net_device *dev) @@ -3839,27 +3840,6 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } -static inline bool netif_index_is_vrf(struct net *net, int ifindex) -{ - bool rc = false; - -#if IS_ENABLED(CONFIG_NET_VRF) - struct net_device *dev; - - if (ifindex == 0) - return false; - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - rc = netif_is_vrf(dev); - - rcu_read_unlock(); -#endif - return rc; -} - /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h new file mode 100644 index 000000000000..87cee05a0a17 --- /dev/null +++ b/include/net/l3mdev.h @@ -0,0 +1,149 @@ +/* + * include/net/l3mdev.h - L3 master device API + * Copyright (c) 2015 Cumulus Networks + * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#ifndef _NET_L3MDEV_H_ +#define _NET_L3MDEV_H_ + +/** + * struct l3mdev_ops - l3mdev operations + * + * @l3mdev_fib_table: Get FIB table id to use for lookups + * + * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device + */ + +struct l3mdev_ops { + u32 (*l3mdev_fib_table)(const struct net_device *dev); + struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, + const struct flowi4 *fl4); +}; + +#ifdef CONFIG_NET_L3_MASTER_DEV + +int l3mdev_master_ifindex_rcu(struct net_device *dev); +static inline int l3mdev_master_ifindex(struct net_device *dev) +{ + int ifindex; + + rcu_read_lock(); + ifindex = l3mdev_master_ifindex_rcu(dev); + rcu_read_unlock(); + + return ifindex; +} + +/* get index of an interface to use for FIB lookups. For devices + * enslaved to an L3 master device FIB lookups are based on the + * master index + */ +static inline int l3mdev_fib_oif_rcu(struct net_device *dev) +{ + return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex; +} + +static inline int l3mdev_fib_oif(struct net_device *dev) +{ + int oif; + + rcu_read_lock(); + oif = l3mdev_fib_oif_rcu(dev); + rcu_read_unlock(); + + return oif; +} + +u32 l3mdev_fib_table_rcu(const struct net_device *dev); +u32 l3mdev_fib_table_by_index(struct net *net, int ifindex); +static inline u32 l3mdev_fib_table(const struct net_device *dev) +{ + u32 tb_id; + + rcu_read_lock(); + tb_id = l3mdev_fib_table_rcu(dev); + rcu_read_unlock(); + + return tb_id; +} + +static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, + const struct flowi4 *fl4) +{ + if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable) + return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4); + + return NULL; +} + +static inline bool netif_index_is_l3_master(struct net *net, int ifindex) +{ + struct net_device *dev; + bool rc = false; + + if (ifindex == 0) + return false; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + rc = netif_is_l3_master(dev); + + rcu_read_unlock(); + + return rc; +} + +#else + +static inline int l3mdev_master_ifindex_rcu(struct net_device *dev) +{ + return 0; +} +static inline int l3mdev_master_ifindex(struct net_device *dev) +{ + return 0; +} + +static inline int l3mdev_fib_oif_rcu(struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} +static inline int l3mdev_fib_oif(struct net_device *dev) +{ + return dev ? dev->ifindex : 0; +} + +static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev) +{ + return 0; +} +static inline u32 l3mdev_fib_table(const struct net_device *dev) +{ + return 0; +} +static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) +{ + return 0; +} + +static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, + const struct flowi4 *fl4) +{ + return NULL; +} + +static inline bool netif_index_is_l3_master(struct net *net, int ifindex) +{ + return false; +} + +#endif + +#endif /* _NET_L3MDEV_H_ */ diff --git a/include/net/route.h b/include/net/route.h index d1bd90bb3187..e211dc167db1 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -28,6 +28,7 @@ #include <net/inetpeer.h> #include <net/flow.h> #include <net/inet_sock.h> +#include <net/l3mdev.h> #include <linux/in_route.h> #include <linux/rtnetlink.h> #include <linux/rcupdate.h> @@ -256,7 +257,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - if (netif_index_is_vrf(sock_net(sk), oif)) + if (netif_index_is_l3_master(sock_net(sk), oif)) flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, diff --git a/include/net/vrf.h b/include/net/vrf.h deleted file mode 100644 index 593e6094ddd4..000000000000 --- a/include/net/vrf.h +++ /dev/null @@ -1,178 +0,0 @@ -/* - * include/net/net_vrf.h - adds vrf dev structure definitions - * Copyright (c) 2015 Cumulus Networks - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef __LINUX_NET_VRF_H -#define __LINUX_NET_VRF_H - -struct net_vrf_dev { - struct rcu_head rcu; - int ifindex; /* ifindex of master dev */ - u32 tb_id; /* table id for VRF */ -}; - -struct slave { - struct list_head list; - struct net_device *dev; -}; - -struct slave_queue { - struct list_head all_slaves; -}; - -struct net_vrf { - struct slave_queue queue; - struct rtable *rth; - u32 tb_id; -}; - - -#if IS_ENABLED(CONFIG_NET_VRF) -/* called with rcu_read_lock() */ -static inline int vrf_master_ifindex_rcu(const struct net_device *dev) -{ - struct net_vrf_dev *vrf_ptr; - int ifindex = 0; - - if (!dev) - return 0; - - if (netif_is_vrf(dev)) { - ifindex = dev->ifindex; - } else { - vrf_ptr = rcu_dereference(dev->vrf_ptr); - if (vrf_ptr) - ifindex = vrf_ptr->ifindex; - } - - return ifindex; -} - -static inline int vrf_master_ifindex(const struct net_device *dev) -{ - int ifindex; - - rcu_read_lock(); - ifindex = vrf_master_ifindex_rcu(dev); - rcu_read_unlock(); - - return ifindex; -} - -/* called with rcu_read_lock */ -static inline u32 vrf_dev_table_rcu(const struct net_device *dev) -{ - u32 tb_id = 0; - - if (dev) { - struct net_vrf_dev *vrf_ptr; - - vrf_ptr = rcu_dereference(dev->vrf_ptr); - if (vrf_ptr) - tb_id = vrf_ptr->tb_id; - } - return tb_id; -} - -static inline u32 vrf_dev_table(const struct net_device *dev) -{ - u32 tb_id; - - rcu_read_lock(); - tb_id = vrf_dev_table_rcu(dev); - rcu_read_unlock(); - - return tb_id; -} - -static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex) -{ - struct net_device *dev; - u32 tb_id = 0; - - if (!ifindex) - return 0; - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - tb_id = vrf_dev_table_rcu(dev); - - rcu_read_unlock(); - - return tb_id; -} - -/* called with rtnl */ -static inline u32 vrf_dev_table_rtnl(const struct net_device *dev) -{ - u32 tb_id = 0; - - if (dev) { - struct net_vrf_dev *vrf_ptr; - - vrf_ptr = rtnl_dereference(dev->vrf_ptr); - if (vrf_ptr) - tb_id = vrf_ptr->tb_id; - } - return tb_id; -} - -/* caller has already checked netif_is_vrf(dev) */ -static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev) -{ - struct rtable *rth = ERR_PTR(-ENETUNREACH); - struct net_vrf *vrf = netdev_priv(dev); - - if (vrf) { - rth = vrf->rth; - atomic_inc(&rth->dst.__refcnt); - } - return rth; -} - -#else -static inline int vrf_master_ifindex_rcu(const struct net_device *dev) -{ - return 0; -} - -static inline int vrf_master_ifindex(const struct net_device *dev) -{ - return 0; -} - -static inline u32 vrf_dev_table_rcu(const struct net_device *dev) -{ - return 0; -} - -static inline u32 vrf_dev_table(const struct net_device *dev) -{ - return 0; -} - -static inline u32 vrf_dev_table_ifindex(struct net *net, int ifindex) -{ - return 0; -} - -static inline u32 vrf_dev_table_rtnl(const struct net_device *dev) -{ - return 0; -} - -static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev) -{ - return ERR_PTR(-ENETUNREACH); -} -#endif - -#endif /* __LINUX_NET_VRF_H */ diff --git a/net/Kconfig b/net/Kconfig index 7021c1bf44d6..127da94ae25e 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -232,6 +232,7 @@ source "net/netlink/Kconfig" source "net/mpls/Kconfig" source "net/hsr/Kconfig" source "net/switchdev/Kconfig" +source "net/l3mdev/Kconfig" config RPS bool diff --git a/net/Makefile b/net/Makefile index 3995613e5510..a5d04098dfce 100644 --- a/net/Makefile +++ b/net/Makefile @@ -74,3 +74,6 @@ obj-$(CONFIG_HSR) += hsr/ ifneq ($(CONFIG_NET_SWITCHDEV),) obj-y += switchdev/ endif +ifneq ($(CONFIG_NET_L3_MASTER_DEV),) +obj-y += l3mdev/ +endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3af85eecbe11..11c4ca13ec3b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -119,7 +119,7 @@ #ifdef CONFIG_IP_MROUTE #include <linux/mroute.h> #endif -#include <net/vrf.h> +#include <net/l3mdev.h> /* The inetsw table contains everything that inet_create needs to @@ -446,7 +446,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id; + tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); /* Not specified by any standard per-se, however it breaks too diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6fcbd215cdbc..fac172370276 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -45,7 +45,7 @@ #include <net/ip_fib.h> #include <net/rtnetlink.h> #include <net/xfrm.h> -#include <net/vrf.h> +#include <net/l3mdev.h> #include <trace/events/fib.h> #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -255,7 +255,7 @@ EXPORT_SYMBOL(inet_addr_type); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { - u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL; + u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL; return __inet_dev_addr_type(net, dev, addr, rt_table); } @@ -268,7 +268,7 @@ unsigned int inet_addr_type_dev_table(struct net *net, const struct net_device *dev, __be32 addr) { - u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL; + u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL; return __inet_dev_addr_type(net, NULL, addr, rt_table); } @@ -332,7 +332,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_iif = vrf_master_ifindex_rcu(dev); + fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); if (!fl4.flowi4_iif) fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; @@ -366,7 +366,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (nh->nh_dev == dev) { dev_match = true; break; - } else if (vrf_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) { + } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) { dev_match = true; break; } @@ -803,7 +803,7 @@ out: static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) { struct net *net = dev_net(ifa->ifa_dev->dev); - u32 tb_id = vrf_dev_table_rtnl(ifa->ifa_dev->dev); + u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev); struct fib_table *tb; struct fib_config cfg = { .fc_protocol = RTPROT_KERNEL, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e5eb8ac4089d..6b96dee2800b 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -96,7 +96,7 @@ #include <net/xfrm.h> #include <net/inet_common.h> #include <net/ip_fib.h> -#include <net/vrf.h> +#include <net/l3mdev.h> /* * Build xmit assembly blocks @@ -309,7 +309,7 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, rc = false; if (icmp_global_allow()) { - int vif = vrf_master_ifindex(dst->dev); + int vif = l3mdev_master_ifindex(dst->dev); struct inet_peer *peer; peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); @@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; - fl4.flowi4_oif = vrf_master_ifindex(skb->dev); + fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) @@ -461,7 +461,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; fl4->fl4_icmp_code = code; - fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev); + fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); rt = __ip_route_output_key(net, fl4); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fa7f15305f9a..9772b789adf3 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -48,7 +48,7 @@ #include <linux/inet.h> #include <linux/netfilter_ipv4.h> #include <net/inet_ecn.h> -#include <net/vrf.h> +#include <net/l3mdev.h> /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6 * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c @@ -78,7 +78,7 @@ struct ipq { u8 ecn; /* RFC3168 support */ u16 max_df_size; /* largest frag with DF set seen */ int iif; - int vif; /* VRF device index */ + int vif; /* L3 master device index */ unsigned int rid; struct inet_peer *peer; }; @@ -657,7 +657,7 @@ out_fail: int ip_defrag(struct sk_buff *skb, u32 user) { struct net_device *dev = skb->dev ? : skb_dst(skb)->dev; - int vif = vrf_master_ifindex_rcu(dev); + int vif = l3mdev_master_ifindex_rcu(dev); struct net *net = dev_net(dev); struct ipq *qp; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 06d2c87ed505..aff6766922e8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1571,7 +1571,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, } oif = arg->bound_dev_if; - if (!oif && netif_index_is_vrf(net, skb->skb_iif)) + if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) oif = skb->skb_iif; flowi4_init_output(&fl4, oif, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 8c84a6664b30..1441de1550e6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -112,7 +112,7 @@ #endif #include <net/secure_seq.h> #include <net/ip_tunnels.h> -#include <net/vrf.h> +#include <net/l3mdev.h> #define RT_FL_TOS(oldflp4) \ ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) @@ -847,7 +847,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) return; } log_martians = IN_DEV_LOG_MARTIANS(in_dev); - vif = vrf_master_ifindex_rcu(rt->dst.dev); + vif = l3mdev_master_ifindex_rcu(rt->dst.dev); rcu_read_unlock(); net = dev_net(rt->dst.dev); @@ -941,7 +941,7 @@ static int ip_error(struct sk_buff *skb) } peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, - vrf_master_ifindex(skb->dev), 1); + l3mdev_master_ifindex(skb->dev), 1); send = true; if (peer) { @@ -1739,7 +1739,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, * Now we are ready to route packet. */ fl4.flowi4_oif = 0; - fl4.flowi4_iif = vrf_master_ifindex_rcu(dev) ? : dev->ifindex; + fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev); fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; @@ -2124,11 +2124,10 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); } - if (netif_is_vrf(dev_out) && - !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) { - rth = vrf_dev_get_rth(dev_out); + + rth = l3mdev_get_rtable(dev_out, fl4); + if (rth) goto out; - } } if (!fl4->daddr) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f7d1d5e19e95..156ba75b6000 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1021,7 +1021,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) * device lookup source address from VRF table. This mimics * behavior of ip_route_connect{_init}. */ - if (netif_index_is_vrf(net, ipc.oif)) { + if (netif_index_is_l3_master(net, ipc.oif)) { flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, (flow_flags | FLOWI_FLAG_VRFSRC | diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0304d1680ca2..f2606b9056bb 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -15,7 +15,7 @@ #include <net/dst.h> #include <net/xfrm.h> #include <net/ip.h> -#include <net/vrf.h> +#include <net/l3mdev.h> static struct xfrm_policy_afinfo xfrm4_policy_afinfo; @@ -111,10 +111,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) struct flowi4 *fl4 = &fl->u.ip4; int oif = 0; - if (skb_dst(skb)) { - oif = vrf_master_ifindex(skb_dst(skb)->dev) ? - : skb_dst(skb)->dev->ifindex; - } + if (skb_dst(skb)) + oif = l3mdev_fib_oif(skb_dst(skb)->dev); memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 30caa289c5db..69cee4e0d728 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -20,7 +20,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> -#include <net/vrf.h> +#include <net/l3mdev.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif @@ -132,10 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) nexthdr = nh[nhoff]; - if (skb_dst(skb)) { - oif = vrf_master_ifindex(skb_dst(skb)->dev) ? - : skb_dst(skb)->dev->ifindex; - } + if (skb_dst(skb)) + oif = l3mdev_fib_oif(skb_dst(skb)->dev); memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig new file mode 100644 index 000000000000..5d47325037bc --- /dev/null +++ b/net/l3mdev/Kconfig @@ -0,0 +1,10 @@ +# +# Configuration for L3 master device support +# + +config NET_L3_MASTER_DEV + bool "L3 Master device support" + depends on INET || IPV6 + ---help--- + This module provides glue between core networking code and device + drivers to support L3 master devices like VRF. diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile new file mode 100644 index 000000000000..84a53a6f609a --- /dev/null +++ b/net/l3mdev/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the L3 device API +# + +obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c new file mode 100644 index 000000000000..ddf75ad41713 --- /dev/null +++ b/net/l3mdev/l3mdev.c @@ -0,0 +1,92 @@ +/* + * net/l3mdev/l3mdev.c - L3 master device implementation + * Copyright (c) 2015 Cumulus Networks + * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/netdevice.h> +#include <net/l3mdev.h> + +/** + * l3mdev_master_ifindex - get index of L3 master device + * @dev: targeted interface + */ + +int l3mdev_master_ifindex_rcu(struct net_device *dev) +{ + int ifindex = 0; + + if (!dev) + return 0; + + if (netif_is_l3_master(dev)) { + ifindex = dev->ifindex; + } else if (dev->flags & IFF_SLAVE) { + struct net_device *master; + + master = netdev_master_upper_dev_get_rcu(dev); + if (master && netif_is_l3_master(master)) + ifindex = master->ifindex; + } + + return ifindex; +} +EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu); + +/** + * l3mdev_fib_table - get FIB table id associated with an L3 + * master interface + * @dev: targeted interface + */ + +u32 l3mdev_fib_table_rcu(const struct net_device *dev) +{ + u32 tb_id = 0; + + if (!dev) + return 0; + + if (netif_is_l3_master(dev)) { + if (dev->l3mdev_ops->l3mdev_fib_table) + tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev); + } else if (dev->flags & IFF_SLAVE) { + /* Users of netdev_master_upper_dev_get_rcu need non-const, + * but current inet_*type functions take a const + */ + struct net_device *_dev = (struct net_device *) dev; + const struct net_device *master; + + master = netdev_master_upper_dev_get_rcu(_dev); + if (master && netif_is_l3_master(master) && + master->l3mdev_ops->l3mdev_fib_table) + tb_id = master->l3mdev_ops->l3mdev_fib_table(master); + } + + return tb_id; +} +EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu); + +u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) +{ + struct net_device *dev; + u32 tb_id = 0; + + if (!ifindex) + return 0; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, ifindex); + if (dev) + tb_id = l3mdev_fib_table_rcu(dev); + + rcu_read_unlock(); + + return tb_id; +} +EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); |