aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/vrf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/vrf.c')
-rw-r--r--drivers/net/vrf.c431
1 files changed, 157 insertions, 274 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 9a9fabb900c1..dff08842f26d 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -42,12 +42,9 @@
#define DRV_NAME "vrf"
#define DRV_VERSION "1.0"
-#define vrf_master_get_rcu(dev) \
- ((struct net_device *)rcu_dereference(dev->rx_handler_data))
-
struct net_vrf {
- struct rtable *rth;
- struct rt6_info *rt6;
+ struct rtable __rcu *rth;
+ struct rt6_info __rcu *rt6;
u32 tb_id;
};
@@ -60,125 +57,12 @@ struct pcpu_dstats {
struct u64_stats_sync syncp;
};
-static struct dst_entry *vrf_ip_check(struct dst_entry *dst, u32 cookie)
-{
- return dst;
-}
-
-static int vrf_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- return ip_local_out(net, sk, skb);
-}
-
-static unsigned int vrf_v4_mtu(const struct dst_entry *dst)
-{
- /* TO-DO: return max ethernet size? */
- return dst->dev->mtu;
-}
-
-static void vrf_dst_destroy(struct dst_entry *dst)
-{
- /* our dst lives forever - or until the device is closed */
-}
-
-static unsigned int vrf_default_advmss(const struct dst_entry *dst)
-{
- return 65535 - 40;
-}
-
-static struct dst_ops vrf_dst_ops = {
- .family = AF_INET,
- .local_out = vrf_ip_local_out,
- .check = vrf_ip_check,
- .mtu = vrf_v4_mtu,
- .destroy = vrf_dst_destroy,
- .default_advmss = vrf_default_advmss,
-};
-
-/* neighbor handling is done with actual device; do not want
- * to flip skb->dev for those ndisc packets. This really fails
- * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
- * a start.
- */
-#if IS_ENABLED(CONFIG_IPV6)
-static bool check_ipv6_frame(const struct sk_buff *skb)
-{
- const struct ipv6hdr *ipv6h;
- struct ipv6hdr _ipv6h;
- bool rc = true;
-
- ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h);
- if (!ipv6h)
- goto out;
-
- if (ipv6h->nexthdr == NEXTHDR_ICMP) {
- const struct icmp6hdr *icmph;
- struct icmp6hdr _icmph;
-
- icmph = skb_header_pointer(skb, sizeof(_ipv6h),
- sizeof(_icmph), &_icmph);
- if (!icmph)
- goto out;
-
- switch (icmph->icmp6_type) {
- case NDISC_ROUTER_SOLICITATION:
- case NDISC_ROUTER_ADVERTISEMENT:
- case NDISC_NEIGHBOUR_SOLICITATION:
- case NDISC_NEIGHBOUR_ADVERTISEMENT:
- case NDISC_REDIRECT:
- rc = false;
- break;
- }
- }
-
-out:
- return rc;
-}
-#else
-static bool check_ipv6_frame(const struct sk_buff *skb)
-{
- return false;
-}
-#endif
-
-static bool is_ip_rx_frame(struct sk_buff *skb)
-{
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- return true;
- case htons(ETH_P_IPV6):
- return check_ipv6_frame(skb);
- }
- return false;
-}
-
static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb)
{
vrf_dev->stats.tx_errors++;
kfree_skb(skb);
}
-/* note: already called with rcu_read_lock */
-static rx_handler_result_t vrf_handle_frame(struct sk_buff **pskb)
-{
- struct sk_buff *skb = *pskb;
-
- if (is_ip_rx_frame(skb)) {
- struct net_device *dev = vrf_master_get_rcu(skb->dev);
- struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
-
- u64_stats_update_begin(&dstats->syncp);
- dstats->rx_pkts++;
- dstats->rx_bytes += skb->len;
- u64_stats_update_end(&dstats->syncp);
-
- skb->dev = dev;
-
- return RX_HANDLER_ANOTHER;
- }
- return RX_HANDLER_PASS;
-}
-
static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats)
{
@@ -349,46 +233,6 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
}
#if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *vrf_ip6_check(struct dst_entry *dst, u32 cookie)
-{
- return dst;
-}
-
-static struct dst_ops vrf_dst_ops6 = {
- .family = AF_INET6,
- .local_out = ip6_local_out,
- .check = vrf_ip6_check,
- .mtu = vrf_v4_mtu,
- .destroy = vrf_dst_destroy,
- .default_advmss = vrf_default_advmss,
-};
-
-static int init_dst_ops6_kmem_cachep(void)
-{
- vrf_dst_ops6.kmem_cachep = kmem_cache_create("vrf_ip6_dst_cache",
- sizeof(struct rt6_info),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
-
- if (!vrf_dst_ops6.kmem_cachep)
- return -ENOMEM;
-
- return 0;
-}
-
-static void free_dst_ops6_kmem_cachep(void)
-{
- kmem_cache_destroy(vrf_dst_ops6.kmem_cachep);
-}
-
-static int vrf_input6(struct sk_buff *skb)
-{
- skb->dev->stats.rx_errors++;
- kfree_skb(skb);
- return 0;
-}
-
/* modelled after ip6_finish_output2 */
static int vrf_finish_output6(struct net *net, struct sock *sk,
struct sk_buff *skb)
@@ -429,67 +273,46 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
-static void vrf_rt6_destroy(struct net_vrf *vrf)
+/* holding rtnl */
+static void vrf_rt6_release(struct net_vrf *vrf)
{
- dst_destroy(&vrf->rt6->dst);
- free_percpu(vrf->rt6->rt6i_pcpu);
- vrf->rt6 = NULL;
+ struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);
+
+ rcu_assign_pointer(vrf->rt6, NULL);
+
+ if (rt6)
+ dst_release(&rt6->dst);
}
static int vrf_rt6_create(struct net_device *dev)
{
struct net_vrf *vrf = netdev_priv(dev);
- struct dst_entry *dst;
+ struct net *net = dev_net(dev);
+ struct fib6_table *rt6i_table;
struct rt6_info *rt6;
- int cpu;
int rc = -ENOMEM;
- rt6 = dst_alloc(&vrf_dst_ops6, dev, 0,
- DST_OBSOLETE_NONE,
- (DST_HOST | DST_NOPOLICY | DST_NOXFRM));
- if (!rt6)
+ rt6i_table = fib6_new_table(net, vrf->tb_id);
+ if (!rt6i_table)
goto out;
- dst = &rt6->dst;
-
- rt6->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_KERNEL);
- if (!rt6->rt6i_pcpu) {
- dst_destroy(dst);
+ rt6 = ip6_dst_alloc(net, dev,
+ DST_HOST | DST_NOPOLICY | DST_NOXFRM | DST_NOCACHE);
+ if (!rt6)
goto out;
- }
- for_each_possible_cpu(cpu) {
- struct rt6_info **p = per_cpu_ptr(rt6->rt6i_pcpu, cpu);
- *p = NULL;
- }
-
- memset(dst + 1, 0, sizeof(*rt6) - sizeof(*dst));
- INIT_LIST_HEAD(&rt6->rt6i_siblings);
- INIT_LIST_HEAD(&rt6->rt6i_uncached);
+ dst_hold(&rt6->dst);
- rt6->dst.input = vrf_input6;
+ rt6->rt6i_table = rt6i_table;
rt6->dst.output = vrf_output6;
+ rcu_assign_pointer(vrf->rt6, rt6);
- rt6->rt6i_table = fib6_get_table(dev_net(dev), vrf->tb_id);
-
- atomic_set(&rt6->dst.__refcnt, 2);
-
- vrf->rt6 = rt6;
rc = 0;
out:
return rc;
}
#else
-static int init_dst_ops6_kmem_cachep(void)
-{
- return 0;
-}
-
-static void free_dst_ops6_kmem_cachep(void)
-{
-}
-
-static void vrf_rt6_destroy(struct net_vrf *vrf)
+static void vrf_rt6_release(struct net_vrf *vrf)
{
}
@@ -557,38 +380,35 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
-static void vrf_rtable_destroy(struct net_vrf *vrf)
+/* holding rtnl */
+static void vrf_rtable_release(struct net_vrf *vrf)
{
- struct dst_entry *dst = (struct dst_entry *)vrf->rth;
+ struct rtable *rth = rtnl_dereference(vrf->rth);
- dst_destroy(dst);
- vrf->rth = NULL;
+ rcu_assign_pointer(vrf->rth, NULL);
+
+ if (rth)
+ dst_release(&rth->dst);
}
-static struct rtable *vrf_rtable_create(struct net_device *dev)
+static int vrf_rtable_create(struct net_device *dev)
{
struct net_vrf *vrf = netdev_priv(dev);
struct rtable *rth;
- rth = dst_alloc(&vrf_dst_ops, dev, 2,
- DST_OBSOLETE_NONE,
- (DST_HOST | DST_NOPOLICY | DST_NOXFRM));
- if (rth) {
- rth->dst.output = vrf_output;
- rth->rt_genid = rt_genid_ipv4(dev_net(dev));
- rth->rt_flags = 0;
- rth->rt_type = RTN_UNICAST;
- rth->rt_is_input = 0;
- rth->rt_iif = 0;
- rth->rt_pmtu = 0;
- rth->rt_gateway = 0;
- rth->rt_uses_gateway = 0;
- rth->rt_table_id = vrf->tb_id;
- INIT_LIST_HEAD(&rth->rt_uncached);
- rth->rt_uncached_list = NULL;
- }
+ if (!fib_new_table(dev_net(dev), vrf->tb_id))
+ return -ENOMEM;
- return rth;
+ rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1, 1, 0);
+ if (!rth)
+ return -ENOMEM;
+
+ rth->dst.output = vrf_output;
+ rth->rt_table_id = vrf->tb_id;
+
+ rcu_assign_pointer(vrf->rth, rth);
+
+ return 0;
}
/**************************** device handling ********************/
@@ -617,28 +437,14 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
{
int ret;
- /* register the packet handler for slave ports */
- ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev);
- if (ret) {
- netdev_err(port_dev,
- "Device %s failed to register rx_handler\n",
- port_dev->name);
- goto out_fail;
- }
-
ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL);
if (ret < 0)
- goto out_unregister;
+ return ret;
port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
cycle_netdev(port_dev);
return 0;
-
-out_unregister:
- netdev_rx_handler_unregister(port_dev);
-out_fail:
- return ret;
}
static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
@@ -655,8 +461,6 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
netdev_upper_dev_unlink(port_dev, dev);
port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
- netdev_rx_handler_unregister(port_dev);
-
cycle_netdev(port_dev);
return 0;
@@ -673,8 +477,8 @@ static void vrf_dev_uninit(struct net_device *dev)
struct net_device *port_dev;
struct list_head *iter;
- vrf_rtable_destroy(vrf);
- vrf_rt6_destroy(vrf);
+ vrf_rtable_release(vrf);
+ vrf_rt6_release(vrf);
netdev_for_each_lower_dev(dev, port_dev, iter)
vrf_del_slave(dev, port_dev);
@@ -692,8 +496,7 @@ static int vrf_dev_init(struct net_device *dev)
goto out_nomem;
/* create the default dst which points back to us */
- vrf->rth = vrf_rtable_create(dev);
- if (!vrf->rth)
+ if (vrf_rtable_create(dev) != 0)
goto out_stats;
if (vrf_rt6_create(dev) != 0)
@@ -704,7 +507,7 @@ static int vrf_dev_init(struct net_device *dev)
return 0;
out_rth:
- vrf_rtable_destroy(vrf);
+ vrf_rtable_release(vrf);
out_stats:
free_percpu(dev->dstats);
dev->dstats = NULL;
@@ -736,8 +539,13 @@ static struct rtable *vrf_get_rtable(const struct net_device *dev,
if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) {
struct net_vrf *vrf = netdev_priv(dev);
- rth = vrf->rth;
- atomic_inc(&rth->dst.__refcnt);
+ rcu_read_lock();
+
+ rth = rcu_dereference(vrf->rth);
+ if (likely(rth))
+ dst_hold(&rth->dst);
+
+ rcu_read_unlock();
}
return rth;
@@ -759,6 +567,8 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
+ /* make sure oif is set to VRF device for lookup */
+ fl4->flowi4_oif = dev->ifindex;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
@@ -779,19 +589,116 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
}
#if IS_ENABLED(CONFIG_IPV6)
+/* neighbor handling is done with actual device; do not want
+ * to flip skb->dev for those ndisc packets. This really fails
+ * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
+ * a start.
+ */
+static bool ipv6_ndisc_frame(const struct sk_buff *skb)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ bool rc = false;
+
+ if (iph->nexthdr == NEXTHDR_ICMP) {
+ const struct icmp6hdr *icmph;
+ struct icmp6hdr _icmph;
+
+ icmph = skb_header_pointer(skb, sizeof(*iph),
+ sizeof(_icmph), &_icmph);
+ if (!icmph)
+ goto out;
+
+ switch (icmph->icmp6_type) {
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_ROUTER_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ case NDISC_REDIRECT:
+ rc = true;
+ break;
+ }
+ }
+
+out:
+ return rc;
+}
+
+static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
+ struct sk_buff *skb)
+{
+ /* if packet is NDISC keep the ingress interface */
+ if (!ipv6_ndisc_frame(skb)) {
+ skb->dev = vrf_dev;
+ skb->skb_iif = vrf_dev->ifindex;
+
+ skb_push(skb, skb->mac_len);
+ dev_queue_xmit_nit(skb, vrf_dev);
+ skb_pull(skb, skb->mac_len);
+
+ IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
+ }
+
+ return skb;
+}
+
+#else
+static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
+ struct sk_buff *skb)
+{
+ return skb;
+}
+#endif
+
+static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
+ struct sk_buff *skb)
+{
+ skb->dev = vrf_dev;
+ skb->skb_iif = vrf_dev->ifindex;
+
+ skb_push(skb, skb->mac_len);
+ dev_queue_xmit_nit(skb, vrf_dev);
+ skb_pull(skb, skb->mac_len);
+
+ return skb;
+}
+
+/* called with rcu lock held */
+static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
+ struct sk_buff *skb,
+ u16 proto)
+{
+ switch (proto) {
+ case AF_INET:
+ return vrf_ip_rcv(vrf_dev, skb);
+ case AF_INET6:
+ return vrf_ip6_rcv(vrf_dev, skb);
+ }
+
+ return skb;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
const struct flowi6 *fl6)
{
- struct rt6_info *rt = NULL;
+ struct dst_entry *dst = NULL;
if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
struct net_vrf *vrf = netdev_priv(dev);
+ struct rt6_info *rt;
- rt = vrf->rt6;
- atomic_inc(&rt->dst.__refcnt);
+ rcu_read_lock();
+
+ rt = rcu_dereference(vrf->rt6);
+ if (likely(rt)) {
+ dst = &rt->dst;
+ dst_hold(dst);
+ }
+
+ rcu_read_unlock();
}
- return (struct dst_entry *)rt;
+ return dst;
}
#endif
@@ -799,6 +706,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = {
.l3mdev_fib_table = vrf_fib_table,
.l3mdev_get_rtable = vrf_get_rtable,
.l3mdev_get_saddr = vrf_get_saddr,
+ .l3mdev_l3_rcv = vrf_l3_rcv,
#if IS_ENABLED(CONFIG_IPV6)
.l3mdev_get_rt6_dst = vrf_get_rt6_dst,
#endif
@@ -946,19 +854,6 @@ static int __init vrf_init_module(void)
{
int rc;
- vrf_dst_ops.kmem_cachep =
- kmem_cache_create("vrf_ip_dst_cache",
- sizeof(struct rtable), 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
-
- if (!vrf_dst_ops.kmem_cachep)
- return -ENOMEM;
-
- rc = init_dst_ops6_kmem_cachep();
- if (rc != 0)
- goto error2;
-
register_netdevice_notifier(&vrf_notifier_block);
rc = rtnl_link_register(&vrf_link_ops);
@@ -969,22 +864,10 @@ static int __init vrf_init_module(void)
error:
unregister_netdevice_notifier(&vrf_notifier_block);
- free_dst_ops6_kmem_cachep();
-error2:
- kmem_cache_destroy(vrf_dst_ops.kmem_cachep);
return rc;
}
-static void __exit vrf_cleanup_module(void)
-{
- rtnl_link_unregister(&vrf_link_ops);
- unregister_netdevice_notifier(&vrf_notifier_block);
- kmem_cache_destroy(vrf_dst_ops.kmem_cachep);
- free_dst_ops6_kmem_cachep();
-}
-
module_init(vrf_init_module);
-module_exit(vrf_cleanup_module);
MODULE_AUTHOR("Shrijeet Mukherjee, David Ahern");
MODULE_DESCRIPTION("Device driver to instantiate VRF domains");
MODULE_LICENSE("GPL");