From 0353f28231c79416191326810e7fe656b69c63b7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 5 Apr 2019 16:30:33 -0700 Subject: neighbor: Add skip_cache argument to neigh_output A later patch allows an IPv6 gateway with an IPv4 route. The neighbor entry will exist in the v6 ndisc table and the cached header will contain the ipv6 protocol which is wrong for an IPv4 packet. For an IPv4 packet to use the v6 neighbor entry, neigh_output needs to skip the cached header and just use the output callback for the neigh entry. A future patchset can look at expanding the hh_cache to handle 2 protocols. For now, IPv6 gateways with an IPv4 route will take the extra overhead of generating the header. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/vrf.c') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 6d1a1abbed27..fd1337736aa0 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -370,7 +370,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); - ret = neigh_output(neigh, skb); + ret = neigh_output(neigh, skb, false); rcu_read_unlock_bh(); return ret; } @@ -578,7 +578,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); - ret = neigh_output(neigh, skb); + ret = neigh_output(neigh, skb, false); rcu_read_unlock_bh(); return ret; } -- cgit v1.2.3-59-g8ed1b From 5c9f7c1dfc2e0776551ef1ceb335187c6698d1ff Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 5 Apr 2019 16:30:34 -0700 Subject: ipv4: Add helpers for neigh lookup for nexthop A common theme in the output path is looking up a neigh entry for a nexthop, either the gateway in an rtable or a fallback to the daddr in the skb: nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); To allow the nexthop to be an IPv6 address we need to consider the family of the nexthop and then call __ipv{4,6}_neigh_lookup_noref based on it. To make this simpler, add a ip_neigh_gw4 helper similar to ip_neigh_gw6 added in an earlier patch which handles: neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); And then add a second one, ip_neigh_for_gw, that calls either ip_neigh_gw4 or ip_neigh_gw6 based on the address family of the gateway. Update the output paths in the VRF driver and core v4 code to use ip_neigh_for_gw simplifying the family based lookup and making both ready for a v6 nexthop. ipv4_neigh_lookup has a different need - the potential to resolve a passed in address in addition to any gateway in the rtable or skb. Since this is a one-off, add ip_neigh_gw4 and ip_neigh_gw6 diectly. The difference between __neigh_create used by the helpers and neigh_create called by ipv4_neigh_lookup is taking a refcount, so add rcu_read_lock_bh and bump the refcnt on the neigh entry. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 10 ++++------ include/net/route.h | 32 ++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 11 ++++------- net/ipv4/route.c | 29 +++++++++++++++++++---------- 4 files changed, 59 insertions(+), 23 deletions(-) (limited to 'drivers/net/vrf.c') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index fd1337736aa0..18d752ae554f 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -549,7 +549,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; - u32 nexthop; + bool is_v6gw = false; int ret = -EINVAL; nf_reset(skb); @@ -572,13 +572,11 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s rcu_read_lock_bh(); - nexthop = (__force u32)rt_nexthop(rt, ip_hdr(skb)->daddr); - neigh = __ipv4_neigh_lookup_noref(dev, nexthop); - if (unlikely(!neigh)) - neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); - ret = neigh_output(neigh, skb, false); + /* if crossing protocols, can not use the cached header */ + ret = neigh_output(neigh, skb, is_v6gw); rcu_read_unlock_bh(); return ret; } diff --git a/include/net/route.h b/include/net/route.h index 5d28a2509b58..96f6c9ae33c2 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include #include @@ -350,4 +352,34 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst) return hoplimit; } +static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, + __be32 daddr) +{ + struct neighbour *neigh; + + neigh = __ipv4_neigh_lookup_noref(dev, daddr); + if (unlikely(!neigh)) + neigh = __neigh_create(&arp_tbl, &daddr, dev, false); + + return neigh; +} + +static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt, + struct sk_buff *skb, + bool *is_v6gw) +{ + struct net_device *dev = rt->dst.dev; + struct neighbour *neigh; + + if (likely(rt->rt_gw_family == AF_INET)) { + neigh = ip_neigh_gw4(dev, rt->rt_gw4); + } else if (rt->rt_gw_family == AF_INET6) { + neigh = ip_neigh_gw6(dev, &rt->rt_gw6); + *is_v6gw = true; + } else { + neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr); + } + return neigh; +} + #endif /* _ROUTE_H */ diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index cca4892b8cb2..4e42c1974ba2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -188,7 +188,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; - u32 nexthop; + bool is_v6gw = false; if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len); @@ -218,16 +218,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s } rcu_read_lock_bh(); - nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); - neigh = __ipv4_neigh_lookup_noref(dev, nexthop); - if (unlikely(!neigh)) - neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { int res; sock_confirm_neigh(skb, neigh); - res = neigh_output(neigh, skb, false); - + /* if crossing protocols, can not use the cached header */ + res = neigh_output(neigh, skb, is_v6gw); rcu_read_unlock_bh(); return res; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e58acf0a87b..32ecb4c1c7e3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -436,18 +436,27 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, { const struct rtable *rt = container_of(dst, struct rtable, dst); struct net_device *dev = dst->dev; - const __be32 *pkey = daddr; struct neighbour *n; - if (rt->rt_gw_family == AF_INET) - pkey = (const __be32 *) &rt->rt_gw4; - else if (skb) - pkey = &ip_hdr(skb)->daddr; - - n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); - if (n) - return n; - return neigh_create(&arp_tbl, pkey, dev); + rcu_read_lock_bh(); + + if (likely(rt->rt_gw_family == AF_INET)) { + n = ip_neigh_gw4(dev, rt->rt_gw4); + } else if (rt->rt_gw_family == AF_INET6) { + n = ip_neigh_gw6(dev, &rt->rt_gw6); + } else { + __be32 pkey; + + pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr); + n = ip_neigh_gw4(dev, pkey); + } + + if (n && !refcount_inc_not_zero(&n->refcnt)) + n = NULL; + + rcu_read_unlock_bh(); + + return n; } static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) -- cgit v1.2.3-59-g8ed1b