path: root/net/ipv6/route.c
diff options
authorWei Wang <weiwan@google.com>2019-06-20 17:36:41 -0700
committerDavid S. Miller <davem@davemloft.net>2019-06-23 13:24:17 -0700
commit7d9e5f422150ed00de744e02a80734d74cc9704d (patch)
tree5649667efad829c536543d0feb5374ad42b93483 /net/ipv6/route.c
parentipv6: convert rx data path to not take refcnt on dst (diff)
ipv6: convert major tx path to use RT6_LOOKUP_F_DST_NOREF
For tx path, in most cases, we still have to take refcnt on the dst cause the caller is caching the dst somewhere. But it still is beneficial to make use of RT6_LOOKUP_F_DST_NOREF flag while doing the route lookup. It is cause this flag prevents manipulating refcnt on net->ipv6.ip6_null_entry when doing fib6_rule_lookup() to traverse each routing table. The null_entry is a shared object and constant updates on it cause false sharing. We converted the current major lookup function ip6_route_output_flags() to make use of RT6_LOOKUP_F_DST_NOREF. Together with the change in the rx path, we see noticable performance boost: I ran synflood tests between 2 hosts under the same switch. Both hosts have 20G mlx NIC, and 8 tx/rx queues. Sender sends pure SYN flood with random src IPs and ports using trafgen. Receiver has a simple TCP listener on the target port. Both hosts have multiple custom rules: - For incoming packets, only local table is traversed. - For outgoing packets, 3 tables are traversed to find the route. The packet processing rate on the receiver is as follows: - Before the fix: 3.78Mpps - After the fix: 5.50Mpps Signed-off-by: Wei Wang <weiwan@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/route.c')
1 files changed, 27 insertions, 2 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 66fc69ef5909..3975ae8e2440 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2415,8 +2415,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net,
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
-struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
- struct flowi6 *fl6, int flags)
+struct dst_entry *ip6_route_output_flags_noref(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6, int flags)
bool any_src;
@@ -2424,6 +2425,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct dst_entry *dst;
+ /* This function does not take refcnt on the dst */
dst = l3mdev_link_scope_lookup(net, fl6);
if (dst)
return dst;
@@ -2431,6 +2433,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
fl6->flowi6_iif = LOOPBACK_IFINDEX;
+ flags |= RT6_LOOKUP_F_DST_NOREF;
any_src = ipv6_addr_any(&fl6->saddr);
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
(fl6->flowi6_oif && any_src))
@@ -2443,6 +2446,28 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
+struct dst_entry *ip6_route_output_flags(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6,
+ int flags)
+ struct dst_entry *dst;
+ struct rt6_info *rt6;
+ rcu_read_lock();
+ dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
+ rt6 = (struct rt6_info *)dst;
+ /* For dst cached in uncached_list, refcnt is already taken. */
+ if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
+ dst = &net->ipv6.ip6_null_entry->dst;
+ dst_hold(dst);
+ }
+ rcu_read_unlock();
+ return dst;
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)