From c5cff8561d2d0006e972bd114afd51f082fee77c Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 21 Aug 2017 09:47:10 -0700 Subject: ipv6: add rcu grace period before freeing fib6_node We currently keep rt->rt6i_node pointing to the fib6_node for the route. And some functions make use of this pointer to dereference the fib6_node from rt structure, e.g. rt6_check(). However, as there is neither refcount nor rcu taken when dereferencing rt->rt6i_node, it could potentially cause crashes as rt->rt6i_node could be set to NULL by other CPUs when doing a route deletion. This patch introduces an rcu grace period before freeing fib6_node and makes sure the functions that dereference it takes rcu_read_lock(). Note: there is no "Fixes" tag because this bug was there in a very early stage. Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'net/ipv6/ip6_fib.c') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5cc0ea038198..a5ebf86f6be8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -148,11 +148,23 @@ static struct fib6_node *node_alloc(void) return fn; } -static void node_free(struct fib6_node *fn) +static void node_free_immediate(struct fib6_node *fn) +{ + kmem_cache_free(fib6_node_kmem, fn); +} + +static void node_free_rcu(struct rcu_head *head) { + struct fib6_node *fn = container_of(head, struct fib6_node, rcu); + kmem_cache_free(fib6_node_kmem, fn); } +static void node_free(struct fib6_node *fn) +{ + call_rcu(&fn->rcu, node_free_rcu); +} + static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) { int cpu; @@ -601,9 +613,9 @@ insert_above: if (!in || !ln) { if (in) - node_free(in); + node_free_immediate(in); if (ln) - node_free(ln); + node_free_immediate(ln); return ERR_PTR(-ENOMEM); } @@ -1038,7 +1050,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, root, and then (in failure) stale node in main tree. */ - node_free(sfn); + node_free_immediate(sfn); err = PTR_ERR(sn); goto failure; } -- cgit v1.2.3-59-g8ed1b From 4e587ea71bf924f7dac621f1351653bd41e446cb Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 25 Aug 2017 15:03:10 -0700 Subject: ipv6: fix sparse warning on rt6i_node Commit c5cff8561d2d adds rcu grace period before freeing fib6_node. This generates a new sparse warning on rt->rt6i_node related code: net/ipv6/route.c:1394:30: error: incompatible types in comparison expression (different address spaces) ./include/net/ip6_fib.h:187:14: error: incompatible types in comparison expression (different address spaces) This commit adds "__rcu" tag for rt6i_node and makes sure corresponding rcu API is used for it. After this fix, sparse no longer generates the above warning. Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node") Signed-off-by: Wei Wang Acked-by: Eric Dumazet Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 +- net/ipv6/addrconf.c | 2 +- net/ipv6/ip6_fib.c | 11 +++++++---- net/ipv6/route.c | 3 ++- 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'net/ipv6/ip6_fib.c') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index e9c59db92942..af509f801084 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -105,7 +105,7 @@ struct rt6_info { * the same cache line. */ struct fib6_table *rt6i_table; - struct fib6_node *rt6i_node; + struct fib6_node __rcu *rt6i_node; struct in6_addr rt6i_gateway; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3c46e9513a31..936e9ab4dda5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5556,7 +5556,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) * our DAD process, so we don't need * to do it again */ - if (!(ifp->rt->rt6i_node)) + if (!rcu_access_pointer(ifp->rt->rt6i_node)) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index a5ebf86f6be8..10b4b1f8b838 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -889,7 +889,7 @@ add: rt->dst.rt6_next = iter; *ins = rt; - rt->rt6i_node = fn; + rcu_assign_pointer(rt->rt6i_node, fn); atomic_inc(&rt->rt6i_ref); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); @@ -915,7 +915,7 @@ add: return err; *ins = rt; - rt->rt6i_node = fn; + rcu_assign_pointer(rt->rt6i_node, fn); rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); if (!info->skip_notify) @@ -1480,8 +1480,9 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, int fib6_del(struct rt6_info *rt, struct nl_info *info) { + struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); struct net *net = info->nl_net; - struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; #if RT6_DEBUG >= 2 @@ -1670,7 +1671,9 @@ static int fib6_clean_node(struct fib6_walker *w) if (res) { #if RT6_DEBUG >= 2 pr_debug("%s: del failed: rt=%p@%p err=%d\n", - __func__, rt, rt->rt6i_node, res); + __func__, rt, + rcu_access_pointer(rt->rt6i_node), + res); #endif continue; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 48c8c92dcbd3..86fb2411e2bd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1383,7 +1383,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node); + (rt->rt6i_flags & RTF_PCPU || + rcu_access_pointer(rt->rt6i_node)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, -- cgit v1.2.3-59-g8ed1b From 1e2ea8ad37be25a7cdcc974945935829d534d5d3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 26 Aug 2017 20:10:10 +0800 Subject: ipv6: set dst.obsolete when a cached route has expired Now it doesn't check for the cached route expiration in ipv6's dst_ops->check(), because it trusts dst_gc that would clean the cached route up when it's expired. The problem is in dst_gc, it would clean the cached route only when it's refcount is 1. If some other module (like xfrm) keeps holding it and the module only release it when dst_ops->check() fails. But without checking for the cached route expiration, .check() may always return true. Meanwhile, without releasing the cached route, dst_gc couldn't del it. It will cause this cached route never to expire. This patch is to set dst.obsolete with DST_OBSOLETE_KILL in .gc when it's expired, and check obsolete != DST_OBSOLETE_FORCE_CHK in .check. Note that this is even needed when ipv6 dst_gc timer is removed one day. It would set dst.obsolete in .redirect and .update_pmtu instead, and check for cached route expiration when getting it, just like what ipv4 route does. Reported-by: Jianlin Shi Signed-off-by: Xin Long Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 4 +++- net/ipv6/route.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net/ipv6/ip6_fib.c') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 10b4b1f8b838..e1c85bb4eac0 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1795,8 +1795,10 @@ static int fib6_age(struct rt6_info *rt, void *arg) } gc_args->more++; } else if (rt->rt6i_flags & RTF_CACHE) { + if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) + rt->dst.obsolete = DST_OBSOLETE_KILL; if (atomic_read(&rt->dst.__refcnt) == 1 && - time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { + rt->dst.obsolete == DST_OBSOLETE_KILL) { RT6_TRACE("aging clone %p\n", rt); return -1; } else if (rt->rt6i_flags & RTF_GATEWAY) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 86fb2411e2bd..2d0e7798c793 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -440,7 +440,8 @@ static bool rt6_check_expired(const struct rt6_info *rt) if (time_after(jiffies, rt->dst.expires)) return true; } else if (rt->dst.from) { - return rt6_check_expired((struct rt6_info *) rt->dst.from); + return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || + rt6_check_expired((struct rt6_info *)rt->dst.from); } return false; } -- cgit v1.2.3-59-g8ed1b