aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c728
1 files changed, 628 insertions, 100 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index df5be3d5d3e5..be5e65c97652 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Linux INET6 implementation
* FIB front-end.
*
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
/* Changes:
@@ -104,7 +100,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
int strict);
-static size_t rt6_nlmsg_size(struct fib6_info *rt);
+static size_t rt6_nlmsg_size(struct fib6_info *f6i);
static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst,
struct in6_addr *dest, struct in6_addr *src,
@@ -494,6 +490,45 @@ static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
return false;
}
+struct fib6_nh_dm_arg {
+ struct net *net;
+ const struct in6_addr *saddr;
+ int oif;
+ int flags;
+ struct fib6_nh *nh;
+};
+
+static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_dm_arg *arg = _arg;
+
+ arg->nh = nh;
+ return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif,
+ arg->flags);
+}
+
+/* returns fib6_nh from nexthop or NULL */
+static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh,
+ struct fib6_result *res,
+ const struct in6_addr *saddr,
+ int oif, int flags)
+{
+ struct fib6_nh_dm_arg arg = {
+ .net = net,
+ .saddr = saddr,
+ .oif = oif,
+ .flags = flags,
+ };
+
+ if (nexthop_is_blackhole(nh))
+ return NULL;
+
+ if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg))
+ return arg.nh;
+
+ return NULL;
+}
+
static void rt6_device_match(struct net *net, struct fib6_result *res,
const struct in6_addr *saddr, int oif, int flags)
{
@@ -514,8 +549,19 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
}
for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
- nh = spf6i->fib6_nh;
- if (__rt6_device_match(net, nh, saddr, oif, flags)) {
+ bool matched = false;
+
+ if (unlikely(spf6i->nh)) {
+ nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr,
+ oif, flags);
+ if (nh)
+ matched = true;
+ } else {
+ nh = spf6i->fib6_nh;
+ if (__rt6_device_match(net, nh, saddr, oif, flags))
+ matched = true;
+ }
+ if (matched) {
res->f6i = spf6i;
goto out;
}
@@ -719,6 +765,24 @@ out:
return rc;
}
+struct fib6_nh_frl_arg {
+ u32 flags;
+ int oif;
+ int strict;
+ int *mpri;
+ bool *do_rr;
+ struct fib6_nh *nh;
+};
+
+static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_frl_arg *arg = _arg;
+
+ arg->nh = nh;
+ return find_match(nh, arg->flags, arg->oif, arg->strict,
+ arg->mpri, arg->do_rr);
+}
+
static void __find_rr_leaf(struct fib6_info *f6i_start,
struct fib6_info *nomatch, u32 metric,
struct fib6_result *res, struct fib6_info **cont,
@@ -729,6 +793,7 @@ static void __find_rr_leaf(struct fib6_info *f6i_start,
for (f6i = f6i_start;
f6i && f6i != nomatch;
f6i = rcu_dereference(f6i->fib6_next)) {
+ bool matched = false;
struct fib6_nh *nh;
if (cont && f6i->fib6_metric != metric) {
@@ -739,8 +804,34 @@ static void __find_rr_leaf(struct fib6_info *f6i_start,
if (fib6_check_expired(f6i))
continue;
- nh = f6i->fib6_nh;
- if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
+ if (unlikely(f6i->nh)) {
+ struct fib6_nh_frl_arg arg = {
+ .flags = f6i->fib6_flags,
+ .oif = oif,
+ .strict = strict,
+ .mpri = mpri,
+ .do_rr = do_rr
+ };
+
+ if (nexthop_is_blackhole(f6i->nh)) {
+ res->fib6_flags = RTF_REJECT;
+ res->fib6_type = RTN_BLACKHOLE;
+ res->f6i = f6i;
+ res->nh = nexthop_fib6_nh(f6i->nh);
+ return;
+ }
+ if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match,
+ &arg)) {
+ matched = true;
+ nh = arg.nh;
+ }
+ } else {
+ nh = f6i->fib6_nh;
+ if (find_match(nh, f6i->fib6_flags, oif, strict,
+ mpri, do_rr))
+ matched = true;
+ }
+ if (matched) {
res->f6i = f6i;
res->nh = nh;
res->fib6_flags = f6i->fib6_flags;
@@ -1300,9 +1391,6 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu);
- if (pcpu_rt)
- ip6_hold_safe(NULL, &pcpu_rt);
-
return pcpu_rt;
}
@@ -1312,12 +1400,9 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
struct rt6_info *pcpu_rt, *prev, **p;
pcpu_rt = ip6_rt_pcpu_alloc(res);
- if (!pcpu_rt) {
- dst_hold(&net->ipv6.ip6_null_entry->dst);
- return net->ipv6.ip6_null_entry;
- }
+ if (!pcpu_rt)
+ return NULL;
- dst_hold(&pcpu_rt->dst);
p = this_cpu_ptr(res->nh->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt);
BUG_ON(prev);
@@ -1651,9 +1736,22 @@ out:
spin_unlock_bh(&rt6_exception_lock);
}
+static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
+{
+ struct fib6_info *f6i = arg;
+
+ fib6_nh_flush_exceptions(nh, f6i);
+
+ return 0;
+}
+
void rt6_flush_exceptions(struct fib6_info *f6i)
{
- fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
+ if (f6i->nh)
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
+ f6i);
+ else
+ fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
}
/* Find cached rt in the hash table inside passed in rt
@@ -1740,6 +1838,23 @@ static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
return err;
}
+struct fib6_nh_excptn_arg {
+ struct rt6_info *rt;
+ int plen;
+};
+
+static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_excptn_arg *arg = _arg;
+ int err;
+
+ err = fib6_nh_remove_exception(nh, arg->plen, arg->rt);
+ if (err == 0)
+ return 1;
+
+ return 0;
+}
+
static int rt6_remove_exception_rt(struct rt6_info *rt)
{
struct fib6_info *from;
@@ -1748,6 +1863,20 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
if (!from || !(rt->rt6i_flags & RTF_CACHE))
return -EINVAL;
+ if (from->nh) {
+ struct fib6_nh_excptn_arg arg = {
+ .rt = rt,
+ .plen = from->fib6_src.plen
+ };
+ int rc;
+
+ /* rc = 1 means an entry was found */
+ rc = nexthop_for_each_fib6_nh(from->nh,
+ rt6_nh_remove_exception_rt,
+ &arg);
+ return rc ? 0 : -ENOENT;
+ }
+
return fib6_nh_remove_exception(from->fib6_nh,
from->fib6_src.plen, rt);
}
@@ -1778,9 +1907,33 @@ static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
rt6_ex->stamp = jiffies;
}
+struct fib6_nh_match_arg {
+ const struct net_device *dev;
+ const struct in6_addr *gw;
+ struct fib6_nh *match;
+};
+
+/* determine if fib6_nh has given device and gateway */
+static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_match_arg *arg = _arg;
+
+ if (arg->dev != nh->fib_nh_dev ||
+ (arg->gw && !nh->fib_nh_gw_family) ||
+ (!arg->gw && nh->fib_nh_gw_family) ||
+ (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6)))
+ return 0;
+
+ arg->match = nh;
+
+ /* found a match, break the loop */
+ return 1;
+}
+
static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
struct fib6_info *from;
+ struct fib6_nh *fib6_nh;
rcu_read_lock();
@@ -1788,7 +1941,21 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
if (!from || !(rt->rt6i_flags & RTF_CACHE))
goto unlock;
- fib6_nh_update_exception(from->fib6_nh, from->fib6_src.plen, rt);
+ if (from->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = rt->dst.dev,
+ .gw = &rt->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg);
+
+ if (!arg.match)
+ return;
+ fib6_nh = arg.match;
+ } else {
+ fib6_nh = from->fib6_nh;
+ }
+ fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt);
unlock:
rcu_read_unlock();
}
@@ -1950,11 +2117,34 @@ static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
rcu_read_unlock_bh();
}
+struct fib6_nh_age_excptn_arg {
+ struct fib6_gc_args *gc_args;
+ unsigned long now;
+};
+
+static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_age_excptn_arg *arg = _arg;
+
+ fib6_nh_age_exceptions(nh, arg->gc_args, arg->now);
+ return 0;
+}
+
void rt6_age_exceptions(struct fib6_info *f6i,
struct fib6_gc_args *gc_args,
unsigned long now)
{
- fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+ if (f6i->nh) {
+ struct fib6_nh_age_excptn_arg arg = {
+ .gc_args = gc_args,
+ .now = now
+ };
+
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions,
+ &arg);
+ } else {
+ fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+ }
}
/* must be called with rcu lock held */
@@ -1993,9 +2183,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
const struct sk_buff *skb, int flags)
{
struct fib6_result res = {};
- struct rt6_info *rt;
+ struct rt6_info *rt = NULL;
int strict = 0;
+ WARN_ON_ONCE((flags & RT6_LOOKUP_F_DST_NOREF) &&
+ !rcu_read_lock_held());
+
strict |= flags & RT6_LOOKUP_F_IFACE;
strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
if (net->ipv6.devconf_all->forwarding == 0)
@@ -2004,23 +2197,15 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
rcu_read_lock();
fib6_table_lookup(net, table, oif, fl6, &res, strict);
- if (res.f6i == net->ipv6.fib6_null_entry) {
- rt = net->ipv6.ip6_null_entry;
- rcu_read_unlock();
- dst_hold(&rt->dst);
- return rt;
- }
+ if (res.f6i == net->ipv6.fib6_null_entry)
+ goto out;
fib6_select_path(net, &res, fl6, oif, false, skb, strict);
/*Search through exception table */
rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
if (rt) {
- if (ip6_hold_safe(net, &rt))
- dst_use_noref(&rt->dst, jiffies);
-
- rcu_read_unlock();
- return rt;
+ goto out;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
!res.nh->fib_nh_gw_family)) {
/* Create a RTF_CACHE clone which will not be
@@ -2028,40 +2213,38 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
* the daddr in the skb during the neighbor look-up is different
* from the fl6->daddr used to look-up route here.
*/
- struct rt6_info *uncached_rt;
+ rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
- uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
-
- rcu_read_unlock();
-
- if (uncached_rt) {
- /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
- * No need for another dst_hold()
+ if (rt) {
+ /* 1 refcnt is taken during ip6_rt_cache_alloc().
+ * As rt6_uncached_list_add() does not consume refcnt,
+ * this refcnt is always returned to the caller even
+ * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
*/
- rt6_uncached_list_add(uncached_rt);
+ rt6_uncached_list_add(rt);
atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
- } else {
- uncached_rt = net->ipv6.ip6_null_entry;
- dst_hold(&uncached_rt->dst);
- }
+ rcu_read_unlock();
- return uncached_rt;
+ return rt;
+ }
} else {
/* Get a percpu copy */
-
- struct rt6_info *pcpu_rt;
-
local_bh_disable();
- pcpu_rt = rt6_get_pcpu_route(&res);
+ rt = rt6_get_pcpu_route(&res);
- if (!pcpu_rt)
- pcpu_rt = rt6_make_pcpu_route(net, &res);
+ if (!rt)
+ rt = rt6_make_pcpu_route(net, &res);
local_bh_enable();
- rcu_read_unlock();
-
- return pcpu_rt;
}
+out:
+ if (!rt)
+ rt = net->ipv6.ip6_null_entry;
+ if (!(flags & RT6_LOOKUP_F_DST_NOREF))
+ ip6_hold_safe(net, &rt);
+ rcu_read_unlock();
+
+ return rt;
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
@@ -2192,11 +2375,12 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
return mhash >> 1;
}
+/* Called with rcu held */
void ip6_route_input(struct sk_buff *skb)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
- int flags = RT6_LOOKUP_F_HAS_SADDR;
+ int flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_DST_NOREF;
struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
@@ -2218,8 +2402,8 @@ void ip6_route_input(struct sk_buff *skb)
if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
skb_dst_drop(skb);
- skb_dst_set(skb,
- ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
+ skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev,
+ &fl6, skb, flags));
}
static struct rt6_info *ip6_pol_route_output(struct net *net,
@@ -2231,8 +2415,9 @@ static struct rt6_info *ip6_pol_route_output(struct net *net,
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
}
-struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
- struct flowi6 *fl6, int flags)
+struct dst_entry *ip6_route_output_flags_noref(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6, int flags)
{
bool any_src;
@@ -2240,6 +2425,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
struct dst_entry *dst;
+ /* This function does not take refcnt on the dst */
dst = l3mdev_link_scope_lookup(net, fl6);
if (dst)
return dst;
@@ -2247,6 +2433,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
fl6->flowi6_iif = LOOPBACK_IFINDEX;
+ flags |= RT6_LOOKUP_F_DST_NOREF;
any_src = ipv6_addr_any(&fl6->saddr);
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
(fl6->flowi6_oif && any_src))
@@ -2259,6 +2446,28 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
+EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
+
+struct dst_entry *ip6_route_output_flags(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6,
+ int flags)
+{
+ struct dst_entry *dst;
+ struct rt6_info *rt6;
+
+ rcu_read_lock();
+ dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
+ rt6 = (struct rt6_info *)dst;
+ /* For dst cached in uncached_list, refcnt is already taken. */
+ if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
+ dst = &net->ipv6.ip6_null_entry->dst;
+ dst_hold(dst);
+ }
+ rcu_read_unlock();
+
+ return dst;
+}
EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
@@ -2483,10 +2692,31 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
rcu_read_unlock();
return;
}
- res.nh = res.f6i->fib6_nh;
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
+ if (res.f6i->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = dst->dev,
+ .gw = &rt6->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(res.f6i->nh,
+ fib6_nh_find_match, &arg);
+
+ /* fib6_info uses a nexthop that does not have fib6_nh
+ * using the dst->dev + gw. Should be impossible.
+ */
+ if (!arg.match) {
+ rcu_read_unlock();
+ return;
+ }
+
+ res.nh = arg.match;
+ } else {
+ res.nh = res.f6i->fib6_nh;
+ }
+
nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
@@ -2593,6 +2823,21 @@ static bool ip6_redirect_nh_match(const struct fib6_result *res,
return true;
}
+struct fib6_nh_rd_arg {
+ struct fib6_result *res;
+ struct flowi6 *fl6;
+ const struct in6_addr *gw;
+ struct rt6_info **ret;
+};
+
+static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_rd_arg *arg = _arg;
+
+ arg->res->nh = nh;
+ return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret);
+}
+
/* Handle redirects */
struct ip6rd_flowi {
struct flowi6 fl6;
@@ -2608,6 +2853,12 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
struct rt6_info *ret = NULL;
struct fib6_result res = {};
+ struct fib6_nh_rd_arg arg = {
+ .res = &res,
+ .fl6 = fl6,
+ .gw = &rdfl->gateway,
+ .ret = &ret
+ };
struct fib6_info *rt;
struct fib6_node *fn;
@@ -2632,14 +2883,24 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
restart:
for_each_fib6_node_rt_rcu(fn) {
res.f6i = rt;
- res.nh = rt->fib6_nh;
-
if (fib6_check_expired(rt))
continue;
if (rt->fib6_flags & RTF_REJECT)
break;
- if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
- goto out;
+ if (unlikely(rt->nh)) {
+ if (nexthop_is_blackhole(rt->nh))
+ continue;
+ /* on match, res->nh is filled in and potentially ret */
+ if (nexthop_for_each_fib6_nh(rt->nh,
+ fib6_nh_redirect_match,
+ &arg))
+ goto out;
+ } else {
+ res.nh = rt->fib6_nh;
+ if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway,
+ &ret))
+ goto out;
+ }
}
if (!rt)
@@ -3283,6 +3544,16 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
#endif
+ if (cfg->fc_nh_id) {
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ goto out;
+ }
+ err = fib6_check_nexthop(nh, cfg, extack);
+ if (err)
+ goto out;
+ }
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
@@ -3328,7 +3599,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->fib6_table = table;
rt->fib6_metric = cfg->fc_metric;
- rt->fib6_type = cfg->fc_type;
+ rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
@@ -3346,7 +3617,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
if (rt->fib6_src.plen) {
- NL_SET_ERR_MSG(extack, "Nexthops can not be used wtih source routing");
+ NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
goto out;
}
rt->nh = nh;
@@ -3460,6 +3731,12 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
info->skip_notify = 1;
}
+ info->skip_notify_kernel = 1;
+ call_fib6_multipath_entry_notifiers(net,
+ FIB_EVENT_ENTRY_DEL,
+ rt,
+ rt->fib6_nsiblings,
+ NULL);
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings,
fib6_siblings) {
@@ -3514,6 +3791,30 @@ static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
return 0;
}
+struct fib6_nh_del_cached_rt_arg {
+ struct fib6_config *cfg;
+ struct fib6_info *f6i;
+};
+
+static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_del_cached_rt_arg *arg = _arg;
+ int rc;
+
+ rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh);
+ return rc != -ESRCH ? rc : 0;
+}
+
+static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i)
+{
+ struct fib6_nh_del_cached_rt_arg arg = {
+ .cfg = cfg,
+ .f6i = f6i
+ };
+
+ return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg);
+}
+
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -3539,11 +3840,21 @@ static int ip6_route_del(struct fib6_config *cfg,
for_each_fib6_node_rt_rcu(fn) {
struct fib6_nh *nh;
- nh = rt->fib6_nh;
- if (cfg->fc_flags & RTF_CACHE) {
- int rc;
+ if (rt->nh && cfg->fc_nh_id &&
+ rt->nh->id != cfg->fc_nh_id)
+ continue;
- rc = ip6_del_cached_rt(cfg, rt, nh);
+ if (cfg->fc_flags & RTF_CACHE) {
+ int rc = 0;
+
+ if (rt->nh) {
+ rc = ip6_del_cached_rt_nh(cfg, rt);
+ } else if (cfg->fc_nh_id) {
+ continue;
+ } else {
+ nh = rt->fib6_nh;
+ rc = ip6_del_cached_rt(cfg, rt, nh);
+ }
if (rc != -ESRCH) {
rcu_read_unlock();
return rc;
@@ -3551,6 +3862,23 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
}
+ if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
+ continue;
+ if (cfg->fc_protocol &&
+ cfg->fc_protocol != rt->fib6_protocol)
+ continue;
+
+ if (rt->nh) {
+ if (!fib6_info_hold_safe(rt))
+ continue;
+ rcu_read_unlock();
+
+ return __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ }
+ if (cfg->fc_nh_id)
+ continue;
+
+ nh = rt->fib6_nh;
if (cfg->fc_ifindex &&
(!nh->fib_nh_dev ||
nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
@@ -3558,10 +3886,6 @@ static int ip6_route_del(struct fib6_config *cfg,
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
continue;
- if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
- continue;
- if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
- continue;
if (!fib6_info_hold_safe(rt))
continue;
rcu_read_unlock();
@@ -3672,7 +3996,25 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (!res.f6i)
goto out;
- res.nh = res.f6i->fib6_nh;
+ if (res.f6i->nh) {
+ struct fib6_nh_match_arg arg = {
+ .dev = dst->dev,
+ .gw = &rt->rt6i_gateway,
+ };
+
+ nexthop_for_each_fib6_nh(res.f6i->nh,
+ fib6_nh_find_match, &arg);
+
+ /* fib6_info uses a nexthop that does not have fib6_nh
+ * using the dst->dev. Should be impossible
+ */
+ if (!arg.match)
+ goto out;
+ res.nh = arg.match;
+ } else {
+ res.nh = res.f6i->fib6_nh;
+ }
+
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
@@ -4405,6 +4747,12 @@ static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
return 0;
arg->f6i = f6i;
+ if (f6i->nh) {
+ /* fib6_nh_mtu_change only returns 0, so this is safe */
+ return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change,
+ arg);
+ }
+
return fib6_nh_mtu_change(f6i->fib6_nh, arg);
}
@@ -4437,6 +4785,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_IP_PROTO] = { .type = NLA_U8 },
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
+ [RTA_NH_ID] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -4483,6 +4832,16 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
+ if (tb[RTA_NH_ID]) {
+ if (tb[RTA_GATEWAY] || tb[RTA_OIF] ||
+ tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop specification and nexthop id are mutually exclusive");
+ goto errout;
+ }
+ cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]);
+ }
+
if (tb[RTA_GATEWAY]) {
cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
cfg->fc_flags |= RTF_GATEWAY;
@@ -4626,6 +4985,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
{
struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
+ enum fib_event_type event_type;
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
struct fib6_info *rt;
@@ -4697,12 +5057,23 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
rtnh = rtnh_next(rtnh, &remaining);
}
+ if (list_empty(&rt6_nh_list)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid nexthop configuration - no valid nexthops");
+ return -EINVAL;
+ }
+
/* for add and replace send one notification with all nexthops.
* Skip the notification in fib6_add_rt2node and send one with
* the full route when done
*/
info->skip_notify = 1;
+ /* For add and replace, send one notification with all nexthops. For
+ * append, send one notification with all appended nexthops.
+ */
+ info->skip_notify_kernel = 1;
+
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
err = __ip6_ins_rt(nh->fib6_info, info, extack);
@@ -4739,6 +5110,15 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nhn++;
}
+ event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
+ err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
+ rt_notif, nhn - 1, extack);
+ if (err) {
+ /* Delete all the siblings that were just added */
+ err_nh = NULL;
+ goto add_errout;
+ }
+
/* success ... tell user about new route */
ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
goto cleanup;
@@ -4817,6 +5197,12 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (cfg.fc_nh_id &&
+ !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ return -EINVAL;
+ }
+
if (cfg.fc_mp)
return ip6_route_multipath_del(&cfg, extack);
else {
@@ -4844,20 +5230,46 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
return ip6_route_add(&cfg, GFP_KERNEL, extack);
}
-static size_t rt6_nlmsg_size(struct fib6_info *rt)
+/* add the overhead of this fib6_nh to nexthop_len */
+static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
{
- int nexthop_len = 0;
+ int *nexthop_len = arg;
- if (rt->nh)
- nexthop_len += nla_total_size(4); /* RTA_NH_ID */
+ *nexthop_len += nla_total_size(0) /* RTA_MULTIPATH */
+ + NLA_ALIGN(sizeof(struct rtnexthop))
+ + nla_total_size(16); /* RTA_GATEWAY */
+
+ if (nh->fib_nh_lws) {
+ /* RTA_ENCAP_TYPE */
+ *nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
+ /* RTA_ENCAP */
+ *nexthop_len += nla_total_size(2);
+ }
- if (rt->fib6_nsiblings) {
- nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
- + NLA_ALIGN(sizeof(struct rtnexthop))
- + nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws);
+ return 0;
+}
- nexthop_len *= rt->fib6_nsiblings;
+static size_t rt6_nlmsg_size(struct fib6_info *f6i)
+{
+ int nexthop_len;
+
+ if (f6i->nh) {
+ nexthop_len = nla_total_size(4); /* RTA_NH_ID */
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
+ &nexthop_len);
+ } else {
+ struct fib6_nh *nh = f6i->fib6_nh;
+
+ nexthop_len = 0;
+ if (f6i->fib6_nsiblings) {
+ nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ + NLA_ALIGN(sizeof(struct rtnexthop))
+ + nla_total_size(16) /* RTA_GATEWAY */
+ + lwtunnel_get_encap_size(nh->fib_nh_lws);
+
+ nexthop_len *= f6i->fib6_nsiblings;
+ }
+ nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
return NLMSG_ALIGN(sizeof(struct rtmsg))
@@ -4873,7 +5285,6 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws)
+ nexthop_len;
}
@@ -5074,9 +5485,27 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg)
+{
+ const struct net_device *dev = arg;
+
+ if (nh->fib_nh_dev == dev)
+ return 1;
+
+ return 0;
+}
+
static bool fib6_info_uses_dev(const struct fib6_info *f6i,
const struct net_device *dev)
{
+ if (f6i->nh) {
+ struct net_device *_dev = (struct net_device *)dev;
+
+ return !!nexthop_for_each_fib6_nh(f6i->nh,
+ fib6_info_nh_uses_dev,
+ _dev);
+ }
+
if (f6i->fib6_nh->fib_nh_dev == dev)
return true;
@@ -5093,33 +5522,129 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
return false;
}
-int rt6_dump_route(struct fib6_info *rt, void *p_arg)
+struct fib6_nh_exception_dump_walker {
+ struct rt6_rtnl_dump_arg *dump;
+ struct fib6_info *rt;
+ unsigned int flags;
+ unsigned int skip;
+ unsigned int count;
+};
+
+static int rt6_nh_dump_exceptions(struct fib6_nh *nh, void *arg)
+{
+ struct fib6_nh_exception_dump_walker *w = arg;
+ struct rt6_rtnl_dump_arg *dump = w->dump;
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ int i, err;
+
+ bucket = fib6_nh_get_excptn_bucket(nh, NULL);
+ if (!bucket)
+ return 0;
+
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ if (w->skip) {
+ w->skip--;
+ continue;
+ }
+
+ /* Expiration of entries doesn't bump sernum, insertion
+ * does. Removal is triggered by insertion, so we can
+ * rely on the fact that if entries change between two
+ * partial dumps, this node is scanned again completely,
+ * see rt6_insert_exception() and fib6_dump_table().
+ *
+ * Count expired entries we go through as handled
+ * entries that we'll skip next time, in case of partial
+ * node dump. Otherwise, if entries expire meanwhile,
+ * we'll skip the wrong amount.
+ */
+ if (rt6_check_expired(rt6_ex->rt6i)) {
+ w->count++;
+ continue;
+ }
+
+ err = rt6_fill_node(dump->net, dump->skb, w->rt,
+ &rt6_ex->rt6i->dst, NULL, NULL, 0,
+ RTM_NEWROUTE,
+ NETLINK_CB(dump->cb->skb).portid,
+ dump->cb->nlh->nlmsg_seq, w->flags);
+ if (err)
+ return err;
+
+ w->count++;
+ }
+ bucket++;
+ }
+
+ return 0;
+}
+
+/* Return -1 if done with node, number of handled routes on partial dump */
+int rt6_dump_route(struct fib6_info *rt, void *p_arg, unsigned int skip)
{
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
struct fib_dump_filter *filter = &arg->filter;
unsigned int flags = NLM_F_MULTI;
struct net *net = arg->net;
+ int count = 0;
if (rt == net->ipv6.fib6_null_entry)
- return 0;
+ return -1;
if ((filter->flags & RTM_F_PREFIX) &&
!(rt->fib6_flags & RTF_PREFIX_RT)) {
/* success since this is not a prefix route */
- return 1;
+ return -1;
}
- if (filter->filter_set) {
- if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
- (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
- (filter->protocol && rt->fib6_protocol != filter->protocol)) {
- return 1;
- }
+ if (filter->filter_set &&
+ ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
+ (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
+ (filter->protocol && rt->fib6_protocol != filter->protocol))) {
+ return -1;
+ }
+
+ if (filter->filter_set ||
+ !filter->dump_routes || !filter->dump_exceptions) {
flags |= NLM_F_DUMP_FILTERED;
}
- return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
- RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
- arg->cb->nlh->nlmsg_seq, flags);
+ if (filter->dump_routes) {
+ if (skip) {
+ skip--;
+ } else {
+ if (rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL,
+ 0, RTM_NEWROUTE,
+ NETLINK_CB(arg->cb->skb).portid,
+ arg->cb->nlh->nlmsg_seq, flags)) {
+ return 0;
+ }
+ count++;
+ }
+ }
+
+ if (filter->dump_exceptions) {
+ struct fib6_nh_exception_dump_walker w = { .dump = arg,
+ .rt = rt,
+ .flags = flags,
+ .skip = skip,
+ .count = 0 };
+ int err;
+
+ if (rt->nh) {
+ err = nexthop_for_each_fib6_nh(rt->nh,
+ rt6_nh_dump_exceptions,
+ &w);
+ } else {
+ err = rt6_nh_dump_exceptions(rt->fib6_nh, &w);
+ }
+
+ if (err)
+ return count += w.count;
+ }
+
+ return -1;
}
static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
@@ -5614,6 +6139,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
ip6_template_metrics, true);
+ INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_has_custom_rules = false;
@@ -5625,6 +6151,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
ip6_template_metrics, true);
+ INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -5634,6 +6161,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
ip6_template_metrics, true);
+ INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
#endif
net->ipv6.sysctl.flush_delay = 0;