diff options
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r-- | net/ipv6/route.c | 647 |
1 files changed, 580 insertions, 67 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fada5a13bcb2..c4d285fe0adc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1,14 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Linux INET6 implementation * FIB front-end. * * Authors: * Pedro Roque <roque@di.fc.ul.pt> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ /* Changes: @@ -104,7 +100,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, int strict); -static size_t rt6_nlmsg_size(struct fib6_info *rt); +static size_t rt6_nlmsg_size(struct fib6_info *f6i); static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct fib6_info *rt, struct dst_entry *dst, struct in6_addr *dest, struct in6_addr *src, @@ -432,15 +428,21 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, struct fib6_info *sibling, *next_sibling; struct fib6_info *match = res->f6i; - if (!match->fib6_nsiblings || have_oif_match) + if ((!match->fib6_nsiblings && !match->nh) || have_oif_match) goto out; /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. */ - if (!fl6->mp_hash) + if (!fl6->mp_hash && + (!match->nh || nexthop_is_multipath(match->nh))) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); + if (unlikely(match->nh)) { + nexthop_path_fib6_result(res, fl6->mp_hash); + return; + } + if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) goto out; @@ -488,6 +490,45 @@ static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh, return false; } +struct fib6_nh_dm_arg { + struct net *net; + const struct in6_addr *saddr; + int oif; + int flags; + struct fib6_nh *nh; +}; + +static int __rt6_nh_dev_match(struct fib6_nh *nh, void *_arg) +{ + struct fib6_nh_dm_arg *arg = _arg; + + arg->nh = nh; + return __rt6_device_match(arg->net, nh, arg->saddr, arg->oif, + arg->flags); +} + +/* returns fib6_nh from nexthop or NULL */ +static struct fib6_nh *rt6_nh_dev_match(struct net *net, struct nexthop *nh, + struct fib6_result *res, + const struct in6_addr *saddr, + int oif, int flags) +{ + struct fib6_nh_dm_arg arg = { + .net = net, + .saddr = saddr, + .oif = oif, + .flags = flags, + }; + + if (nexthop_is_blackhole(nh)) + return NULL; + + if (nexthop_for_each_fib6_nh(nh, __rt6_nh_dev_match, &arg)) + return arg.nh; + + return NULL; +} + static void rt6_device_match(struct net *net, struct fib6_result *res, const struct in6_addr *saddr, int oif, int flags) { @@ -496,14 +537,31 @@ static void rt6_device_match(struct net *net, struct fib6_result *res, struct fib6_nh *nh; if (!oif && ipv6_addr_any(saddr)) { - nh = f6i->fib6_nh; + if (unlikely(f6i->nh)) { + nh = nexthop_fib6_nh(f6i->nh); + if (nexthop_is_blackhole(f6i->nh)) + goto out_blackhole; + } else { + nh = f6i->fib6_nh; + } if (!(nh->fib_nh_flags & RTNH_F_DEAD)) goto out; } for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) { - nh = spf6i->fib6_nh; - if (__rt6_device_match(net, nh, saddr, oif, flags)) { + bool matched = false; + + if (unlikely(spf6i->nh)) { + nh = rt6_nh_dev_match(net, spf6i->nh, res, saddr, + oif, flags); + if (nh) + matched = true; + } else { + nh = spf6i->fib6_nh; + if (__rt6_device_match(net, nh, saddr, oif, flags)) + matched = true; + } + if (matched) { res->f6i = spf6i; goto out; } @@ -515,7 +573,14 @@ static void rt6_device_match(struct net *net, struct fib6_result *res, goto out; } - nh = f6i->fib6_nh; + if (unlikely(f6i->nh)) { + nh = nexthop_fib6_nh(f6i->nh); + if (nexthop_is_blackhole(f6i->nh)) + goto out_blackhole; + } else { + nh = f6i->fib6_nh; + } + if (nh->fib_nh_flags & RTNH_F_DEAD) { res->f6i = net->ipv6.fib6_null_entry; nh = res->f6i->fib6_nh; @@ -524,6 +589,12 @@ out: res->nh = nh; res->fib6_type = res->f6i->fib6_type; res->fib6_flags = res->f6i->fib6_flags; + return; + +out_blackhole: + res->fib6_flags |= RTF_REJECT; + res->fib6_type = RTN_BLACKHOLE; + res->nh = nh; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -694,6 +765,24 @@ out: return rc; } +struct fib6_nh_frl_arg { + u32 flags; + int oif; + int strict; + int *mpri; + bool *do_rr; + struct fib6_nh *nh; +}; + +static int rt6_nh_find_match(struct fib6_nh *nh, void *_arg) +{ + struct fib6_nh_frl_arg *arg = _arg; + + arg->nh = nh; + return find_match(nh, arg->flags, arg->oif, arg->strict, + arg->mpri, arg->do_rr); +} + static void __find_rr_leaf(struct fib6_info *f6i_start, struct fib6_info *nomatch, u32 metric, struct fib6_result *res, struct fib6_info **cont, @@ -704,6 +793,7 @@ static void __find_rr_leaf(struct fib6_info *f6i_start, for (f6i = f6i_start; f6i && f6i != nomatch; f6i = rcu_dereference(f6i->fib6_next)) { + bool matched = false; struct fib6_nh *nh; if (cont && f6i->fib6_metric != metric) { @@ -714,8 +804,34 @@ static void __find_rr_leaf(struct fib6_info *f6i_start, if (fib6_check_expired(f6i)) continue; - nh = f6i->fib6_nh; - if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) { + if (unlikely(f6i->nh)) { + struct fib6_nh_frl_arg arg = { + .flags = f6i->fib6_flags, + .oif = oif, + .strict = strict, + .mpri = mpri, + .do_rr = do_rr + }; + + if (nexthop_is_blackhole(f6i->nh)) { + res->fib6_flags = RTF_REJECT; + res->fib6_type = RTN_BLACKHOLE; + res->f6i = f6i; + res->nh = nexthop_fib6_nh(f6i->nh); + return; + } + if (nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_find_match, + &arg)) { + matched = true; + nh = arg.nh; + } + } else { + nh = f6i->fib6_nh; + if (find_match(nh, f6i->fib6_flags, oif, strict, + mpri, do_rr)) + matched = true; + } + if (matched) { res->f6i = f6i; res->nh = nh; res->fib6_flags = f6i->fib6_flags; @@ -1117,6 +1233,8 @@ restart: rt = net->ipv6.ip6_null_entry; dst_hold(&rt->dst); goto out; + } else if (res.fib6_flags & RTF_REJECT) { + goto do_create; } fib6_select_path(net, &res, fl6, fl6->flowi6_oif, @@ -1128,6 +1246,7 @@ restart: if (ip6_hold_safe(net, &rt)) dst_use_noref(&rt->dst, jiffies); } else { +do_create: rt = ip6_create_rt_rcu(&res); } @@ -1268,10 +1387,9 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res) /* It should be called with rcu_read_lock() acquired */ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) { - struct rt6_info *pcpu_rt, **p; + struct rt6_info *pcpu_rt; - p = this_cpu_ptr(res->nh->rt6i_pcpu); - pcpu_rt = *p; + pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu); if (pcpu_rt) ip6_hold_safe(NULL, &pcpu_rt); @@ -1624,9 +1742,22 @@ out: spin_unlock_bh(&rt6_exception_lock); } +static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg) +{ + struct fib6_info *f6i = arg; + + fib6_nh_flush_exceptions(nh, f6i); + + return 0; +} + void rt6_flush_exceptions(struct fib6_info *f6i) { - fib6_nh_flush_exceptions(f6i->fib6_nh, f6i); + if (f6i->nh) + nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions, + f6i); + else + fib6_nh_flush_exceptions(f6i->fib6_nh, f6i); } /* Find cached rt in the hash table inside passed in rt @@ -1713,6 +1844,23 @@ static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen, return err; } +struct fib6_nh_excptn_arg { + struct rt6_info *rt; + int plen; +}; + +static int rt6_nh_remove_exception_rt(struct fib6_nh *nh, void *_arg) +{ + struct fib6_nh_excptn_arg *arg = _arg; + int err; + + err = fib6_nh_remove_exception(nh, arg->plen, arg->rt); + if (err == 0) + return 1; + + return 0; +} + static int rt6_remove_exception_rt(struct rt6_info *rt) { struct fib6_info *from; @@ -1721,6 +1869,20 @@ static int rt6_remove_exception_rt(struct rt6_info *rt) if (!from || !(rt->rt6i_flags & RTF_CACHE)) return -EINVAL; + if (from->nh) { + struct fib6_nh_excptn_arg arg = { + .rt = rt, + .plen = from->fib6_src.plen + }; + int rc; + + /* rc = 1 means an entry was found */ + rc = nexthop_for_each_fib6_nh(from->nh, + rt6_nh_remove_exception_rt, + &arg); + return rc ? 0 : -ENOENT; + } + return fib6_nh_remove_exception(from->fib6_nh, from->fib6_src.plen, rt); } @@ -1751,9 +1913,33 @@ static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen, rt6_ex->stamp = jiffies; } +struct fib6_nh_match_arg { + const struct net_device *dev; + const struct in6_addr *gw; + struct fib6_nh *match; +}; + +/* determine if fib6_nh has given device and gateway */ +static int fib6_nh_find_match(struct fib6_nh *nh, void *_arg) +{ + struct fib6_nh_match_arg *arg = _arg; + + if (arg->dev != nh->fib_nh_dev || + (arg->gw && !nh->fib_nh_gw_family) || + (!arg->gw && nh->fib_nh_gw_family) || + (arg->gw && !ipv6_addr_equal(arg->gw, &nh->fib_nh_gw6))) + return 0; + + arg->match = nh; + + /* found a match, break the loop */ + return 1; +} + static void rt6_update_exception_stamp_rt(struct rt6_info *rt) { struct fib6_info *from; + struct fib6_nh *fib6_nh; rcu_read_lock(); @@ -1761,7 +1947,21 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) if (!from || !(rt->rt6i_flags & RTF_CACHE)) goto unlock; - fib6_nh_update_exception(from->fib6_nh, from->fib6_src.plen, rt); + if (from->nh) { + struct fib6_nh_match_arg arg = { + .dev = rt->dst.dev, + .gw = &rt->rt6i_gateway, + }; + + nexthop_for_each_fib6_nh(from->nh, fib6_nh_find_match, &arg); + + if (!arg.match) + return; + fib6_nh = arg.match; + } else { + fib6_nh = from->fib6_nh; + } + fib6_nh_update_exception(fib6_nh, from->fib6_src.plen, rt); unlock: rcu_read_unlock(); } @@ -1923,11 +2123,34 @@ static void fib6_nh_age_exceptions(const struct fib6_nh *nh, rcu_read_unlock_bh(); } +struct fib6_nh_age_excptn_arg { + struct fib6_gc_args *gc_args; + unsigned long now; +}; + +static int rt6_nh_age_exceptions(struct fib6_nh *nh, void *_arg) +{ + struct fib6_nh_age_excptn_arg *arg = _arg; + + fib6_nh_age_exceptions(nh, arg->gc_args, arg->now); + return 0; +} + void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, unsigned long now) { - fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now); + if (f6i->nh) { + struct fib6_nh_age_excptn_arg arg = { + .gc_args = gc_args, + .now = now + }; + + nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_age_exceptions, + &arg); + } else { + fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now); + } } /* must be called with rcu lock held */ @@ -2456,10 +2679,31 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, rcu_read_unlock(); return; } - res.nh = res.f6i->fib6_nh; res.fib6_flags = res.f6i->fib6_flags; res.fib6_type = res.f6i->fib6_type; + if (res.f6i->nh) { + struct fib6_nh_match_arg arg = { + .dev = dst->dev, + .gw = &rt6->rt6i_gateway, + }; + + nexthop_for_each_fib6_nh(res.f6i->nh, + fib6_nh_find_match, &arg); + + /* fib6_info uses a nexthop that does not have fib6_nh + * using the dst->dev + gw. Should be impossible. + */ + if (!arg.match) { + rcu_read_unlock(); + return; + } + + res.nh = arg.match; + } else { + res.nh = res.f6i->fib6_nh; + } + nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); @@ -2566,6 +2810,21 @@ static bool ip6_redirect_nh_match(const struct fib6_result *res, return true; } +struct fib6_nh_rd_arg { + struct fib6_result *res; + struct flowi6 *fl6; + const struct in6_addr *gw; + struct rt6_info **ret; +}; + +static int fib6_nh_redirect_match(struct fib6_nh *nh, void *_arg) +{ + struct fib6_nh_rd_arg *arg = _arg; + + arg->res->nh = nh; + return ip6_redirect_nh_match(arg->res, arg->fl6, arg->gw, arg->ret); +} + /* Handle redirects */ struct ip6rd_flowi { struct flowi6 fl6; @@ -2581,6 +2840,12 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; struct rt6_info *ret = NULL; struct fib6_result res = {}; + struct fib6_nh_rd_arg arg = { + .res = &res, + .fl6 = fl6, + .gw = &rdfl->gateway, + .ret = &ret + }; struct fib6_info *rt; struct fib6_node *fn; @@ -2605,14 +2870,24 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, restart: for_each_fib6_node_rt_rcu(fn) { res.f6i = rt; - res.nh = rt->fib6_nh; - if (fib6_check_expired(rt)) continue; if (rt->fib6_flags & RTF_REJECT) break; - if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret)) - goto out; + if (unlikely(rt->nh)) { + if (nexthop_is_blackhole(rt->nh)) + continue; + /* on match, res->nh is filled in and potentially ret */ + if (nexthop_for_each_fib6_nh(rt->nh, + fib6_nh_redirect_match, + &arg)) + goto out; + } else { + res.nh = rt->fib6_nh; + if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, + &ret)) + goto out; + } } if (!rt) @@ -3114,7 +3389,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, goto out; } } - goto set_dev; + goto pcpu_alloc; } if (cfg->fc_flags & RTF_GATEWAY) { @@ -3146,17 +3421,18 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, !netif_carrier_ok(dev)) fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; + err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap, + cfg->fc_encap_type, cfg, gfp_flags, extack); + if (err) + goto out; + +pcpu_alloc: fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); if (!fib6_nh->rt6i_pcpu) { err = -ENOMEM; goto out; } - err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap, - cfg->fc_encap_type, cfg, gfp_flags, extack); - if (err) - goto out; -set_dev: fib6_nh->fib_nh_dev = dev; fib6_nh->fib_nh_oif = dev->ifindex; err = 0; @@ -3217,7 +3493,9 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, { struct net *net = cfg->fc_nlinfo.nl_net; struct fib6_info *rt = NULL; + struct nexthop *nh = NULL; struct fib6_table *table; + struct fib6_nh *fib6_nh; int err = -EINVAL; int addr_type; @@ -3253,6 +3531,16 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } #endif + if (cfg->fc_nh_id) { + nh = nexthop_find_by_id(net, cfg->fc_nh_id); + if (!nh) { + NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); + goto out; + } + err = fib6_check_nexthop(nh, cfg, extack); + if (err) + goto out; + } err = -ENOBUFS; if (cfg->fc_nlinfo.nlh && @@ -3270,7 +3558,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; err = -ENOMEM; - rt = fib6_info_alloc(gfp_flags, true); + rt = fib6_info_alloc(gfp_flags, !nh); if (!rt) goto out; @@ -3310,19 +3598,35 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->fib6_src.plen = cfg->fc_src_len; #endif - err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack); - if (err) - goto out; + if (nh) { + if (!nexthop_get(nh)) { + NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + goto out; + } + if (rt->fib6_src.plen) { + NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); + goto out; + } + rt->nh = nh; + fib6_nh = nexthop_fib6_nh(rt->nh); + } else { + err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack); + if (err) + goto out; - /* We cannot add true routes via loopback here, - * they would result in kernel looping; promote them to reject routes - */ - addr_type = ipv6_addr_type(&cfg->fc_dst); - if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, addr_type)) - rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP; + fib6_nh = rt->fib6_nh; + + /* We cannot add true routes via loopback here, they would + * result in kernel looping; promote them to reject routes + */ + addr_type = ipv6_addr_type(&cfg->fc_dst); + if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, + addr_type)) + rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP; + } if (!ipv6_addr_any(&cfg->fc_prefsrc)) { - struct net_device *dev = fib6_info_nh_dev(rt); + struct net_device *dev = fib6_nh->fib_nh_dev; if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { NL_SET_ERR_MSG(extack, "Invalid source address"); @@ -3414,6 +3718,12 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) info->skip_notify = 1; } + info->skip_notify_kernel = 1; + call_fib6_multipath_entry_notifiers(net, + FIB_EVENT_ENTRY_DEL, + rt, + rt->fib6_nsiblings, + NULL); list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { @@ -3468,6 +3778,30 @@ static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt, return 0; } +struct fib6_nh_del_cached_rt_arg { + struct fib6_config *cfg; + struct fib6_info *f6i; +}; + +static int fib6_nh_del_cached_rt(struct fib6_nh *nh, void *_arg) +{ + struct fib6_nh_del_cached_rt_arg *arg = _arg; + int rc; + + rc = ip6_del_cached_rt(arg->cfg, arg->f6i, nh); + return rc != -ESRCH ? rc : 0; +} + +static int ip6_del_cached_rt_nh(struct fib6_config *cfg, struct fib6_info *f6i) +{ + struct fib6_nh_del_cached_rt_arg arg = { + .cfg = cfg, + .f6i = f6i + }; + + return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_del_cached_rt, &arg); +} + static int ip6_route_del(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -3493,11 +3827,20 @@ static int ip6_route_del(struct fib6_config *cfg, for_each_fib6_node_rt_rcu(fn) { struct fib6_nh *nh; - nh = rt->fib6_nh; - if (cfg->fc_flags & RTF_CACHE) { - int rc; + if (rt->nh && rt->nh->id != cfg->fc_nh_id) + continue; - rc = ip6_del_cached_rt(cfg, rt, nh); + if (cfg->fc_flags & RTF_CACHE) { + int rc = 0; + + if (rt->nh) { + rc = ip6_del_cached_rt_nh(cfg, rt); + } else if (cfg->fc_nh_id) { + continue; + } else { + nh = rt->fib6_nh; + rc = ip6_del_cached_rt(cfg, rt, nh); + } if (rc != -ESRCH) { rcu_read_unlock(); return rc; @@ -3505,6 +3848,23 @@ static int ip6_route_del(struct fib6_config *cfg, continue; } + if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) + continue; + if (cfg->fc_protocol && + cfg->fc_protocol != rt->fib6_protocol) + continue; + + if (rt->nh) { + if (!fib6_info_hold_safe(rt)) + continue; + rcu_read_unlock(); + + return __ip6_del_rt(rt, &cfg->fc_nlinfo); + } + if (cfg->fc_nh_id) + continue; + + nh = rt->fib6_nh; if (cfg->fc_ifindex && (!nh->fib_nh_dev || nh->fib_nh_dev->ifindex != cfg->fc_ifindex)) @@ -3512,10 +3872,6 @@ static int ip6_route_del(struct fib6_config *cfg, if (cfg->fc_flags & RTF_GATEWAY && !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6)) continue; - if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) - continue; - if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol) - continue; if (!fib6_info_hold_safe(rt)) continue; rcu_read_unlock(); @@ -3626,7 +3982,25 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (!res.f6i) goto out; - res.nh = res.f6i->fib6_nh; + if (res.f6i->nh) { + struct fib6_nh_match_arg arg = { + .dev = dst->dev, + .gw = &rt->rt6i_gateway, + }; + + nexthop_for_each_fib6_nh(res.f6i->nh, + fib6_nh_find_match, &arg); + + /* fib6_info uses a nexthop that does not have fib6_nh + * using the dst->dev. Should be impossible + */ + if (!arg.match) + goto out; + res.nh = arg.match; + } else { + res.nh = res.f6i->fib6_nh; + } + res.fib6_flags = res.f6i->fib6_flags; res.fib6_type = res.f6i->fib6_type; nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL); @@ -3678,6 +4052,9 @@ static struct fib6_info *rt6_get_route_info(struct net *net, goto out; for_each_fib6_node_rt_rcu(fn) { + /* these routes do not use nexthops */ + if (rt->nh) + continue; if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex) continue; if (!(rt->fib6_flags & RTF_ROUTEINFO) || @@ -3741,8 +4118,13 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - struct fib6_nh *nh = rt->fib6_nh; + struct fib6_nh *nh; + + /* RA routes do not use nexthops */ + if (rt->nh) + continue; + nh = rt->fib6_nh; if (dev == nh->fib_nh_dev && ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&nh->fib_nh_gw6, addr)) @@ -3993,7 +4375,8 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; - if (((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) && + if (!rt->nh && + ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) && rt != net->ipv6.fib6_null_entry && ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { spin_lock_bh(&rt6_exception_lock); @@ -4021,8 +4404,13 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) static int fib6_clean_tohost(struct fib6_info *rt, void *arg) { struct in6_addr *gateway = (struct in6_addr *)arg; - struct fib6_nh *nh = rt->fib6_nh; + struct fib6_nh *nh; + /* RA routes do not use nexthops */ + if (rt->nh) + return 0; + + nh = rt->fib6_nh; if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6)) return -1; @@ -4069,6 +4457,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) return NULL; } +/* only called for fib entries with builtin fib6_nh */ static bool rt6_is_dead(const struct fib6_info *rt) { if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD || @@ -4147,7 +4536,7 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg) const struct arg_netdev_event *arg = p_arg; struct net *net = dev_net(arg->dev); - if (rt != net->ipv6.fib6_null_entry && + if (rt != net->ipv6.fib6_null_entry && !rt->nh && rt->fib6_nh->fib_nh_dev == arg->dev) { rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags; fib6_update_sernum_upto_root(net, rt); @@ -4172,6 +4561,7 @@ void rt6_sync_up(struct net_device *dev, unsigned char nh_flags) fib6_clean_all(dev_net(dev), fib6_ifup, &arg); } +/* only called for fib entries with inline fib6_nh */ static bool rt6_multipath_uses_dev(const struct fib6_info *rt, const struct net_device *dev) { @@ -4232,7 +4622,7 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) const struct net_device *dev = arg->dev; struct net *net = dev_net(dev); - if (rt == net->ipv6.fib6_null_entry) + if (rt == net->ipv6.fib6_null_entry || rt->nh) return 0; switch (arg->event) { @@ -4343,6 +4733,12 @@ static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg) return 0; arg->f6i = f6i; + if (f6i->nh) { + /* fib6_nh_mtu_change only returns 0, so this is safe */ + return nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_mtu_change, + arg); + } + return fib6_nh_mtu_change(f6i->fib6_nh, arg); } @@ -4375,6 +4771,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_IP_PROTO] = { .type = NLA_U8 }, [RTA_SPORT] = { .type = NLA_U16 }, [RTA_DPORT] = { .type = NLA_U16 }, + [RTA_NH_ID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -4421,6 +4818,16 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK); + if (tb[RTA_NH_ID]) { + if (tb[RTA_GATEWAY] || tb[RTA_OIF] || + tb[RTA_MULTIPATH] || tb[RTA_ENCAP]) { + NL_SET_ERR_MSG(extack, + "Nexthop specification and nexthop id are mutually exclusive"); + goto errout; + } + cfg->fc_nh_id = nla_get_u32(tb[RTA_NH_ID]); + } + if (tb[RTA_GATEWAY]) { cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; @@ -4564,6 +4971,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, { struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; + enum fib_event_type event_type; struct fib6_config r_cfg; struct rtnexthop *rtnh; struct fib6_info *rt; @@ -4641,6 +5049,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, */ info->skip_notify = 1; + /* For add and replace, send one notification with all nexthops. For + * append, send one notification with all appended nexthops. + */ + info->skip_notify_kernel = 1; + err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { err = __ip6_ins_rt(nh->fib6_info, info, extack); @@ -4677,6 +5090,15 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nhn++; } + event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD; + err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type, + rt_notif, nhn - 1, extack); + if (err) { + /* Delete all the siblings that were just added */ + err_nh = NULL; + goto add_errout; + } + /* success ... tell user about new route */ ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); goto cleanup; @@ -4755,6 +5177,12 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (cfg.fc_nh_id && + !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) { + NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); + return -EINVAL; + } + if (cfg.fc_mp) return ip6_route_multipath_del(&cfg, extack); else { @@ -4782,17 +5210,46 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, return ip6_route_add(&cfg, GFP_KERNEL, extack); } -static size_t rt6_nlmsg_size(struct fib6_info *rt) +/* add the overhead of this fib6_nh to nexthop_len */ +static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg) +{ + int *nexthop_len = arg; + + *nexthop_len += nla_total_size(0) /* RTA_MULTIPATH */ + + NLA_ALIGN(sizeof(struct rtnexthop)) + + nla_total_size(16); /* RTA_GATEWAY */ + + if (nh->fib_nh_lws) { + /* RTA_ENCAP_TYPE */ + *nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); + /* RTA_ENCAP */ + *nexthop_len += nla_total_size(2); + } + + return 0; +} + +static size_t rt6_nlmsg_size(struct fib6_info *f6i) { - int nexthop_len = 0; + int nexthop_len; + + if (f6i->nh) { + nexthop_len = nla_total_size(4); /* RTA_NH_ID */ + nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size, + &nexthop_len); + } else { + struct fib6_nh *nh = f6i->fib6_nh; - if (rt->fib6_nsiblings) { - nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ - + NLA_ALIGN(sizeof(struct rtnexthop)) - + nla_total_size(16) /* RTA_GATEWAY */ - + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws); + nexthop_len = 0; + if (f6i->fib6_nsiblings) { + nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + + NLA_ALIGN(sizeof(struct rtnexthop)) + + nla_total_size(16) /* RTA_GATEWAY */ + + lwtunnel_get_encap_size(nh->fib_nh_lws); - nexthop_len *= rt->fib6_nsiblings; + nexthop_len *= f6i->fib6_nsiblings; + } + nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); } return NLMSG_ALIGN(sizeof(struct rtmsg)) @@ -4808,10 +5265,38 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws) + nexthop_len; } +static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh, + unsigned char *flags) +{ + if (nexthop_is_multipath(nh)) { + struct nlattr *mp; + + mp = nla_nest_start(skb, RTA_MULTIPATH); + if (!mp) + goto nla_put_failure; + + if (nexthop_mpath_fill_node(skb, nh)) + goto nla_put_failure; + + nla_nest_end(skb, mp); + } else { + struct fib6_nh *fib6_nh; + + fib6_nh = nexthop_fib6_nh(nh); + if (fib_nexthop_info(skb, &fib6_nh->nh_common, + flags, false) < 0) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct fib6_info *rt, struct dst_entry *dst, struct in6_addr *dest, struct in6_addr *src, @@ -4821,6 +5306,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct rt6_info *rt6 = (struct rt6_info *)dst; struct rt6key *rt6_dst, *rt6_src; u32 *pmetrics, table, rt6_flags; + unsigned char nh_flags = 0; struct nlmsghdr *nlh; struct rtmsg *rtm; long expires = 0; @@ -4940,9 +5426,18 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, } nla_nest_end(skb, mp); - } else { - unsigned char nh_flags = 0; + } else if (rt->nh) { + if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id)) + goto nla_put_failure; + if (nexthop_is_blackhole(rt->nh)) + rtm->rtm_type = RTN_BLACKHOLE; + + if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) + goto nla_put_failure; + + rtm->rtm_flags |= nh_flags; + } else { if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, &nh_flags, false) < 0) goto nla_put_failure; @@ -4970,9 +5465,27 @@ nla_put_failure: return -EMSGSIZE; } +static int fib6_info_nh_uses_dev(struct fib6_nh *nh, void *arg) +{ + const struct net_device *dev = arg; + + if (nh->fib_nh_dev == dev) + return 1; + + return 0; +} + static bool fib6_info_uses_dev(const struct fib6_info *f6i, const struct net_device *dev) { + if (f6i->nh) { + struct net_device *_dev = (struct net_device *)dev; + + return !!nexthop_for_each_fib6_nh(f6i->nh, + fib6_info_nh_uses_dev, + _dev); + } + if (f6i->fib6_nh->fib_nh_dev == dev) return true; |