aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c249
1 files changed, 119 insertions, 130 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index abcb5ae77319..e3226284e480 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -364,14 +364,11 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
- struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rt6_info *rt = (struct rt6_info *)dst;
struct fib6_info *from;
struct inet6_dev *idev;
- if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
- kfree(p);
-
+ ip_dst_metrics_put(dst);
rt6_uncached_list_del(rt);
idev = rt->rt6i_idev;
@@ -978,11 +975,7 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
{
rt->rt6i_flags &= ~RTF_EXPIRES;
rcu_assign_pointer(rt->from, from);
- dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
- if (from->fib6_metrics != &dst_default_metrics) {
- rt->dst._metrics |= DST_METRICS_REFCOUNTED;
- refcount_inc(&from->fib6_metrics->refcnt);
- }
+ ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
}
/* Caller must already hold reference to @ort */
@@ -1000,7 +993,6 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
#ifdef CONFIG_IPV6_SUBTREES
rt->rt6i_src = ort->fib6_src;
#endif
- rt->rt6i_prefsrc = ort->fib6_prefsrc;
}
static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
@@ -1454,11 +1446,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,
if (ort->fib6_src.plen)
src_key = &nrt->rt6i_src.addr;
#endif
-
- /* Update rt6i_prefsrc as it could be changed
- * in rt6_remove_prefsrc()
- */
- nrt->rt6i_prefsrc = ort->fib6_prefsrc;
/* rt6_mtu_change() might lower mtu on ort.
* Only insert this exception route if its mtu
* is less than ort's mtu value.
@@ -1640,25 +1627,6 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
rcu_read_unlock();
}
-static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
-{
- struct rt6_exception_bucket *bucket;
- struct rt6_exception *rt6_ex;
- int i;
-
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
- if (bucket) {
- for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
- hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
- rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
- }
- bucket++;
- }
- }
-}
-
static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
struct rt6_info *rt, int mtu)
{
@@ -2103,7 +2071,8 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
{
bool any_src;
- if (rt6_need_strict(&fl6->daddr)) {
+ if (ipv6_addr_type(&fl6->daddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
struct dst_entry *dst;
dst = l3mdev_link_scope_lookup(net, fl6);
@@ -2373,15 +2342,14 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
{
const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
- fl6.daddr = iph->daddr;
- fl6.saddr = iph->saddr;
- fl6.flowlabel = ip6_flowinfo(iph);
- fl6.flowi6_uid = uid;
+ struct flowi6 fl6 = {
+ .flowi6_oif = oif,
+ .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
+ .flowi6_uid = uid,
+ };
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
@@ -2532,16 +2500,15 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
{
const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_iif = LOOPBACK_IFINDEX;
- fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark;
- fl6.daddr = iph->daddr;
- fl6.saddr = iph->saddr;
- fl6.flowlabel = ip6_flowinfo(iph);
- fl6.flowi6_uid = uid;
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_oif = oif,
+ .flowi6_mark = mark,
+ .daddr = iph->daddr,
+ .saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
+ .flowi6_uid = uid,
+ };
dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
rt6_do_redirect(dst, NULL, skb);
@@ -2549,21 +2516,18 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
}
EXPORT_SYMBOL_GPL(ip6_redirect);
-void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
- u32 mark)
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_iif = LOOPBACK_IFINDEX;
- fl6.flowi6_oif = oif;
- fl6.flowi6_mark = mark;
- fl6.daddr = msg->dest;
- fl6.saddr = iph->daddr;
- fl6.flowi6_uid = sock_net_uid(net, NULL);
+ struct flowi6 fl6 = {
+ .flowi6_iif = LOOPBACK_IFINDEX,
+ .flowi6_oif = oif,
+ .daddr = msg->dest,
+ .saddr = iph->daddr,
+ .flowi6_uid = sock_net_uid(net, NULL),
+ };
dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
rt6_do_redirect(dst, NULL, skb);
@@ -2734,24 +2698,6 @@ out:
return entries > rt_max_size;
}
-static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
- struct fib6_config *cfg)
-{
- struct dst_metrics *p;
-
- if (!cfg->fc_mx)
- return 0;
-
- p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
- if (unlikely(!p))
- return -ENOMEM;
-
- refcount_set(&p->refcnt, 1);
- rt->fib6_metrics = p;
-
- return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
-}
-
static struct rt6_info *ip6_nh_lookup_table(struct net *net,
struct fib6_config *cfg,
const struct in6_addr *gw_addr,
@@ -3027,13 +2973,17 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!rt)
goto out;
+ rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len);
+ if (IS_ERR(rt->fib6_metrics)) {
+ err = PTR_ERR(rt->fib6_metrics);
+ /* Do not leave garbage there. */
+ rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
+ goto out;
+ }
+
if (cfg->fc_flags & RTF_ADDRCONF)
rt->dst_nocount = true;
- err = ip6_convert_metrics(net, rt, cfg);
- if (err < 0)
- goto out;
-
if (cfg->fc_flags & RTF_EXPIRES)
fib6_set_expires(rt, jiffies +
clock_t_to_jiffies(cfg->fc_expires));
@@ -3142,8 +3092,6 @@ install_route:
rt->fib6_nh.nh_dev = dev;
rt->fib6_table = table;
- cfg->fc_nlinfo.nl_net = dev_net(dev);
-
if (idev)
in6_dev_put(idev);
@@ -3635,23 +3583,23 @@ static void rtmsg_to_fib6_config(struct net *net,
struct in6_rtmsg *rtmsg,
struct fib6_config *cfg)
{
- memset(cfg, 0, sizeof(*cfg));
+ *cfg = (struct fib6_config){
+ .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+ : RT6_TABLE_MAIN,
+ .fc_ifindex = rtmsg->rtmsg_ifindex,
+ .fc_metric = rtmsg->rtmsg_metric,
+ .fc_expires = rtmsg->rtmsg_info,
+ .fc_dst_len = rtmsg->rtmsg_dst_len,
+ .fc_src_len = rtmsg->rtmsg_src_len,
+ .fc_flags = rtmsg->rtmsg_flags,
+ .fc_type = rtmsg->rtmsg_type,
- cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
- : RT6_TABLE_MAIN;
- cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
- cfg->fc_metric = rtmsg->rtmsg_metric;
- cfg->fc_expires = rtmsg->rtmsg_info;
- cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
- cfg->fc_src_len = rtmsg->rtmsg_src_len;
- cfg->fc_flags = rtmsg->rtmsg_flags;
- cfg->fc_type = rtmsg->rtmsg_type;
-
- cfg->fc_nlinfo.nl_net = net;
+ .fc_nlinfo.nl_net = net,
- cfg->fc_dst = rtmsg->rtmsg_dst;
- cfg->fc_src = rtmsg->rtmsg_src;
- cfg->fc_gateway = rtmsg->rtmsg_gateway;
+ .fc_dst = rtmsg->rtmsg_dst,
+ .fc_src = rtmsg->rtmsg_src,
+ .fc_gateway = rtmsg->rtmsg_gateway,
+ };
}
int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
@@ -3758,6 +3706,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
if (!f6i)
return ERR_PTR(-ENOMEM);
+ f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0);
f6i->dst_nocount = true;
f6i->dst_host = true;
f6i->fib6_protocol = RTPROT_KERNEL;
@@ -3800,8 +3749,6 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->fib6_prefsrc.plen = 0;
- /* need to update cache as well */
- rt6_exceptions_remove_prefsrc(rt);
spin_unlock_bh(&rt6_exception_lock);
}
return 0;
@@ -4079,8 +4026,12 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
.event = event,
},
};
+ struct net *net = dev_net(dev);
- fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
+ if (net->ipv6.sysctl.skip_notify_on_dev_down)
+ fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
+ else
+ fib6_clean_all(net, fib6_ifdown, &arg);
}
void rt6_disable_ip(struct net_device *dev, unsigned long event)
@@ -4170,20 +4121,25 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
int err;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
- NULL);
+ extack);
if (err < 0)
goto errout;
err = -EINVAL;
rtm = nlmsg_data(nlh);
- memset(cfg, 0, sizeof(*cfg));
- cfg->fc_table = rtm->rtm_table;
- cfg->fc_dst_len = rtm->rtm_dst_len;
- cfg->fc_src_len = rtm->rtm_src_len;
- cfg->fc_flags = RTF_UP;
- cfg->fc_protocol = rtm->rtm_protocol;
- cfg->fc_type = rtm->rtm_type;
+ *cfg = (struct fib6_config){
+ .fc_table = rtm->rtm_table,
+ .fc_dst_len = rtm->rtm_dst_len,
+ .fc_src_len = rtm->rtm_src_len,
+ .fc_flags = RTF_UP,
+ .fc_protocol = rtm->rtm_protocol,
+ .fc_type = rtm->rtm_type,
+
+ .fc_nlinfo.portid = NETLINK_CB(skb).portid,
+ .fc_nlinfo.nlh = nlh,
+ .fc_nlinfo.nl_net = sock_net(skb->sk),
+ };
if (rtm->rtm_type == RTN_UNREACHABLE ||
rtm->rtm_type == RTN_BLACKHOLE ||
@@ -4199,10 +4155,6 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
- cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
- cfg->fc_nlinfo.nlh = nlh;
- cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
-
if (tb[RTA_GATEWAY]) {
cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
cfg->fc_flags |= RTF_GATEWAY;
@@ -4815,28 +4767,52 @@ nla_put_failure:
return -EMSGSIZE;
}
+static bool fib6_info_uses_dev(const struct fib6_info *f6i,
+ const struct net_device *dev)
+{
+ if (f6i->fib6_nh.nh_dev == dev)
+ return true;
+
+ if (f6i->fib6_nsiblings) {
+ struct fib6_info *sibling, *next_sibling;
+
+ list_for_each_entry_safe(sibling, next_sibling,
+ &f6i->fib6_siblings, fib6_siblings) {
+ if (sibling->fib6_nh.nh_dev == dev)
+ return true;
+ }
+ }
+
+ return false;
+}
+
int rt6_dump_route(struct fib6_info *rt, void *p_arg)
{
struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
+ struct fib_dump_filter *filter = &arg->filter;
+ unsigned int flags = NLM_F_MULTI;
struct net *net = arg->net;
if (rt == net->ipv6.fib6_null_entry)
return 0;
- if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
- struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
-
- /* user wants prefix routes only */
- if (rtm->rtm_flags & RTM_F_PREFIX &&
- !(rt->fib6_flags & RTF_PREFIX_RT)) {
- /* success since this is not a prefix route */
+ if ((filter->flags & RTM_F_PREFIX) &&
+ !(rt->fib6_flags & RTF_PREFIX_RT)) {
+ /* success since this is not a prefix route */
+ return 1;
+ }
+ if (filter->filter_set) {
+ if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
+ (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
+ (filter->protocol && rt->fib6_protocol != filter->protocol)) {
return 1;
}
+ flags |= NLM_F_DUMP_FILTERED;
}
return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
- arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ arg->cb->nlh->nlmsg_seq, flags);
}
static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
@@ -4850,7 +4826,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct rt6_info *rt;
struct sk_buff *skb;
struct rtmsg *rtm;
- struct flowi6 fl6;
+ struct flowi6 fl6 = {};
bool fibmatch;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
@@ -4859,7 +4835,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
err = -EINVAL;
- memset(&fl6, 0, sizeof(fl6));
rtm = nlmsg_data(nlh);
fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
@@ -5084,7 +5059,10 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
return 0;
}
-struct ctl_table ipv6_route_table_template[] = {
+static int zero;
+static int one = 1;
+
+static struct ctl_table ipv6_route_table_template[] = {
{
.procname = "flush",
.data = &init_net.ipv6.sysctl.flush_delay,
@@ -5155,6 +5133,15 @@ struct ctl_table ipv6_route_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
},
+ {
+ .procname = "skip_notify_on_dev_down",
+ .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
@@ -5178,6 +5165,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
+ table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
@@ -5242,6 +5230,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
+ net->ipv6.sysctl.skip_notify_on_dev_down = 0;
net->ipv6.ip6_rt_gc_expire = 30*HZ;