aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c165
1 files changed, 79 insertions, 86 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cb32ce250db0..c8bc9b4ac328 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -61,6 +61,7 @@
#include <net/nexthop.h>
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
+#include <net/l3mdev.h>
#include <asm/uaccess.h>
@@ -86,9 +87,9 @@ static void ip6_dst_ifdown(struct dst_entry *,
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
-static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
@@ -142,6 +143,9 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
struct net_device *loopback_dev = net->loopback_dev;
int cpu;
+ if (dev == loopback_dev)
+ return;
+
for_each_possible_cpu(cpu) {
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
struct rt6_info *rt;
@@ -151,14 +155,12 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
struct inet6_dev *rt_idev = rt->rt6i_idev;
struct net_device *rt_dev = rt->dst.dev;
- if (rt_idev && (rt_idev->dev == dev || !dev) &&
- rt_idev->dev != loopback_dev) {
+ if (rt_idev->dev == dev) {
rt->rt6i_idev = in6_dev_get(loopback_dev);
in6_dev_put(rt_idev);
}
- if (rt_dev && (rt_dev == dev || !dev) &&
- rt_dev != loopback_dev) {
+ if (rt_dev == dev) {
rt->dst.dev = loopback_dev;
dev_hold(rt->dst.dev);
dev_put(rt_dev);
@@ -247,12 +249,6 @@ static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
{
}
-static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
- unsigned long old)
-{
- return NULL;
-}
-
static struct dst_ops ip6_dst_blackhole_ops = {
.family = AF_INET6,
.destroy = ip6_dst_destroy,
@@ -261,7 +257,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.default_advmss = ip6_default_advmss,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
.redirect = ip6_rt_blackhole_redirect,
- .cow_metrics = ip6_rt_blackhole_cow_metrics,
+ .cow_metrics = dst_cow_metrics_generic,
.neigh_lookup = ip6_neigh_lookup,
};
@@ -308,7 +304,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
- .output = dst_discard_sk,
+ .output = dst_discard_out,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
@@ -318,6 +314,15 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
#endif
+static void rt6_info_init(struct rt6_info *rt)
+{
+ struct dst_entry *dst = &rt->dst;
+
+ memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
+ INIT_LIST_HEAD(&rt->rt6i_siblings);
+ INIT_LIST_HEAD(&rt->rt6i_uncached);
+}
+
/* allocate dst with ip6_dst_ops */
static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct net_device *dev,
@@ -326,13 +331,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
0, DST_OBSOLETE_FORCE_CHK, flags);
- if (rt) {
- struct dst_entry *dst = &rt->dst;
+ if (rt)
+ rt6_info_init(rt);
- memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
- INIT_LIST_HEAD(&rt->rt6i_siblings);
- INIT_LIST_HEAD(&rt->rt6i_uncached);
- }
return rt;
}
@@ -421,31 +422,7 @@ static bool rt6_check_expired(const struct rt6_info *rt)
static int rt6_info_hash_nhsfn(unsigned int candidate_count,
const struct flowi6 *fl6)
{
- unsigned int val = fl6->flowi6_proto;
-
- val ^= ipv6_addr_hash(&fl6->daddr);
- val ^= ipv6_addr_hash(&fl6->saddr);
-
- /* Work only if this not encapsulated */
- switch (fl6->flowi6_proto) {
- case IPPROTO_UDP:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- val ^= (__force u16)fl6->fl6_sport;
- val ^= (__force u16)fl6->fl6_dport;
- break;
-
- case IPPROTO_ICMPV6:
- val ^= (__force u16)fl6->fl6_icmp_type;
- val ^= (__force u16)fl6->fl6_icmp_code;
- break;
- }
- /* RFC6438 recommands to use flowlabel */
- val ^= (__force u32)fl6->flowlabel;
-
- /* Perhaps, we need to tune, this function? */
- val = val ^ (val >> 7) ^ (val >> 12);
- return val % candidate_count;
+ return get_hash_from_flowi6(fl6) % candidate_count;
}
static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
@@ -498,10 +475,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
if (dev->flags & IFF_LOOPBACK) {
if (!sprt->rt6i_idev ||
sprt->rt6i_idev->dev->ifindex != oif) {
- if (flags & RT6_LOOKUP_F_IFACE && oif)
+ if (flags & RT6_LOOKUP_F_IFACE)
continue;
- if (local && (!oif ||
- local->rt6i_idev->dev->ifindex == oif))
+ if (local &&
+ local->rt6i_idev->dev->ifindex == oif)
continue;
}
local = sprt;
@@ -538,7 +515,7 @@ static void rt6_probe_deferred(struct work_struct *w)
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
+ ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
dev_put(work->dev);
kfree(work);
}
@@ -1068,6 +1045,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
+ if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+ oif = 0;
+
redo_rt6_select:
rt = rt6_select(fn, oif, strict);
if (rt->rt6i_nsiblings)
@@ -1165,7 +1145,7 @@ void ip6_route_input(struct sk_buff *skb)
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
- .flowi6_iif = skb->dev->ifindex,
+ .flowi6_iif = l3mdev_fib_oif(skb->dev),
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = ip6_flowinfo(iph),
@@ -1189,15 +1169,22 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
+ struct dst_entry *dst;
int flags = 0;
+ bool any_src;
+
+ dst = l3mdev_rt6_dst_by_oif(net, fl6);
+ if (dst)
+ return dst;
fl6->flowi6_iif = LOOPBACK_IFINDEX;
+ any_src = ipv6_addr_any(&fl6->saddr);
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
- fl6->flowi6_oif)
+ (fl6->flowi6_oif && any_src))
flags |= RT6_LOOKUP_F_IFACE;
- if (!ipv6_addr_any(&fl6->saddr))
+ if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
@@ -1213,24 +1200,20 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
if (rt) {
- new = &rt->dst;
-
- memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
+ rt6_info_init(rt);
+ new = &rt->dst;
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard_sk;
+ new->output = dst_discard_out;
- if (dst_metrics_read_only(&ort->dst))
- new->_metrics = ort->dst._metrics;
- else
- dst_copy_metrics(new, &ort->dst);
+ dst_copy_metrics(new, &ort->dst);
rt->rt6i_idev = ort->rt6i_idev;
if (rt->rt6i_idev)
in6_dev_hold(rt->rt6i_idev);
rt->rt6i_gateway = ort->rt6i_gateway;
- rt->rt6i_flags = ort->rt6i_flags;
+ rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
rt->rt6i_metric = 0;
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
@@ -1748,21 +1731,21 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
return -EINVAL;
}
-int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
+static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
{
- int err;
struct net *net = cfg->fc_nlinfo.nl_net;
struct rt6_info *rt = NULL;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
struct fib6_table *table;
int addr_type;
+ int err = -EINVAL;
if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
- return -EINVAL;
+ goto out;
#ifndef CONFIG_IPV6_SUBTREES
if (cfg->fc_src_len)
- return -EINVAL;
+ goto out;
#endif
if (cfg->fc_ifindex) {
err = -ENODEV;
@@ -1877,7 +1860,7 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
- rt->dst.output = dst_discard_sk;
+ rt->dst.output = dst_discard_out;
rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
@@ -1982,9 +1965,7 @@ install_route:
cfg->fc_nlinfo.nl_net = dev_net(dev);
- *rt_ret = rt;
-
- return 0;
+ return rt;
out:
if (dev)
dev_put(dev);
@@ -1993,20 +1974,21 @@ out:
if (rt)
dst_free(&rt->dst);
- *rt_ret = NULL;
-
- return err;
+ return ERR_PTR(err);
}
int ip6_route_add(struct fib6_config *cfg)
{
struct mx6_config mxc = { .mx = NULL, };
- struct rt6_info *rt = NULL;
+ struct rt6_info *rt;
int err;
- err = ip6_route_info_create(cfg, &rt);
- if (err)
+ rt = ip6_route_info_create(cfg);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ rt = NULL;
goto out;
+ }
err = ip6_convert_metrics(&mxc, cfg);
if (err)
@@ -2098,7 +2080,6 @@ static int ip6_route_del(struct fib6_config *cfg)
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
{
- struct net *net = dev_net(skb->dev);
struct netevent_redirect netevent;
struct rt6_info *rt, *nrt = NULL;
struct ndisc_options ndopts;
@@ -2159,7 +2140,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
}
rt = (struct rt6_info *) dst;
- if (rt == net->ipv6.ip6_null_entry) {
+ if (rt->rt6i_flags & RTF_REJECT) {
net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
return;
}
@@ -2288,7 +2269,6 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_INFO,
.fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = ifindex,
.fc_dst_len = prefixlen,
@@ -2299,6 +2279,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_nlinfo.nl_net = net,
};
+ cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
cfg.fc_dst = *prefix;
cfg.fc_gateway = *gwaddr;
@@ -2339,7 +2320,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_DFLT,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
.fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -2386,7 +2367,8 @@ static void rtmsg_to_fib6_config(struct net *net,
{
memset(cfg, 0, sizeof(*cfg));
- cfg->fc_table = RT6_TABLE_MAIN;
+ cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+ : RT6_TABLE_MAIN;
cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
cfg->fc_metric = rtmsg->rtmsg_metric;
cfg->fc_expires = rtmsg->rtmsg_info;
@@ -2470,7 +2452,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2481,7 +2463,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
@@ -2495,6 +2477,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
const struct in6_addr *addr,
bool anycast)
{
+ u32 tb_id;
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
DST_NOCOUNT);
@@ -2517,7 +2500,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
- rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
+ tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
+ rt->rt6i_table = fib6_get_table(net, tb_id);
rt->dst.flags |= DST_NOCACHE;
atomic_set(&rt->dst.__refcnt, 1);
@@ -2622,7 +2606,8 @@ void rt6_ifdown(struct net *net, struct net_device *dev)
fib6_clean_all(net, fib6_ifdown, &adn);
icmp6_clean_all(fib6_ifdown, &adn);
- rt6_uncached_list_flush_dev(net, dev);
+ if (dev)
+ rt6_uncached_list_flush_dev(net, dev);
}
struct rt6_mtu_change_arg {
@@ -2895,9 +2880,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
r_cfg.fc_encap_type = nla_get_u16(nla);
}
- err = ip6_route_info_create(&r_cfg, &rt);
- if (err)
+ rt = ip6_route_info_create(&r_cfg);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ rt = NULL;
goto cleanup;
+ }
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
@@ -3276,6 +3264,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
} else {
fl6.flowi6_oif = oif;
+ if (netif_index_is_l3_master(net, oif)) {
+ fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
+ FLOWI_FLAG_SKIP_NH_OIF;
+ }
+
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
}