From 9fbb704c3385adf5f9adfce7fd3c0bf31aa4da6d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 13 Mar 2018 08:29:36 -0700 Subject: net/ipv6: Refactor gateway validation on route add Move gateway validation code from ip6_route_info_create into ip6_validate_gw. Code move plus adjustments to handle the potential reset of dev and idev and to make checkpatch happy. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv6/route.c | 120 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 66 insertions(+), 54 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 81711e3e2604..23ced851fdb1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2550,7 +2550,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, static int ip6_route_check_nh_onlink(struct net *net, struct fib6_config *cfg, - struct net_device *dev, + const struct net_device *dev, struct netlink_ext_ack *extack) { u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; @@ -2626,6 +2626,68 @@ out: return err; } +static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, + struct net_device **_dev, struct inet6_dev **idev, + struct netlink_ext_ack *extack) +{ + const struct in6_addr *gw_addr = &cfg->fc_gateway; + int gwa_type = ipv6_addr_type(gw_addr); + const struct net_device *dev = *_dev; + int err = -EINVAL; + + /* if gw_addr is local we will fail to detect this in case + * address is still TENTATIVE (DAD in progress). rt6_lookup() + * will return already-added prefix route via interface that + * prefix route was assigned to, which might be non-loopback. + */ + if (ipv6_chk_addr_and_flags(net, gw_addr, + gwa_type & IPV6_ADDR_LINKLOCAL ? + dev : NULL, 0, 0)) { + NL_SET_ERR_MSG(extack, "Invalid gateway address"); + goto out; + } + + if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) { + /* IPv6 strictly inhibits using not link-local + * addresses as nexthop address. + * Otherwise, router will not able to send redirects. + * It is very good, but in some (rare!) circumstances + * (SIT, PtP, NBMA NOARP links) it is handy to allow + * some exceptions. --ANK + * We allow IPv4-mapped nexthops to support RFC4798-type + * addressing + */ + if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) { + NL_SET_ERR_MSG(extack, "Invalid gateway address"); + goto out; + } + + if (cfg->fc_flags & RTNH_F_ONLINK) + err = ip6_route_check_nh_onlink(net, cfg, dev, extack); + else + err = ip6_route_check_nh(net, cfg, _dev, idev); + + if (err) + goto out; + } + + /* reload in case device was changed */ + dev = *_dev; + + err = -EINVAL; + if (!dev) { + NL_SET_ERR_MSG(extack, "Egress device not specified"); + goto out; + } else if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, + "Egress device can not be loopback device for this route"); + goto out; + } + err = 0; +out: + return err; +} + static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -2808,61 +2870,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, } if (cfg->fc_flags & RTF_GATEWAY) { - const struct in6_addr *gw_addr; - int gwa_type; - - gw_addr = &cfg->fc_gateway; - gwa_type = ipv6_addr_type(gw_addr); - - /* if gw_addr is local we will fail to detect this in case - * address is still TENTATIVE (DAD in progress). rt6_lookup() - * will return already-added prefix route via interface that - * prefix route was assigned to, which might be non-loopback. - */ - err = -EINVAL; - if (ipv6_chk_addr_and_flags(net, gw_addr, - gwa_type & IPV6_ADDR_LINKLOCAL ? - dev : NULL, 0, 0)) { - NL_SET_ERR_MSG(extack, "Invalid gateway address"); + err = ip6_validate_gw(net, cfg, &dev, &idev, extack); + if (err) goto out; - } - rt->rt6i_gateway = *gw_addr; - - if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { - /* IPv6 strictly inhibits using not link-local - addresses as nexthop address. - Otherwise, router will not able to send redirects. - It is very good, but in some (rare!) circumstances - (SIT, PtP, NBMA NOARP links) it is handy to allow - some exceptions. --ANK - We allow IPv4-mapped nexthops to support RFC4798-type - addressing - */ - if (!(gwa_type & (IPV6_ADDR_UNICAST | - IPV6_ADDR_MAPPED))) { - NL_SET_ERR_MSG(extack, - "Invalid gateway address"); - goto out; - } - if (cfg->fc_flags & RTNH_F_ONLINK) { - err = ip6_route_check_nh_onlink(net, cfg, dev, - extack); - } else { - err = ip6_route_check_nh(net, cfg, &dev, &idev); - } - if (err) - goto out; - } - err = -EINVAL; - if (!dev) { - NL_SET_ERR_MSG(extack, "Egress device not specified"); - goto out; - } else if (dev->flags & IFF_LOOPBACK) { - NL_SET_ERR_MSG(extack, - "Egress device can not be loopback device for this route"); - goto out; - } + rt->rt6i_gateway = cfg->fc_gateway; } err = -ENODEV; -- cgit v1.2.3-59-g8ed1b From 232378e8db4780bc7145d7a0ee47f5f80a41ad6b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 13 Mar 2018 08:29:37 -0700 Subject: net/ipv6: Change address check to always take a device argument ipv6_chk_addr_and_flags determines if an address is a local address and optionally if it is an address on a specific device. For example, it is called by ip6_route_info_create to determine if a given gateway address is a local address. The address check currently does not consider L3 domains and as a result does not allow a route to be added in one VRF if the nexthop points to an address in a second VRF. e.g., $ ip route add 2001:db8:1::/64 vrf r2 via 2001:db8:102::23 Error: Invalid gateway address. where 2001:db8:102::23 is an address on an interface in vrf r1. ipv6_chk_addr_and_flags needs to allow callers to always pass in a device with a separate argument to not limit the address to the specific device. The device is used used to determine the L3 domain of interest. To that end add an argument to skip the device check and update callers to always pass a device where possible and use the new argument to mean any address in the domain. Update a handful of users of ipv6_chk_addr with a NULL dev argument. This patch handles the change to these callers without adding the domain check. ip6_validate_gw needs to handle 2 cases - one where the device is given as part of the nexthop spec and the other where the device is resolved. There is at least 1 VRF case where deferring the check to only after the route lookup has resolved the device fails with an unintuitive error "RTNETLINK answers: No route to host" as opposed to the preferred "Error: Gateway can not be a local address." The 'no route to host' error is because of the fallback to a full lookup. The check is done twice to avoid this error. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/addrconf.h | 4 ++-- net/ipv6/addrconf.c | 11 ++++++++--- net/ipv6/anycast.c | 9 ++++++--- net/ipv6/datagram.c | 5 +++-- net/ipv6/ip6_tunnel.c | 12 ++++++++---- net/ipv6/ndisc.c | 2 +- net/ipv6/route.c | 19 +++++++++++++++---- 7 files changed, 43 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index c4185a7b0e90..132e5b95167a 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -69,8 +69,8 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg); int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, int strict, - u32 banned_flags); + const struct net_device *dev, bool skip_dev_check, + int strict, u32 banned_flags); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b5fd116c046a..0677b9732d56 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1851,19 +1851,24 @@ static int ipv6_count_addresses(const struct inet6_dev *idev) int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict) { - return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE); + return ipv6_chk_addr_and_flags(net, addr, dev, !dev, + strict, IFA_F_TENTATIVE); } EXPORT_SYMBOL(ipv6_chk_addr); int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, - const struct net_device *dev, int strict, - u32 banned_flags) + const struct net_device *dev, bool skip_dev_check, + int strict, u32 banned_flags) { unsigned int hash = inet6_addr_hash(net, addr); struct inet6_ifaddr *ifp; u32 ifp_flags; rcu_read_lock(); + + if (skip_dev_check) + dev = NULL; + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index c61718dba2e6..d580d4d456a5 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; - if (ipv6_chk_addr(net, addr, NULL, 0)) + + if (ifindex) + dev = __dev_get_by_index(net, ifindex); + + if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); @@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) dev = __dev_get_by_flags(net, IFF_UP, IFF_UP | IFF_LOOPBACK); } - } else - dev = __dev_get_by_index(net, ifindex); + } if (!dev) { err = -ENODEV; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fbf08ce3f5ab..b27333d7b099 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -801,8 +801,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) && - !ipv6_chk_addr(net, &src_info->ipi6_addr, - strict ? dev : NULL, 0) && + !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr, + dev, !strict, 0, + IFA_F_TENTATIVE) && !ipv6_chk_acast_addr_src(net, dev, &src_info->ipi6_addr)) err = -EINVAL; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5c045fa407da..456fcf942f95 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, ldev = dev_get_by_index_rcu(net, p->link); if ((ipv6_addr_is_multicast(laddr) || - likely(ipv6_chk_addr(net, laddr, ldev, 0))) && + likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false, + 0, IFA_F_TENTATIVE))) && ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) || - likely(!ipv6_chk_addr(net, raddr, NULL, 0)))) + likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true, + 0, IFA_F_TENTATIVE)))) ret = 1; } return ret; @@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) + if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, + 0, IFA_F_TENTATIVE))) pr_warn("%s xmit: Local address not yet configured!\n", p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && - unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) + unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, + true, 0, IFA_F_TENTATIVE))) pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", p->name); else diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 8af5eef464c1..10024eb0c521 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) int probes = atomic_read(&neigh->probes); if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr, - dev, 1, + dev, false, 1, IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 23ced851fdb1..939d122e71b4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2632,7 +2632,9 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, { const struct in6_addr *gw_addr = &cfg->fc_gateway; int gwa_type = ipv6_addr_type(gw_addr); + bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true; const struct net_device *dev = *_dev; + bool need_addr_check = !dev; int err = -EINVAL; /* if gw_addr is local we will fail to detect this in case @@ -2640,10 +2642,9 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, * will return already-added prefix route via interface that * prefix route was assigned to, which might be non-loopback. */ - if (ipv6_chk_addr_and_flags(net, gw_addr, - gwa_type & IPV6_ADDR_LINKLOCAL ? - dev : NULL, 0, 0)) { - NL_SET_ERR_MSG(extack, "Invalid gateway address"); + if (dev && + ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { + NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); goto out; } @@ -2683,6 +2684,16 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, "Egress device can not be loopback device for this route"); goto out; } + + /* if we did not check gw_addr above, do so now that the + * egress device has been resolved. + */ + if (need_addr_check && + ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { + NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); + goto out; + } + err = 0; out: return err; -- cgit v1.2.3-59-g8ed1b From 1893ff20275b9b9be4892b5b5785788ce45abdea Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 13 Mar 2018 08:29:38 -0700 Subject: net/ipv6: Add l3mdev check to ipv6_chk_addr_and_flags Lookup the L3 master device for the passed in device. Only consider addresses on netdev's with the same master device. If the device is not enslaved or is NULL, then the l3mdev is NULL which means only devices not enslaved (ie, in the default domain) are considered. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0677b9732d56..6fd4bbdc444f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1856,22 +1856,37 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr, } EXPORT_SYMBOL(ipv6_chk_addr); +/* device argument is used to find the L3 domain of interest. If + * skip_dev_check is set, then the ifp device is not checked against + * the passed in dev argument. So the 2 cases for addresses checks are: + * 1. does the address exist in the L3 domain that dev is part of + * (skip_dev_check = true), or + * + * 2. does the address exist on the specific device + * (skip_dev_check = false) + */ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, const struct net_device *dev, bool skip_dev_check, int strict, u32 banned_flags) { unsigned int hash = inet6_addr_hash(net, addr); + const struct net_device *l3mdev; struct inet6_ifaddr *ifp; u32 ifp_flags; rcu_read_lock(); + l3mdev = l3mdev_master_dev_rcu(dev); if (skip_dev_check) dev = NULL; hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; + + if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev) + continue; + /* Decouple optimistic from tentative for evaluation here. * Ban optimistic addresses explicitly, when required. */ -- cgit v1.2.3-59-g8ed1b