From d7e38611b81e6d7e14969c361f2b9fc07403a6c3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 12:58:59 -0700 Subject: net/ipv4: Put target net when address dump fails due to bad attributes If tgt_net is set based on IFA_TARGET_NETNSID attribute in the dump request, make sure all error paths call put_net. Fixes: 5fcd266a9f64 ("net/ipv4: Add support for dumping addresses for a specific device") Fixes: c33078e3dfb1 ("net/ipv4: Update inet_dump_ifaddr for strict data checking") Reported-by: Li RongQing Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 63d5b58fbfdb..9250b309c742 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1761,7 +1761,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; struct in_device *in_dev; struct hlist_head *head; - int err; + int err = 0; s_h = cb->args[0]; s_idx = idx = cb->args[1]; @@ -1771,12 +1771,15 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); if (err < 0) - return err; + goto put_tgt_net; + err = 0; if (fillargs.ifindex) { dev = __dev_get_by_index(tgt_net, fillargs.ifindex); - if (!dev) - return -ENODEV; + if (!dev) { + err = -ENODEV; + goto put_tgt_net; + } in_dev = __in_dev_get_rtnl(dev); if (in_dev) { @@ -1821,7 +1824,7 @@ put_tgt_net: if (fillargs.netnsid >= 0) put_net(tgt_net); - return skb->len; + return err < 0 ? err : skb->len; } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, -- cgit v1.2.3-59-g8ed1b From 242afaa6968cde96824247ab984c24c466ca29f3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 12:59:00 -0700 Subject: net/ipv6: Put target net when address dump fails due to bad attributes If tgt_net is set based on IFA_TARGET_NETNSID attribute in the dump request, make sure all error paths call put_net. Fixes: 6371a71f3a3b ("net/ipv6: Add support for dumping addresses for a specific device") Fixes: ed6eff11790a ("net/ipv6: Update inet6_dump_addr for strict data checking") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 45b84dd5c4eb..7eb09c86fa13 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5089,23 +5089,25 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev; struct inet6_dev *idev; struct hlist_head *head; + int err = 0; s_h = cb->args[0]; s_idx = idx = cb->args[1]; s_ip_idx = cb->args[2]; if (cb->strict_check) { - int err; - err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, skb->sk, cb); if (err < 0) - return err; + goto put_tgt_net; + err = 0; if (fillargs.ifindex) { dev = __dev_get_by_index(tgt_net, fillargs.ifindex); - if (!dev) - return -ENODEV; + if (!dev) { + err = -ENODEV; + goto put_tgt_net; + } idev = __in6_dev_get(dev); if (idev) { err = in6_dump_addrs(idev, skb, cb, s_ip_idx, @@ -5144,7 +5146,7 @@ put_tgt_net: if (fillargs.netnsid >= 0) put_net(tgt_net); - return skb->len; + return err < 0 ? err : skb->len; } static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) -- cgit v1.2.3-59-g8ed1b From ae677bbb4441309e1827e60413de92363153dccb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 12:59:01 -0700 Subject: net: Don't return invalid table id error when dumping all families When doing a route dump across all address families, do not error out if the table does not exist. This allows a route dump for AF_UNSPEC with a table id that may only exist for some of the families. Do return the table does not exist error if dumping routes for a specific family and the table does not exist. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_frontend.c | 4 ++++ net/ipv4/ipmr.c | 3 +++ net/ipv6/ip6_fib.c | 3 +++ net/ipv6/ip6mr.c | 3 +++ 5 files changed, 14 insertions(+) (limited to 'net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e8d9456bf36e..c5969762a8f4 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -226,6 +226,7 @@ struct fib_dump_filter { u32 table_id; /* filter_set is an optimization that an entry is set */ bool filter_set; + bool dump_all_families; unsigned char protocol; unsigned char rt_type; unsigned int flags; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 5bf653f36911..6df95be96311 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -829,6 +829,7 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, return -EINVAL; } + filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC); filter->flags = rtm->rtm_flags; filter->protocol = rtm->rtm_protocol; filter->rt_type = rtm->rtm_type; @@ -899,6 +900,9 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (filter.table_id) { tb = fib_get_table(net, filter.table_id); if (!tb) { + if (filter.dump_all_families) + return skb->len; + NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); return -ENOENT; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7a3e2acda94c..a6defbec4f1b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2542,6 +2542,9 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { + if (filter.dump_all_families) + return skb->len; + NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); return -ENOENT; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 2a058b408a6a..1b8bc008b53b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -620,6 +620,9 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (arg.filter.table_id) { tb = fib6_get_table(net, arg.filter.table_id); if (!tb) { + if (arg.filter.dump_all_families) + return skb->len; + NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); return -ENOENT; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index c3317ffb09eb..e2ea691e42c6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2473,6 +2473,9 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { + if (filter.dump_all_families) + return skb->len; + NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); return -ENOENT; } -- cgit v1.2.3-59-g8ed1b From c63586dc9b3ed5d45ba82e16bf9e2170a55521e6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 12:59:02 -0700 Subject: net: rtnl_dump_all needs to propagate error from dumpit function If an address, route or netconf dump request is sent for AF_UNSPEC, then rtnl_dump_all is used to do the dump across all address families. If one of the dumpit functions fails (e.g., invalid attributes in the dump request) then rtnl_dump_all needs to propagate that error so the user gets an appropriate response instead of just getting no data. Fixes: effe67926624 ("net: Enable kernel side filtering of route dumps") Fixes: 5fcd266a9f64 ("net/ipv4: Add support for dumping addresses for a specific device") Fixes: 6371a71f3a3b ("net/ipv6: Add support for dumping addresses for a specific device") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0958c7be2c22..f679c7a7d761 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3333,6 +3333,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) int idx; int s_idx = cb->family; int type = cb->nlh->nlmsg_type - RTM_BASE; + int ret = 0; if (s_idx == 0) s_idx = 1; @@ -3365,12 +3366,13 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) cb->prev_seq = 0; cb->seq = 0; } - if (dumpit(skb, cb)) + ret = dumpit(skb, cb); + if (ret < 0) break; } cb->family = idx; - return skb->len; + return skb->len ? : ret; } struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, -- cgit v1.2.3-59-g8ed1b From db4f1be3ca9b0ef7330763d07bf4ace83ad6f913 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 23 Oct 2018 16:04:31 -0600 Subject: net: udp: fix handling of CHECKSUM_COMPLETE packets Current handling of CHECKSUM_COMPLETE packets by the UDP stack is incorrect for any packet that has an incorrect checksum value. udp4/6_csum_init() will both make a call to __skb_checksum_validate_complete() to initialize/validate the csum field when receiving a CHECKSUM_COMPLETE packet. When this packet fails validation, skb->csum will be overwritten with the pseudoheader checksum so the packet can be fully validated by software, but the skb->ip_summed value will be left as CHECKSUM_COMPLETE so that way the stack can later warn the user about their hardware spewing bad checksums. Unfortunately, leaving the SKB in this state can cause problems later on in the checksum calculation. Since the the packet is still marked as CHECKSUM_COMPLETE, udp_csum_pull_header() will SUBTRACT the checksum of the UDP header from skb->csum instead of adding it, leaving us with a garbage value in that field. Once we try to copy the packet to userspace in the udp4/6_recvmsg(), we'll make a call to skb_copy_and_csum_datagram_msg() to checksum the packet data and add it in the garbage skb->csum value to perform our final validation check. Since the value we're validating is not the proper checksum, it's possible that the folded value could come out to 0, causing us not to drop the packet. Instead, we believe that the packet was checksummed incorrectly by hardware since skb->ip_summed is still CHECKSUM_COMPLETE, and we attempt to warn the user with netdev_rx_csum_fault(skb->dev); Unfortunately, since this is the UDP path, skb->dev has been overwritten by skb->dev_scratch and is no longer a valid pointer, so we end up reading invalid memory. This patch addresses this problem in two ways: 1) Do not use the dev pointer when calling netdev_rx_csum_fault() from skb_copy_and_csum_datagram_msg(). Since this gets called from the UDP path where skb->dev has been overwritten, we have no way of knowing if the pointer is still valid. Also for the sake of consistency with the other uses of netdev_rx_csum_fault(), don't attempt to call it if the packet was checksummed by software. 2) Add better CHECKSUM_COMPLETE handling to udp4/6_csum_init(). If we receive a packet that's CHECKSUM_COMPLETE that fails verification (i.e. skb->csum_valid == 0), check who performed the calculation. It's possible that the checksum was done in software by the network stack earlier (such as Netfilter's CONNTRACK module), and if that says the checksum is bad, we can drop the packet immediately instead of waiting until we try and copy it to userspace. Otherwise, we need to mark the SKB as CHECKSUM_NONE, since the skb->csum field no longer contains the full packet checksum after the call to __skb_checksum_validate_complete(). Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Fixes: c84d949057ca ("udp: copy skb->truesize in the first cache line") Cc: Sam Kumar Cc: Eric Dumazet Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller --- net/core/datagram.c | 5 +++-- net/ipv4/udp.c | 20 ++++++++++++++++++-- net/ipv6/ip6_checksum.c | 20 ++++++++++++++++++-- 3 files changed, 39 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 6a034eb538a1..57f3a6fcfc1e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -808,8 +808,9 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, return -EINVAL; } - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) - netdev_rx_csum_fault(skb->dev); + if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && + !skb->csum_complete_sw) + netdev_rx_csum_fault(NULL); } return 0; fault: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf8252d05a01..7e048288fcab 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2120,8 +2120,24 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, /* Note, we are only interested in != 0 or == 0, thus the * force to int. */ - return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - inet_compute_pseudo); + err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat it as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; } /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 547515e8450a..377717045f8f 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -88,8 +88,24 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * Note, we are only interested in != 0 or == 0, thus the * force to int. */ - return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); + err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat is as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; } EXPORT_SYMBOL(udp6_csum_init); -- cgit v1.2.3-59-g8ed1b From e72bde6b66299602087c8c2350d36a525e75d06e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 08:32:49 -0700 Subject: net: sched: Remove TCA_OPTIONS from policy Marco reported an error with hfsc: root@Calimero:~# tc qdisc add dev eth0 root handle 1:0 hfsc default 1 Error: Attribute failed policy validation. Apparently a few implementations pass TCA_OPTIONS as a binary instead of nested attribute, so drop TCA_OPTIONS from the policy. Fixes: 8b4c3cdd9dd8 ("net: sched: Add policy validation for tc attributes") Reported-by: Marco Berizzi Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/sched/sch_api.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 022bca98bde6..ca3b0f46de53 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1320,7 +1320,6 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { [TCA_KIND] = { .type = NLA_STRING }, - [TCA_OPTIONS] = { .type = NLA_NESTED }, [TCA_RATE] = { .type = NLA_BINARY, .len = sizeof(struct tc_estimator) }, [TCA_STAB] = { .type = NLA_NESTED }, -- cgit v1.2.3-59-g8ed1b From 4ed591c8ab44e711e56b8e021ffaf4f407c045f5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 24 Oct 2018 13:58:39 -0700 Subject: net/ipv6: Allow onlink routes to have a device mismatch if it is the default route The intent of ip6_route_check_nh_onlink is to make sure the gateway given for an onlink route is not actually on a connected route for a different interface (e.g., 2001:db8:1::/64 is on dev eth1 and then an onlink route has a via 2001:db8:1::1 dev eth2). If the gateway lookup hits the default route then it most likely will be a different interface than the onlink route which is ok. Update ip6_route_check_nh_onlink to disregard the device mismatch if the gateway lookup hits the default route. Turns out the existing onlink tests are passing because there is no default route or it is an unreachable default, so update the onlink tests to have a default route other than unreachable. Fixes: fc1e64e1092f6 ("net/ipv6: Add support for onlink flag") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 ++ tools/testing/selftests/net/fib-onlink-tests.sh | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e3226284e480..2a7423c39456 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2745,6 +2745,8 @@ static int ip6_route_check_nh_onlink(struct net *net, grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); if (grt) { if (!grt->dst.error && + /* ignore match if it is the default route */ + grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) && (grt->rt6i_flags & flags || dev != grt->dst.dev)) { NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway or device mismatch"); diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index 3991ad1a368d..864f865eee55 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -167,8 +167,8 @@ setup() # add vrf table ip li add ${VRF} type vrf table ${VRF_TABLE} ip li set ${VRF} up - ip ro add table ${VRF_TABLE} unreachable default - ip -6 ro add table ${VRF_TABLE} unreachable default + ip ro add table ${VRF_TABLE} unreachable default metric 8192 + ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192 # create test interfaces ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]} @@ -185,20 +185,20 @@ setup() for n in 1 3 5 7; do ip li set ${NETIFS[p${n}]} up ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} - ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} + ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad done # move peer interfaces to namespace and add addresses for n in 2 4 6 8; do ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} - ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} + ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad done - set +e + ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64} + ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64} - # let DAD complete - assume default of 1 probe - sleep 1 + set +e } cleanup() -- cgit v1.2.3-59-g8ed1b From bf4cc40e9343bbe6c7400ff6f4feb46fb9338087 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Thu, 25 Oct 2018 21:18:25 +0200 Subject: net/{ipv4,ipv6}: Do not put target net if input nsid is invalid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cleanup path will put the target net when netnsid is set. So we must reset netnsid if the input is invalid. Fixes: d7e38611b81e ("net/ipv4: Put target net when address dump fails due to bad attributes") Fixes: 242afaa6968c ("net/ipv6: Put target net when address dump fails due to bad attributes") Cc: David Ahern Signed-off-by: Bjørn Mork Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 1 + net/ipv6/addrconf.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 9250b309c742..a34602ae27de 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1704,6 +1704,7 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, net = rtnl_get_net_ns_capable(sk, fillargs->netnsid); if (IS_ERR(net)) { + fillargs->netnsid = -1; NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id"); return PTR_ERR(net); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7eb09c86fa13..63a808d5af15 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5058,6 +5058,7 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh, fillargs->netnsid = nla_get_s32(tb[i]); net = rtnl_get_net_ns_capable(sk, fillargs->netnsid); if (IS_ERR(net)) { + fillargs->netnsid = -1; NL_SET_ERR_MSG_MOD(extack, "Invalid target network namespace id"); return PTR_ERR(net); } -- cgit v1.2.3-59-g8ed1b From ee1abcf689353f36d9322231b4320926096bdee0 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 24 Oct 2018 14:37:21 +0200 Subject: ipv6/ndisc: Preserve IPv6 control buffer if protocol error handlers are called Commit a61bbcf28a8c ("[NET]: Store skb->timestamp as offset to a base timestamp") introduces a neighbour control buffer and zeroes it out in ndisc_rcv(), as ndisc_recv_ns() uses it. Commit f2776ff04722 ("[IPV6]: Fix address/interface handling in UDP and DCCP, according to the scoping architecture.") introduces the usage of the IPv6 control buffer in protocol error handlers (e.g. inet6_iif() in present-day __udp6_lib_err()). Now, with commit b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect()."), we call protocol error handlers from ndisc_redirect_rcv(), after the control buffer is already stolen and some parts are already zeroed out. This implies that inet6_iif() on this path will always return zero. This gives unexpected results on UDP socket lookup in __udp6_lib_err(), as we might actually need to match sockets for a given interface. Instead of always claiming the control buffer in ndisc_rcv(), do that only when needed. Fixes: b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect().") Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a25cfdd47c89..659ecf4e4b3c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1732,10 +1732,9 @@ int ndisc_rcv(struct sk_buff *skb) return 0; } - memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); ndisc_recv_ns(skb); break; -- cgit v1.2.3-59-g8ed1b From fb692ec4117f6fd25044cfb5720d6b79d400dc65 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 25 Oct 2018 13:25:28 +0200 Subject: net/smc: fix smc_buf_unuse to use the lgr pointer The pointer to the link group is unset in the smc connection structure right before the call to smc_buf_unuse. Provide the lgr pointer to smc_buf_unuse explicitly. And move the call to smc_lgr_schedule_free_work to the end of smc_conn_free. Fixes: a6920d1d130c ("net/smc: handle unregistered buffers") Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index e871368500e3..18daebcef181 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -122,22 +122,17 @@ static void __smc_lgr_unregister_conn(struct smc_connection *conn) sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ } -/* Unregister connection and trigger lgr freeing if applicable +/* Unregister connection from lgr */ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - int reduced = 0; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { - reduced = 1; __smc_lgr_unregister_conn(conn); } write_unlock_bh(&lgr->conns_lock); - if (!reduced || lgr->conns_num) - return; - smc_lgr_schedule_free_work(lgr); } /* Send delete link, either as client to request the initiation @@ -291,7 +286,8 @@ out: return rc; } -static void smc_buf_unuse(struct smc_connection *conn) +static void smc_buf_unuse(struct smc_connection *conn, + struct smc_link_group *lgr) { if (conn->sndbuf_desc) conn->sndbuf_desc->used = 0; @@ -301,8 +297,6 @@ static void smc_buf_unuse(struct smc_connection *conn) conn->rmb_desc->used = 0; } else { /* buf registration failed, reuse not possible */ - struct smc_link_group *lgr = conn->lgr; - write_lock_bh(&lgr->rmbs_lock); list_del(&conn->rmb_desc->list); write_unlock_bh(&lgr->rmbs_lock); @@ -315,16 +309,21 @@ static void smc_buf_unuse(struct smc_connection *conn) /* remove a finished connection from its link group */ void smc_conn_free(struct smc_connection *conn) { - if (!conn->lgr) + struct smc_link_group *lgr = conn->lgr; + + if (!lgr) return; - if (conn->lgr->is_smcd) { + if (lgr->is_smcd) { smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); } else { smc_cdc_tx_dismiss_slots(conn); } - smc_lgr_unregister_conn(conn); - smc_buf_unuse(conn); + smc_lgr_unregister_conn(conn); /* unsets conn->lgr */ + smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + + if (!lgr->conns_num) + smc_lgr_schedule_free_work(lgr); } static void smc_link_clear(struct smc_link *lnk) -- cgit v1.2.3-59-g8ed1b From 5a2de63fd1a59c30c02526d427bc014b98adf508 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 26 Oct 2018 10:28:43 +0800 Subject: bridge: do not add port to router list when receives query with source 0.0.0.0 Based on RFC 4541, 2.1.1. IGMP Forwarding Rules The switch supporting IGMP snooping must maintain a list of multicast routers and the ports on which they are attached. This list can be constructed in any combination of the following ways: a) This list should be built by the snooping switch sending Multicast Router Solicitation messages as described in IGMP Multicast Router Discovery [MRDISC]. It may also snoop Multicast Router Advertisement messages sent by and to other nodes. b) The arrival port for IGMP Queries (sent by multicast routers) where the source address is not 0.0.0.0. We should not add the port to router list when receives query with source 0.0.0.0. Reported-by: Ying Xu Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 024139b51d3a..41cdafbf2ebe 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1422,7 +1422,15 @@ static void br_multicast_query_received(struct net_bridge *br, return; br_multicast_update_query_timer(br, query, max_delay); - br_multicast_mark_router(br, port); + + /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, + * the arrival port for IGMP Queries where the source address + * is 0.0.0.0 should not be added to router port list. + */ + if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || + (saddr->proto == htons(ETH_P_IPV6) && + !ipv6_addr_any(&saddr->u.ip6))) + br_multicast_mark_router(br, port); } static void br_ip4_multicast_query(struct net_bridge *br, -- cgit v1.2.3-59-g8ed1b From f64bf6b8ae802e93231155b0d92a619d896cd0bd Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Fri, 26 Oct 2018 12:24:35 +0100 Subject: net: allow traceroute with a specified interface in a vrf Traceroute executed in a vrf succeeds if no device is given or if the vrf is given as the device, but fails if the interface is given as the device. This is for default UDP probes, it succeeds for TCP SYN or ICMP ECHO probes. As the skb bound dev is the interface and the sk dev is the vrf, sk lookup fails for ICMP_DEST_UNREACH and ICMP_TIME_EXCEEDED messages. The solution is for the secondary dev to be passed so that the interface is available for the device match to succeed, in the same way as is already done for non-error cases. Signed-off-by: Mike Manning Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7e048288fcab..ca3ed931f2a9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -609,8 +609,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) struct net *net = dev_net(skb->dev); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, 0, - udptable, NULL); + iph->saddr, uh->source, skb->dev->ifindex, + inet_sdif(skb), udptable, NULL); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; /* No socket for error */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 06d17ff3562f..d2d97d07ef27 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -478,7 +478,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), 0, udptable, skb); + inet6_iif(skb), inet6_sdif(skb), udptable, skb); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); -- cgit v1.2.3-59-g8ed1b From aab456dfa404f3a16d6f1780e62a6a8533c4d008 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 26 Oct 2018 09:33:27 -0700 Subject: net/neigh: fix NULL deref in pneigh_dump_table() pneigh can have NULL device pointer, so we need to make neigh_master_filtered() and neigh_ifindex_filtered() more robust. syzbot report : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 15867 Comm: syz-executor2 Not tainted 4.19.0+ #276 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__read_once_size include/linux/compiler.h:179 [inline] RIP: 0010:list_empty include/linux/list.h:203 [inline] RIP: 0010:netdev_master_upper_dev_get+0xa1/0x250 net/core/dev.c:6467 RSP: 0018:ffff8801bfaf7220 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000001 RCX: ffffc90005e92000 RDX: 0000000000000016 RSI: ffffffff860b44d9 RDI: 0000000000000005 RBP: ffff8801bfaf72b0 R08: ffff8801c4c84080 R09: fffffbfff139a580 R10: fffffbfff139a580 R11: ffffffff89cd2c07 R12: 1ffff10037f5ee45 R13: 0000000000000000 R14: ffff8801bfaf7288 R15: 00000000000000b0 FS: 00007f65cc68d700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b33a21000 CR3: 00000001c6116000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: neigh_master_filtered net/core/neighbour.c:2367 [inline] pneigh_dump_table net/core/neighbour.c:2456 [inline] neigh_dump_info+0x7a9/0x1ce0 net/core/neighbour.c:2577 netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] rtnetlink_rcv_msg+0x809/0xc20 net/core/rtnetlink.c:4898 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4953 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x5a5/0x760 net/netlink/af_netlink.c:1336 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:631 sock_write_iter+0x35e/0x5c0 net/socket.c:900 call_write_iter include/linux/fs.h:1808 [inline] new_sync_write fs/read_write.c:474 [inline] __vfs_write+0x6b8/0x9f0 fs/read_write.c:487 vfs_write+0x1fc/0x560 fs/read_write.c:549 ksys_write+0x101/0x260 fs/read_write.c:598 __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:607 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457569 Fixes: 6f52f80e8530 ("net/neigh: Extend dump filter to proxy neighbor dumps") Signed-off-by: Eric Dumazet Cc: David Ahern Reported-by: syzbot Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ee605d9d8bd4..41954e42a2de 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2364,7 +2364,7 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx) if (!master_idx) return false; - master = netdev_master_upper_dev_get(dev); + master = dev ? netdev_master_upper_dev_get(dev) : NULL; if (!master || master->ifindex != master_idx) return true; @@ -2373,7 +2373,7 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx) static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) { - if (filter_idx && dev->ifindex != filter_idx) + if (filter_idx && (!dev || dev->ifindex != filter_idx)) return true; return false; -- cgit v1.2.3-59-g8ed1b