From 5038517119d50ed0240059b1d7fc2faa92371c08 Mon Sep 17 00:00:00 2001 From: Kadlecsik József Date: Sat, 25 Jan 2020 20:39:25 +0100 Subject: netfilter: ipset: fix suspicious RCU usage in find_set_and_id find_set_and_id() is called when the NFNL_SUBSYS_IPSET mutex is held. However, in the error path there can be a follow-up recvmsg() without the mutex held. Use the start() function of struct netlink_dump_control instead of dump() to verify and report if the specified set does not exist. Thanks to Pablo Neira Ayuso for helping me to understand the subleties of the netlink protocol. Reported-by: syzbot+fc69d7cb21258ab4ae4d@syzkaller.appspotmail.com Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 41 ++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index cf895bc80871..69c107f9ba8d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1483,31 +1483,34 @@ ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static int -dump_init(struct netlink_callback *cb, struct ip_set_net *inst) +ip_set_dump_start(struct netlink_callback *cb) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *attr = (void *)nlh + min_len; + struct sk_buff *skb = cb->skb; + struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type; - ip_set_id_t index; int ret; ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_dump_policy, NULL); if (ret) - return ret; + goto error; cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { + ip_set_id_t index; struct ip_set *set; set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); - if (!set) - return -ENOENT; - + if (!set) { + ret = -ENOENT; + goto error; + } dump_type = DUMP_ONE; cb->args[IPSET_CB_INDEX] = index; } else { @@ -1523,10 +1526,17 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) cb->args[IPSET_CB_DUMP] = dump_type; return 0; + +error: + /* We have to create and send the error message manually :-( */ + if (nlh->nlmsg_flags & NLM_F_ACK) { + netlink_ack(cb->skb, nlh, ret, NULL); + } + return ret; } static int -ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) +ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb) { ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; @@ -1537,18 +1547,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) bool is_destroyed; int ret = 0; - if (!cb->args[IPSET_CB_DUMP]) { - ret = dump_init(cb, inst); - if (ret < 0) { - nlh = nlmsg_hdr(cb->skb); - /* We have to create and send the error message - * manually :-( - */ - if (nlh->nlmsg_flags & NLM_F_ACK) - netlink_ack(cb->skb, nlh, ret, NULL); - return ret; - } - } + if (!cb->args[IPSET_CB_DUMP]) + return -EINVAL; if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) goto out; @@ -1684,7 +1684,8 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, { struct netlink_dump_control c = { - .dump = ip_set_dump_start, + .start = ip_set_dump_start, + .dump = ip_set_dump_do, .done = ip_set_dump_done, }; return netlink_dump_start(ctnl, skb, nlh, &c); -- cgit v1.2.3-59-g8ed1b From fac20b9e738523fc884ee3ea5be360a321cd8bad Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Jan 2020 21:50:35 +0000 Subject: rxrpc: Fix use-after-free in rxrpc_put_local() Fix rxrpc_put_local() to not access local->debug_id after calling atomic_dec_return() as, unless that returned n==0, we no longer have the right to access the object. Fixes: 06d9532fa6b3 ("rxrpc: Fix read-after-free in rxrpc_queue_local()") Signed-off-by: David Howells --- net/rxrpc/local_object.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 36587260cabd..3aa179efcda4 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -364,11 +364,14 @@ void rxrpc_queue_local(struct rxrpc_local *local) void rxrpc_put_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); + unsigned int debug_id; int n; if (local) { + debug_id = local->debug_id; + n = atomic_dec_return(&local->usage); - trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here); + trace_rxrpc_local(debug_id, rxrpc_local_put, n, here); if (n == 0) call_rcu(&local->rcu, rxrpc_local_rcu); -- cgit v1.2.3-59-g8ed1b From f71dbf2fb28489a79bde0dca1c8adfb9cdb20a6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Jan 2020 21:50:36 +0000 Subject: rxrpc: Fix insufficient receive notification generation In rxrpc_input_data(), rxrpc_notify_socket() is called if the base sequence number of the packet is immediately following the hard-ack point at the end of the function. However, this isn't sufficient, since the recvmsg side may have been advancing the window and then overrun the position in which we're adding - at which point rx_hard_ack >= seq0 and no notification is generated. Fix this by always generating a notification at the end of the input function. Without this, a long call may stall, possibly indefinitely. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells --- net/rxrpc/input.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 96d54e5bf7bc..ef10fbf71b15 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -599,10 +599,8 @@ ack: false, true, rxrpc_propose_ack_input_data); - if (seq0 == READ_ONCE(call->rx_hard_ack) + 1) { - trace_rxrpc_notify_socket(call->debug_id, serial); - rxrpc_notify_socket(call); - } + trace_rxrpc_notify_socket(call->debug_id, serial); + rxrpc_notify_socket(call); unlock: spin_unlock(&call->input_lock); -- cgit v1.2.3-59-g8ed1b From 04d36d748fac349b068ef621611f454010054c58 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Jan 2020 21:50:36 +0000 Subject: rxrpc: Fix missing active use pinning of rxrpc_local object The introduction of a split between the reference count on rxrpc_local objects and the usage count didn't quite go far enough. A number of kernel work items need to make use of the socket to perform transmission. These also need to get an active count on the local object to prevent the socket from being closed. Fix this by getting the active count in those places. Also split out the raw active count get/put functions as these places tend to hold refs on the rxrpc_local object already, so getting and putting an extra object ref is just a waste of time. The problem can lead to symptoms like: BUG: kernel NULL pointer dereference, address: 0000000000000018 .. CPU: 2 PID: 818 Comm: kworker/u9:0 Not tainted 5.5.0-fscache+ #51 ... RIP: 0010:selinux_socket_sendmsg+0x5/0x13 ... Call Trace: security_socket_sendmsg+0x2c/0x3e sock_sendmsg+0x1a/0x46 rxrpc_send_keepalive+0x131/0x1ae rxrpc_peer_keepalive_worker+0x219/0x34b process_one_work+0x18e/0x271 worker_thread+0x1a3/0x247 kthread+0xe6/0xeb ret_from_fork+0x1f/0x30 Fixes: 730c5fd42c1e ("rxrpc: Fix local endpoint refcounting") Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 2 ++ net/rxrpc/ar-internal.h | 10 ++++++++++ net/rxrpc/conn_event.c | 30 ++++++++++++++++++++---------- net/rxrpc/local_object.c | 18 +++++++----------- net/rxrpc/peer_event.c | 42 +++++++++++++++++++++++------------------- 5 files changed, 62 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9d3c4d2d893a..fe42f986cd94 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -194,6 +194,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) service_in_use: write_unlock(&local->services_lock); rxrpc_unuse_local(local); + rxrpc_put_local(local); ret = -EADDRINUSE; error_unlock: release_sock(&rx->sk); @@ -899,6 +900,7 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_purge_queue(&sk->sk_receive_queue); rxrpc_unuse_local(rx->local); + rxrpc_put_local(rx->local); rx->local = NULL; key_put(rx->key); rx->key = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5e99df80e80a..94441fee85bc 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -1021,6 +1021,16 @@ void rxrpc_unuse_local(struct rxrpc_local *); void rxrpc_queue_local(struct rxrpc_local *); void rxrpc_destroy_all_locals(struct rxrpc_net *); +static inline bool __rxrpc_unuse_local(struct rxrpc_local *local) +{ + return atomic_dec_return(&local->active_users) == 0; +} + +static inline bool __rxrpc_use_local(struct rxrpc_local *local) +{ + return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0; +} + /* * misc.c */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 808a4723f868..06fcff2ebbba 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -438,16 +438,12 @@ again: /* * connection-level event processor */ -void rxrpc_process_connection(struct work_struct *work) +static void rxrpc_do_process_connection(struct rxrpc_connection *conn) { - struct rxrpc_connection *conn = - container_of(work, struct rxrpc_connection, processor); struct sk_buff *skb; u32 abort_code = RX_PROTOCOL_ERROR; int ret; - rxrpc_see_connection(conn); - if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); @@ -475,18 +471,32 @@ void rxrpc_process_connection(struct work_struct *work) } } -out: - rxrpc_put_connection(conn); - _leave(""); return; requeue_and_leave: skb_queue_head(&conn->rx_queue, skb); - goto out; + return; protocol_error: if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; rxrpc_free_skb(skb, rxrpc_skb_freed); - goto out; + return; +} + +void rxrpc_process_connection(struct work_struct *work) +{ + struct rxrpc_connection *conn = + container_of(work, struct rxrpc_connection, processor); + + rxrpc_see_connection(conn); + + if (__rxrpc_use_local(conn->params.local)) { + rxrpc_do_process_connection(conn); + rxrpc_unuse_local(conn->params.local); + } + + rxrpc_put_connection(conn); + _leave(""); + return; } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 3aa179efcda4..a6c1349e965d 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -383,14 +383,11 @@ void rxrpc_put_local(struct rxrpc_local *local) */ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) { - unsigned int au; - local = rxrpc_get_local_maybe(local); if (!local) return NULL; - au = atomic_fetch_add_unless(&local->active_users, 1, 0); - if (au == 0) { + if (!__rxrpc_use_local(local)) { rxrpc_put_local(local); return NULL; } @@ -404,14 +401,11 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) */ void rxrpc_unuse_local(struct rxrpc_local *local) { - unsigned int au; - if (local) { - au = atomic_dec_return(&local->active_users); - if (au == 0) + if (__rxrpc_unuse_local(local)) { + rxrpc_get_local(local); rxrpc_queue_local(local); - else - rxrpc_put_local(local); + } } } @@ -468,7 +462,7 @@ static void rxrpc_local_processor(struct work_struct *work) do { again = false; - if (atomic_read(&local->active_users) == 0) { + if (!__rxrpc_use_local(local)) { rxrpc_local_destroyer(local); break; } @@ -482,6 +476,8 @@ static void rxrpc_local_processor(struct work_struct *work) rxrpc_process_local_events(local); again = true; } + + __rxrpc_unuse_local(local); } while (again); rxrpc_put_local(local); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 48f67a9b1037..923b263c401b 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -364,27 +364,31 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, if (!rxrpc_get_peer_maybe(peer)) continue; - spin_unlock_bh(&rxnet->peer_hash_lock); - - keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; - slot = keepalive_at - base; - _debug("%02x peer %u t=%d {%pISp}", - cursor, peer->debug_id, slot, &peer->srx.transport); + if (__rxrpc_use_local(peer->local)) { + spin_unlock_bh(&rxnet->peer_hash_lock); + + keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; + slot = keepalive_at - base; + _debug("%02x peer %u t=%d {%pISp}", + cursor, peer->debug_id, slot, &peer->srx.transport); + + if (keepalive_at <= base || + keepalive_at > base + RXRPC_KEEPALIVE_TIME) { + rxrpc_send_keepalive(peer); + slot = RXRPC_KEEPALIVE_TIME; + } - if (keepalive_at <= base || - keepalive_at > base + RXRPC_KEEPALIVE_TIME) { - rxrpc_send_keepalive(peer); - slot = RXRPC_KEEPALIVE_TIME; + /* A transmission to this peer occurred since last we + * examined it so put it into the appropriate future + * bucket. + */ + slot += cursor; + slot &= mask; + spin_lock_bh(&rxnet->peer_hash_lock); + list_add_tail(&peer->keepalive_link, + &rxnet->peer_keepalive[slot & mask]); + rxrpc_unuse_local(peer->local); } - - /* A transmission to this peer occurred since last we examined - * it so put it into the appropriate future bucket. - */ - slot += cursor; - slot &= mask; - spin_lock_bh(&rxnet->peer_hash_lock); - list_add_tail(&peer->keepalive_link, - &rxnet->peer_keepalive[slot & mask]); rxrpc_put_peer_locked(peer); } -- cgit v1.2.3-59-g8ed1b From b9e0102a57d768bdb99cbbfa01225f73d58e03bc Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 28 Jan 2020 11:07:27 -0800 Subject: netfilter: Use kvcalloc Convert the uses of kvmalloc_array with __GFP_ZERO to the equivalent kvcalloc. Signed-off-by: Joe Perches Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 3 +-- net/netfilter/x_tables.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f4c4b467c87e..d1305423640f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2248,8 +2248,7 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); - hash = kvmalloc_array(nr_slots, sizeof(struct hlist_nulls_head), - GFP_KERNEL | __GFP_ZERO); + hash = kvcalloc(nr_slots, sizeof(struct hlist_nulls_head), GFP_KERNEL); if (hash && nulls) for (i = 0; i < nr_slots; i++) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index ce70c2576bb2..e27c6c5ba9df 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -939,14 +939,14 @@ EXPORT_SYMBOL(xt_check_entry_offsets); * * @size: number of entries * - * Return: NULL or kmalloc'd or vmalloc'd array + * Return: NULL or zeroed kmalloc'd or vmalloc'd array */ unsigned int *xt_alloc_entry_offsets(unsigned int size) { if (size > XT_MAX_TABLE_SIZE / sizeof(unsigned int)) return NULL; - return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO); + return kvcalloc(size, sizeof(unsigned int), GFP_KERNEL); } EXPORT_SYMBOL(xt_alloc_entry_offsets); -- cgit v1.2.3-59-g8ed1b From 91bfaa15a379e9af24f71fb4ee08d8019b6e8ec7 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 30 Jan 2020 18:04:35 +0200 Subject: netfilter: flowtable: Fix hardware flush order on nf_flow_table_cleanup On netdev down event, nf_flow_table_cleanup() is called for the relevant device and it cleans all the tables that are on that device. If one of those tables has hardware offload flag, nf_flow_table_iterate_cleanup flushes hardware and then runs the gc. But the gc can queue more hardware work, which will take time to execute. Instead first add the work, then flush it, to execute it now. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Signed-off-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 7e91989a1b55..14a069c72bb2 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -529,9 +529,9 @@ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, struct net_device *dev) { - nf_flow_table_offload_flush(flowtable); nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); flush_delayed_work(&flowtable->gc_work); + nf_flow_table_offload_flush(flowtable); } void nf_flow_table_cleanup(struct net_device *dev) -- cgit v1.2.3-59-g8ed1b From 0f34f30a1be80f3f59efeaab596396bc698e7337 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 30 Jan 2020 18:04:36 +0200 Subject: netfilter: flowtable: Fix missing flush hardware on table free If entries exist when freeing a hardware offload enabled table, we queue work for hardware while running the gc iteration. Execute it (flush) after queueing. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Signed-off-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 14a069c72bb2..8af28e10b4e6 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -553,6 +553,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); + nf_flow_table_offload_flush(flow_table); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); -- cgit v1.2.3-59-g8ed1b From c22208b7ce3ef0c2c184ff0d9f6423614b1799d9 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 30 Jan 2020 18:04:37 +0200 Subject: netfilter: flowtable: Fix setting forgotten NF_FLOW_HW_DEAD flag During the refactor this was accidently removed. Fixes: ae29045018c8 ("netfilter: flowtable: add nf_flow_offload_tuple() helper") Signed-off-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index c8b70ffeef0c..83e1db37c3b0 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -675,6 +675,7 @@ static void flow_offload_work_del(struct flow_offload_work *offload) { flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY); + set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags); } static void flow_offload_tuple_stats(struct flow_offload_work *offload, -- cgit v1.2.3-59-g8ed1b From c13c48c00a6bc1febc73902505bdec0967bd7095 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 09:14:47 -0800 Subject: tcp: clear tp->total_retrans in tcp_disconnect() total_retrans needs to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Cc: SeongJae Park Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 484485ae74c2..dd57f1e36181 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2626,6 +2626,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); + tp->total_retrans = 0; inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 * issue in __tcp_select_window() -- cgit v1.2.3-59-g8ed1b From 2fbdd56251b5c62f96589f39eded277260de7267 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 10:22:47 -0800 Subject: tcp: clear tp->delivered in tcp_disconnect() tp->delivered needs to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: ddf1af6fa00e ("tcp: new delivery accounting") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dd57f1e36181..a8ffdfb61f42 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2622,6 +2622,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; + tp->delivered = 0; tp->delivered_ce = 0; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; -- cgit v1.2.3-59-g8ed1b From db7ffee6f3eb3683cdcaeddecc0a630a14546fe3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 10:32:41 -0800 Subject: tcp: clear tp->data_segs{in|out} in tcp_disconnect() tp->data_segs_in and tp->data_segs_out need to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: a44d6eacdaf5 ("tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In") Signed-off-by: Eric Dumazet Cc: Martin KaFai Lau Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a8ffdfb61f42..3b601823f69a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2643,6 +2643,8 @@ int tcp_disconnect(struct sock *sk, int flags) tp->bytes_acked = 0; tp->bytes_received = 0; tp->bytes_retrans = 0; + tp->data_segs_in = 0; + tp->data_segs_out = 0; tp->duplicate_sack[0].start_seq = 0; tp->duplicate_sack[0].end_seq = 0; tp->dsack_dups = 0; -- cgit v1.2.3-59-g8ed1b From 784f8344de750a41344f4bbbebb8507a730fc99c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 10:44:50 -0800 Subject: tcp: clear tp->segs_{in|out} in tcp_disconnect() tp->segs_in and tp->segs_out need to be cleared in tcp_disconnect(). tcp_disconnect() is rarely used, but it is worth fixing it. Fixes: 2efd055c53c0 ("tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info") Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3b601823f69a..eb2d80519f8e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2639,6 +2639,8 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); tp->compressed_ack = 0; + tp->segs_in = 0; + tp->segs_out = 0; tp->bytes_sent = 0; tp->bytes_acked = 0; tp->bytes_received = 0; -- cgit v1.2.3-59-g8ed1b From cb3c0e6bdf64d0d124e94ce43cbe4ccbb9b37f51 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jan 2020 15:27:04 -0800 Subject: cls_rsvp: fix rsvp_policy NLA_BINARY can be confusing, since .len value represents the max size of the blob. cls_rsvp really wants user space to provide long enough data for TCA_RSVP_DST and TCA_RSVP_SRC attributes. BUG: KMSAN: uninit-value in rsvp_get net/sched/cls_rsvp.h:258 [inline] BUG: KMSAN: uninit-value in gen_handle net/sched/cls_rsvp.h:402 [inline] BUG: KMSAN: uninit-value in rsvp_change+0x1ae9/0x4220 net/sched/cls_rsvp.h:572 CPU: 1 PID: 13228 Comm: syz-executor.1 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 rsvp_get net/sched/cls_rsvp.h:258 [inline] gen_handle net/sched/cls_rsvp.h:402 [inline] rsvp_change+0x1ae9/0x4220 net/sched/cls_rsvp.h:572 tc_new_tfilter+0x31fe/0x5010 net/sched/cls_api.c:2104 rtnetlink_rcv_msg+0xcb7/0x1570 net/core/rtnetlink.c:5415 netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5442 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45b349 Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f269d43dc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f269d43e6d4 RCX: 000000000045b349 RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003 RBP: 000000000075bfc8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000000009c2 R14: 00000000004cb338 R15: 000000000075bfd4 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 slab_alloc_node mm/slub.c:2774 [inline] __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4382 __kmalloc_reserve net/core/skbuff.c:141 [inline] __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:209 alloc_skb include/linux/skbuff.h:1049 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1174 [inline] netlink_sendmsg+0x7d3/0x14d0 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 6fa8c0144b77 ("[NET_SCHED]: Use nla_policy for attribute validation in classifiers") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Cong Wang Signed-off-by: Jakub Kicinski --- net/sched/cls_rsvp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index c22624131949..d36949d9382c 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -463,10 +463,8 @@ static u32 gen_tunnel(struct rsvp_head *data) static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, - [TCA_RSVP_DST] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_SRC] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, }; -- cgit v1.2.3-59-g8ed1b From 9603d47bad4642118fa19fd1562569663d9235f6 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Sun, 2 Feb 2020 03:38:26 +0000 Subject: tcp: Reduce SYN resend delay if a suspicous ACK is received When closing a connection, the two acks that required to change closing socket's status to FIN_WAIT_2 and then TIME_WAIT could be processed in reverse order. This is possible in RSS disabled environments such as a connection inside a host. For example, expected state transitions and required packets for the disconnection will be similar to below flow. 00 (Process A) (Process B) 01 ESTABLISHED ESTABLISHED 02 close() 03 FIN_WAIT_1 04 ---FIN--> 05 CLOSE_WAIT 06 <--ACK--- 07 FIN_WAIT_2 08 <--FIN/ACK--- 09 TIME_WAIT 10 ---ACK--> 11 LAST_ACK 12 CLOSED CLOSED In some cases such as LINGER option applied socket, the FIN and FIN/ACK will be substituted to RST and RST/ACK, but there is no difference in the main logic. The acks in lines 6 and 8 are the acks. If the line 8 packet is processed before the line 6 packet, it will be just ignored as it is not a expected packet, and the later process of the line 6 packet will change the status of Process A to FIN_WAIT_2, but as it has already handled line 8 packet, it will not go to TIME_WAIT and thus will not send the line 10 packet to Process B. Thus, Process B will left in CLOSE_WAIT status, as below. 00 (Process A) (Process B) 01 ESTABLISHED ESTABLISHED 02 close() 03 FIN_WAIT_1 04 ---FIN--> 05 CLOSE_WAIT 06 (<--ACK---) 07 (<--FIN/ACK---) 08 (fired in right order) 09 <--FIN/ACK--- 10 <--ACK--- 11 (processed in reverse order) 12 FIN_WAIT_2 Later, if the Process B sends SYN to Process A for reconnection using the same port, Process A will responds with an ACK for the last flow, which has no increased sequence number. Thus, Process A will send RST, wait for TIMEOUT_INIT (one second in default), and then try reconnection. If reconnections are frequent, the one second latency spikes can be a big problem. Below is a tcpdump results of the problem: 14.436259 IP 127.0.0.1.45150 > 127.0.0.1.4242: Flags [S], seq 2560603644 14.436266 IP 127.0.0.1.4242 > 127.0.0.1.45150: Flags [.], ack 5, win 512 14.436271 IP 127.0.0.1.45150 > 127.0.0.1.4242: Flags [R], seq 2541101298 /* ONE SECOND DELAY */ 15.464613 IP 127.0.0.1.45150 > 127.0.0.1.4242: Flags [S], seq 2560603644 This commit mitigates the problem by reducing the delay for the next SYN if the suspicous ACK is received while in SYN_SENT state. Following commit will add a selftest, which can be also helpful for understanding of this issue. Signed-off-by: Eric Dumazet Signed-off-by: SeongJae Park Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e325b4506e25..316ebdf8151d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5908,8 +5908,14 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * the segment and return)" */ if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) || - after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) + after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { + /* Previous FIN/ACK or RST/ACK might be ignored. */ + if (icsk->icsk_retransmits == 0) + inet_csk_reset_xmit_timer(sk, + ICSK_TIME_RETRANS, + TCP_TIMEOUT_MIN, TCP_RTO_MAX); goto reset_and_undo; + } if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, -- cgit v1.2.3-59-g8ed1b From 5273a191dca65a675dc0bcf3909e59c6933e2831 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 30 Jan 2020 21:50:36 +0000 Subject: rxrpc: Fix NULL pointer deref due to call->conn being cleared on disconnect When a call is disconnected, the connection pointer from the call is cleared to make sure it isn't used again and to prevent further attempted transmission for the call. Unfortunately, there might be a daemon trying to use it at the same time to transmit a packet. Fix this by keeping call->conn set, but setting a flag on the call to indicate disconnection instead. Remove also the bits in the transmission functions where the conn pointer is checked and a ref taken under spinlock as this is now redundant. Fixes: 8d94aa381dab ("rxrpc: Calls shouldn't hold socket refs") Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_object.c | 4 ++-- net/rxrpc/conn_client.c | 3 +-- net/rxrpc/conn_object.c | 4 ++-- net/rxrpc/output.c | 27 +++++++++------------------ 5 files changed, 15 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 94441fee85bc..7d730c438404 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -490,6 +490,7 @@ enum rxrpc_call_flag { RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ RXRPC_CALL_IS_INTR, /* The call is interruptible */ + RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ }; /* diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a31c18c09894..dbdbc4f18b5e 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -493,7 +493,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn) + if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) rxrpc_disconnect_call(call); if (call->security) call->security->free_call_crypto(call); @@ -569,6 +569,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); struct rxrpc_net *rxnet = call->rxnet; + rxrpc_put_connection(call->conn); rxrpc_put_peer(call->peer); kfree(call->rxtx_buffer); kfree(call->rxtx_annotations); @@ -590,7 +591,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERTCMP(call->conn, ==, NULL); rxrpc_cleanup_ring(call); rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 376370cd9285..ea7d4c21f889 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -785,6 +785,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) u32 cid; spin_lock(&conn->channel_lock); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); cid = call->cid; if (cid) { @@ -792,7 +793,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) chan = &conn->channels[channel]; } trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); - call->conn = NULL; /* Calls that have never actually been assigned a channel can simply be * discarded. If the conn didn't get used either, it will follow @@ -908,7 +908,6 @@ out: spin_unlock(&rxnet->client_conn_cache_lock); out_2: spin_unlock(&conn->channel_lock); - rxrpc_put_connection(conn); _leave(""); return; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 38d718e90dc6..c0b3154f7a7e 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -171,6 +171,8 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, _enter("%d,%x", conn->debug_id, call->cid); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + if (rcu_access_pointer(chan->call) == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. @@ -223,9 +225,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) __rxrpc_disconnect_call(conn, call); spin_unlock(&conn->channel_lock); - call->conn = NULL; conn->idle_timestamp = jiffies; - rxrpc_put_connection(conn); } /* diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 935bb60fff56..bad3d2420344 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -129,7 +129,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, rxrpc_serial_t *_serial) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_ack_buffer *pkt; struct msghdr msg; struct kvec iov[2]; @@ -139,18 +139,14 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, int ret; u8 reason; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) { - rxrpc_put_connection(conn); + if (!pkt) return -ENOMEM; - } + + conn = call->conn; msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -244,7 +240,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, } out: - rxrpc_put_connection(conn); kfree(pkt); return ret; } @@ -254,7 +249,7 @@ out: */ int rxrpc_send_abort_packet(struct rxrpc_call *call) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_abort_buffer pkt; struct msghdr msg; struct kvec iov[1]; @@ -271,13 +266,11 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) test_bit(RXRPC_CALL_TX_LAST, &call->flags)) return 0; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; + conn = call->conn; + msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; @@ -312,8 +305,6 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, rxrpc_tx_point_call_abort); rxrpc_tx_backoff(call, ret); - - rxrpc_put_connection(conn); return ret; } -- cgit v1.2.3-59-g8ed1b From 2b5b8251bc9fe2f9118411f037862ee17cf81e97 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Feb 2020 10:15:07 -0800 Subject: net: hsr: fix possible NULL deref in hsr_handle_frame() hsr_port_get_rcu() can return NULL, so we need to be careful. general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] CPU: 1 PID: 10249 Comm: syz-executor.5 Not tainted 5.5.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__read_once_size include/linux/compiler.h:199 [inline] RIP: 0010:hsr_addr_is_self+0x86/0x330 net/hsr/hsr_framereg.c:44 Code: 04 00 f3 f3 f3 65 48 8b 04 25 28 00 00 00 48 89 45 d0 31 c0 e8 6b ff 94 f9 4c 89 f2 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 75 02 00 00 48 8b 43 30 49 39 c6 49 89 47 c0 0f RSP: 0018:ffffc90000da8a90 EFLAGS: 00010206 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff87e0cc33 RDX: 0000000000000006 RSI: ffffffff87e035d5 RDI: 0000000000000000 RBP: ffffc90000da8b20 R08: ffff88808e7de040 R09: ffffed1015d2707c R10: ffffed1015d2707b R11: ffff8880ae9383db R12: ffff8880a689bc5e R13: 1ffff920001b5153 R14: 0000000000000030 R15: ffffc90000da8af8 FS: 00007fd7a42be700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32338000 CR3: 00000000a928c000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hsr_handle_frame+0x1c5/0x630 net/hsr/hsr_slave.c:31 __netif_receive_skb_core+0xfbc/0x30b0 net/core/dev.c:5099 __netif_receive_skb_one_core+0xa8/0x1a0 net/core/dev.c:5196 __netif_receive_skb+0x2c/0x1d0 net/core/dev.c:5312 process_backlog+0x206/0x750 net/core/dev.c:6144 napi_poll net/core/dev.c:6582 [inline] net_rx_action+0x508/0x1120 net/core/dev.c:6650 __do_softirq+0x262/0x98c kernel/softirq.c:292 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082 Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/hsr/hsr_slave.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index ee561297d8a7..fbfd0db182b7 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -27,6 +27,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) rcu_read_lock(); /* hsr->node_db, hsr->ports */ port = hsr_port_get_rcu(skb->dev); + if (!port) + goto finish_pass; if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) { /* Directly kill frames sent by ourselves */ -- cgit v1.2.3-59-g8ed1b From 599be01ee567b61f4471ee8078870847d0a11e8e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 2 Feb 2020 21:14:35 -0800 Subject: net_sched: fix an OOB access in cls_tcindex As Eric noticed, tcindex_alloc_perfect_hash() uses cp->hash to compute the size of memory allocation, but cp->hash is set again after the allocation, this caused an out-of-bound access. So we have to move all cp->hash initialization and computation before the memory allocation. Move cp->mask and cp->shift together as cp->hash may need them for computation too. Reported-and-tested-by: syzbot+35d4dea36c387813ed31@syzkaller.appspotmail.com Fixes: 331b72922c5f ("net: sched: RCU cls_tcindex") Cc: Eric Dumazet Cc: John Fastabend Cc: Jamal Hadi Salim Cc: Jiri Pirko Cc: Jakub Kicinski Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 3d4a1280352f..0323aee03de7 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -333,12 +333,31 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->fall_through = p->fall_through; cp->tp = tp; + if (tb[TCA_TCINDEX_HASH]) + cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); + + if (tb[TCA_TCINDEX_MASK]) + cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); + + if (tb[TCA_TCINDEX_SHIFT]) + cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); + + if (!cp->hash) { + /* Hash not specified, use perfect hash if the upper limit + * of the hashing index is below the threshold. + */ + if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) + cp->hash = (cp->mask >> cp->shift) + 1; + else + cp->hash = DEFAULT_HASH_SIZE; + } + if (p->perfect) { int i; if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout; - for (i = 0; i < cp->hash; i++) + for (i = 0; i < min(cp->hash, p->hash); i++) cp->perfect[i].res = p->perfect[i].res; balloc = 1; } @@ -350,15 +369,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (old_r) cr = r->res; - if (tb[TCA_TCINDEX_HASH]) - cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); - - if (tb[TCA_TCINDEX_MASK]) - cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); - - if (tb[TCA_TCINDEX_SHIFT]) - cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); - err = -EBUSY; /* Hash already allocated, make sure that we still meet the @@ -376,16 +386,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tb[TCA_TCINDEX_FALL_THROUGH]) cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - if (!cp->hash) { - /* Hash not specified, use perfect hash if the upper limit - * of the hashing index is below the threshold. - */ - if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) - cp->hash = (cp->mask >> cp->shift) + 1; - else - cp->hash = DEFAULT_HASH_SIZE; - } - if (!cp->perfect && !cp->h) cp->alloc_hash = cp->hash; -- cgit v1.2.3-59-g8ed1b From 0d0d9a388a858e271bb70e71e99e7fe2a6fd6f64 Mon Sep 17 00:00:00 2001 From: Ridge Kennedy Date: Tue, 4 Feb 2020 12:24:00 +1300 Subject: l2tp: Allow duplicate session creation with UDP In the past it was possible to create multiple L2TPv3 sessions with the same session id as long as the sessions belonged to different tunnels. The resulting sessions had issues when used with IP encapsulated tunnels, but worked fine with UDP encapsulated ones. Some applications began to rely on this behaviour to avoid having to negotiate unique session ids. Some time ago a change was made to require session ids to be unique across all tunnels, breaking the applications making use of this "feature". This change relaxes the duplicate session id check to allow duplicates if both of the colliding sessions belong to UDP encapsulated tunnels. Fixes: dbdbc73b4478 ("l2tp: fix duplicate session creation") Signed-off-by: Ridge Kennedy Acked-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c99223cb3338..fcb53ed1c4fb 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -320,8 +320,13 @@ int l2tp_session_register(struct l2tp_session *session, spin_lock_bh(&pn->l2tp_session_hlist_lock); + /* IP encap expects session IDs to be globally unique, while + * UDP encap doesn't. + */ hlist_for_each_entry(session_walk, g_head, global_hlist) - if (session_walk->session_id == session->session_id) { + if (session_walk->session_id == session->session_id && + (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP || + tunnel->encap == L2TP_ENCAPTYPE_IP)) { err = -EEXIST; goto err_tlock_pnlock; } -- cgit v1.2.3-59-g8ed1b