From 2bd624b4611ffee36422782d16e1c944d1351e98 Mon Sep 17 00:00:00 2001 From: Anoob Soman Date: Wed, 15 Feb 2017 20:25:39 +0000 Subject: packet: Do not call fanout_release from atomic contexts Commit 6664498280cf ("packet: call fanout_release, while UNREGISTERING a netdev"), unfortunately, introduced the following issues. 1. calling mutex_lock(&fanout_mutex) (fanout_release()) from inside rcu_read-side critical section. rcu_read_lock disables preemption, most often, which prohibits calling sleeping functions. [ ] include/linux/rcupdate.h:560 Illegal context switch in RCU read-side critical section! [ ] [ ] rcu_scheduler_active = 1, debug_locks = 0 [ ] 4 locks held by ovs-vswitchd/1969: [ ] #0: (cb_lock){++++++}, at: [] genl_rcv+0x19/0x40 [ ] #1: (ovs_mutex){+.+.+.}, at: [] ovs_vport_cmd_del+0x4a/0x100 [openvswitch] [ ] #2: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 [ ] #3: (rcu_read_lock){......}, at: [] packet_notifier+0x5/0x3f0 [ ] [ ] Call Trace: [ ] [] dump_stack+0x85/0xc4 [ ] [] lockdep_rcu_suspicious+0x107/0x110 [ ] [] ___might_sleep+0x57/0x210 [ ] [] __might_sleep+0x70/0x90 [ ] [] mutex_lock_nested+0x3c/0x3a0 [ ] [] ? vprintk_default+0x1f/0x30 [ ] [] ? printk+0x4d/0x4f [ ] [] fanout_release+0x1d/0xe0 [ ] [] packet_notifier+0x2f9/0x3f0 2. calling mutex_lock(&fanout_mutex) inside spin_lock(&po->bind_lock). "sleeping function called from invalid context" [ ] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620 [ ] in_atomic(): 1, irqs_disabled(): 0, pid: 1969, name: ovs-vswitchd [ ] INFO: lockdep is turned off. [ ] Call Trace: [ ] [] dump_stack+0x85/0xc4 [ ] [] ___might_sleep+0x202/0x210 [ ] [] __might_sleep+0x70/0x90 [ ] [] mutex_lock_nested+0x3c/0x3a0 [ ] [] fanout_release+0x1d/0xe0 [ ] [] packet_notifier+0x2f9/0x3f0 3. calling dev_remove_pack(&fanout->prot_hook), from inside spin_lock(&po->bind_lock) or rcu_read-side critical-section. dev_remove_pack() -> synchronize_net(), which might sleep. [ ] BUG: scheduling while atomic: ovs-vswitchd/1969/0x00000002 [ ] INFO: lockdep is turned off. [ ] Call Trace: [ ] [] dump_stack+0x85/0xc4 [ ] [] __schedule_bug+0x64/0x73 [ ] [] __schedule+0x6b/0xd10 [ ] [] schedule+0x6b/0x80 [ ] [] schedule_timeout+0x38d/0x410 [ ] [] synchronize_sched_expedited+0x53d/0x810 [ ] [] synchronize_rcu_expedited+0xe/0x10 [ ] [] synchronize_net+0x35/0x50 [ ] [] dev_remove_pack+0x13/0x20 [ ] [] fanout_release+0xbe/0xe0 [ ] [] packet_notifier+0x2f9/0x3f0 4. fanout_release() races with calls from different CPU. To fix the above problems, remove the call to fanout_release() under rcu_read_lock(). Instead, call __dev_remove_pack(&fanout->prot_hook) and netdev_run_todo will be happy that &dev->ptype_specific list is empty. In order to achieve this, I moved dev_{add,remove}_pack() out of fanout_{add,release} to __fanout_{link,unlink}. So, call to {,__}unregister_prot_hook() will make sure fanout->prot_hook is removed as well. Fixes: 6664498280cf ("packet: call fanout_release, while UNREGISTERING a netdev") Reported-by: Eric Dumazet Signed-off-by: Anoob Soman Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0f03f6a53b4d..70f5b6a4683c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1497,6 +1497,8 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po) f->arr[f->num_members] = sk; smp_wmb(); f->num_members++; + if (f->num_members == 1) + dev_add_pack(&f->prot_hook); spin_unlock(&f->lock); } @@ -1513,6 +1515,8 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) BUG_ON(i >= f->num_members); f->arr[i] = f->arr[f->num_members - 1]; f->num_members--; + if (f->num_members == 0) + __dev_remove_pack(&f->prot_hook); spin_unlock(&f->lock); } @@ -1693,7 +1697,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; match->prot_hook.id_match = match_fanout_group; - dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); } err = -EINVAL; @@ -1718,7 +1721,12 @@ out: return err; } -static void fanout_release(struct sock *sk) +/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes + * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. + * It is the responsibility of the caller to call fanout_release_data() and + * free the returned packet_fanout (after synchronize_net()) + */ +static struct packet_fanout *fanout_release(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; @@ -1728,17 +1736,17 @@ static void fanout_release(struct sock *sk) if (f) { po->fanout = NULL; - if (atomic_dec_and_test(&f->sk_ref)) { + if (atomic_dec_and_test(&f->sk_ref)) list_del(&f->list); - dev_remove_pack(&f->prot_hook); - fanout_release_data(f); - kfree(f); - } + else + f = NULL; if (po->rollover) kfree_rcu(po->rollover, rcu); } mutex_unlock(&fanout_mutex); + + return f; } static bool packet_extra_vlan_len_allowed(const struct net_device *dev, @@ -2912,6 +2920,7 @@ static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; + struct packet_fanout *f; struct net *net; union tpacket_req_u req_u; @@ -2951,9 +2960,14 @@ static int packet_release(struct socket *sock) packet_set_ring(sk, &req_u, 1, 1); } - fanout_release(sk); + f = fanout_release(sk); synchronize_net(); + + if (f) { + fanout_release_data(f); + kfree(f); + } /* * Now the socket is dead. No more input will appear. */ @@ -3905,7 +3919,6 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); - fanout_release(sk); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); -- cgit v1.2.3-59-g8ed1b From 5edabca9d4cff7f1f2b68f0bac55ef99d9798ba4 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 16 Feb 2017 17:22:46 +0100 Subject: dccp: fix freeing skb too early for IPV6_RECVPKTINFO In the current DCCP implementation an skb for a DCCP_PKT_REQUEST packet is forcibly freed via __kfree_skb in dccp_rcv_state_process if dccp_v6_conn_request successfully returns. However, if IPV6_RECVPKTINFO is set on a socket, the address of the skb is saved to ireq->pktopts and the ref count for skb is incremented in dccp_v6_conn_request, so skb is still in use. Nevertheless, it gets freed in dccp_rcv_state_process. Fix by calling consume_skb instead of doing goto discard and therefore calling __kfree_skb. Similar fixes for TCP: fb7e2399ec17f1004c0e0ccfd17439f8759ede01 [TCP]: skb is unexpectedly freed. 0aea76d35c9651d55bbaf746e7914e5f9ae5a25d tcp: SYN packets are now simply consumed Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/input.c b/net/dccp/input.c index ba347184bda9..8fedc2d49770 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -606,7 +606,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; - goto discard; + consume_skb(skb); + return 0; } if (dh->dccph_type == DCCP_PKT_RESET) goto discard; -- cgit v1.2.3-59-g8ed1b From 4c03b862b12f980456f9de92db6d508a4999b788 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Feb 2017 16:19:39 -0500 Subject: irda: Fix lockdep annotations in hashbin_delete(). A nested lock depth was added to the hasbin_delete() code but it doesn't actually work some well and results in tons of lockdep splats. Fix the code instead to properly drop the lock around the operation and just keep peeking the head of the hashbin queue. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/irda/irqueue.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c index acbe61c7e683..160dc89335e2 100644 --- a/net/irda/irqueue.c +++ b/net/irda/irqueue.c @@ -383,9 +383,6 @@ EXPORT_SYMBOL(hashbin_new); * for deallocating this structure if it's complex. If not the user can * just supply kfree, which should take care of the job. */ -#ifdef CONFIG_LOCKDEP -static int hashbin_lock_depth = 0; -#endif int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) { irda_queue_t* queue; @@ -396,22 +393,27 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) IRDA_ASSERT(hashbin->magic == HB_MAGIC, return -1;); /* Synchronize */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags, - hashbin_lock_depth++); - } + if (hashbin->hb_type & HB_LOCK) + spin_lock_irqsave(&hashbin->hb_spinlock, flags); /* * Free the entries in the hashbin, TODO: use hashbin_clear when * it has been shown to work */ for (i = 0; i < HASHBIN_SIZE; i ++ ) { - queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]); - while (queue ) { - if (free_func) - (*free_func)(queue); - queue = dequeue_first( - (irda_queue_t**) &hashbin->hb_queue[i]); + while (1) { + queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]); + + if (!queue) + break; + + if (free_func) { + if (hashbin->hb_type & HB_LOCK) + spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); + free_func(queue); + if (hashbin->hb_type & HB_LOCK) + spin_lock_irqsave(&hashbin->hb_spinlock, flags); + } } } @@ -420,12 +422,8 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) hashbin->magic = ~HB_MAGIC; /* Release lock */ - if ( hashbin->hb_type & HB_LOCK) { + if (hashbin->hb_type & HB_LOCK) spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); -#ifdef CONFIG_LOCKDEP - hashbin_lock_depth--; -#endif - } /* * Free the hashbin structure -- cgit v1.2.3-59-g8ed1b From 00ea1ceebe0d9f2dc1cc2b7bd575a00100c27869 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 18 Feb 2017 19:00:45 -0500 Subject: ipv6: release dst on error in ip6_dst_lookup_tail If ip6_dst_lookup_tail has acquired a dst and fails the IPv4-mapped check, release the dst before returning an error. Fixes: ec5e3b0a1d41 ("ipv6: Inhibit IPv4-mapped src address on the wire.") Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e164684456df..7cebee58e55b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1022,8 +1022,10 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, } #endif if (ipv6_addr_v4mapped(&fl6->saddr) && - !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) - return -EAFNOSUPPORT; + !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { + err = -EAFNOSUPPORT; + goto out_err_release; + } return 0; -- cgit v1.2.3-59-g8ed1b