From 36d5fe6a000790f56039afe26834265db0a3ad4c Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Wed, 26 Mar 2014 22:37:45 +0000 Subject: core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors skb_zerocopy can copy elements of the frags array between skbs, but it doesn't orphan them. Also, it doesn't handle errors, so this patch takes care of that as well, and modify the callers accordingly. skb_tx_error() is also added to the callers so they will signal the failed delivery towards the creator of the skb. Signed-off-by: Zoltan Kiss Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 8601b320b443..270b77dfac30 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -464,7 +464,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, } nla->nla_len = nla_attr_size(skb->len); - skb_zerocopy(user_skb, skb, skb->len, hlen); + err = skb_zerocopy(user_skb, skb, skb->len, hlen); + if (err) + goto out; /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { @@ -478,6 +480,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); out: + if (err) + skb_tx_error(skb); kfree_skb(nskb); return err; } -- cgit v1.2.3-59-g8ed1b From 4f647e0a3c37b8d5086214128614a136064110c3 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Thu, 27 Mar 2014 11:05:34 -0300 Subject: openvswitch: fix a possible deadlock and lockdep warning There are two problematic situations. A deadlock can happen when is_percpu is false because it can get interrupted while holding the spinlock. Then it executes ovs_flow_stats_update() in softirq context which tries to get the same lock. The second sitation is that when is_percpu is true, the code correctly disables BH but only for the local CPU, so the following can happen when locking the remote CPU without disabling BH: CPU#0 CPU#1 ovs_flow_stats_get() stats_read() +->spin_lock remote CPU#1 ovs_flow_stats_get() | stats_read() | ... +--> spin_lock remote CPU#0 | | | ovs_flow_stats_update() | ... | spin_lock local CPU#0 <--+ ovs_flow_stats_update() +---------------------------------- spin_lock local CPU#1 This patch disables BH for both cases fixing the deadlocks. Acked-by: Jesse Gross ================================= [ INFO: inconsistent lock state ] 3.14.0-rc8-00007-g632b06a #1 Tainted: G I --------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/0/0 [HC0[0]:SC1[5]:HE1:SE0] takes: (&(&cpu_stats->lock)->rlock){+.?...}, at: [] ovs_flow_stats_update+0x51/0xd0 [openvswitch] {SOFTIRQ-ON-W} state was registered at: [] __lock_acquire+0x68f/0x1c40 [] lock_acquire+0xa2/0x1d0 [] _raw_spin_lock+0x3e/0x80 [] ovs_flow_stats_get+0xc4/0x1e0 [openvswitch] [] ovs_flow_cmd_fill_info+0x185/0x360 [openvswitch] [] ovs_flow_cmd_build_info.constprop.27+0x55/0x90 [openvswitch] [] ovs_flow_cmd_new_or_set+0x4dd/0x570 [openvswitch] [] genl_family_rcv_msg+0x1cd/0x3f0 [] genl_rcv_msg+0x8e/0xd0 [] netlink_rcv_skb+0xa9/0xc0 [] genl_rcv+0x28/0x40 [] netlink_unicast+0x100/0x1e0 [] netlink_sendmsg+0x347/0x770 [] sock_sendmsg+0x9c/0xe0 [] ___sys_sendmsg+0x3a9/0x3c0 [] __sys_sendmsg+0x51/0x90 [] SyS_sendmsg+0x12/0x20 [] system_call_fastpath+0x16/0x1b irq event stamp: 1740726 hardirqs last enabled at (1740726): [] ip6_finish_output2+0x4f0/0x840 hardirqs last disabled at (1740725): [] ip6_finish_output2+0x4ab/0x840 softirqs last enabled at (1740674): [] _local_bh_enable+0x22/0x50 softirqs last disabled at (1740675): [] irq_exit+0xc5/0xd0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&cpu_stats->lock)->rlock); lock(&(&cpu_stats->lock)->rlock); *** DEADLOCK *** 5 locks held by swapper/0/0: #0: (((&ifa->dad_timer))){+.-...}, at: [] call_timer_fn+0x5/0x320 #1: (rcu_read_lock){.+.+..}, at: [] mld_sendpack+0x5/0x4a0 #2: (rcu_read_lock_bh){.+....}, at: [] ip6_finish_output2+0x59/0x840 #3: (rcu_read_lock_bh){.+....}, at: [] __dev_queue_xmit+0x5/0x9b0 #4: (rcu_read_lock){.+.+..}, at: [] internal_dev_xmit+0x5/0x110 [openvswitch] stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Tainted: G I 3.14.0-rc8-00007-g632b06a #1 Hardware name: /DX58SO, BIOS SOX5810J.86A.5599.2012.0529.2218 05/29/2012 0000000000000000 0fcf20709903df0c ffff88042d603808 ffffffff817cfe3c ffffffff81c134c0 ffff88042d603858 ffffffff817cb6da 0000000000000005 ffffffff00000001 ffff880400000000 0000000000000006 ffffffff81c134c0 Call Trace: [] dump_stack+0x4d/0x66 [] print_usage_bug+0x1f4/0x205 [] ? check_usage_backwards+0x180/0x180 [] mark_lock+0x223/0x2b0 [] __lock_acquire+0x623/0x1c40 [] ? __lock_is_held+0x57/0x80 [] ? masked_flow_lookup+0x236/0x250 [openvswitch] [] lock_acquire+0xa2/0x1d0 [] ? ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] _raw_spin_lock+0x3e/0x80 [] ? ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] ovs_dp_process_received_packet+0x84/0x120 [openvswitch] [] ? __lock_acquire+0x347/0x1c40 [] ovs_vport_receive+0x2a/0x30 [openvswitch] [] internal_dev_xmit+0x68/0x110 [openvswitch] [] ? internal_dev_xmit+0x5/0x110 [openvswitch] [] dev_hard_start_xmit+0x2e6/0x8b0 [] __dev_queue_xmit+0x417/0x9b0 [] ? __dev_queue_xmit+0x5/0x9b0 [] ? ip6_finish_output2+0x4f0/0x840 [] dev_queue_xmit+0x10/0x20 [] ip6_finish_output2+0x551/0x840 [] ? ip6_finish_output+0x9a/0x220 [] ip6_finish_output+0x9a/0x220 [] ip6_output+0x4f/0x1f0 [] mld_sendpack+0x1d9/0x4a0 [] mld_send_initial_cr.part.32+0x88/0xa0 [] ? addrconf_dad_completed+0x220/0x220 [] ipv6_mc_dad_complete+0x31/0x50 [] addrconf_dad_completed+0x147/0x220 [] ? addrconf_dad_completed+0x220/0x220 [] addrconf_dad_timer+0x19f/0x1c0 [] call_timer_fn+0x99/0x320 [] ? call_timer_fn+0x5/0x320 [] ? addrconf_dad_completed+0x220/0x220 [] run_timer_softirq+0x254/0x3b0 [] __do_softirq+0x12d/0x480 Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'net/openvswitch') diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index dda451f4429c..2998989e76db 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -103,30 +103,24 @@ static void stats_read(struct flow_stats *stats, void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int cpu, cur_cpu; + int cpu; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); + local_bh_disable(); if (!flow->stats.is_percpu) { stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); } else { - cur_cpu = get_cpu(); for_each_possible_cpu(cpu) { struct flow_stats *stats; - if (cpu == cur_cpu) - local_bh_disable(); - stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); stats_read(stats, ovs_stats, used, tcp_flags); - - if (cpu == cur_cpu) - local_bh_enable(); } - put_cpu(); } + local_bh_enable(); } static void stats_reset(struct flow_stats *stats) @@ -141,25 +135,17 @@ static void stats_reset(struct flow_stats *stats) void ovs_flow_stats_clear(struct sw_flow *flow) { - int cpu, cur_cpu; + int cpu; + local_bh_disable(); if (!flow->stats.is_percpu) { stats_reset(flow->stats.stat); } else { - cur_cpu = get_cpu(); - for_each_possible_cpu(cpu) { - - if (cpu == cur_cpu) - local_bh_disable(); - stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); - - if (cpu == cur_cpu) - local_bh_enable(); } - put_cpu(); } + local_bh_enable(); } static int check_header(struct sk_buff *skb, int len) -- cgit v1.2.3-59-g8ed1b