From 8961b1940200ac5e91bee1c1bc69086365e1b7c9 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Sun, 30 Nov 2014 11:49:36 +0100 Subject: pxa168: close race between napi and irq activation In pxa168_eth_open() the irqs are enabled before napi. This opens a tiny time window in which the irq handler is processed, disables irqs but then is not able to schedule the not yet activated napi, leaving irqs disabled forever (since irqs are reenabled in napi poll function). Fix this race by activating napi before irqs are activated. Signed-off-by: Lino Sanfilippo Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/pxa168_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/marvell') diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index c3b209cd0660..a3e394c47819 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1153,8 +1153,8 @@ static int pxa168_eth_open(struct net_device *dev) pep->rx_used_desc_q = 0; pep->rx_curr_desc_q = 0; netif_carrier_off(dev); - eth_port_start(dev); napi_enable(&pep->napi); + eth_port_start(dev); return 0; out_free_rx_skb: rxq_deinit(dev); -- cgit v1.2.3-59-g8ed1b From 6276288a4c1f755e6b65b0f2e1177855b0340e31 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Sun, 30 Nov 2014 12:51:31 +0100 Subject: skge: Unmask interrupts in case of spurious interrupts In case of a spurious interrupt dont forget to reenable the interrupts that have been masked by reading the interrupt source register. Signed-off-by: Lino Sanfilippo Acked-by: Mirko Lindner Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/skge.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet/marvell') diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 264eab7d3b26..7173836fe361 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -3433,10 +3433,9 @@ static irqreturn_t skge_intr(int irq, void *dev_id) if (status & IS_HW_ERR) skge_error_irq(hw); - +out: skge_write32(hw, B0_IMSK, hw->intr_mask); skge_read32(hw, B0_IMSK); -out: spin_unlock(&hw->hw_lock); return IRQ_RETVAL(handled); -- cgit v1.2.3-59-g8ed1b From ea589e9b7838f5d1c3d4998f9fe08854872187fc Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Sun, 30 Nov 2014 12:56:51 +0100 Subject: sky2: avoid pci write posting after disabling irqs In sky2_change_mtu setting B0_IMSK to 0 may be delayed due to PCI write posting which could result in irqs being still active when synchronize_irq is called. Since we are not prepared to handle any further irqs after synchronize_irq (our resources are freed after that) force the write by a consecutive read from the same register. Similar situation in sky2_all_down: Here we disabled irqs by a write to B0_IMSK but did not ensure that this write took place before synchronize_irq. Fix that too. Signed-off-by: Lino Sanfilippo Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/marvell') diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index bd3366267039..f14544c8d73f 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -2419,6 +2419,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu) imask = sky2_read32(hw, B0_IMSK); sky2_write32(hw, B0_IMSK, 0); + sky2_read32(hw, B0_IMSK); dev->trans_start = jiffies; /* prevent tx timeout */ napi_disable(&hw->napi); @@ -3487,8 +3488,8 @@ static void sky2_all_down(struct sky2_hw *hw) int i; if (hw->flags & SKY2_HW_IRQ_SETUP) { - sky2_read32(hw, B0_IMSK); sky2_write32(hw, B0_IMSK, 0); + sky2_read32(hw, B0_IMSK); synchronize_irq(hw->pdev->irq); napi_disable(&hw->napi); -- cgit v1.2.3-59-g8ed1b From aebea2ba0f7495e1a1c9ea5e753d146cb2f6b845 Mon Sep 17 00:00:00 2001 From: willy tarreau Date: Tue, 2 Dec 2014 08:13:04 +0100 Subject: net: mvneta: fix Tx interrupt delay The mvneta driver sets the amount of Tx coalesce packets to 16 by default. Normally that does not cause any trouble since the driver uses a much larger Tx ring size (532 packets). But some sockets might run with very small buffers, much smaller than the equivalent of 16 packets. This is what ping is doing for example, by setting SNDBUF to 324 bytes rounded up to 2kB by the kernel. The problem is that there is no documented method to force a specific packet to emit an interrupt (eg: the last of the ring) nor is it possible to make the NIC emit an interrupt after a given delay. In this case, it causes trouble, because when ping sends packets over its raw socket, the few first packets leave the system, and the first 15 packets will be emitted without an IRQ being generated, so without the skbs being freed. And since the socket's buffer is small, there's no way to reach that amount of packets, and the ping ends up with "send: no buffer available" after sending 6 packets. Running with 3 instances of ping in parallel is enough to hide the problem, because with 6 packets per instance, that's 18 packets total, which is enough to grant a Tx interrupt before all are sent. The original driver in the LSP kernel worked around this design flaw by using a software timer to clean up the Tx descriptors. This timer was slow and caused terrible network performance on some Tx-bound workloads (such as routing) but was enough to make tools like ping work correctly. Instead here, we simply set the packet counts before interrupt to 1. This ensures that each packet sent will produce an interrupt. NAPI takes care of coalescing interrupts since the interrupt is disabled once generated. No measurable performance impact nor CPU usage were observed on small nor large packets, including when saturating the link on Tx, and this fixes tools like ping which rely on too small a send buffer. If one wants to increase this value for certain workloads where it is safe to do so, "ethtool -C $dev tx-frames" will override this default setting. This fix needs to be applied to stable kernels starting with 3.10. Tested-By: Maggie Mae Roxas Signed-off-by: Willy Tarreau Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/marvell') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ade067de1689..bb4afe6ccc85 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -216,7 +216,7 @@ /* Various constants */ /* Coalescing */ -#define MVNETA_TXDONE_COAL_PKTS 16 +#define MVNETA_TXDONE_COAL_PKTS 1 #define MVNETA_RX_COAL_PKTS 32 #define MVNETA_RX_COAL_USEC 100 -- cgit v1.2.3-59-g8ed1b From 5f478b41033606d325e420df693162e2524c2b94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Dec 2014 04:30:59 -0800 Subject: net: mvneta: fix race condition in mvneta_tx() mvneta_tx() dereferences skb to get skb->len too late, as hardware might have completed the transmit and TX completion could have freed the skb from another cpu. Fixes: 71f6d1b31fb1 ("net: mvneta: replace Tx timer with a real interrupt") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/marvell') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index bb4afe6ccc85..67a84cfaefa1 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1721,6 +1721,7 @@ static int mvneta_tx(struct sk_buff *skb, struct net_device *dev) u16 txq_id = skb_get_queue_mapping(skb); struct mvneta_tx_queue *txq = &pp->txqs[txq_id]; struct mvneta_tx_desc *tx_desc; + int len = skb->len; int frags = 0; u32 tx_cmd; @@ -1788,7 +1789,7 @@ out: u64_stats_update_begin(&stats->syncp); stats->tx_packets++; - stats->tx_bytes += skb->len; + stats->tx_bytes += len; u64_stats_update_end(&stats->syncp); } else { dev->stats.tx_dropped++; -- cgit v1.2.3-59-g8ed1b