aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx4/en_rx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_rx.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c69
1 files changed, 41 insertions, 28 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index f2e8beddcf44..6562f78b07f4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -688,18 +688,23 @@ out_loopback:
dev_kfree_skb_any(skb);
}
-static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
- struct mlx4_en_rx_ring *ring)
+static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
+ struct mlx4_en_rx_ring *ring)
{
- int index = ring->prod & ring->size_mask;
+ u32 missing = ring->actual_size - (ring->prod - ring->cons);
- while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
- if (mlx4_en_prepare_rx_desc(priv, ring, index,
+ /* Try to batch allocations, but not too much. */
+ if (missing < 8)
+ return false;
+ do {
+ if (mlx4_en_prepare_rx_desc(priv, ring,
+ ring->prod & ring->size_mask,
GFP_ATOMIC | __GFP_COLD))
break;
ring->prod++;
- index = ring->prod & ring->size_mask;
- }
+ } while (--missing);
+
+ return true;
}
/* When hardware doesn't strip the vlan, we need to calculate the checksum
@@ -788,7 +793,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
struct bpf_prog *xdp_prog;
int doorbell_pending;
struct sk_buff *skb;
- int tx_index;
int index;
int nr;
unsigned int length;
@@ -808,7 +812,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
rcu_read_lock();
xdp_prog = rcu_dereference(ring->xdp_prog);
doorbell_pending = 0;
- tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
* descriptor offset can be deduced from the CQE index instead of
@@ -877,8 +880,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
*/
length = be32_to_cpu(cqe->byte_cnt);
length -= ring->fcs_del;
- ring->bytes += length;
- ring->packets++;
l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
@@ -904,22 +905,26 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
case XDP_PASS:
break;
case XDP_TX:
- if (likely(!mlx4_en_xmit_frame(frags, dev,
- length, tx_index,
+ if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
+ length, cq->ring,
&doorbell_pending)))
goto consumed;
- goto xdp_drop; /* Drop on xmit failure */
+ goto xdp_drop_no_cnt; /* Drop on xmit failure */
default:
bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED:
case XDP_DROP:
-xdp_drop:
+ ring->xdp_drop++;
+xdp_drop_no_cnt:
if (likely(mlx4_en_rx_recycle(ring, frags)))
goto consumed;
goto next;
}
}
+ ring->bytes += length;
+ ring->packets++;
+
if (likely(dev->features & NETIF_F_RXCSUM)) {
if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
MLX4_CQE_STATUS_UDP)) {
@@ -1081,15 +1086,20 @@ consumed:
out:
rcu_read_unlock();
- if (doorbell_pending)
- mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
+ if (polled) {
+ if (doorbell_pending)
+ mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
+
+ mlx4_cq_set_ci(&cq->mcq);
+ wmb(); /* ensure HW sees CQ consumer before we post new buffers */
+ ring->cons = cq->mcq.cons_index;
+ }
AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
- mlx4_cq_set_ci(&cq->mcq);
- wmb(); /* ensure HW sees CQ consumer before we post new buffers */
- ring->cons = cq->mcq.cons_index;
- mlx4_en_refill_rx_buffers(priv, ring);
- mlx4_en_update_rx_prod_db(ring);
+
+ if (mlx4_en_refill_rx_buffers(priv, ring))
+ mlx4_en_update_rx_prod_db(ring);
+
return polled;
}
@@ -1131,14 +1141,17 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
return budget;
/* Current cpu is not according to smp_irq_affinity -
- * probably affinity changed. need to stop this NAPI
- * poll, and restart it on the right CPU
+ * probably affinity changed. Need to stop this NAPI
+ * poll, and restart it on the right CPU.
+ * Try to avoid returning a too small value (like 0),
+ * to not fool net_rx_action() and its netdev_budget
*/
- done = 0;
+ if (done)
+ done--;
}
/* Done for now */
- napi_complete_done(napi, done);
- mlx4_en_arm_cq(priv, cq);
+ if (napi_complete_done(napi, done))
+ mlx4_en_arm_cq(priv, cq);
return done;
}
@@ -1162,7 +1175,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
/* bpf requires buffers to be set up as 1 packet per page.
* This only works when num_frags == 1.
*/
- if (priv->xdp_ring_num) {
+ if (priv->tx_ring_num[TX_XDP]) {
dma_dir = PCI_DMA_BIDIRECTIONAL;
/* This will gain efficient xdp frame recycling at the expense
* of more costly truesize accounting