aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/i40e/i40e_txrx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c212
1 files changed, 142 insertions, 70 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 3ce4900c0c43..635b3ac17877 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -465,10 +465,11 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
+ pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
- rx_desc->wb.qword0.hi_dword.fd_id);
+ pf->fd_inv);
/* Check if the programming error is for ATR.
* If so, auto disable ATR and set a state for
@@ -814,6 +815,8 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
* i40e_set_new_dynamic_itr - Find new ITR level
* @rc: structure containing ring performance data
*
+ * Returns true if ITR changed, false if not
+ *
* Stores a new ITR value based on packets and byte counts during
* the last interrupt. The advantage of per interrupt computation
* is faster updates and more accurate ITR for the current traffic
@@ -822,21 +825,32 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
* testing data as well as attempting to minimize response time
* while increasing bulk throughput.
**/
-static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
{
enum i40e_latency_range new_latency_range = rc->latency_range;
+ struct i40e_q_vector *qv = rc->ring->q_vector;
u32 new_itr = rc->itr;
int bytes_per_int;
+ int usecs;
if (rc->total_packets == 0 || !rc->itr)
- return;
+ return false;
/* simple throttlerate management
- * 0-10MB/s lowest (100000 ints/s)
+ * 0-10MB/s lowest (50000 ints/s)
* 10-20MB/s low (20000 ints/s)
- * 20-1249MB/s bulk (8000 ints/s)
+ * 20-1249MB/s bulk (18000 ints/s)
+ * > 40000 Rx packets per second (8000 ints/s)
+ *
+ * The math works out because the divisor is in 10^(-6) which
+ * turns the bytes/us input value into MB/s values, but
+ * make sure to use usecs, as the register values written
+ * are in 2 usec increments in the ITR registers, and make sure
+ * to use the smoothed values that the countdown timer gives us.
*/
- bytes_per_int = rc->total_bytes / rc->itr;
+ usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
+ bytes_per_int = rc->total_bytes / usecs;
+
switch (new_latency_range) {
case I40E_LOWEST_LATENCY:
if (bytes_per_int > 10)
@@ -849,35 +863,52 @@ static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
new_latency_range = I40E_LOWEST_LATENCY;
break;
case I40E_BULK_LATENCY:
- if (bytes_per_int <= 20)
- new_latency_range = I40E_LOW_LATENCY;
- break;
+ case I40E_ULTRA_LATENCY:
default:
if (bytes_per_int <= 20)
new_latency_range = I40E_LOW_LATENCY;
break;
}
+
+ /* this is to adjust RX more aggressively when streaming small
+ * packets. The value of 40000 was picked as it is just beyond
+ * what the hardware can receive per second if in low latency
+ * mode.
+ */
+#define RX_ULTRA_PACKET_RATE 40000
+
+ if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
+ (&qv->rx == rc))
+ new_latency_range = I40E_ULTRA_LATENCY;
+
rc->latency_range = new_latency_range;
switch (new_latency_range) {
case I40E_LOWEST_LATENCY:
- new_itr = I40E_ITR_100K;
+ new_itr = I40E_ITR_50K;
break;
case I40E_LOW_LATENCY:
new_itr = I40E_ITR_20K;
break;
case I40E_BULK_LATENCY:
+ new_itr = I40E_ITR_18K;
+ break;
+ case I40E_ULTRA_LATENCY:
new_itr = I40E_ITR_8K;
break;
default:
break;
}
- if (new_itr != rc->itr)
- rc->itr = new_itr;
-
rc->total_bytes = 0;
rc->total_packets = 0;
+
+ if (new_itr != rc->itr) {
+ rc->itr = new_itr;
+ return true;
+ }
+
+ return false;
}
/**
@@ -923,6 +954,8 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
if (!dev)
return -ENOMEM;
+ /* warn if we are about to overwrite the pointer */
+ WARN_ON(tx_ring->tx_bi);
bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
if (!tx_ring->tx_bi)
@@ -1083,6 +1116,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
struct device *dev = rx_ring->dev;
int bi_size;
+ /* warn if we are about to overwrite the pointer */
+ WARN_ON(rx_ring->rx_bi);
bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
if (!rx_ring->rx_bi)
@@ -1263,16 +1298,11 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring,
struct sk_buff *skb, u16 vlan_tag)
{
struct i40e_q_vector *q_vector = rx_ring->q_vector;
- struct i40e_vsi *vsi = rx_ring->vsi;
- u64 flags = vsi->back->flags;
if (vlan_tag & VLAN_VID_MASK)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
- if (flags & I40E_FLAG_IN_NETPOLL)
- netif_rx(skb);
- else
- napi_gro_receive(&q_vector->napi, skb);
+ napi_gro_receive(&q_vector->napi, skb);
}
/**
@@ -1439,7 +1469,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
- const int current_node = numa_node_id();
+ const int current_node = numa_mem_id();
struct i40e_vsi *vsi = rx_ring->vsi;
u16 i = rx_ring->next_to_clean;
union i40e_rx_desc *rx_desc;
@@ -1517,6 +1547,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
cleaned_count++;
if (rx_hbo || rx_sph) {
int len;
+
if (rx_hbo)
len = I40E_RX_HDR_SIZE;
else
@@ -1702,9 +1733,6 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
/* ERR_MASK will only have valid bits if EOP set */
if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
dev_kfree_skb_any(skb);
- /* TODO: shouldn't we increment a counter indicating the
- * drop?
- */
continue;
}
@@ -1749,6 +1777,21 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
return total_rx_packets;
}
+static u32 i40e_buildreg_itr(const int type, const u16 itr)
+{
+ u32 val;
+
+ val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+ I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+ (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
+ (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
+
+ return val;
+}
+
+/* a small macro to shorten up some long lines */
+#define INTREG I40E_PFINT_DYN_CTLN
+
/**
* i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
* @vsi: the VSI we care about
@@ -1759,56 +1802,69 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_q_vector *q_vector)
{
struct i40e_hw *hw = &vsi->back->hw;
- u16 old_itr;
+ bool rx = false, tx = false;
+ u32 rxval, txval;
int vector;
- u32 val;
vector = (q_vector->v_idx + vsi->base_vector);
+
+ /* avoid dynamic calculation if in countdown mode OR if
+ * all dynamic is disabled
+ */
+ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+
+ if (q_vector->itr_countdown > 0 ||
+ (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) &&
+ !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) {
+ goto enable_int;
+ }
+
if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
- old_itr = q_vector->rx.itr;
- i40e_set_new_dynamic_itr(&q_vector->rx);
- if (old_itr != q_vector->rx.itr) {
- val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
- (I40E_RX_ITR <<
- I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
- (q_vector->rx.itr <<
- I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
- } else {
- val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
- (I40E_ITR_NONE <<
- I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
- }
- if (!test_bit(__I40E_DOWN, &vsi->state))
- wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val);
- } else {
- i40e_irq_dynamic_enable(vsi,
- q_vector->v_idx + vsi->base_vector);
+ rx = i40e_set_new_dynamic_itr(&q_vector->rx);
+ rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
}
+
if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
- old_itr = q_vector->tx.itr;
- i40e_set_new_dynamic_itr(&q_vector->tx);
- if (old_itr != q_vector->tx.itr) {
- val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
- (I40E_TX_ITR <<
- I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
- (q_vector->tx.itr <<
- I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
- } else {
- val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
- (I40E_ITR_NONE <<
- I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
- }
- if (!test_bit(__I40E_DOWN, &vsi->state))
- wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->v_idx +
- vsi->base_vector - 1), val);
- } else {
- i40e_irq_dynamic_enable(vsi,
- q_vector->v_idx + vsi->base_vector);
+ tx = i40e_set_new_dynamic_itr(&q_vector->tx);
+ txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
+ }
+
+ if (rx || tx) {
+ /* get the higher of the two ITR adjustments and
+ * use the same value for both ITR registers
+ * when in adaptive mode (Rx and/or Tx)
+ */
+ u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
+
+ q_vector->tx.itr = q_vector->rx.itr = itr;
+ txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
+ tx = true;
+ rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
+ rx = true;
}
+
+ /* only need to enable the interrupt once, but need
+ * to possibly update both ITR values
+ */
+ if (rx) {
+ /* set the INTENA_MSK_MASK so that this first write
+ * won't actually enable the interrupt, instead just
+ * updating the ITR (it's bit 31 PF and VF)
+ */
+ rxval |= BIT(31);
+ /* don't check _DOWN because interrupt isn't being enabled */
+ wr32(hw, INTREG(vector - 1), rxval);
+ }
+
+enable_int:
+ if (!test_bit(__I40E_DOWN, &vsi->state))
+ wr32(hw, INTREG(vector - 1), txval);
+
+ if (q_vector->itr_countdown)
+ q_vector->itr_countdown--;
+ else
+ q_vector->itr_countdown = ITR_COUNTDOWN_START;
+
}
/**
@@ -1829,7 +1885,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
bool clean_complete = true;
bool arm_wb = false;
int budget_per_ring;
- int cleaned;
+ int work_done = 0;
if (test_bit(__I40E_DOWN, &vsi->state)) {
napi_complete(napi);
@@ -1842,24 +1898,34 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
i40e_for_each_ring(ring, q_vector->tx) {
clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
arm_wb |= ring->arm_wb;
+ ring->arm_wb = false;
}
+ /* Handle case where we are called by netpoll with a budget of 0 */
+ if (budget <= 0)
+ goto tx_only;
+
/* We attempt to distribute budget to each Rx queue fairly, but don't
* allow the budget to go below 1 because that would exit polling early.
*/
budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
i40e_for_each_ring(ring, q_vector->rx) {
+ int cleaned;
+
if (ring_is_ps_enabled(ring))
cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
else
cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
+
+ work_done += cleaned;
/* if we didn't clean as many as budgeted, we must be done */
clean_complete &= (budget_per_ring != cleaned);
}
/* If work not completed, return budget and polling will return */
if (!clean_complete) {
+tx_only:
if (arm_wb)
i40e_force_wb(vsi, q_vector);
return budget;
@@ -1869,7 +1935,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
q_vector->arm_wb_state = false;
/* Work is done so exit the polling mode and re-enable the interrupt */
- napi_complete(napi);
+ napi_complete_done(napi, work_done);
if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
i40e_update_enable_itr(vsi, q_vector);
} else { /* Legacy mode */
@@ -2077,6 +2143,7 @@ static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
/* else if it is a SW VLAN, check the next protocol and store the tag */
} else if (protocol == htons(ETH_P_8021Q)) {
struct vlan_hdr *vhdr, _vhdr;
+
vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
if (!vhdr)
return -EINVAL;
@@ -2120,6 +2187,7 @@ out:
* @tx_ring: ptr to the ring to send
* @skb: ptr to the skb we're sending
* @hdr_len: ptr to the size of the packet header
+ * @cd_type_cmd_tso_mss: ptr to u64 object
* @cd_tunneling: ptr to context descriptor bits
*
* Returns 0 if no TSO can happen, 1 if tso is going, or error
@@ -2179,6 +2247,7 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
* @tx_ring: ptr to the ring to send
* @skb: ptr to the skb we're sending
* @tx_flags: the collected send information
+ * @cd_type_cmd_tso_mss: ptr to u64 object
*
* Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
**/
@@ -2221,6 +2290,7 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
* @tx_flags: pointer to Tx flags currently set
* @td_cmd: Tx descriptor command bits to set
* @td_offset: Tx descriptor header offsets to set
+ * @tx_ring: Tx descriptor ring
* @cd_tunneling: ptr to context desc bits
**/
static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
@@ -2736,6 +2806,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
u8 hdr_len = 0;
int tsyn;
int tso;
+
if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
return NETDEV_TX_BUSY;
@@ -2768,10 +2839,11 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
if (tsyn)
tx_flags |= I40E_TX_FLAGS_TSYN;
- if (i40e_chk_linearize(skb, tx_flags))
+ if (i40e_chk_linearize(skb, tx_flags)) {
if (skb_linearize(skb))
goto out_drop;
-
+ tx_ring->tx_stats.tx_linearize++;
+ }
skb_tx_timestamp(skb);
/* always enable CRC insertion offload */