diff options
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 212 |
1 files changed, 142 insertions, 70 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 3ce4900c0c43..635b3ac17877 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -465,10 +465,11 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) { + pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id); if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) || (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n", - rx_desc->wb.qword0.hi_dword.fd_id); + pf->fd_inv); /* Check if the programming error is for ATR. * If so, auto disable ATR and set a state for @@ -814,6 +815,8 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) * i40e_set_new_dynamic_itr - Find new ITR level * @rc: structure containing ring performance data * + * Returns true if ITR changed, false if not + * * Stores a new ITR value based on packets and byte counts during * the last interrupt. The advantage of per interrupt computation * is faster updates and more accurate ITR for the current traffic @@ -822,21 +825,32 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) * testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; + struct i40e_q_vector *qv = rc->ring->q_vector; u32 new_itr = rc->itr; int bytes_per_int; + int usecs; if (rc->total_packets == 0 || !rc->itr) - return; + return false; /* simple throttlerate management - * 0-10MB/s lowest (100000 ints/s) + * 0-10MB/s lowest (50000 ints/s) * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (8000 ints/s) + * 20-1249MB/s bulk (18000 ints/s) + * > 40000 Rx packets per second (8000 ints/s) + * + * The math works out because the divisor is in 10^(-6) which + * turns the bytes/us input value into MB/s values, but + * make sure to use usecs, as the register values written + * are in 2 usec increments in the ITR registers, and make sure + * to use the smoothed values that the countdown timer gives us. */ - bytes_per_int = rc->total_bytes / rc->itr; + usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; + bytes_per_int = rc->total_bytes / usecs; + switch (new_latency_range) { case I40E_LOWEST_LATENCY: if (bytes_per_int > 10) @@ -849,35 +863,52 @@ static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: - if (bytes_per_int <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + case I40E_ULTRA_LATENCY: default: if (bytes_per_int <= 20) new_latency_range = I40E_LOW_LATENCY; break; } + + /* this is to adjust RX more aggressively when streaming small + * packets. The value of 40000 was picked as it is just beyond + * what the hardware can receive per second if in low latency + * mode. + */ +#define RX_ULTRA_PACKET_RATE 40000 + + if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) && + (&qv->rx == rc)) + new_latency_range = I40E_ULTRA_LATENCY; + rc->latency_range = new_latency_range; switch (new_latency_range) { case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_100K; + new_itr = I40E_ITR_50K; break; case I40E_LOW_LATENCY: new_itr = I40E_ITR_20K; break; case I40E_BULK_LATENCY: + new_itr = I40E_ITR_18K; + break; + case I40E_ULTRA_LATENCY: new_itr = I40E_ITR_8K; break; default: break; } - if (new_itr != rc->itr) - rc->itr = new_itr; - rc->total_bytes = 0; rc->total_packets = 0; + + if (new_itr != rc->itr) { + rc->itr = new_itr; + return true; + } + + return false; } /** @@ -923,6 +954,8 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) if (!dev) return -ENOMEM; + /* warn if we are about to overwrite the pointer */ + WARN_ON(tx_ring->tx_bi); bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL); if (!tx_ring->tx_bi) @@ -1083,6 +1116,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) struct device *dev = rx_ring->dev; int bi_size; + /* warn if we are about to overwrite the pointer */ + WARN_ON(rx_ring->rx_bi); bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL); if (!rx_ring->rx_bi) @@ -1263,16 +1298,11 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) { struct i40e_q_vector *q_vector = rx_ring->q_vector; - struct i40e_vsi *vsi = rx_ring->vsi; - u64 flags = vsi->back->flags; if (vlan_tag & VLAN_VID_MASK) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); - if (flags & I40E_FLAG_IN_NETPOLL) - netif_rx(skb); - else - napi_gro_receive(&q_vector->napi, skb); + napi_gro_receive(&q_vector->napi, skb); } /** @@ -1439,7 +1469,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - const int current_node = numa_node_id(); + const int current_node = numa_mem_id(); struct i40e_vsi *vsi = rx_ring->vsi; u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; @@ -1517,6 +1547,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) cleaned_count++; if (rx_hbo || rx_sph) { int len; + if (rx_hbo) len = I40E_RX_HDR_SIZE; else @@ -1702,9 +1733,6 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) /* ERR_MASK will only have valid bits if EOP set */ if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) { dev_kfree_skb_any(skb); - /* TODO: shouldn't we increment a counter indicating the - * drop? - */ continue; } @@ -1749,6 +1777,21 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) return total_rx_packets; } +static u32 i40e_buildreg_itr(const int type, const u16 itr) +{ + u32 val; + + val = I40E_PFINT_DYN_CTLN_INTENA_MASK | + I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | + (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | + (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); + + return val; +} + +/* a small macro to shorten up some long lines */ +#define INTREG I40E_PFINT_DYN_CTLN + /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt * @vsi: the VSI we care about @@ -1759,56 +1802,69 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { struct i40e_hw *hw = &vsi->back->hw; - u16 old_itr; + bool rx = false, tx = false; + u32 rxval, txval; int vector; - u32 val; vector = (q_vector->v_idx + vsi->base_vector); + + /* avoid dynamic calculation if in countdown mode OR if + * all dynamic is disabled + */ + rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + + if (q_vector->itr_countdown > 0 || + (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && + !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + goto enable_int; + } + if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { - old_itr = q_vector->rx.itr; - i40e_set_new_dynamic_itr(&q_vector->rx); - if (old_itr != q_vector->rx.itr) { - val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | - (I40E_RX_ITR << - I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (q_vector->rx.itr << - I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); - } else { - val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | - (I40E_ITR_NONE << - I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); - } - if (!test_bit(__I40E_DOWN, &vsi->state)) - wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val); - } else { - i40e_irq_dynamic_enable(vsi, - q_vector->v_idx + vsi->base_vector); + rx = i40e_set_new_dynamic_itr(&q_vector->rx); + rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } + if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { - old_itr = q_vector->tx.itr; - i40e_set_new_dynamic_itr(&q_vector->tx); - if (old_itr != q_vector->tx.itr) { - val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | - (I40E_TX_ITR << - I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (q_vector->tx.itr << - I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); - } else { - val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | - (I40E_ITR_NONE << - I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); - } - if (!test_bit(__I40E_DOWN, &vsi->state)) - wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->v_idx + - vsi->base_vector - 1), val); - } else { - i40e_irq_dynamic_enable(vsi, - q_vector->v_idx + vsi->base_vector); + tx = i40e_set_new_dynamic_itr(&q_vector->tx); + txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); + } + + if (rx || tx) { + /* get the higher of the two ITR adjustments and + * use the same value for both ITR registers + * when in adaptive mode (Rx and/or Tx) + */ + u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); + + q_vector->tx.itr = q_vector->rx.itr = itr; + txval = i40e_buildreg_itr(I40E_TX_ITR, itr); + tx = true; + rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); + rx = true; } + + /* only need to enable the interrupt once, but need + * to possibly update both ITR values + */ + if (rx) { + /* set the INTENA_MSK_MASK so that this first write + * won't actually enable the interrupt, instead just + * updating the ITR (it's bit 31 PF and VF) + */ + rxval |= BIT(31); + /* don't check _DOWN because interrupt isn't being enabled */ + wr32(hw, INTREG(vector - 1), rxval); + } + +enable_int: + if (!test_bit(__I40E_DOWN, &vsi->state)) + wr32(hw, INTREG(vector - 1), txval); + + if (q_vector->itr_countdown) + q_vector->itr_countdown--; + else + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } /** @@ -1829,7 +1885,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) bool clean_complete = true; bool arm_wb = false; int budget_per_ring; - int cleaned; + int work_done = 0; if (test_bit(__I40E_DOWN, &vsi->state)) { napi_complete(napi); @@ -1842,24 +1898,34 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) i40e_for_each_ring(ring, q_vector->tx) { clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); arm_wb |= ring->arm_wb; + ring->arm_wb = false; } + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) + goto tx_only; + /* We attempt to distribute budget to each Rx queue fairly, but don't * allow the budget to go below 1 because that would exit polling early. */ budget_per_ring = max(budget/q_vector->num_ringpairs, 1); i40e_for_each_ring(ring, q_vector->rx) { + int cleaned; + if (ring_is_ps_enabled(ring)) cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring); else cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring); + + work_done += cleaned; /* if we didn't clean as many as budgeted, we must be done */ clean_complete &= (budget_per_ring != cleaned); } /* If work not completed, return budget and polling will return */ if (!clean_complete) { +tx_only: if (arm_wb) i40e_force_wb(vsi, q_vector); return budget; @@ -1869,7 +1935,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) q_vector->arm_wb_state = false; /* Work is done so exit the polling mode and re-enable the interrupt */ - napi_complete(napi); + napi_complete_done(napi, work_done); if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { i40e_update_enable_itr(vsi, q_vector); } else { /* Legacy mode */ @@ -2077,6 +2143,7 @@ static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, /* else if it is a SW VLAN, check the next protocol and store the tag */ } else if (protocol == htons(ETH_P_8021Q)) { struct vlan_hdr *vhdr, _vhdr; + vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); if (!vhdr) return -EINVAL; @@ -2120,6 +2187,7 @@ out: * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @hdr_len: ptr to the size of the packet header + * @cd_type_cmd_tso_mss: ptr to u64 object * @cd_tunneling: ptr to context descriptor bits * * Returns 0 if no TSO can happen, 1 if tso is going, or error @@ -2179,6 +2247,7 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @tx_flags: the collected send information + * @cd_type_cmd_tso_mss: ptr to u64 object * * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen **/ @@ -2221,6 +2290,7 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_flags: pointer to Tx flags currently set * @td_cmd: Tx descriptor command bits to set * @td_offset: Tx descriptor header offsets to set + * @tx_ring: Tx descriptor ring * @cd_tunneling: ptr to context desc bits **/ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, @@ -2736,6 +2806,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, u8 hdr_len = 0; int tsyn; int tso; + if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) return NETDEV_TX_BUSY; @@ -2768,10 +2839,11 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, if (tsyn) tx_flags |= I40E_TX_FLAGS_TSYN; - if (i40e_chk_linearize(skb, tx_flags)) + if (i40e_chk_linearize(skb, tx_flags)) { if (skb_linearize(skb)) goto out_drop; - + tx_ring->tx_stats.tx_linearize++; + } skb_tx_timestamp(skb); /* always enable CRC insertion offload */ |