aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/i40e/i40e_txrx.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-02-17 23:47:32 -0500
committerDavid S. Miller <davem@davemloft.net>2016-02-17 23:47:32 -0500
commitd8ef034730350f98c622eb7560dea28b258e9daf (patch)
tree2c4671a47144046cd2ed18dee21fdf2e29136482 /drivers/net/ethernet/intel/i40e/i40e_txrx.c
parentMerge branch 'rocker-worlds' (diff)
parenti40e/i40evf: Bump version (diff)
downloadlinux-dev-d8ef034730350f98c622eb7560dea28b258e9daf.tar.xz
linux-dev-d8ef034730350f98c622eb7560dea28b258e9daf.zip
Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
Jeff Kirsher says: ==================== 40GbE Intel Wired LAN Driver Updates 2016-02-17 This series contains updates to i40e/i40evf only (again). Jesse moves sync_vsi_filters() up in the service_task because it may need to request a reset, and we do not want to wait another round of service task time. Refactored the enable_icr0() in order to allow it to be decided by the caller whether the CLEARPBA (clear pending events) bit will be set while re-enabling the interrupt. Also provides the "Don't Give Up" patch, where the driver will keep polling trying to allocate receive buffers until it succeeds. This should keep all receive queues running even in the face of memory pressure. Cleans up the debugging helpers by putting everything in hex to be consistent. Neerav updates the DCB firmware version related checkes specific to X710 and XL710 only since the checks are not required for X722 devices. Shannon adds the use of the new shared MAC filter bit for multicast and broadcast filters in order to make better use of the filters available from the device. Added a parameter to allow the driver to set the enable/disable of statistics gathering in the hardware switch. Also the L2 cloud filtering parameter is removed since it was never used. Anjali refactors the force_wb and WB_ON_ITR functionality since Force-WriteBack functionality in X710/XL710 devices has been moved out of the clean routine and into the service task, so we need to make sure WriteBack-On-ITR is separated out since it is still called from clean. Catherine changes the VF driver string to reflect all the products that are supported. Mitch refactors the packet split receive code to properly use half-pages for receives. Also changes the use of bitwise operators to logical operators on clean_complete variable, while making a witty reference to Mr. Spock. Cleans up (i.e. removes) the hsplit field in the ring structure and use the existing macro to detect packet split enablement, which allows debugfs dumps of the VSI to properly show which recevie routine is in use. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c249
1 files changed, 166 insertions, 83 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 6d1dd60c5c91..0ffa9a89986c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
+ * Copyright(c) 2013 - 2016 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -774,37 +774,48 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
}
/**
- * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
+ * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
* @vsi: the VSI we care about
- * @q_vector: the vector on which to force writeback
+ * @q_vector: the vector on which to enable writeback
*
**/
-void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
+ struct i40e_q_vector *q_vector)
{
u16 flags = q_vector->tx.ring[0].flags;
+ u32 val;
- if (flags & I40E_TXR_FLAGS_WB_ON_ITR) {
- u32 val;
+ if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
+ return;
- if (q_vector->arm_wb_state)
- return;
+ if (q_vector->arm_wb_state)
+ return;
- if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
- val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
- I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
+ if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+ val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
+ I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
- wr32(&vsi->back->hw,
- I40E_PFINT_DYN_CTLN(q_vector->v_idx +
- vsi->base_vector - 1),
- val);
- } else {
- val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
- I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
+ wr32(&vsi->back->hw,
+ I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
+ val);
+ } else {
+ val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
+ I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
- wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
- }
- q_vector->arm_wb_state = true;
- } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+ wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
+ }
+ q_vector->arm_wb_state = true;
+}
+
+/**
+ * i40e_force_wb - Issue SW Interrupt so HW does a wb
+ * @vsi: the VSI we care about
+ * @q_vector: the vector on which to force writeback
+ *
+ **/
+void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+{
+ if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
@@ -1049,7 +1060,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
if (rx_bi->page_dma) {
dma_unmap_page(dev,
rx_bi->page_dma,
- PAGE_SIZE / 2,
+ PAGE_SIZE,
DMA_FROM_DEVICE);
rx_bi->page_dma = 0;
}
@@ -1184,16 +1195,19 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
* i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
* @rx_ring: ring to place buffers on
* @cleaned_count: number of buffers to replace
+ *
+ * Returns true if any errors on allocation
**/
-void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
+bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
{
u16 i = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc;
struct i40e_rx_buffer *bi;
+ const int current_node = numa_node_id();
/* do nothing if no valid netdev defined */
if (!rx_ring->netdev || !cleaned_count)
- return;
+ return false;
while (cleaned_count--) {
rx_desc = I40E_RX_DESC(rx_ring, i);
@@ -1201,56 +1215,79 @@ void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
if (bi->skb) /* desc is in use */
goto no_buffers;
+
+ /* If we've been moved to a different NUMA node, release the
+ * page so we can get a new one on the current node.
+ */
+ if (bi->page && page_to_nid(bi->page) != current_node) {
+ dma_unmap_page(rx_ring->dev,
+ bi->page_dma,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ __free_page(bi->page);
+ bi->page = NULL;
+ bi->page_dma = 0;
+ rx_ring->rx_stats.realloc_count++;
+ } else if (bi->page) {
+ rx_ring->rx_stats.page_reuse_count++;
+ }
+
if (!bi->page) {
bi->page = alloc_page(GFP_ATOMIC);
if (!bi->page) {
rx_ring->rx_stats.alloc_page_failed++;
goto no_buffers;
}
- }
-
- if (!bi->page_dma) {
- /* use a half page if we're re-using */
- bi->page_offset ^= PAGE_SIZE / 2;
bi->page_dma = dma_map_page(rx_ring->dev,
bi->page,
- bi->page_offset,
- PAGE_SIZE / 2,
+ 0,
+ PAGE_SIZE,
DMA_FROM_DEVICE);
- if (dma_mapping_error(rx_ring->dev,
- bi->page_dma)) {
+ if (dma_mapping_error(rx_ring->dev, bi->page_dma)) {
rx_ring->rx_stats.alloc_page_failed++;
+ __free_page(bi->page);
+ bi->page = NULL;
bi->page_dma = 0;
+ bi->page_offset = 0;
goto no_buffers;
}
+ bi->page_offset = 0;
}
- dma_sync_single_range_for_device(rx_ring->dev,
- rx_ring->rx_bi[0].dma,
- i * rx_ring->rx_hdr_len,
- rx_ring->rx_hdr_len,
- DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change
* because each write-back erases this info.
*/
- rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
+ rx_desc->read.pkt_addr =
+ cpu_to_le64(bi->page_dma + bi->page_offset);
rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
i++;
if (i == rx_ring->count)
i = 0;
}
+ if (rx_ring->next_to_use != i)
+ i40e_release_rx_desc(rx_ring, i);
+
+ return false;
+
no_buffers:
if (rx_ring->next_to_use != i)
i40e_release_rx_desc(rx_ring, i);
+
+ /* make sure to come back via polling to try again after
+ * allocation failure
+ */
+ return true;
}
/**
* i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
* @rx_ring: ring to place buffers on
* @cleaned_count: number of buffers to replace
+ *
+ * Returns true if any errors on allocation
**/
-void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
+bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
{
u16 i = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc;
@@ -1259,7 +1296,7 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
/* do nothing if no valid netdev defined */
if (!rx_ring->netdev || !cleaned_count)
- return;
+ return false;
while (cleaned_count--) {
rx_desc = I40E_RX_DESC(rx_ring, i);
@@ -1267,8 +1304,10 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
skb = bi->skb;
if (!skb) {
- skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_buf_len);
+ skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
+ rx_ring->rx_buf_len,
+ GFP_ATOMIC |
+ __GFP_NOWARN);
if (!skb) {
rx_ring->rx_stats.alloc_buff_failed++;
goto no_buffers;
@@ -1286,6 +1325,8 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
if (dma_mapping_error(rx_ring->dev, bi->dma)) {
rx_ring->rx_stats.alloc_buff_failed++;
bi->dma = 0;
+ dev_kfree_skb(bi->skb);
+ bi->skb = NULL;
goto no_buffers;
}
}
@@ -1297,9 +1338,19 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
i = 0;
}
+ if (rx_ring->next_to_use != i)
+ i40e_release_rx_desc(rx_ring, i);
+
+ return false;
+
no_buffers:
if (rx_ring->next_to_use != i)
i40e_release_rx_desc(rx_ring, i);
+
+ /* make sure to come back via polling to try again after
+ * allocation failure
+ */
+ return true;
}
/**
@@ -1483,18 +1534,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
*
* Returns true if there's any budget left (e.g. the clean is finished)
**/
-static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
+static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
- const int current_node = numa_mem_id();
struct i40e_vsi *vsi = rx_ring->vsi;
u16 i = rx_ring->next_to_clean;
union i40e_rx_desc *rx_desc;
u32 rx_error, rx_status;
+ bool failure = false;
u8 rx_ptype;
u64 qword;
+ u32 copysize;
if (budget <= 0)
return 0;
@@ -1505,7 +1557,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
u16 vlan_tag;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
- i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count);
+ failure = failure ||
+ i40e_alloc_rx_buffers_ps(rx_ring,
+ cleaned_count);
cleaned_count = 0;
}
@@ -1523,6 +1577,12 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
* DD bit is set.
*/
dma_rmb();
+ /* sync header buffer for reading */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_ring->rx_bi[0].dma,
+ i * rx_ring->rx_hdr_len,
+ rx_ring->rx_hdr_len,
+ DMA_FROM_DEVICE);
if (i40e_rx_is_programming_status(qword)) {
i40e_clean_programming_status(rx_ring, rx_desc);
I40E_RX_INCREMENT(rx_ring, i);
@@ -1531,10 +1591,13 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
rx_bi = &rx_ring->rx_bi[i];
skb = rx_bi->skb;
if (likely(!skb)) {
- skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_hdr_len);
+ skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
+ rx_ring->rx_hdr_len,
+ GFP_ATOMIC |
+ __GFP_NOWARN);
if (!skb) {
rx_ring->rx_stats.alloc_buff_failed++;
+ failure = true;
break;
}
@@ -1561,9 +1624,16 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
I40E_RXD_QW1_PTYPE_SHIFT;
- prefetch(rx_bi->page);
+ /* sync half-page for reading */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_bi->page_dma,
+ rx_bi->page_offset,
+ PAGE_SIZE / 2,
+ DMA_FROM_DEVICE);
+ prefetch(page_address(rx_bi->page) + rx_bi->page_offset);
rx_bi->skb = NULL;
cleaned_count++;
+ copysize = 0;
if (rx_hbo || rx_sph) {
int len;
@@ -1574,38 +1644,45 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
} else if (skb->len == 0) {
int len;
+ unsigned char *va = page_address(rx_bi->page) +
+ rx_bi->page_offset;
- len = (rx_packet_len > skb_headlen(skb) ?
- skb_headlen(skb) : rx_packet_len);
- memcpy(__skb_put(skb, len),
- rx_bi->page + rx_bi->page_offset,
- len);
- rx_bi->page_offset += len;
+ len = min(rx_packet_len, rx_ring->rx_hdr_len);
+ memcpy(__skb_put(skb, len), va, len);
+ copysize = len;
rx_packet_len -= len;
}
-
/* Get the rest of the data if this was a header split */
if (rx_packet_len) {
- skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
- rx_bi->page,
- rx_bi->page_offset,
- rx_packet_len);
-
- skb->len += rx_packet_len;
- skb->data_len += rx_packet_len;
- skb->truesize += rx_packet_len;
-
- if ((page_count(rx_bi->page) == 1) &&
- (page_to_nid(rx_bi->page) == current_node))
- get_page(rx_bi->page);
- else
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ rx_bi->page,
+ rx_bi->page_offset + copysize,
+ rx_packet_len, I40E_RXBUFFER_2048);
+
+ get_page(rx_bi->page);
+ /* switch to the other half-page here; the allocation
+ * code programs the right addr into HW. If we haven't
+ * used this half-page, the address won't be changed,
+ * and HW can just use it next time through.
+ */
+ rx_bi->page_offset ^= PAGE_SIZE / 2;
+ /* If the page count is more than 2, then both halves
+ * of the page are used and we need to free it. Do it
+ * here instead of in the alloc code. Otherwise one
+ * of the half-pages might be released between now and
+ * then, and we wouldn't know which one to use.
+ */
+ if (page_count(rx_bi->page) > 2) {
+ dma_unmap_page(rx_ring->dev,
+ rx_bi->page_dma,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ __free_page(rx_bi->page);
rx_bi->page = NULL;
+ rx_bi->page_dma = 0;
+ rx_ring->rx_stats.realloc_count++;
+ }
- dma_unmap_page(rx_ring->dev,
- rx_bi->page_dma,
- PAGE_SIZE / 2,
- DMA_FROM_DEVICE);
- rx_bi->page_dma = 0;
}
I40E_RX_INCREMENT(rx_ring, i);
@@ -1664,7 +1741,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
rx_ring->q_vector->rx.total_packets += total_rx_packets;
rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
- return total_rx_packets;
+ return failure ? budget : total_rx_packets;
}
/**
@@ -1682,6 +1759,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
union i40e_rx_desc *rx_desc;
u32 rx_error, rx_status;
u16 rx_packet_len;
+ bool failure = false;
u8 rx_ptype;
u64 qword;
u16 i;
@@ -1692,7 +1770,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
u16 vlan_tag;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
- i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count);
+ failure = failure ||
+ i40e_alloc_rx_buffers_1buf(rx_ring,
+ cleaned_count);
cleaned_count = 0;
}
@@ -1791,7 +1871,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
rx_ring->q_vector->rx.total_packets += total_rx_packets;
rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
- return total_rx_packets;
+ return failure ? budget : total_rx_packets;
}
static u32 i40e_buildreg_itr(const int type, const u16 itr)
@@ -1799,7 +1879,9 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
u32 val;
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+ /* Don't clear PBA because that can cause lost interrupts that
+ * came in while we were cleaning/polling
+ */
(type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
(itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
@@ -1914,7 +1996,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
i40e_for_each_ring(ring, q_vector->tx) {
- clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
+ clean_complete = clean_complete &&
+ i40e_clean_tx_irq(ring, vsi->work_limit);
arm_wb = arm_wb || ring->arm_wb;
ring->arm_wb = false;
}
@@ -1938,7 +2021,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
work_done += cleaned;
/* if we didn't clean as many as budgeted, we must be done */
- clean_complete &= (budget_per_ring != cleaned);
+ clean_complete = clean_complete && (budget_per_ring > cleaned);
}
/* If work not completed, return budget and polling will return */
@@ -1946,7 +2029,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
tx_only:
if (arm_wb) {
q_vector->tx.ring[0].tx_stats.tx_force_wb++;
- i40e_force_wb(vsi, q_vector);
+ i40e_enable_wb_on_itr(vsi, q_vector);
}
return budget;
}
@@ -1972,7 +2055,7 @@ tx_only:
qval = rd32(hw, I40E_QINT_TQCTL(0)) |
I40E_QINT_TQCTL_CAUSE_ENA_MASK;
wr32(hw, I40E_QINT_TQCTL(0), qval);
- i40e_irq_dynamic_enable_icr0(vsi->back);
+ i40e_irq_dynamic_enable_icr0(vsi->back, false);
}
return 0;
}