aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/chelsio/cxgb4/sge.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/chelsio/cxgb4/sge.c')
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c187
1 files changed, 133 insertions, 54 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index b7b93e7a643d..b4eb4680a27c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -406,7 +406,7 @@ static void free_tx_desc(struct adapter *adap, struct sge_txq *q,
*/
static inline int reclaimable(const struct sge_txq *q)
{
- int hw_cidx = ntohs(q->stat->cidx);
+ int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx));
hw_cidx -= q->cidx;
return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx;
}
@@ -613,6 +613,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
PCI_DMA_FROMDEVICE);
if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
__free_pages(pg, s->fl_pg_order);
+ q->mapping_err++;
goto out; /* do not try small pages for this error */
}
mapping |= RX_LARGE_PG_BUF;
@@ -642,6 +643,7 @@ alloc_small_pages:
PCI_DMA_FROMDEVICE);
if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
put_page(pg);
+ q->mapping_err++;
goto out;
}
*d++ = cpu_to_be64(mapping);
@@ -663,6 +665,7 @@ out: cred = q->avail - cred;
if (unlikely(fl_starving(adap, q))) {
smp_wmb();
+ q->low++;
set_bit(q->cntxt_id - adap->sge.egr_start,
adap->sge.starving_fl);
}
@@ -1029,6 +1032,30 @@ static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
*p = 0;
}
+static void *inline_tx_skb_header(const struct sk_buff *skb,
+ const struct sge_txq *q, void *pos,
+ int length)
+{
+ u64 *p;
+ int left = (void *)q->stat - pos;
+
+ if (likely(length <= left)) {
+ memcpy(pos, skb->data, length);
+ pos += length;
+ } else {
+ memcpy(pos, skb->data, left);
+ memcpy(q->desc, skb->data + left, length - left);
+ pos = (void *)q->desc + (length - left);
+ }
+ /* 0-pad to multiple of 16 */
+ p = PTR_ALIGN(pos, 8);
+ if ((uintptr_t)p & 8) {
+ *p = 0;
+ return p + 1;
+ }
+ return p;
+}
+
/*
* Figure out what HW csum a packet wants and return the appropriate control
* bits.
@@ -1320,7 +1347,7 @@ out_free: dev_kfree_skb_any(skb);
*/
static inline void reclaim_completed_tx_imm(struct sge_txq *q)
{
- int hw_cidx = ntohs(q->stat->cidx);
+ int hw_cidx = ntohs(ACCESS_ONCE(q->stat->cidx));
int reclaim = hw_cidx - q->cidx;
if (reclaim < 0)
@@ -1542,24 +1569,50 @@ static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb)
}
/**
- * service_ofldq - restart a suspended offload queue
+ * service_ofldq - service/restart a suspended offload queue
* @q: the offload queue
*
- * Services an offload Tx queue by moving packets from its packet queue
- * to the HW Tx ring. The function starts and ends with the queue locked.
+ * Services an offload Tx queue by moving packets from its Pending Send
+ * Queue to the Hardware TX ring. The function starts and ends with the
+ * Send Queue locked, but drops the lock while putting the skb at the
+ * head of the Send Queue onto the Hardware TX Ring. Dropping the lock
+ * allows more skbs to be added to the Send Queue by other threads.
+ * The packet being processed at the head of the Pending Send Queue is
+ * left on the queue in case we experience DMA Mapping errors, etc.
+ * and need to give up and restart later.
+ *
+ * service_ofldq() can be thought of as a task which opportunistically
+ * uses other threads execution contexts. We use the Offload Queue
+ * boolean "service_ofldq_running" to make sure that only one instance
+ * is ever running at a time ...
*/
static void service_ofldq(struct sge_ofld_txq *q)
{
- u64 *pos;
+ u64 *pos, *before, *end;
int credits;
struct sk_buff *skb;
+ struct sge_txq *txq;
+ unsigned int left;
unsigned int written = 0;
unsigned int flits, ndesc;
+ /* If another thread is currently in service_ofldq() processing the
+ * Pending Send Queue then there's nothing to do. Otherwise, flag
+ * that we're doing the work and continue. Examining/modifying
+ * the Offload Queue boolean "service_ofldq_running" must be done
+ * while holding the Pending Send Queue Lock.
+ */
+ if (q->service_ofldq_running)
+ return;
+ q->service_ofldq_running = true;
+
while ((skb = skb_peek(&q->sendq)) != NULL && !q->full) {
- /*
- * We drop the lock but leave skb on sendq, thus retaining
- * exclusive access to the state of the queue.
+ /* We drop the lock while we're working with the skb at the
+ * head of the Pending Send Queue. This allows more skbs to
+ * be added to the Pending Send Queue while we're working on
+ * this one. We don't need to lock to guard the TX Ring
+ * updates because only one thread of execution is ever
+ * allowed into service_ofldq() at a time.
*/
spin_unlock(&q->sendq.lock);
@@ -1583,9 +1636,32 @@ static void service_ofldq(struct sge_ofld_txq *q)
} else {
int last_desc, hdr_len = skb_transport_offset(skb);
- memcpy(pos, skb->data, hdr_len);
- write_sgl(skb, &q->q, (void *)pos + hdr_len,
- pos + flits, hdr_len,
+ /* The WR headers may not fit within one descriptor.
+ * So we need to deal with wrap-around here.
+ */
+ before = (u64 *)pos;
+ end = (u64 *)pos + flits;
+ txq = &q->q;
+ pos = (void *)inline_tx_skb_header(skb, &q->q,
+ (void *)pos,
+ hdr_len);
+ if (before > (u64 *)pos) {
+ left = (u8 *)end - (u8 *)txq->stat;
+ end = (void *)txq->desc + left;
+ }
+
+ /* If current position is already at the end of the
+ * ofld queue, reset the current to point to
+ * start of the queue and update the end ptr as well.
+ */
+ if (pos == (u64 *)txq->stat) {
+ left = (u8 *)end - (u8 *)txq->stat;
+ end = (void *)txq->desc + left;
+ pos = (void *)txq->desc;
+ }
+
+ write_sgl(skb, &q->q, (void *)pos,
+ end, hdr_len,
(dma_addr_t *)skb->head);
#ifdef CONFIG_NEED_DMA_MAP_STATE
skb->dev = q->adap->port[0];
@@ -1604,6 +1680,11 @@ static void service_ofldq(struct sge_ofld_txq *q)
written = 0;
}
+ /* Reacquire the Pending Send Queue Lock so we can unlink the
+ * skb we've just successfully transferred to the TX Ring and
+ * loop for the next skb which may be at the head of the
+ * Pending Send Queue.
+ */
spin_lock(&q->sendq.lock);
__skb_unlink(skb, &q->sendq);
if (is_ofld_imm(skb))
@@ -1611,6 +1692,11 @@ static void service_ofldq(struct sge_ofld_txq *q)
}
if (likely(written))
ring_tx_db(q->adap, &q->q, written);
+
+ /*Indicate that no thread is processing the Pending Send Queue
+ * currently.
+ */
+ q->service_ofldq_running = false;
}
/**
@@ -1624,9 +1710,19 @@ static int ofld_xmit(struct sge_ofld_txq *q, struct sk_buff *skb)
{
skb->priority = calc_tx_flits_ofld(skb); /* save for restart */
spin_lock(&q->sendq.lock);
+
+ /* Queue the new skb onto the Offload Queue's Pending Send Queue. If
+ * that results in this new skb being the only one on the queue, start
+ * servicing it. If there are other skbs already on the list, then
+ * either the queue is currently being processed or it's been stopped
+ * for some reason and it'll be restarted at a later time. Restart
+ * paths are triggered by events like experiencing a DMA Mapping Error
+ * or filling the Hardware TX Ring.
+ */
__skb_queue_tail(&q->sendq, skb);
if (q->sendq.qlen == 1)
service_ofldq(q);
+
spin_unlock(&q->sendq.lock);
return NET_XMIT_SUCCESS;
}
@@ -1864,7 +1960,6 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
skb->truesize += skb->data_len;
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb_record_rx_queue(skb, rxq->rspq.idx);
- skb_mark_napi_id(skb, &rxq->rspq.napi);
pi = netdev_priv(skb->dev);
if (pi->rxtstamp)
cxgb4_sgetim_to_hwtstamp(adapter, skb_hwtstamps(skb),
@@ -2193,7 +2288,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
if (likely(work_done < budget)) {
int timer_index;
- napi_complete(napi);
+ napi_complete_done(napi, work_done);
timer_index = QINTR_TIMER_IDX_G(q->next_intr_params);
if (q->adaptive_rx) {
@@ -2460,7 +2555,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
iq->size = roundup(iq->size, 16);
iq->desc = alloc_ring(adap->pdev_dev, iq->size, iq->iqe_len, 0,
- &iq->phys_addr, NULL, 0, NUMA_NO_NODE);
+ &iq->phys_addr, NULL, 0,
+ dev_to_node(adap->pdev_dev));
if (!iq->desc)
return -ENOMEM;
@@ -2500,7 +2596,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
fl->size = roundup(fl->size, 8);
fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64),
sizeof(struct rx_sw_desc), &fl->addr,
- &fl->sdesc, s->stat_len, NUMA_NO_NODE);
+ &fl->sdesc, s->stat_len,
+ dev_to_node(adap->pdev_dev));
if (!fl->desc)
goto fl_nomem;
@@ -2528,7 +2625,6 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
goto err;
netif_napi_add(dev, &iq->napi, napi_rx_handler, 64);
- napi_hash_add(&iq->napi);
iq->cur_desc = iq->desc;
iq->cidx = 0;
iq->gen = 1;
@@ -2574,8 +2670,9 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
* simple (and hopefully less wrong).
*/
if (!is_t4(adap->params.chip) && cong >= 0) {
- u32 param, val;
+ u32 param, val, ch_map = 0;
int i;
+ u16 cng_ch_bits_log = adap->params.arch.cng_ch_bits_log;
param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
@@ -2587,9 +2684,9 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X);
for (i = 0; i < 4; i++) {
if (cong & (1 << i))
- val |=
- CONMCTXT_CNGCHMAP_V(1 << (i << 2));
+ ch_map |= 1 << (i << cng_ch_bits_log);
}
+ val |= CONMCTXT_CNGCHMAP_V(ch_map);
}
ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1,
&param, &val);
@@ -2884,7 +2981,7 @@ void t4_free_sge_resources(struct adapter *adap)
}
/* clean up RDMA and iSCSI Rx queues */
- t4_free_ofld_rxqs(adap, adap->sge.ofldqsets, adap->sge.ofldrxq);
+ t4_free_ofld_rxqs(adap, adap->sge.iscsiqsets, adap->sge.iscsirxq);
t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq);
t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq);
@@ -3077,8 +3174,7 @@ static int t4_sge_init_soft(struct adapter *adap)
int t4_sge_init(struct adapter *adap)
{
struct sge *s = &adap->sge;
- u32 sge_control, sge_control2, sge_conm_ctrl;
- unsigned int ingpadboundary, ingpackboundary;
+ u32 sge_control, sge_conm_ctrl;
int ret, egress_threshold;
/*
@@ -3089,35 +3185,7 @@ int t4_sge_init(struct adapter *adap)
s->pktshift = PKTSHIFT_G(sge_control);
s->stat_len = (sge_control & EGRSTATUSPAGESIZE_F) ? 128 : 64;
- /* T4 uses a single control field to specify both the PCIe Padding and
- * Packing Boundary. T5 introduced the ability to specify these
- * separately. The actual Ingress Packet Data alignment boundary
- * within Packed Buffer Mode is the maximum of these two
- * specifications. (Note that it makes no real practical sense to
- * have the Pading Boudary be larger than the Packing Boundary but you
- * could set the chip up that way and, in fact, legacy T4 code would
- * end doing this because it would initialize the Padding Boundary and
- * leave the Packing Boundary initialized to 0 (16 bytes).)
- */
- ingpadboundary = 1 << (INGPADBOUNDARY_G(sge_control) +
- INGPADBOUNDARY_SHIFT_X);
- if (is_t4(adap->params.chip)) {
- s->fl_align = ingpadboundary;
- } else {
- /* T5 has a different interpretation of one of the PCIe Packing
- * Boundary values.
- */
- sge_control2 = t4_read_reg(adap, SGE_CONTROL2_A);
- ingpackboundary = INGPACKBOUNDARY_G(sge_control2);
- if (ingpackboundary == INGPACKBOUNDARY_16B_X)
- ingpackboundary = 16;
- else
- ingpackboundary = 1 << (ingpackboundary +
- INGPACKBOUNDARY_SHIFT_X);
-
- s->fl_align = max(ingpadboundary, ingpackboundary);
- }
-
+ s->fl_align = t4_fl_pkt_align(adap);
ret = t4_sge_init_soft(adap);
if (ret < 0)
return ret;
@@ -3135,10 +3203,21 @@ int t4_sge_init(struct adapter *adap)
* buffers.
*/
sge_conm_ctrl = t4_read_reg(adap, SGE_CONM_CTRL_A);
- if (is_t4(adap->params.chip))
+ switch (CHELSIO_CHIP_VERSION(adap->params.chip)) {
+ case CHELSIO_T4:
egress_threshold = EGRTHRESHOLD_G(sge_conm_ctrl);
- else
+ break;
+ case CHELSIO_T5:
egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl);
+ break;
+ case CHELSIO_T6:
+ egress_threshold = T6_EGRTHRESHOLDPACKING_G(sge_conm_ctrl);
+ break;
+ default:
+ dev_err(adap->pdev_dev, "Unsupported Chip version %d\n",
+ CHELSIO_CHIP_VERSION(adap->params.chip));
+ return -EINVAL;
+ }
s->fl_starve_thres = 2*egress_threshold + 1;
t4_idma_monitor_init(adap, &s->idma_monitor);