aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
diff options
context:
space:
mode:
authorJose Abreu <Jose.Abreu@synopsys.com>2019-08-17 20:54:43 +0200
committerDavid S. Miller <davem@davemloft.net>2019-08-17 12:43:59 -0700
commit67afd6d1cfdf0d0461fe3c4c922447f3e9b1c6ee (patch)
treebb67952bed5794bde765a624e0c5956c75ee5a2a /drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
parentnet: stmmac: xgmac: Correctly return that RX descriptor is not last one (diff)
downloadlinux-dev-67afd6d1cfdf0d0461fe3c4c922447f3e9b1c6ee.tar.xz
linux-dev-67afd6d1cfdf0d0461fe3c4c922447f3e9b1c6ee.zip
net: stmmac: Add Split Header support and enable it in XGMAC cores
Add the support for Split Header feature in the RX path and enable it in XGMAC cores. This does not impact neither beneficts bandwidth but it does reduces CPU usage because without the feature all the entire packet is memcpy'ed, while that with the feature only the header is. With Split Header disabled 'perf stat -d' gives: 86870.624945 task-clock (msec) # 0.429 CPUs utilized 1073352 context-switches # 0.012 M/sec 1 cpu-migrations # 0.000 K/sec 213 page-faults # 0.002 K/sec 327113872376 cycles # 3.766 GHz (62.53%) 56618161216 instructions # 0.17 insn per cycle (75.06%) 10742205071 branches # 123.658 M/sec (75.36%) 584309242 branch-misses # 5.44% of all branches (75.19%) 17594787965 L1-dcache-loads # 202.540 M/sec (74.88%) 4003773131 L1-dcache-load-misses # 22.76% of all L1-dcache hits (74.89%) 1313301468 LLC-loads # 15.118 M/sec (49.75%) 355906510 LLC-load-misses # 27.10% of all LL-cache hits (49.92%) With Split Header enabled 'perf stat -d' gives: 49324.456539 task-clock (msec) # 0.245 CPUs utilized 2542387 context-switches # 0.052 M/sec 1 cpu-migrations # 0.000 K/sec 213 page-faults # 0.004 K/sec 177092791469 cycles # 3.590 GHz (62.30%) 68555756017 instructions # 0.39 insn per cycle (75.16%) 12697019382 branches # 257.418 M/sec (74.81%) 442081897 branch-misses # 3.48% of all branches (74.79%) 20337958358 L1-dcache-loads # 412.330 M/sec (75.46%) 3820210140 L1-dcache-load-misses # 18.78% of all L1-dcache hits (75.35%) 1257719198 LLC-loads # 25.499 M/sec (49.73%) 685543923 LLC-load-misses # 54.51% of all LL-cache hits (49.86%) Changes from v2: - Reword commit message (Jakub) Changes from v1: - Add performance info (David) - Add misssing dma_sync_single_for_device() Signed-off-by: Jose Abreu <joabreu@synopsys.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/stmicro/stmmac/stmmac_main.c')
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c75
1 files changed, 74 insertions, 1 deletions
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 05f0fa7a6f02..60e5f3584790 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1201,6 +1201,17 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
if (!buf->page)
return -ENOMEM;
+ if (priv->sph) {
+ buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
+ if (!buf->sec_page)
+ return -ENOMEM;
+
+ buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
+ stmmac_set_desc_sec_addr(priv, p, buf->sec_addr);
+ } else {
+ buf->sec_page = NULL;
+ }
+
buf->addr = page_pool_get_dma_addr(buf->page);
stmmac_set_desc_addr(priv, p, buf->addr);
if (priv->dma_buf_sz == BUF_SIZE_16KiB)
@@ -1223,6 +1234,10 @@ static void stmmac_free_rx_buffer(struct stmmac_priv *priv, u32 queue, int i)
if (buf->page)
page_pool_put_page(rx_q->page_pool, buf->page, false);
buf->page = NULL;
+
+ if (buf->sec_page)
+ page_pool_put_page(rx_q->page_pool, buf->sec_page, false);
+ buf->sec_page = NULL;
}
/**
@@ -2596,6 +2611,12 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
stmmac_enable_tso(priv, priv->ioaddr, 1, chan);
}
+ /* Enable Split Header */
+ if (priv->sph && priv->hw->rx_csum) {
+ for (chan = 0; chan < rx_cnt; chan++)
+ stmmac_enable_sph(priv, priv->ioaddr, 1, chan);
+ }
+
/* Start the ball rolling... */
stmmac_start_all_dma(priv);
@@ -3315,6 +3336,17 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
break;
}
+ if (priv->sph && !buf->sec_page) {
+ buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
+ if (!buf->sec_page)
+ break;
+
+ buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
+
+ dma_sync_single_for_device(priv->device, buf->sec_addr,
+ len, DMA_FROM_DEVICE);
+ }
+
buf->addr = page_pool_get_dma_addr(buf->page);
/* Sync whole allocation to device. This will invalidate old
@@ -3324,6 +3356,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
DMA_FROM_DEVICE);
stmmac_set_desc_addr(priv, p, buf->addr);
+ stmmac_set_desc_sec_addr(priv, p, buf->sec_addr);
stmmac_refill_desc3(priv, rx_q, p);
rx_q->rx_count_frames++;
@@ -3370,10 +3403,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true);
}
while (count < limit) {
+ unsigned int hlen = 0, prev_len = 0;
enum pkt_hash_types hash_type;
struct stmmac_rx_buffer *buf;
- unsigned int prev_len = 0;
struct dma_desc *np, *p;
+ unsigned int sec_len;
int entry;
u32 hash;
@@ -3392,6 +3426,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
break;
read_again:
+ sec_len = 0;
entry = next_entry;
buf = &rx_q->buf_pool[entry];
@@ -3418,6 +3453,7 @@ read_again:
np = rx_q->dma_rx + next_entry;
prefetch(np);
+ prefetch(page_address(buf->page));
if (priv->extend_desc)
stmmac_rx_extended_status(priv, &priv->dev->stats,
@@ -3458,6 +3494,17 @@ read_again:
}
if (!skb) {
+ int ret = stmmac_get_rx_header_len(priv, p, &hlen);
+
+ if (priv->sph && !ret && (hlen > 0)) {
+ sec_len = len;
+ if (!(status & rx_not_ls))
+ sec_len = sec_len - hlen;
+ len = hlen;
+
+ prefetch(page_address(buf->sec_page));
+ }
+
skb = napi_alloc_skb(&ch->rx_napi, len);
if (!skb) {
priv->dev->stats.rx_dropped++;
@@ -3490,6 +3537,20 @@ read_again:
buf->page = NULL;
}
+ if (sec_len > 0) {
+ dma_sync_single_for_cpu(priv->device, buf->sec_addr,
+ sec_len, DMA_FROM_DEVICE);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ buf->sec_page, 0, sec_len,
+ priv->dma_buf_sz);
+
+ len += sec_len;
+
+ /* Data payload appended into SKB */
+ page_pool_release_page(rx_q->page_pool, buf->sec_page);
+ buf->sec_page = NULL;
+ }
+
if (likely(status & rx_not_ls))
goto read_again;
@@ -3664,6 +3725,8 @@ static int stmmac_set_features(struct net_device *netdev,
netdev_features_t features)
{
struct stmmac_priv *priv = netdev_priv(netdev);
+ bool sph_en;
+ u32 chan;
/* Keep the COE Type in case of csum is supporting */
if (features & NETIF_F_RXCSUM)
@@ -3675,6 +3738,10 @@ static int stmmac_set_features(struct net_device *netdev,
*/
stmmac_rx_ipc(priv, priv->hw);
+ sph_en = (priv->hw->rx_csum > 0) && priv->sph;
+ for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++)
+ stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
+
return 0;
}
@@ -4367,6 +4434,12 @@ int stmmac_dvr_probe(struct device *device,
dev_info(priv->device, "TSO feature enabled\n");
}
+ if (priv->dma_cap.sphen) {
+ ndev->hw_features |= NETIF_F_GRO;
+ priv->sph = true;
+ dev_info(priv->device, "SPH feature enabled\n");
+ }
+
if (priv->dma_cap.addr64) {
ret = dma_set_mask_and_coherent(device,
DMA_BIT_MASK(priv->dma_cap.addr64));