aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/cavium/liquidio/octeon_network.h
diff options
context:
space:
mode:
authorVSR Burru <veerasenareddy.burru@cavium.com>2017-03-06 18:45:59 -0800
committerDavid S. Miller <davem@davemloft.net>2017-03-09 13:07:00 -0800
commit67e303e0c7683957eb4e530453705a43a6d4f966 (patch)
treefce0649052e8f551bea61bffc7788fb2faa4c0ee /drivers/net/ethernet/cavium/liquidio/octeon_network.h
parentnet: ipv6: Remove redundant RTA_OIF in multipath routes (diff)
downloadlinux-dev-67e303e0c7683957eb4e530453705a43a6d4f966.tar.xz
linux-dev-67e303e0c7683957eb4e530453705a43a6d4f966.zip
liquidio: improve UDP TX performance
Improve UDP TX performance by: * reducing the ring size from 2K to 512 * replacing the numerous streaming DMA allocations for info buffers and gather lists with one large consistent DMA allocation per ring BQL is not effective here. We reduced the ring size because there is heavy overhead with dma_map_single every so often. With iommu=on, dma_map_single in PF Tx data path was taking longer time (~700usec) for every ~250 packets. Debugged intel_iommu code, and found that PF driver is utilizing too many static IO virtual address mapping entries (for gather list entries and info buffers): about 100K entries for two PF's each using 8 rings. Also, finding an empty entry (in rbtree of device domain's iova mapping in kernel) during Tx path becomes a bottleneck every so often; the loop to find the empty entry goes through over 40K iterations; this is too costly and was the major overhead. Overhead is low when this loop quits quickly. Netperf benchmark numbers before and after patch: PF UDP TX +--------+--------+------------+------------+---------+ | | | Before | After | | | Number | | Patch | Patch | | | of | Packet | Throughput | Throughput | Percent | | Flows | Size | (Gbps) | (Gbps) | Change | +--------+--------+------------+------------+---------+ | | 360 | 0.52 | 0.93 | +78.9 | | 1 | 1024 | 1.62 | 2.84 | +75.3 | | | 1518 | 2.44 | 4.21 | +72.5 | +--------+--------+------------+------------+---------+ | | 360 | 0.45 | 1.59 | +253.3 | | 4 | 1024 | 1.34 | 5.48 | +308.9 | | | 1518 | 2.27 | 8.31 | +266.1 | +--------+--------+------------+------------+---------+ | | 360 | 0.40 | 1.61 | +302.5 | | 8 | 1024 | 1.64 | 4.24 | +158.5 | | | 1518 | 2.87 | 6.52 | +127.2 | +--------+--------+------------+------------+---------+ VF UDP TX +--------+--------+------------+------------+---------+ | | | Before | After | | | Number | | Patch | Patch | | | of | Packet | Throughput | Throughput | Percent | | Flows | Size | (Gbps) | (Gbps) | Change | +--------+--------+------------+------------+---------+ | | 360 | 1.28 | 1.49 | +16.4 | | 1 | 1024 | 4.44 | 4.39 | -1.1 | | | 1518 | 6.08 | 6.51 | +7.1 | +--------+--------+------------+------------+---------+ | | 360 | 2.35 | 2.35 | 0.0 | | 4 | 1024 | 6.41 | 8.07 | +25.9 | | | 1518 | 9.56 | 9.54 | -0.2 | +--------+--------+------------+------------+---------+ | | 360 | 3.41 | 3.65 | +7.0 | | 8 | 1024 | 9.35 | 9.34 | -0.1 | | | 1518 | 9.56 | 9.57 | +0.1 | +--------+--------+------------+------------+---------+ Signed-off-by: VSR Burru <veerasenareddy.burru@cavium.com> Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com> Signed-off-by: Derek Chickles <derek.chickles@cavium.com> Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@cavium.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to '')
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_network.h43
1 files changed, 27 insertions, 16 deletions
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 6bb89419006e..eef2a1e8a7e3 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -62,6 +62,9 @@ struct lio {
/** Array of gather component linked lists */
struct list_head *glist;
+ void **glists_virt_base;
+ dma_addr_t *glists_dma_base;
+ u32 glist_entry_size;
/** Pointer to the NIC properties for the Octeon device this network
* interface is associated with.
@@ -344,6 +347,29 @@ static inline void tx_buffer_free(void *buffer)
#define lio_dma_free(oct, size, virt_addr, dma_addr) \
dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr)
+static inline void *
+lio_alloc_info_buffer(struct octeon_device *oct,
+ struct octeon_droq *droq)
+{
+ void *virt_ptr;
+
+ virt_ptr = lio_dma_alloc(oct, (droq->max_count * OCT_DROQ_INFO_SIZE),
+ &droq->info_list_dma);
+ if (virt_ptr) {
+ droq->info_alloc_size = droq->max_count * OCT_DROQ_INFO_SIZE;
+ droq->info_base_addr = virt_ptr;
+ }
+
+ return virt_ptr;
+}
+
+static inline void lio_free_info_buffer(struct octeon_device *oct,
+ struct octeon_droq *droq)
+{
+ lio_dma_free(oct, droq->info_alloc_size, droq->info_base_addr,
+ droq->info_list_dma);
+}
+
static inline
void *get_rbd(struct sk_buff *skb)
{
@@ -359,22 +385,7 @@ void *get_rbd(struct sk_buff *skb)
static inline u64
lio_map_ring_info(struct octeon_droq *droq, u32 i)
{
- dma_addr_t dma_addr;
- struct octeon_device *oct = droq->oct_dev;
-
- dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i],
- OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE);
-
- WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr));
-
- return (u64)dma_addr;
-}
-
-static inline void
-lio_unmap_ring_info(struct pci_dev *pci_dev,
- u64 info_ptr, u32 size)
-{
- dma_unmap_single(&pci_dev->dev, info_ptr, size, DMA_FROM_DEVICE);
+ return droq->info_list_dma + (i * sizeof(struct octeon_droq_info));
}
static inline u64