aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-02-09 18:17:54 -0800
committerJakub Kicinski <kuba@kernel.org>2022-02-09 18:40:56 -0800
commit1127170d457eb9bcc839ef7f2064634f92fe83e2 (patch)
tree228996f3ae0b734cadc7118a4d10efc1635acf23 /drivers/net/ethernet/intel
parentnet: drop_monitor: support drop reason (diff)
parentMerge branch 'Split bpf_sk_lookup remote_port field' (diff)
downloadlinux-dev-1127170d457eb9bcc839ef7f2064634f92fe83e2.tar.xz
linux-dev-1127170d457eb9bcc839ef7f2064634f92fe83e2.zip
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2022-02-09 We've added 126 non-merge commits during the last 16 day(s) which contain a total of 201 files changed, 4049 insertions(+), 2215 deletions(-). The main changes are: 1) Add custom BPF allocator for JITs that pack multiple programs into a huge page to reduce iTLB pressure, from Song Liu. 2) Add __user tagging support in vmlinux BTF and utilize it from BPF verifier when generating loads, from Yonghong Song. 3) Add per-socket fast path check guarding from cgroup/BPF overhead when used by only some sockets, from Pavel Begunkov. 4) Continued libbpf deprecation work of APIs/features and removal of their usage from samples, selftests, libbpf & bpftool, from Andrii Nakryiko and various others. 5) Improve BPF instruction set documentation by adding byte swap instructions and cleaning up load/store section, from Christoph Hellwig. 6) Switch BPF preload infra to light skeleton and remove libbpf dependency from it, from Alexei Starovoitov. 7) Fix architecture-agnostic macros in libbpf for accessing syscall arguments from BPF progs for non-x86 architectures, from Ilya Leoshkevich. 8) Rework port members in struct bpf_sk_lookup and struct bpf_sock to be of 16-bit field with anonymous zero padding, from Jakub Sitnicki. 9) Add new bpf_copy_from_user_task() helper to read memory from a different task than current. Add ability to create sleepable BPF iterator progs, from Kenny Yu. 10) Implement XSK batching for ice's zero-copy driver used by AF_XDP and utilize TX batching API from XSK buffer pool, from Maciej Fijalkowski. 11) Generate temporary netns names for BPF selftests to avoid naming collisions, from Hangbin Liu. 12) Implement bpf_core_types_are_compat() with limited recursion for in-kernel usage, from Matteo Croce. 13) Simplify pahole version detection and finally enable CONFIG_DEBUG_INFO_DWARF5 to be selected with CONFIG_DEBUG_INFO_BTF, from Nathan Chancellor. 14) Misc minor fixes to libbpf and selftests from various folks. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (126 commits) selftests/bpf: Cover 4-byte load from remote_port in bpf_sk_lookup bpf: Make remote_port field in struct bpf_sk_lookup 16-bit wide libbpf: Fix compilation warning due to mismatched printf format selftests/bpf: Test BPF_KPROBE_SYSCALL macro libbpf: Add BPF_KPROBE_SYSCALL macro libbpf: Fix accessing the first syscall argument on s390 libbpf: Fix accessing the first syscall argument on arm64 libbpf: Allow overriding PT_REGS_PARM1{_CORE}_SYSCALL selftests/bpf: Skip test_bpf_syscall_macro's syscall_arg1 on arm64 and s390 libbpf: Fix accessing syscall arguments on riscv libbpf: Fix riscv register names libbpf: Fix accessing syscall arguments on powerpc selftests/bpf: Use PT_REGS_SYSCALL_REGS in bpf_syscall_macro libbpf: Add PT_REGS_SYSCALL_REGS macro selftests/bpf: Fix an endianness issue in bpf_syscall_macro test bpf: Fix bpf_prog_pack build HPAGE_PMD_SIZE bpf: Fix leftover header->pages in sparc and powerpc code. libbpf: Fix signedness bug in btf_dump_array_data() selftests/bpf: Do not export subtest as standalone test bpf, x86_64: Fail gracefully on bpf_jit_binary_pack_finalize failures ... ==================== Link: https://lore.kernel.org/r/20220209210050.8425-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net/ethernet/intel')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c11
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c374
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.h27
10 files changed, 308 insertions, 146 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index a628f4b43fe8..0eae5858f2fe 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -830,8 +830,6 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
i40e_clean_tx_ring(tx_ring);
kfree(tx_ring->tx_bi);
tx_ring->tx_bi = NULL;
- kfree(tx_ring->xsk_descs);
- tx_ring->xsk_descs = NULL;
if (tx_ring->desc) {
dma_free_coherent(tx_ring->dev, tx_ring->size,
@@ -1431,13 +1429,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
if (!tx_ring->tx_bi)
goto err;
- if (ring_is_xdp(tx_ring)) {
- tx_ring->xsk_descs = kcalloc(I40E_MAX_NUM_DESCRIPTORS, sizeof(*tx_ring->xsk_descs),
- GFP_KERNEL);
- if (!tx_ring->xsk_descs)
- goto err;
- }
-
u64_stats_init(&tx_ring->syncp);
/* round up to nearest 4K */
@@ -1461,8 +1452,6 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
return 0;
err:
- kfree(tx_ring->xsk_descs);
- tx_ring->xsk_descs = NULL;
kfree(tx_ring->tx_bi);
tx_ring->tx_bi = NULL;
return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 324699ec930b..c471c2da313c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -392,7 +392,6 @@ struct i40e_ring {
u16 rx_offset;
struct xdp_rxq_info xdp_rxq;
struct xsk_buff_pool *xsk_pool;
- struct xdp_desc *xsk_descs; /* For storing descriptors in the AF_XDP ZC path */
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 67e9844e2076..5a997b0d07d8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -471,11 +471,11 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
**/
static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
{
- struct xdp_desc *descs = xdp_ring->xsk_descs;
+ struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
u32 nb_pkts, nb_processed = 0;
unsigned int total_bytes = 0;
- nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget);
+ nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
if (!nb_pkts)
return true;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index e2e3ef7fba7f..e3df0134dc77 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2803,6 +2803,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
/* clone ring and setup updated count */
xdp_rings[i] = *vsi->xdp_rings[i];
xdp_rings[i].count = new_tx_cnt;
+ xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
+ xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
xdp_rings[i].desc = NULL;
xdp_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&xdp_rings[i]);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f46af3b34074..63f43400a146 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2495,10 +2495,10 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
xdp_ring->vsi = vsi;
xdp_ring->netdev = NULL;
- xdp_ring->next_dd = ICE_TX_THRESH - 1;
- xdp_ring->next_rs = ICE_TX_THRESH - 1;
xdp_ring->dev = dev;
xdp_ring->count = vsi->num_tx_desc;
+ xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1;
+ xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1;
WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index c2258bee8ecb..7b9b3b750bf0 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -173,6 +173,8 @@ tx_skip_free:
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
+ tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1;
+ tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1;
if (!tx_ring->netdev)
return;
@@ -1467,7 +1469,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
bool wd;
if (tx_ring->xsk_pool)
- wd = ice_clean_tx_irq_zc(tx_ring, budget);
+ wd = ice_xmit_zc(tx_ring, ICE_DESC_UNUSED(tx_ring), budget);
else if (ice_ring_is_xdp(tx_ring))
wd = true;
else
@@ -1520,7 +1522,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
/* Exit the polling mode, but don't re-enable interrupts if stack might
* poll us due to busy-polling
*/
- if (likely(napi_complete_done(napi, work_done))) {
+ if (napi_complete_done(napi, work_done)) {
ice_net_dim(q_vector);
ice_enable_interrupt(q_vector);
} else {
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index b7b3bd4816f0..466253ac2ee1 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -13,7 +13,6 @@
#define ICE_MAX_CHAINED_RX_BUFS 5
#define ICE_MAX_BUF_TXD 8
#define ICE_MIN_TX_LEN 17
-#define ICE_TX_THRESH 32
/* The size limit for a transmit buffer in a descriptor is (16K - 1).
* In order to align with the read requests we will align the value to
@@ -111,6 +110,8 @@ static inline int ice_skb_pad(void)
(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
(R)->next_to_clean - (R)->next_to_use - 1)
+#define ICE_RING_QUARTER(R) ((R)->count >> 2)
+
#define ICE_TX_FLAGS_TSO BIT(0)
#define ICE_TX_FLAGS_HW_VLAN BIT(1)
#define ICE_TX_FLAGS_SW_VLAN BIT(2)
@@ -321,17 +322,18 @@ struct ice_tx_ring {
u16 count; /* Number of descriptors */
u16 q_index; /* Queue number of ring */
/* stats structs */
- struct ice_q_stats stats;
- struct u64_stats_sync syncp;
struct ice_txq_stats tx_stats;
-
/* CL3 - 3rd cacheline starts here */
+ struct ice_q_stats stats;
+ struct u64_stats_sync syncp;
struct rcu_head rcu; /* to avoid race on free */
DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */
struct ice_channel *ch;
struct ice_ptp_tx *tx_tstamps;
spinlock_t tx_lock;
u32 txq_teid; /* Added Tx queue TEID */
+ /* CL4 - 4th cacheline starts here */
+ u16 xdp_tx_active;
#define ICE_TX_FLAGS_RING_XDP BIT(0)
u8 flags;
u8 dcb_tc; /* Traffic class of ring */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 0e87b98e0966..eb21cec1d772 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -222,6 +222,7 @@ ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
{
unsigned int total_bytes = 0, total_pkts = 0;
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *next_dd_desc;
u16 next_dd = xdp_ring->next_dd;
@@ -233,7 +234,7 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
return;
- for (i = 0; i < ICE_TX_THRESH; i++) {
+ for (i = 0; i < tx_thresh; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
total_bytes += tx_buf->bytecount;
@@ -254,9 +255,9 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
}
next_dd_desc->cmd_type_offset_bsz = 0;
- xdp_ring->next_dd = xdp_ring->next_dd + ICE_TX_THRESH;
+ xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh;
if (xdp_ring->next_dd > xdp_ring->count)
- xdp_ring->next_dd = ICE_TX_THRESH - 1;
+ xdp_ring->next_dd = tx_thresh - 1;
xdp_ring->next_to_clean = ntc;
ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes);
}
@@ -269,12 +270,13 @@ static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
*/
int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
{
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u16 i = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_buf;
dma_addr_t dma;
- if (ICE_DESC_UNUSED(xdp_ring) < ICE_TX_THRESH)
+ if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh)
ice_clean_xdp_irq(xdp_ring);
if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
@@ -300,13 +302,14 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0,
size, 0);
+ xdp_ring->xdp_tx_active++;
i++;
if (i == xdp_ring->count) {
i = 0;
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
- xdp_ring->next_rs = ICE_TX_THRESH - 1;
+ xdp_ring->next_rs = tx_thresh - 1;
}
xdp_ring->next_to_use = i;
@@ -314,7 +317,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
- xdp_ring->next_rs += ICE_TX_THRESH;
+ xdp_ring->next_rs += tx_thresh;
}
return ICE_XDP_TX;
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index feb874bde171..ed430d566274 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -327,6 +327,13 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
bool if_running, pool_present = !!pool;
int ret = 0, pool_failure = 0;
+ if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
+ !is_power_of_2(vsi->tx_rings[qid]->count)) {
+ netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
+ pool_failure = -EINVAL;
+ goto failure;
+ }
+
if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
if (if_running) {
@@ -349,6 +356,7 @@ xsk_pool_if_up:
netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
}
+failure:
if (pool_failure) {
netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
pool_present ? "en" : "dis", pool_failure);
@@ -359,33 +367,28 @@ xsk_pool_if_up:
}
/**
- * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
- * @rx_ring: Rx ring
+ * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it
+ * @pool: XSK Buffer pool to pull the buffers from
+ * @xdp: SW ring of xdp_buff that will hold the buffers
+ * @rx_desc: Pointer to Rx descriptors that will be filled
* @count: The number of buffers to allocate
*
* This function allocates a number of Rx buffers from the fill ring
* or the internal recycle mechanism and places them on the Rx ring.
*
- * Returns true if all allocations were successful, false if any fail.
+ * Note that ring wrap should be handled by caller of this function.
+ *
+ * Returns the amount of allocated Rx descriptors
*/
-bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+ union ice_32b_rx_flex_desc *rx_desc, u16 count)
{
- union ice_32b_rx_flex_desc *rx_desc;
- u16 ntu = rx_ring->next_to_use;
- struct xdp_buff **xdp;
- u32 nb_buffs, i;
dma_addr_t dma;
+ u16 buffs;
+ int i;
- rx_desc = ICE_RX_DESC(rx_ring, ntu);
- xdp = ice_xdp_buf(rx_ring, ntu);
-
- nb_buffs = min_t(u16, count, rx_ring->count - ntu);
- nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs);
- if (!nb_buffs)
- return false;
-
- i = nb_buffs;
- while (i--) {
+ buffs = xsk_buff_alloc_batch(pool, xdp, count);
+ for (i = 0; i < buffs; i++) {
dma = xsk_buff_xdp_get_dma(*xdp);
rx_desc->read.pkt_addr = cpu_to_le64(dma);
rx_desc->wb.status_error0 = 0;
@@ -394,13 +397,77 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
xdp++;
}
+ return buffs;
+}
+
+/**
+ * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Place the @count of descriptors onto Rx ring. Handle the ring wrap
+ * for case where space from next_to_use up to the end of ring is less
+ * than @count. Finally do a tail bump.
+ *
+ * Returns true if all allocations were successful, false if any fail.
+ */
+static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+{
+ union ice_32b_rx_flex_desc *rx_desc;
+ u32 nb_buffs_extra = 0, nb_buffs;
+ u16 ntu = rx_ring->next_to_use;
+ u16 total_count = count;
+ struct xdp_buff **xdp;
+
+ rx_desc = ICE_RX_DESC(rx_ring, ntu);
+ xdp = ice_xdp_buf(rx_ring, ntu);
+
+ if (ntu + count >= rx_ring->count) {
+ nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
+ rx_desc,
+ rx_ring->count - ntu);
+ rx_desc = ICE_RX_DESC(rx_ring, 0);
+ xdp = ice_xdp_buf(rx_ring, 0);
+ ntu = 0;
+ count -= nb_buffs_extra;
+ ice_release_rx_desc(rx_ring, 0);
+ }
+
+ nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
+
ntu += nb_buffs;
if (ntu == rx_ring->count)
ntu = 0;
- ice_release_rx_desc(rx_ring, ntu);
+ if (rx_ring->next_to_use != ntu)
+ ice_release_rx_desc(rx_ring, ntu);
- return count == nb_buffs;
+ return total_count == (nb_buffs_extra + nb_buffs);
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Wrapper for internal allocation routine; figure out how many tail
+ * bumps should take place based on the given threshold
+ *
+ * Returns true if all calls to internal alloc routine succeeded
+ */
+bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+{
+ u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
+ u16 batched, leftover, i, tail_bumps;
+
+ batched = ALIGN_DOWN(count, rx_thresh);
+ tail_bumps = batched / rx_thresh;
+ leftover = count & (rx_thresh - 1);
+
+ for (i = 0; i < tail_bumps; i++)
+ if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
+ return false;
+ return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
}
/**
@@ -616,134 +683,221 @@ construct_skb:
}
/**
- * ice_xmit_zc - Completes AF_XDP entries, and cleans XDP entries
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
* @xdp_ring: XDP Tx ring
- * @budget: max number of frames to xmit
+ * @tx_buf: Tx buffer to clean
+ */
+static void
+ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+{
+ xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
+ xdp_ring->xdp_tx_active--;
+ dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
+ dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buf, len, 0);
+}
+
+/**
+ * ice_clean_xdp_irq_zc - Reclaim resources after transmit completes on XDP ring
+ * @xdp_ring: XDP ring to clean
+ * @napi_budget: amount of descriptors that NAPI allows us to clean
*
- * Returns true if cleanup/transmission is done.
+ * Returns count of cleaned descriptors
*/
-static bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, int budget)
+static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget)
{
- struct ice_tx_desc *tx_desc = NULL;
- bool work_done = true;
- struct xdp_desc desc;
- dma_addr_t dma;
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+ int budget = napi_budget / tx_thresh;
+ u16 next_dd = xdp_ring->next_dd;
+ u16 ntc, cleared_dds = 0;
- while (likely(budget-- > 0)) {
+ do {
+ struct ice_tx_desc *next_dd_desc;
+ u16 desc_cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
+ u32 xsk_frames;
+ u16 i;
- if (unlikely(!ICE_DESC_UNUSED(xdp_ring))) {
- xdp_ring->tx_stats.tx_busy++;
- work_done = false;
+ next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
+ if (!(next_dd_desc->cmd_type_offset_bsz &
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
break;
- }
-
- tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
- if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc))
- break;
-
- dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr);
- xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma,
- desc.len);
+ cleared_dds++;
+ xsk_frames = 0;
+ if (likely(!xdp_ring->xdp_tx_active)) {
+ xsk_frames = tx_thresh;
+ goto skip;
+ }
- tx_buf->bytecount = desc.len;
+ ntc = xdp_ring->next_to_clean;
- tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
- tx_desc->buf_addr = cpu_to_le64(dma);
- tx_desc->cmd_type_offset_bsz =
- ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, desc.len, 0);
+ for (i = 0; i < tx_thresh; i++) {
+ tx_buf = &xdp_ring->tx_buf[ntc];
- xdp_ring->next_to_use++;
- if (xdp_ring->next_to_use == xdp_ring->count)
- xdp_ring->next_to_use = 0;
- }
-
- if (tx_desc) {
- ice_xdp_ring_update_tail(xdp_ring);
- xsk_tx_release(xdp_ring->xsk_pool);
- }
+ if (tx_buf->raw_buf) {
+ ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
+ tx_buf->raw_buf = NULL;
+ } else {
+ xsk_frames++;
+ }
- return budget > 0 && work_done;
+ ntc++;
+ if (ntc >= xdp_ring->count)
+ ntc = 0;
+ }
+skip:
+ xdp_ring->next_to_clean += tx_thresh;
+ if (xdp_ring->next_to_clean >= desc_cnt)
+ xdp_ring->next_to_clean -= desc_cnt;
+ if (xsk_frames)
+ xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+ next_dd_desc->cmd_type_offset_bsz = 0;
+ next_dd = next_dd + tx_thresh;
+ if (next_dd >= desc_cnt)
+ next_dd = tx_thresh - 1;
+ } while (budget--);
+
+ xdp_ring->next_dd = next_dd;
+
+ return cleared_dds * tx_thresh;
}
/**
- * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
- * @xdp_ring: XDP Tx ring
- * @tx_buf: Tx buffer to clean
+ * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
+ * @xdp_ring: XDP ring to produce the HW Tx descriptor on
+ * @desc: AF_XDP descriptor to pull the DMA address and length from
+ * @total_bytes: bytes accumulator that will be used for stats update
*/
-static void
-ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
+static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
+ unsigned int *total_bytes)
{
- xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf);
- dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
- dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
- dma_unmap_len_set(tx_buf, len, 0);
+ struct ice_tx_desc *tx_desc;
+ dma_addr_t dma;
+
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
+
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+ 0, desc->len, 0);
+
+ *total_bytes += desc->len;
}
/**
- * ice_clean_tx_irq_zc - Completes AF_XDP entries, and cleans XDP entries
- * @xdp_ring: XDP Tx ring
- * @budget: NAPI budget
- *
- * Returns true if cleanup/tranmission is done.
+ * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @total_bytes: bytes accumulator that will be used for stats update
*/
-bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget)
+static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+ unsigned int *total_bytes)
{
- int total_packets = 0, total_bytes = 0;
- s16 ntc = xdp_ring->next_to_clean;
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+ u16 ntu = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
- struct ice_tx_buf *tx_buf;
- u32 xsk_frames = 0;
- bool xmit_done;
+ u32 i;
- tx_desc = ICE_TX_DESC(xdp_ring, ntc);
- tx_buf = &xdp_ring->tx_buf[ntc];
- ntc -= xdp_ring->count;
+ loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
+ dma_addr_t dma;
- do {
- if (!(tx_desc->cmd_type_offset_bsz &
- cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
- break;
+ dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
+ xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
- total_bytes += tx_buf->bytecount;
- total_packets++;
+ tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
+ tx_desc->buf_addr = cpu_to_le64(dma);
+ tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
+ 0, descs[i].len, 0);
- if (tx_buf->raw_buf) {
- ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
- tx_buf->raw_buf = NULL;
- } else {
- xsk_frames++;
- }
+ *total_bytes += descs[i].len;
+ }
- tx_desc->cmd_type_offset_bsz = 0;
- tx_buf++;
- tx_desc++;
- ntc++;
+ xdp_ring->next_to_use = ntu;
- if (unlikely(!ntc)) {
- ntc -= xdp_ring->count;
- tx_buf = xdp_ring->tx_buf;
- tx_desc = ICE_TX_DESC(xdp_ring, 0);
- }
+ if (xdp_ring->next_to_use > xdp_ring->next_rs) {
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+ xdp_ring->next_rs += tx_thresh;
+ }
+}
- prefetch(tx_desc);
+/**
+ * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @nb_pkts: count of packets to be send
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+ u32 nb_pkts, unsigned int *total_bytes)
+{
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+ u32 batched, leftover, i;
+
+ batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
+ leftover = nb_pkts & (PKTS_PER_BATCH - 1);
+ for (i = 0; i < batched; i += PKTS_PER_BATCH)
+ ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
+ for (; i < batched + leftover; i++)
+ ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
+
+ if (xdp_ring->next_to_use > xdp_ring->next_rs) {
+ struct ice_tx_desc *tx_desc;
+
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+ xdp_ring->next_rs += tx_thresh;
+ }
+}
- } while (likely(--budget));
+/**
+ * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @budget: number of free descriptors on HW Tx ring that can be used
+ * @napi_budget: amount of descriptors that NAPI allows us to clean
+ *
+ * Returns true if there is no more work that needs to be done, false otherwise
+ */
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget)
+{
+ struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
+ u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
+ u32 nb_pkts, nb_processed = 0;
+ unsigned int total_bytes = 0;
+
+ if (budget < tx_thresh)
+ budget += ice_clean_xdp_irq_zc(xdp_ring, napi_budget);
+
+ nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
+ if (!nb_pkts)
+ return true;
+
+ if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
+ struct ice_tx_desc *tx_desc;
+
+ nb_processed = xdp_ring->count - xdp_ring->next_to_use;
+ ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
+ tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
+ tx_desc->cmd_type_offset_bsz |=
+ cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+ xdp_ring->next_rs = tx_thresh - 1;
+ xdp_ring->next_to_use = 0;
+ }
- ntc += xdp_ring->count;
- xdp_ring->next_to_clean = ntc;
+ ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
+ &total_bytes);
- if (xsk_frames)
- xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+ ice_xdp_ring_update_tail(xdp_ring);
+ ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
- ice_update_tx_ring_stats(xdp_ring, total_packets, total_bytes);
- xmit_done = ice_xmit_zc(xdp_ring, ICE_DFLT_IRQ_WORK);
-
- return budget > 0 && xmit_done;
+ return nb_pkts < budget;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
index 4c7bd8e9dfc4..0cbb5793b5b8 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -6,19 +6,37 @@
#include "ice_txrx.h"
#include "ice.h"
+#define PKTS_PER_BATCH 8
+
+#ifdef __clang__
+#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for
+#elif __GNUC__ >= 4
+#define loop_unrolled_for _Pragma("GCC unroll 8") for
+#else
+#define loop_unrolled_for for
+#endif
+
struct ice_vsi;
#ifdef CONFIG_XDP_SOCKETS
int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool,
u16 qid);
int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget);
-bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget);
int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count);
bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget);
#else
+static inline bool
+ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring,
+ u32 __always_unused budget,
+ int __always_unused napi_budget)
+{
+ return false;
+}
+
static inline int
ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi,
struct xsk_buff_pool __always_unused *pool,
@@ -35,13 +53,6 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring,
}
static inline bool
-ice_clean_tx_irq_zc(struct ice_tx_ring __always_unused *xdp_ring,
- int __always_unused budget)
-{
- return false;
-}
-
-static inline bool
ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring,
u16 __always_unused count)
{