diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/xsk')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 192 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h | 27 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 223 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h | 25 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c | 111 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h | 15 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c | 267 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h | 31 |
9 files changed, 892 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile new file mode 100644 index 000000000000..5ee42991900a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/../.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c new file mode 100644 index 000000000000..6a55573ec8f2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rx.h" +#include "en/xdp.h" +#include <net/xdp_sock.h> + +/* RX data path */ + +bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count) +{ + /* Check in advance that we have enough frames, instead of allocating + * one-by-one, failing and moving frames to the Reuse Ring. + */ + return xsk_umem_has_addrs_rq(rq->umem, count); +} + +int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct xdp_umem *umem = rq->umem; + u64 handle; + + if (!xsk_umem_peek_addr_rq(umem, &handle)) + return -ENOMEM; + + dma_info->xsk.handle = handle + rq->buff.umem_headroom; + dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle); + + /* No need to add headroom to the DMA address. In striding RQ case, we + * just provide pages for UMR, and headroom is counted at the setup + * stage when creating a WQE. In non-striding RQ case, headroom is + * accounted in mlx5e_alloc_rx_wqe. + */ + dma_info->addr = xdp_umem_get_dma(umem, handle); + + xsk_umem_discard_addr_rq(umem); + + dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + return 0; +} + +static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle) +{ + xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask); +} + +/* XSKRQ uses pages from UMEM, they must not be released. They are returned to + * the userspace if possible, and if not, this function is called to reuse them + * in the driver. + */ +void mlx5e_xsk_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle); +} + +/* Return a frame back to the hardware to fill in again. It is used by XDP when + * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP. + */ +void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle) +{ + struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca); + + mlx5e_xsk_recycle_frame(rq, handle); +} + +static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data, + u32 cqe_bcnt) +{ + struct sk_buff *skb; + + skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_put_data(skb, data, cqe_bcnt); + + return skb; +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx) +{ + struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom; + u32 cqe_bcnt32 = cqe_bcnt; + void *va, *data; + u32 frag_size; + bool consumed; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + /* head_offset is not used in this function, because di->xsk.data and + * di->addr point directly to the necessary place. Furthermore, in the + * current implementation, one page = one packet = one frame, so + * head_offset should always be 0. + */ + WARN_ON_ONCE(head_offset); + + va = di->xsk.data; + data = va + rx_headroom; + frag_size = rq->buff.headroom + cqe_bcnt32; + + dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL); + prefetch(data); + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true); + rcu_read_unlock(); + + /* Possible flows: + * - XDP_REDIRECT to XSKMAP: + * The page is owned by the userspace from now. + * - XDP_TX and other XDP_REDIRECTs: + * The page was returned by ZCA and recycled. + * - XDP_DROP: + * Recycle the page. + * - XDP_PASS: + * Allocate an SKB, copy the data and recycle the page. + * + * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its + * size is the same as the Driver RX Ring's size, and pages for WQEs are + * allocated first from the Reuse Ring, so it has enough space. + */ + + if (likely(consumed)) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the + * frame. On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32); +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) +{ + struct mlx5e_dma_info *di = wi->di; + u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom; + void *va, *data; + bool consumed; + u32 frag_size; + + /* wi->offset is not used in this function, because di->xsk.data and + * di->addr point directly to the necessary place. Furthermore, in the + * current implementation, one page = one packet = one frame, so + * wi->offset should always be 0. + */ + WARN_ON_ONCE(wi->offset); + + va = di->xsk.data; + data = va + rx_headroom; + frag_size = rq->buff.headroom + cqe_bcnt; + + dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL); + prefetch(data); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { + rq->stats->wqe_err++; + return NULL; + } + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true); + rcu_read_unlock(); + + if (likely(consumed)) + return NULL; /* page/packet was consumed by XDP */ + + /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse + * will be handled by mlx5e_put_rx_frag. + * On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h new file mode 100644 index 000000000000..307b923a1361 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_RX_H__ +#define __MLX5_EN_XSK_RX_H__ + +#include "en.h" + +/* RX data path */ + +bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count); +int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info); +void mlx5e_xsk_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info); +void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle); +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx); +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt); + +#endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c new file mode 100644 index 000000000000..aaffa6f68dc0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "setup.h" +#include "en/params.h" + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev) +{ + /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current + * mlx5e XDP implementation doesn't support multiple packets per page. + */ + if (xsk->chunk_size != PAGE_SIZE) + return false; + + /* Current MTU and XSK headroom don't allow packets to fit the frames. */ + if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size) + return false; + + /* frag_sz is different for regular and XSK RQs, so ensure that linear + * SKB mode is possible. + */ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return mlx5e_rx_is_linear_skb(params, xsk); + } +} + +static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); +} + +static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_channel_param *cparam) +{ + const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + + mlx5e_build_rq_param(priv, params, xsk, &cparam->rq); + mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq); + mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq); +} + +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_channel *c) +{ + struct mlx5e_channel_param cparam = {}; + struct dim_cq_moder icocq_moder = {}; + int err; + + if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) + return -EINVAL; + + mlx5e_build_xsk_cparam(priv, params, xsk, &cparam); + + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq); + if (unlikely(err)) + return err; + + err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq); + if (unlikely(err)) + goto err_close_rx_cq; + + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq); + if (unlikely(err)) + goto err_close_rq; + + /* Create a separate SQ, so that when the UMEM is disabled, we could + * close this SQ safely and stop receiving CQEs. In other case, e.g., if + * the XDPSQ was used instead, we might run into trouble when the UMEM + * is disabled and then reenabled, but the SQ continues receiving CQEs + * from the old UMEM. + */ + err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true); + if (unlikely(err)) + goto err_close_tx_cq; + + err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq); + if (unlikely(err)) + goto err_close_sq; + + /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be + * triggered and NAPI to be called on the correct CPU. + */ + err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq); + if (unlikely(err)) + goto err_close_icocq; + + spin_lock_init(&c->xskicosq_lock); + + set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + + return 0; + +err_close_icocq: + mlx5e_close_cq(&c->xskicosq.cq); + +err_close_sq: + mlx5e_close_xdpsq(&c->xsksq); + +err_close_tx_cq: + mlx5e_close_cq(&c->xsksq.cq); + +err_close_rq: + mlx5e_close_rq(&c->xskrq); + +err_close_rx_cq: + mlx5e_close_cq(&c->xskrq.cq); + + return err; +} + +void mlx5e_close_xsk(struct mlx5e_channel *c) +{ + clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + napi_synchronize(&c->napi); + + mlx5e_close_rq(&c->xskrq); + mlx5e_close_cq(&c->xskrq.cq); + mlx5e_close_icosq(&c->xskicosq); + mlx5e_close_cq(&c->xskicosq.cq); + mlx5e_close_xdpsq(&c->xsksq); + mlx5e_close_cq(&c->xsksq.cq); +} + +void mlx5e_activate_xsk(struct mlx5e_channel *c) +{ + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + /* TX queue is created active. */ + mlx5e_trigger_irq(&c->xskicosq); +} + +void mlx5e_deactivate_xsk(struct mlx5e_channel *c) +{ + mlx5e_deactivate_rq(&c->xskrq); + /* TX queue is disabled on close. */ +} + +static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn) +{ + struct mlx5e_redirect_rqt_param direct_rrp = { + .is_rss = false, + { + .rqn = rqn, + }, + }; + + u32 rqtn = priv->xsk_tir[ix].rqt.rqtn; + + return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); +} + +int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c) +{ + return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn); +} + +int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix) +{ + return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn); +} + +int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int err, i; + + if (!priv->xsk.refcnt) + return 0; + + for (i = 0; i < chs->num; i++) { + struct mlx5e_channel *c = chs->c[i]; + + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + continue; + + err = mlx5e_xsk_redirect_rqt_to_channel(priv, c); + if (unlikely(err)) + goto err_stop; + } + + return 0; + +err_stop: + for (i--; i >= 0; i--) { + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) + continue; + + mlx5e_xsk_redirect_rqt_to_drop(priv, i); + } + + return err; +} + +void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int i; + + if (!priv->xsk.refcnt) + return; + + for (i = 0; i < chs->num; i++) { + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) + continue; + + mlx5e_xsk_redirect_rqt_to_drop(priv, i); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h new file mode 100644 index 000000000000..0dd11b81c046 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_SETUP_H__ +#define __MLX5_EN_XSK_SETUP_H__ + +#include "en.h" + +struct mlx5e_xsk_param; + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev); +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_channel *c); +void mlx5e_close_xsk(struct mlx5e_channel *c); +void mlx5e_activate_xsk(struct mlx5e_channel *c); +void mlx5e_deactivate_xsk(struct mlx5e_channel *c); +int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c); +int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix); +int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); +void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs); + +#endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c new file mode 100644 index 000000000000..35e188cf4ea4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "tx.h" +#include "umem.h" +#include "en/xdp.h" +#include "en/params.h" +#include <net/xdp_sock.h> + +int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_channel *c; + u16 ix; + + if (unlikely(!mlx5e_xdp_is_open(priv))) + return -ENETDOWN; + + if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + return -EINVAL; + + c = priv->channels.c[ix]; + + if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))) + return -ENXIO; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + spin_lock(&c->xskicosq_lock); + mlx5e_trigger_irq(&c->xskicosq); + spin_unlock(&c->xskicosq_lock); + } + + return 0; +} + +/* When TX fails (because of the size of the packet), we need to get completions + * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish + * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the + * same. + */ +static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_info *xdpi) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + struct mlx5e_tx_wqe *nopwqe; + + wi->num_wqebbs = 1; + wi->num_pkts = 1; + + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + sq->doorbell_cseg = &nopwqe->ctrl; +} + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) +{ + struct xdp_umem *umem = sq->umem; + struct mlx5e_xdp_info xdpi; + struct mlx5e_xdp_xmit_data xdptxd; + bool work_done = true; + bool flush = false; + + xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK; + + for (; budget; budget--) { + int check_result = sq->xmit_xdp_frame_check(sq); + struct xdp_desc desc; + + if (unlikely(check_result < 0)) { + work_done = false; + break; + } + + if (!xsk_umem_consume_tx(umem, &desc)) { + /* TX will get stuck until something wakes it up by + * triggering NAPI. Currently it's expected that the + * application calls sendto() if there are consumed, but + * not completed frames. + */ + break; + } + + xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr); + xdptxd.data = xdp_umem_get_data(umem, desc.addr); + xdptxd.len = desc.len; + + dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr, + xdptxd.len, DMA_BIDIRECTIONAL); + + if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xsk_tx_post_err(sq, &xdpi); + } + + flush = true; + } + + if (flush) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + mlx5e_xmit_xdp_doorbell(sq); + + xsk_umem_consume_tx_done(umem); + } + + return !(budget && work_done); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h new file mode 100644 index 000000000000..7add18bf78d8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_TX_H__ +#define __MLX5_EN_XSK_TX_H__ + +#include "en.h" + +/* TX data path */ + +int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid); + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); + +#endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c new file mode 100644 index 000000000000..4baaa5788320 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/xdp_sock.h> +#include "umem.h" +#include "setup.h" +#include "en/params.h" + +static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv, + struct xdp_umem *umem) +{ + struct device *dev = priv->mdev->device; + u32 i; + + for (i = 0; i < umem->npgs; i++) { + dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + if (unlikely(dma_mapping_error(dev, dma))) + goto err_unmap; + umem->pages[i].dma = dma; + } + + return 0; + +err_unmap: + while (i--) { + dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + umem->pages[i].dma = 0; + } + + return -ENOMEM; +} + +static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv, + struct xdp_umem *umem) +{ + struct device *dev = priv->mdev->device; + u32 i; + + for (i = 0; i < umem->npgs; i++) { + dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + umem->pages[i].dma = 0; + } +} + +static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk) +{ + if (!xsk->umems) { + xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS, + sizeof(*xsk->umems), GFP_KERNEL); + if (unlikely(!xsk->umems)) + return -ENOMEM; + } + + xsk->refcnt++; + xsk->ever_used = true; + + return 0; +} + +static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk) +{ + if (!--xsk->refcnt) { + kfree(xsk->umems); + xsk->umems = NULL; + } +} + +static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix) +{ + int err; + + err = mlx5e_xsk_get_umems(xsk); + if (unlikely(err)) + return err; + + xsk->umems[ix] = umem; + return 0; +} + +static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix) +{ + xsk->umems[ix] = NULL; + + mlx5e_xsk_put_umems(xsk); +} + +static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem) +{ + return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff; +} + +void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk) +{ + xsk->headroom = umem->headroom; + xsk->chunk_size = umem->chunk_size_nohr + umem->headroom; +} + +static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, + struct xdp_umem *umem, u16 ix) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + int err; + + if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix))) + return -EBUSY; + + if (unlikely(!mlx5e_xsk_is_umem_sane(umem))) + return -EINVAL; + + err = mlx5e_xsk_map_umem(priv, umem); + if (unlikely(err)) + return err; + + err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix); + if (unlikely(err)) + goto err_unmap_umem; + + mlx5e_build_xsk_param(umem, &xsk); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + /* XSK objects will be created on open. */ + goto validate_closed; + } + + if (!params->xdp_prog) { + /* XSK objects will be created when an XDP program is set, + * and the channels are reopened. + */ + goto validate_closed; + } + + c = priv->channels.c[ix]; + + err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + if (unlikely(err)) + goto err_remove_umem; + + mlx5e_activate_xsk(c); + + /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide + * any Fill Ring entries at the setup stage. + */ + + err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]); + if (unlikely(err)) + goto err_deactivate; + + return 0; + +err_deactivate: + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +err_remove_umem: + mlx5e_xsk_remove_umem(&priv->xsk, ix); + +err_unmap_umem: + mlx5e_xsk_unmap_umem(priv, umem); + + return err; + +validate_closed: + /* Check the configuration in advance, rather than fail at a later stage + * (in mlx5e_xdp_set or on open) and end up with no channels. + */ + if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) { + err = -EINVAL; + goto err_remove_umem; + } + + return 0; +} + +static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) +{ + struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params, + &priv->xsk, ix); + struct mlx5e_channel *c; + + if (unlikely(!umem)) + return -EINVAL; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto remove_umem; + + /* XSK RQ and SQ are only created if XDP program is set. */ + if (!priv->channels.params.xdp_prog) + goto remove_umem; + + c = priv->channels.c[ix]; + mlx5e_xsk_redirect_rqt_to_drop(priv, ix); + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +remove_umem: + mlx5e_xsk_remove_umem(&priv->xsk, ix); + mlx5e_xsk_unmap_umem(priv, umem); + + return 0; +} + +static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem, + u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_enable_locked(priv, umem, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_disable_locked(priv, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + u16 ix; + + if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + return -EINVAL; + + return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) : + mlx5e_xsk_disable_umem(priv, ix); +} + +int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries) +{ + struct xdp_umem_fq_reuse *reuseq; + + reuseq = xsk_reuseq_prepare(nentries); + if (unlikely(!reuseq)) + return -ENOMEM; + xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq)); + + return 0; +} + +u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk) +{ + u16 res = xsk->refcnt ? params->num_channels : 0; + + while (res) { + if (mlx5e_xsk_get_umem(params, xsk, res - 1)) + break; + --res; + } + + return res; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h new file mode 100644 index 000000000000..25b4cbe58b54 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_UMEM_H__ +#define __MLX5_EN_XSK_UMEM_H__ + +#include "en.h" + +static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params, + struct mlx5e_xsk *xsk, u16 ix) +{ + if (!xsk || !xsk->umems) + return NULL; + + if (unlikely(ix >= params->num_channels)) + return NULL; + + return xsk->umems[ix]; +} + +struct mlx5e_xsk_param; +void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk); + +/* .ndo_bpf callback. */ +int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid); + +int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries); + +u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk); + +#endif /* __MLX5_EN_XSK_UMEM_H__ */ |