aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
diff options
context:
space:
mode:
authorShay Agroskin <shayag@mellanox.com>2019-03-14 14:54:07 +0200
committerSaeed Mahameed <saeedm@mellanox.com>2019-04-23 12:09:20 -0700
commitc2273219baa5097a4d7c1c162b992623534f34c1 (patch)
tree921448bbf8fa8fb8b73662fbab4ed5f8d89d9ffe /drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
parentnet/mlx5e: XDP, Add TX MPWQE session counter (diff)
downloadlinux-dev-c2273219baa5097a4d7c1c162b992623534f34c1.tar.xz
linux-dev-c2273219baa5097a4d7c1c162b992623534f34c1.zip
net/mlx5e: XDP, Inline small packets into the TX MPWQE in XDP xmit flow
Upon high packet rate with multiple CPUs TX workloads, much of the HCA's resources are spent on prefetching TX descriptors, thus affecting transmission rates. This patch comes to mitigate this problem by moving some workload to the CPU and reducing the HW data prefetch overhead for small packets (<= 256B). When forwarding packets with XDP, a packet that is smaller than a certain size (set to ~256 bytes) would be sent inline within its WQE TX descrptor (mem-copied), when the hardware tx queue is congested beyond a pre-defined water-mark. This is added to better utilize the HW resources (which now makes one less packet data prefetch) and allow better scalability, on the account of CPU usage (which now 'memcpy's the packet into the WQE). To load balance between HW and CPU and get max packet rate, we use watermarks to detect how much the HW is congested and move the work loads back and forth between HW and CPU. Performance: Tested packet rate for UDP 64Byte multi-stream over two dual port ConnectX-5 100Gbps NICs. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz * Tested with hyper-threading disabled XDP_TX: | | before | after | | | 24 rings | 51Mpps | 116Mpps | +126% | | 1 ring | 12Mpps | 12Mpps | same | XDP_REDIRECT: ** Below is the transmit rate, not the redirection rate which might be larger, and is not affected by this patch. | | before | after | | | 32 rings | 64Mpps | 92Mpps | +43% | | 1 ring | 6.4Mpps | 6.4Mpps | same | As we can see, feature significantly improves scaling, without hurting single ring performance. Signed-off-by: Shay Agroskin <shayag@mellanox.com> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_stats.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 80ee48dcc0a3..ca0ff3b3fbd1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -66,6 +66,7 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_xmit) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_mpwqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_inlnw) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_cqe) },
@@ -81,6 +82,7 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_cqe_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_xmit) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_mpwqe) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_inlnw) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_full) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xdp_cqes) },
@@ -163,6 +165,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->rx_xdp_redirect += rq_stats->xdp_redirect;
s->rx_xdp_tx_xmit += xdpsq_stats->xmit;
s->rx_xdp_tx_mpwqe += xdpsq_stats->mpwqe;
+ s->rx_xdp_tx_inlnw += xdpsq_stats->inlnw;
s->rx_xdp_tx_full += xdpsq_stats->full;
s->rx_xdp_tx_err += xdpsq_stats->err;
s->rx_xdp_tx_cqe += xdpsq_stats->cqes;
@@ -188,7 +191,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->ch_eq_rearm += ch_stats->eq_rearm;
/* xdp redirect */
s->tx_xdp_xmit += xdpsq_red_stats->xmit;
- s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe;
+ s->tx_xdp_mpwqe += xdpsq_red_stats->mpwqe;
+ s->tx_xdp_inlnw += xdpsq_red_stats->inlnw;
s->tx_xdp_full += xdpsq_red_stats->full;
s->tx_xdp_err += xdpsq_red_stats->err;
s->tx_xdp_cqes += xdpsq_red_stats->cqes;
@@ -1250,6 +1254,7 @@ static const struct counter_desc sq_stats_desc[] = {
static const struct counter_desc rq_xdpsq_stats_desc[] = {
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+ { MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
{ MLX5E_DECLARE_RQ_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },
@@ -1258,6 +1263,7 @@ static const struct counter_desc rq_xdpsq_stats_desc[] = {
static const struct counter_desc xdpsq_stats_desc[] = {
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, xmit) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) },
+ { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, full) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, err) },
{ MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) },