aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c108
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h118
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c304
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c335
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c95
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c151
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h208
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c231
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c192
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c223
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c111
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c267
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h31
19 files changed, 2199 insertions, 323 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index d3744bffbae3..79301d116667 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -3,65 +3,102 @@
#include "en/params.h"
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
+static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- u16 linear_rq_headroom = params->xdp_prog ?
- XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
- u32 frag_sz;
+ return params->xdp_prog || xsk;
+}
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u16 headroom = NET_IP_ALIGN;
+
+ if (mlx5e_rx_is_xdp(params, xsk)) {
+ headroom += XDP_PACKET_HEADROOM;
+ if (xsk)
+ headroom += xsk->headroom;
+ } else {
+ headroom += MLX5_RX_HEADROOM;
+ }
+
+ return headroom;
+}
+
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
+ u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+ u32 frag_sz = linear_rq_headroom + hw_mtu;
- linear_rq_headroom += NET_IP_ALIGN;
+ /* AF_XDP doesn't build SKBs in place. */
+ if (!xsk)
+ frag_sz = MLX5_SKB_FRAG_SZ(frag_sz);
- frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);
+ /* XDP in mlx5e doesn't support multiple packets per page. */
+ if (mlx5e_rx_is_xdp(params, xsk))
+ frag_sz = max_t(u32, frag_sz, PAGE_SIZE);
- if (params->xdp_prog && frag_sz < PAGE_SIZE)
- frag_sz = PAGE_SIZE;
+ /* Even if we can go with a smaller fragment size, we must not put
+ * multiple packets into a single frame.
+ */
+ if (xsk)
+ frag_sz = max_t(u32, frag_sz, xsk->chunk_size);
return frag_sz;
}
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
}
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params)
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more
+ * than one page. For this, check both with and without xsk.
+ */
+ u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
+ mlx5e_rx_get_linear_frag_sz(params, NULL));
- return !params->lro_en && frag_sz <= PAGE_SIZE;
+ return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
}
#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
+ u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
s8 signed_log_num_strides_param;
u8 log_num_strides;
- if (!mlx5e_rx_is_linear_skb(params))
+ if (!mlx5e_rx_is_linear_skb(params, xsk))
return false;
- if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
+ if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
return false;
if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
return true;
- log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
+ log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
signed_log_num_strides_param =
(s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;
return signed_log_num_strides_param >= 0;
}
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params);
+ u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk);
/* Numbers are unsigned, don't subtract to avoid underflow. */
if (params->log_rq_mtu_frames <
@@ -72,33 +109,30 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
}
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
- return order_base_2(mlx5e_rx_get_linear_frag_sz(params));
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
return MLX5_MPWRQ_LOG_WQE_SZ -
- mlx5e_mpwqe_get_log_stride_size(mdev, params);
+ mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
}
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u16 linear_rq_headroom = params->xdp_prog ?
- XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
- bool is_linear_skb;
-
- linear_rq_headroom += NET_IP_ALIGN;
-
- is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
- mlx5e_rx_is_linear_skb(params) :
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params);
+ bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
+ mlx5e_rx_is_linear_skb(params, xsk) :
+ mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
- return is_linear_skb ? linear_rq_headroom : 0;
+ return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index b106a0236f36..bd882b5ee9a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -6,17 +6,119 @@
#include "en.h"
-u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params);
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params);
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params);
+struct mlx5e_xsk_param {
+ u16 headroom;
+ u16 chunk_size;
+};
+
+struct mlx5e_rq_param {
+ u32 rqc[MLX5_ST_SZ_DW(rqc)];
+ struct mlx5_wq_param wq;
+ struct mlx5e_rq_frags_info frags_info;
+};
+
+struct mlx5e_sq_param {
+ u32 sqc[MLX5_ST_SZ_DW(sqc)];
+ struct mlx5_wq_param wq;
+ bool is_mpw;
+};
+
+struct mlx5e_cq_param {
+ u32 cqc[MLX5_ST_SZ_DW(cqc)];
+ struct mlx5_wq_param wq;
+ u16 eq_ix;
+ u8 cq_period_mode;
+};
+
+struct mlx5e_channel_param {
+ struct mlx5e_rq_param rq;
+ struct mlx5e_sq_param sq;
+ struct mlx5e_sq_param xdp_sq;
+ struct mlx5e_sq_param icosq;
+ struct mlx5e_cq_param rx_cq;
+ struct mlx5e_cq_param tx_cq;
+ struct mlx5e_cq_param icosq_cq;
+};
+
+static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
+ u16 qid,
+ enum mlx5e_rq_group group,
+ u16 *ix)
+{
+ int nch = params->num_channels;
+ int ch = qid - nch * group;
+
+ if (ch < 0 || ch >= nch)
+ return false;
+
+ *ix = ch;
+ return true;
+}
+
+static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
+ u16 qid,
+ u16 *ix,
+ enum mlx5e_rq_group *group)
+{
+ u16 nch = params->num_channels;
+
+ *ix = qid % nch;
+ *group = qid / nch;
+}
+
+static inline bool mlx5e_qid_validate(struct mlx5e_params *params, u64 qid)
+{
+ return qid < params->num_channels * MLX5E_NUM_RQ_GROUPS;
+}
+
+/* Parameter calculations */
+
+u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params);
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
+
+/* Build queue parameters */
+
+void mlx5e_build_rq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_param *param);
+void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_cq_param *param);
+void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param);
+void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_sq_param *param);
#endif /* __MLX5_EN_PARAMS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index fe5d4d7f15ed..a6a52806be45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -3,37 +3,53 @@
#include <net/vxlan.h>
#include <net/gre.h>
-#include "lib/vxlan.h"
+#include <net/geneve.h>
#include "en/tc_tun.h"
+#include "en_tc.h"
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
+{
+ if (netif_is_vxlan(tunnel_dev))
+ return &vxlan_tunnel;
+ else if (netif_is_geneve(tunnel_dev))
+ return &geneve_tunnel;
+ else if (netif_is_gretap(tunnel_dev) ||
+ netif_is_ip6gretap(tunnel_dev))
+ return &gre_tunnel;
+ else
+ return NULL;
+}
static int get_route_and_out_devs(struct mlx5e_priv *priv,
struct net_device *dev,
struct net_device **route_dev,
struct net_device **out_dev)
{
+ struct net_device *uplink_dev, *uplink_upper, *real_dev;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct net_device *uplink_dev, *uplink_upper;
bool dst_is_lag_dev;
+ real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev;
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
uplink_upper = netdev_master_upper_dev_get(uplink_dev);
dst_is_lag_dev = (uplink_upper &&
netif_is_lag_master(uplink_upper) &&
- dev == uplink_upper &&
+ real_dev == uplink_upper &&
mlx5_lag_is_sriov(priv->mdev));
/* if the egress device isn't on the same HW e-switch or
* it's a LAG device, use the uplink
*/
- if (!netdev_port_same_parent_id(priv->netdev, dev) ||
+ if (!netdev_port_same_parent_id(priv->netdev, real_dev) ||
dst_is_lag_dev) {
- *route_dev = uplink_dev;
- *out_dev = *route_dev;
+ *route_dev = dev;
+ *out_dev = uplink_dev;
} else {
*route_dev = dev;
if (is_vlan_dev(*route_dev))
*out_dev = uplink_dev;
- else if (mlx5e_eswitch_rep(dev))
+ else if (mlx5e_eswitch_rep(dev) &&
+ mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
*out_dev = *route_dev;
else
return -EOPNOTSUPP;
@@ -141,63 +157,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
return 0;
}
-static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key)
-{
- __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
- struct udphdr *udp = (struct udphdr *)(buf);
- struct vxlanhdr *vxh = (struct vxlanhdr *)
- ((char *)udp + sizeof(struct udphdr));
-
- udp->dest = tun_key->tp_dst;
- vxh->vx_flags = VXLAN_HF_VNI;
- vxh->vx_vni = vxlan_vni_field(tun_id);
-
- return 0;
-}
-
-static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key)
-{
- __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
- int hdr_len;
- struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
-
- /* the HW does not calculate GRE csum or sequences */
- if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
- return -EOPNOTSUPP;
-
- greh->protocol = htons(ETH_P_TEB);
-
- /* GRE key */
- hdr_len = gre_calc_hlen(tun_key->tun_flags);
- greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
- if (tun_key->tun_flags & TUNNEL_KEY) {
- __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
- *ptr = tun_id;
- }
-
- return 0;
-}
-
static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
struct mlx5e_encap_entry *e)
{
- int err = 0;
- struct ip_tunnel_key *key = &e->tun_info.key;
-
- if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
- *ip_proto = IPPROTO_UDP;
- err = mlx5e_gen_vxlan_header(buf, key);
- } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
- *ip_proto = IPPROTO_GRE;
- err = mlx5e_gen_gre_header(buf, key);
- } else {
- pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n"
- , e->tunnel_type);
- err = -EOPNOTSUPP;
+ if (!e->tunnel) {
+ pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
+ return -EOPNOTSUPP;
}
- return err;
+ return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
}
static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
@@ -229,7 +197,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
struct net_device *out_dev, *route_dev;
struct neighbour *n = NULL;
struct flowi4 fl4 = {};
@@ -253,7 +221,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
ipv4_encap_size =
(is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct iphdr) +
- e->tunnel_hlen;
+ e->tunnel->calc_hlen(e);
if (max_encap_size < ipv4_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
@@ -345,7 +313,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
- struct ip_tunnel_key *tun_key = &e->tun_info.key;
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
struct net_device *out_dev, *route_dev;
struct neighbour *n = NULL;
struct flowi6 fl6 = {};
@@ -369,7 +337,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
ipv6_encap_size =
(is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
sizeof(struct ipv6hdr) +
- e->tunnel_hlen;
+ e->tunnel->calc_hlen(e);
if (max_encap_size < ipv6_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
@@ -455,27 +423,12 @@ out:
return err;
}
-int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev)
-{
- if (netif_is_vxlan(tunnel_dev))
- return MLX5E_TC_TUNNEL_TYPE_VXLAN;
- else if (netif_is_gretap(tunnel_dev) ||
- netif_is_ip6gretap(tunnel_dev))
- return MLX5E_TC_TUNNEL_TYPE_GRETAP;
- else
- return MLX5E_TC_TUNNEL_TYPE_UNKNOWN;
-}
-
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev)
{
- int tunnel_type = mlx5e_tc_tun_get_type(netdev);
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev);
- if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN &&
- MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
- return true;
- else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP &&
- MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap))
+ if (tunnel && tunnel->can_offload(priv))
return true;
else
return false;
@@ -486,71 +439,87 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
struct mlx5e_encap_entry *e,
struct netlink_ext_ack *extack)
{
- e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev);
-
- if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
- int dst_port = be16_to_cpu(e->tun_info.key.tp_dst);
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
- if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
- NL_SET_ERR_MSG_MOD(extack,
- "vxlan udp dport was not registered with the HW");
- netdev_warn(priv->netdev,
- "%d isn't an offloaded vxlan udp dport\n",
- dst_port);
- return -EOPNOTSUPP;
- }
- e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
- e->tunnel_hlen = VXLAN_HLEN;
- } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
- e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
- e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags);
- } else {
+ if (!tunnel) {
e->reformat_type = -1;
- e->tunnel_hlen = -1;
return -EOPNOTSUPP;
}
- return 0;
+
+ return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
+}
+
+int mlx5e_tc_tun_parse(struct net_device *filter_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v, u8 *match_level)
+{
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
+ int err = 0;
+
+ if (!tunnel) {
+ netdev_warn(priv->netdev,
+ "decapsulation offload is not supported for %s net device\n",
+ mlx5e_netdev_kind(filter_dev));
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ *match_level = tunnel->match_level;
+
+ if (tunnel->parse_udp_ports) {
+ err = tunnel->parse_udp_ports(priv, spec, f,
+ headers_c, headers_v);
+ if (err)
+ goto out;
+ }
+
+ if (tunnel->parse_tunnel) {
+ err = tunnel->parse_tunnel(priv, spec, f,
+ headers_c, headers_v);
+ if (err)
+ goto out;
+ }
+
+out:
+ return err;
}
-static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
- void *headers_c,
- void *headers_v)
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
{
- struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct netlink_ext_ack *extack = f->common.extack;
- void *misc_c = MLX5_ADDR_OF(fte_match_param,
- spec->match_criteria,
- misc_parameters);
- void *misc_v = MLX5_ADDR_OF(fte_match_param,
- spec->match_value,
- misc_parameters);
struct flow_match_ports enc_ports;
- flow_rule_match_enc_ports(rule, &enc_ports);
-
/* Full udp dst port must be given */
- if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) ||
- memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) {
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
NL_SET_ERR_MSG_MOD(extack,
- "VXLAN decap filter must include enc_dst_port condition");
+ "UDP tunnel decap filter must include enc_dst_port condition");
netdev_warn(priv->netdev,
- "VXLAN decap filter must include enc_dst_port condition\n");
+ "UDP tunnel decap filter must include enc_dst_port condition\n");
return -EOPNOTSUPP;
}
- /* udp dst port must be knonwn as a VXLAN port */
- if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) {
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ if (memchr_inv(&enc_ports.mask->dst, 0xff,
+ sizeof(enc_ports.mask->dst))) {
NL_SET_ERR_MSG_MOD(extack,
- "Matched UDP port is not registered as a VXLAN port");
+ "UDP tunnel decap filter must match enc_dst_port fully");
netdev_warn(priv->netdev,
- "UDP port %d is not registered as a VXLAN port\n",
- be16_to_cpu(enc_ports.key->dst));
+ "UDP tunnel decap filter must match enc_dst_port fully\n");
return -EOPNOTSUPP;
}
- /* dst UDP port is valid here */
+ /* match on UDP protocol and dst port number */
+
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
@@ -559,92 +528,15 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
ntohs(enc_ports.key->dst));
+ /* UDP src port on outer header is generated by HW,
+ * so it is probably a bad idea to request matching it.
+ * Nonetheless, it is allowed.
+ */
+
MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
ntohs(enc_ports.mask->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
ntohs(enc_ports.key->src));
- /* match on VNI */
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
- struct flow_match_enc_keyid enc_keyid;
-
- flow_rule_match_enc_keyid(rule, &enc_keyid);
-
- MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
- be32_to_cpu(enc_keyid.mask->keyid));
- MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
- be32_to_cpu(enc_keyid.key->keyid));
- }
return 0;
}
-
-static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
- void *outer_headers_c,
- void *outer_headers_v)
-{
- void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
- struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
-
- if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) {
- NL_SET_ERR_MSG_MOD(f->common.extack,
- "GRE HW offloading is not supported");
- netdev_warn(priv->netdev, "GRE HW offloading is not supported\n");
- return -EOPNOTSUPP;
- }
-
- MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
- ip_protocol, IPPROTO_GRE);
-
- /* gre protocol*/
- MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
- MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
-
- /* gre key */
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
- struct flow_match_enc_keyid enc_keyid;
-
- flow_rule_match_enc_keyid(rule, &enc_keyid);
- MLX5_SET(fte_match_set_misc, misc_c,
- gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
- MLX5_SET(fte_match_set_misc, misc_v,
- gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
- }
-
- return 0;
-}
-
-int mlx5e_tc_tun_parse(struct net_device *filter_dev,
- struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
- void *headers_c,
- void *headers_v, u8 *match_level)
-{
- int tunnel_type;
- int err = 0;
-
- tunnel_type = mlx5e_tc_tun_get_type(filter_dev);
- if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
- *match_level = MLX5_MATCH_L4;
- err = mlx5e_tc_tun_parse_vxlan(priv, spec, f,
- headers_c, headers_v);
- } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) {
- *match_level = MLX5_MATCH_L3;
- err = mlx5e_tc_tun_parse_gretap(priv, spec, f,
- headers_c, headers_v);
- } else {
- netdev_warn(priv->netdev,
- "decapsulation offload is not supported for %s (kind: \"%s\")\n",
- netdev_name(filter_dev),
- mlx5e_netdev_kind(filter_dev));
-
- return -EOPNOTSUPP;
- }
- return err;
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index b63f15de899d..c362b9225dc2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -14,9 +14,41 @@
enum {
MLX5E_TC_TUNNEL_TYPE_UNKNOWN,
MLX5E_TC_TUNNEL_TYPE_VXLAN,
- MLX5E_TC_TUNNEL_TYPE_GRETAP
+ MLX5E_TC_TUNNEL_TYPE_GENEVE,
+ MLX5E_TC_TUNNEL_TYPE_GRETAP,
};
+struct mlx5e_tc_tunnel {
+ int tunnel_type;
+ enum mlx5_flow_match_level match_level;
+
+ bool (*can_offload)(struct mlx5e_priv *priv);
+ int (*calc_hlen)(struct mlx5e_encap_entry *e);
+ int (*init_encap_attr)(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack);
+ int (*generate_ip_tun_hdr)(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e);
+ int (*parse_udp_ports)(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+ int (*parse_tunnel)(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+};
+
+extern struct mlx5e_tc_tunnel vxlan_tunnel;
+extern struct mlx5e_tc_tunnel geneve_tunnel;
+extern struct mlx5e_tc_tunnel gre_tunnel;
+
+struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
+
int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e,
@@ -30,15 +62,20 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
struct mlx5e_encap_entry *e);
-int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev);
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev);
int mlx5e_tc_tun_parse(struct net_device *filter_dev,
struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct tc_cls_flower_offload *f,
+ struct flow_cls_offload *f,
void *headers_c,
void *headers_v, u8 *match_level);
+int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v);
+
#endif //__MLX5_EN_TC_TUNNEL_H__
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
new file mode 100644
index 000000000000..951ea26d96bc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/geneve.h>
+#include "lib/geneve.h"
+#include "en/tc_tun.h"
+
+#define MLX5E_GENEVE_VER 0
+
+static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv)
+{
+ return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE);
+}
+
+static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e)
+{
+ return sizeof(struct udphdr) +
+ sizeof(struct genevehdr) +
+ e->tun_info->options_len;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ports enc_ports;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return -EOPNOTSUPP;
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ /* Currently we support only default GENEVE
+ * port, so udp dst port must match.
+ */
+ if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP dst port is not registered as a GENEVE port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a GENEVE port\n",
+ be16_to_cpu(enc_ports.key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err;
+
+ err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_check_udp_dport_geneve(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &geneve_tunnel;
+
+ /* Reformat type for GENEVE encap is similar to VXLAN:
+ * in both cases the HW adds in the same place a
+ * defined encapsulation header that the SW provides.
+ */
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ return 0;
+}
+
+static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+ vni[0] = (__force __u8)(tun_id >> 16);
+ vni[1] = (__force __u8)(tun_id >> 8);
+ vni[2] = (__force __u8)tun_id;
+#else
+ vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+ vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+ vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+static int mlx5e_gen_ip_tunnel_header_geneve(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_info *tun_info = e->tun_info;
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct genevehdr *geneveh;
+
+ geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr));
+
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_info->key.tp_dst;
+
+ memset(geneveh, 0, sizeof(*geneveh));
+ geneveh->ver = MLX5E_GENEVE_VER;
+ geneveh->opt_len = tun_info->options_len / 4;
+ geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM);
+ geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT);
+ mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni);
+ geneveh->proto_type = htons(ETH_P_TEB);
+
+ if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ if (!geneveh->opt_len)
+ return -EOPNOTSUPP;
+ ip_tunnel_info_opts_get(geneveh->options, tun_info);
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_enc_keyid enc_keyid;
+ void *misc_c, *misc_v;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return 0;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid)
+ return 0;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported");
+ netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid));
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len);
+ u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ void *misc_c, *misc_v, *misc_3_c, *misc_3_v;
+ struct geneve_opt *option_key, *option_mask;
+ __be32 opt_data_key = 0, opt_data_mask = 0;
+ struct flow_match_enc_opts enc_opts;
+ int res = 0;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3);
+ misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
+ return 0;
+
+ flow_rule_match_enc_opts(rule, &enc_opts);
+
+ if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.geneve_tlv_option_0_data)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* make sure that we're talking about GENEVE options */
+
+ if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: option type is not GENEVE");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: option type is not GENEVE\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (enc_opts.mask->len &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_geneve_opt_len)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options len is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it
+ * doesn't include the TLV option header. 'geneve_opt_len' is a total
+ * len of all the options, including the headers, also multiples of 4
+ * bytes. Len that comes from the dissector is in bytes.
+ */
+
+ if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: unsupported options len");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: unsupported options len (len=%d)\n",
+ enc_opts.key->len);
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4);
+
+ /* we support matching on one option only, so just get it */
+ option_key = (struct geneve_opt *)&enc_opts.key->data[0];
+ option_mask = (struct geneve_opt *)&enc_opts.mask->data[0];
+
+ if (option_key->length > max_tlv_option_data_len) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: unsupported option len");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n",
+ option_key->length, option_mask->length);
+ return -EOPNOTSUPP;
+ }
+
+ /* data can't be all 0 - fail to offload such rule */
+ if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: can't match on 0 data field");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: can't match on 0 data field\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* add new GENEVE TLV options object */
+ res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key);
+ if (res) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on GENEVE options: failed creating TLV opt object");
+ netdev_warn(priv->netdev,
+ "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n",
+ be16_to_cpu(option_key->opt_class),
+ option_key->type, option_key->length);
+ return res;
+ }
+
+ /* In general, after creating the object, need to query it
+ * in order to check which option data to set in misc3.
+ * But we support only geneve_tlv_option_0_data, so no
+ * point querying at this stage.
+ */
+
+ memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4);
+ memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4);
+ MLX5_SET(fte_match_set_misc3, misc_3_v,
+ geneve_tlv_option_0_data, be32_to_cpu(opt_data_key));
+ MLX5_SET(fte_match_set_misc3, misc_3_c,
+ geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask));
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ struct netlink_ext_ack *extack = f->common.extack;
+
+ /* match on OAM - packets with OAM bit on should NOT be offloaded */
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) {
+ NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported");
+ netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0);
+
+ /* Match on GENEVE protocol. We support only Transparent Eth Bridge. */
+
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_geneve_protocol_type)) {
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type);
+ MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB);
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err;
+
+ err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f);
+ if (err)
+ return err;
+
+ err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_parse_geneve_options(priv, spec, f);
+}
+
+struct mlx5e_tc_tunnel geneve_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = mlx5e_tc_tun_can_offload_geneve,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_geneve,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_geneve,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve,
+ .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve,
+ .parse_tunnel = mlx5e_tc_tun_parse_geneve,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
new file mode 100644
index 000000000000..58b13192df23
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/gre.h>
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv)
+{
+ return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e)
+{
+ return gre_calc_hlen(e->tun_info->key.tun_flags);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ e->tunnel = &gre_tunnel;
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE;
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_gretap(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf);
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ int hdr_len;
+
+ *ip_proto = IPPROTO_GRE;
+
+ /* the HW does not calculate GRE csum or sequences */
+ if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ))
+ return -EOPNOTSUPP;
+
+ greh->protocol = htons(ETH_P_TEB);
+
+ /* GRE key */
+ hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e);
+ greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags);
+ if (tun_key->tun_flags & TUNNEL_KEY) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+ *ptr = tun_id;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE);
+
+ /* gre protocol */
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol);
+ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
+
+ /* gre key */
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_match_enc_keyid enc_keyid;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+ MLX5_SET(fte_match_set_misc, misc_c,
+ gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v,
+ gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel gre_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GRETAP,
+ .match_level = MLX5_MATCH_L3,
+ .can_offload = mlx5e_tc_tun_can_offload_gretap,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_gretap,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_gretap,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap,
+ .parse_udp_ports = NULL,
+ .parse_tunnel = mlx5e_tc_tun_parse_gretap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
new file mode 100644
index 000000000000..37b176801bcc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/vxlan.h>
+#include "lib/vxlan.h"
+#include "en/tc_tun.h"
+
+static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv)
+{
+ return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap);
+}
+
+static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e)
+{
+ return VXLAN_HLEN;
+}
+
+static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv,
+ struct flow_cls_offload *f)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_ports enc_ports;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return -EOPNOTSUPP;
+
+ flow_rule_match_enc_ports(rule, &enc_ports);
+
+ /* check the UDP destination port validity */
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan,
+ be16_to_cpu(enc_ports.key->dst))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matched UDP dst port is not registered as a VXLAN port");
+ netdev_warn(priv->netdev,
+ "UDP port %d is not registered as a VXLAN port\n",
+ be16_to_cpu(enc_ports.key->dst));
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ int err = 0;
+
+ err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+ if (err)
+ return err;
+
+ return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f);
+}
+
+static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev,
+ struct mlx5e_priv *priv,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int dst_port = be16_to_cpu(e->tun_info->key.tp_dst);
+
+ e->tunnel = &vxlan_tunnel;
+
+ if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vxlan udp dport was not registered with the HW");
+ netdev_warn(priv->netdev,
+ "%d isn't an offloaded vxlan udp dport\n",
+ dst_port);
+ return -EOPNOTSUPP;
+ }
+
+ e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
+ return 0;
+}
+
+static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
+ __u8 *ip_proto,
+ struct mlx5e_encap_entry *e)
+{
+ const struct ip_tunnel_key *tun_key = &e->tun_info->key;
+ __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+ struct udphdr *udp = (struct udphdr *)(buf);
+ struct vxlanhdr *vxh;
+
+ vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+ *ip_proto = IPPROTO_UDP;
+
+ udp->dest = tun_key->tp_dst;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(tun_id);
+
+ return 0;
+}
+
+static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct flow_cls_offload *f,
+ void *headers_c,
+ void *headers_v)
+{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct flow_match_enc_keyid enc_keyid;
+ void *misc_c, *misc_v;
+
+ misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+ misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return 0;
+
+ flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+ if (!enc_keyid.mask->keyid)
+ return 0;
+
+ /* match on VNI is required */
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+ ft_field_support.outer_vxlan_vni)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on VXLAN VNI is not supported");
+ netdev_warn(priv->netdev,
+ "Matching on VXLAN VNI is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+ be32_to_cpu(enc_keyid.mask->keyid));
+ MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+ be32_to_cpu(enc_keyid.key->keyid));
+
+ return 0;
+}
+
+struct mlx5e_tc_tunnel vxlan_tunnel = {
+ .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN,
+ .match_level = MLX5_MATCH_L4,
+ .can_offload = mlx5e_tc_tun_can_offload_vxlan,
+ .calc_hlen = mlx5e_tc_tun_calc_hlen_vxlan,
+ .init_encap_attr = mlx5e_tc_tun_init_encap_attr_vxlan,
+ .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan,
+ .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan,
+ .parse_tunnel = mlx5e_tc_tun_parse_vxlan,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
new file mode 100644
index 000000000000..ddfe19adb3d9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -0,0 +1,208 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TXRX_H___
+#define __MLX5_EN_TXRX_H___
+
+#include "en.h"
+
+#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
+#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
+ MLX5E_SQ_NOPS_ROOM)
+
+#ifndef CONFIG_MLX5_EN_TLS
+#define MLX5E_SQ_TLS_ROOM (0)
+#else
+/* TLS offload requires additional stop_room for:
+ * - a resync SKB.
+ * kTLS offload requires additional stop_room for:
+ * - static params WQE,
+ * - progress params WQE, and
+ * - resync DUMP per frag.
+ */
+#define MLX5E_SQ_TLS_ROOM \
+ (MLX5_SEND_WQE_MAX_WQEBBS + \
+ MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \
+ MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS)
+#endif
+
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+static inline bool
+mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
+{
+ return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
+}
+
+static inline void *
+mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi)
+{
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ void *wqe;
+
+ *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+ memset(wqe, 0, size);
+
+ return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+
+ (*pc)++;
+
+ return wqe;
+}
+
+static inline struct mlx5e_tx_wqe *
+mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc);
+ struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+
+ memset(cseg, 0, sizeof(*cseg));
+
+ cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP);
+ cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01);
+ cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+ (*pc)++;
+
+ return wqe;
+}
+
+static inline void
+mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
+ u16 pi, u16 nnops)
+{
+ struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
+
+ edge_wi = wi + nnops;
+
+ /* fill sq frag edge with nops to avoid wqe wrapping two pages */
+ for (; wi < edge_wi; wi++) {
+ wi->skb = NULL;
+ wi->num_wqebbs = 1;
+ mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+ }
+ sq->stats->nop += nnops;
+}
+
+static inline void
+mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
+ struct mlx5_wqe_ctrl_seg *ctrl)
+{
+ ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+ /* ensure wqe is visible to device before updating doorbell record */
+ dma_wmb();
+
+ *wq->db = cpu_to_be32(pc);
+
+ /* ensure doorbell record is visible to device before ringing the
+ * doorbell
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)ctrl, uar_map);
+}
+
+static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe)
+{
+ return !!wqe->ctrl.tisn;
+}
+
+static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
+{
+ struct mlx5_core_cq *mcq;
+
+ mcq = &cq->mcq;
+ mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc);
+}
+
+static inline struct mlx5e_sq_dma *
+mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i)
+{
+ return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
+}
+
+static inline void
+mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
+ enum mlx5e_dma_map_type map_type)
+{
+ struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++);
+
+ dma->addr = addr;
+ dma->size = size;
+ dma->type = map_type;
+}
+
+static inline void
+mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
+{
+ switch (dma->type) {
+ case MLX5E_DMA_MAP_SINGLE:
+ dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ case MLX5E_DMA_MAP_PAGE:
+ dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
+ break;
+ default:
+ WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
+ }
+}
+
+/* SW parser related functions */
+
+struct mlx5e_swp_spec {
+ __be16 l3_proto;
+ u8 l4_proto;
+ u8 is_tun;
+ __be16 tun_l3_proto;
+ u8 tun_l4_proto;
+};
+
+static inline void
+mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
+ struct mlx5e_swp_spec *swp_spec)
+{
+ /* SWP offsets are in 2-bytes words */
+ eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2;
+ if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6;
+ if (swp_spec->l4_proto) {
+ eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2;
+ if (swp_spec->l4_proto == IPPROTO_UDP)
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP;
+ }
+
+ if (swp_spec->is_tun) {
+ eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
+ if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */
+ eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2;
+ if (swp_spec->l3_proto == htons(ETH_P_IPV6))
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
+ }
+ switch (swp_spec->tun_l4_proto) {
+ case IPPROTO_UDP:
+ eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP;
+ /* fall through */
+ case IPPROTO_TCP:
+ eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2;
+ break;
+ }
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index eb8ef78e5626..b0b982cf69bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -31,11 +31,13 @@
*/
#include <linux/bpf_trace.h>
+#include <net/xdp_sock.h>
#include "en/xdp.h"
+#include "en/params.h"
-int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
{
- int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+ int hr = mlx5e_get_linear_rq_headroom(params, xsk);
/* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
* The condition checked in mlx5e_rx_is_linear_skb is:
@@ -54,25 +56,70 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
}
static inline bool
-mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
- struct xdp_buff *xdp)
+mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *di, struct xdp_buff *xdp)
{
+ struct mlx5e_xdp_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
+ struct xdp_frame *xdpf;
+ dma_addr_t dma_addr;
- xdpi.xdpf = convert_to_xdp_frame(xdp);
- if (unlikely(!xdpi.xdpf))
+ xdpf = convert_to_xdp_frame(xdp);
+ if (unlikely(!xdpf))
return false;
- xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf);
- dma_sync_single_for_device(sq->pdev, xdpi.dma_addr,
- xdpi.xdpf->len, PCI_DMA_TODEVICE);
- xdpi.di = *di;
- return sq->xmit_xdp_frame(sq, &xdpi);
+ xdptxd.data = xdpf->data;
+ xdptxd.len = xdpf->len;
+
+ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) {
+ /* The xdp_buff was in the UMEM and was copied into a newly
+ * allocated page. The UMEM page was returned via the ZCA, and
+ * this new page has to be mapped at this point and has to be
+ * unmapped and returned via xdp_return_frame on completion.
+ */
+
+ /* Prevent double recycling of the UMEM page. Even in case this
+ * function returns false, the xdp_buff shouldn't be recycled,
+ * as it was already done in xdp_convert_zc_to_xdp_frame.
+ */
+ __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+
+ dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(sq->pdev, dma_addr)) {
+ xdp_return_frame(xdpf);
+ return false;
+ }
+
+ xdptxd.dma_addr = dma_addr;
+ xdpi.frame.xdpf = xdpf;
+ xdpi.frame.dma_addr = dma_addr;
+ } else {
+ /* Driver assumes that convert_to_xdp_frame returns an xdp_frame
+ * that points to the same memory region as the original
+ * xdp_buff. It allows to map the memory only once and to use
+ * the DMA_BIDIRECTIONAL mode.
+ */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+
+ dma_addr = di->addr + (xdpf->data - (void *)xdpf);
+ dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len,
+ DMA_TO_DEVICE);
+
+ xdptxd.dma_addr = dma_addr;
+ xdpi.page.rq = rq;
+ xdpi.page.di = *di;
+ }
+
+ return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0);
}
/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len)
+ void *va, u16 *rx_headroom, u32 *len, bool xsk)
{
struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
struct xdp_buff xdp;
@@ -86,16 +133,20 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
xdp.data_hard_start = va;
+ if (xsk)
+ xdp.handle = di->xsk.handle;
xdp.rxq = &rq->xdp_rxq;
act = bpf_prog_run_xdp(prog, &xdp);
+ if (xsk)
+ xdp.handle += xdp.data - xdp.data_hard_start;
switch (act) {
case XDP_PASS:
*rx_headroom = xdp.data - xdp.data_hard_start;
*len = xdp.data_end - xdp.data;
return false;
case XDP_TX:
- if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp)))
+ if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp)))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
return true;
@@ -106,7 +157,8 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
- mlx5e_page_dma_unmap(rq, di);
+ if (!xsk)
+ mlx5e_page_dma_unmap(rq, di);
rq->stats->xdp_redirect++;
return true;
default:
@@ -160,7 +212,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
stats->mpwqe++;
}
-static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
{
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
@@ -183,32 +235,55 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
session->wqe = NULL; /* Close session */
}
+enum {
+ MLX5E_XDP_CHECK_OK = 1,
+ MLX5E_XDP_CHECK_START_MPWQE = 2,
+};
+
+static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
+{
+ if (unlikely(!sq->mpwqe.wqe)) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
+ MLX5_SEND_WQE_MAX_WQEBBS))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_START_MPWQE;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
- struct mlx5e_xdp_info *xdpi)
+ struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xdp_info *xdpi,
+ int check_result)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
- struct xdp_frame *xdpf = xdpi->xdpf;
-
- if (unlikely(sq->hw_mtu < xdpf->len)) {
+ if (unlikely(xdptxd->len > sq->hw_mtu)) {
stats->err++;
return false;
}
- if (unlikely(!session->wqe)) {
- if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- MLX5_SEND_WQE_MAX_WQEBBS))) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- stats->full++;
- return false;
- }
+ if (!check_result)
+ check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq);
+ if (unlikely(check_result < 0))
+ return false;
+ if (check_result == MLX5E_XDP_CHECK_START_MPWQE) {
+ /* Start the session when nothing can fail, so it's guaranteed
+ * that if there is an active session, it has at least one dseg,
+ * and it's safe to complete it at any time.
+ */
mlx5e_xdp_mpwqe_session_start(sq);
}
- mlx5e_xdp_mpwqe_add_dseg(sq, xdpi, stats);
+ mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
if (unlikely(session->complete ||
session->ds_count == session->max_ds_count))
@@ -219,7 +294,22 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
return true;
}
-static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
+static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+{
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+ /* SQ is full, ring doorbell */
+ mlx5e_xmit_xdp_doorbell(sq);
+ sq->stats->full++;
+ return -EBUSY;
+ }
+
+ return MLX5E_XDP_CHECK_OK;
+}
+
+static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_xmit_data *xdptxd,
+ struct mlx5e_xdp_info *xdpi,
+ int check_result)
{
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
@@ -229,9 +319,8 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg = wqe->data;
- struct xdp_frame *xdpf = xdpi->xdpf;
- dma_addr_t dma_addr = xdpi->dma_addr;
- unsigned int dma_len = xdpf->len;
+ dma_addr_t dma_addr = xdptxd->dma_addr;
+ u32 dma_len = xdptxd->len;
struct mlx5e_xdpsq_stats *stats = sq->stats;
@@ -242,18 +331,16 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
return false;
}
- if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) {
- /* SQ is full, ring doorbell */
- mlx5e_xmit_xdp_doorbell(sq);
- stats->full++;
+ if (!check_result)
+ check_result = mlx5e_xmit_xdp_frame_check(sq);
+ if (unlikely(check_result < 0))
return false;
- }
cseg->fm_ce_se = 0;
/* copy the inline part if required */
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE);
+ memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
dma_len -= MLX5E_XDP_MIN_INLINE;
dma_addr += MLX5E_XDP_MIN_INLINE;
@@ -277,7 +364,7 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *
static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_wqe_info *wi,
- struct mlx5e_rq *rq,
+ u32 *xsk_frames,
bool recycle)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
@@ -286,22 +373,32 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
for (i = 0; i < wi->num_pkts; i++) {
struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
- if (rq) {
- /* XDP_TX */
- mlx5e_page_release(rq, &xdpi.di, recycle);
- } else {
- /* XDP_REDIRECT */
- dma_unmap_single(sq->pdev, xdpi.dma_addr,
- xdpi.xdpf->len, DMA_TO_DEVICE);
- xdp_return_frame(xdpi.xdpf);
+ switch (xdpi.mode) {
+ case MLX5E_XDP_XMIT_MODE_FRAME:
+ /* XDP_TX from the XSK RQ and XDP_REDIRECT */
+ dma_unmap_single(sq->pdev, xdpi.frame.dma_addr,
+ xdpi.frame.xdpf->len, DMA_TO_DEVICE);
+ xdp_return_frame(xdpi.frame.xdpf);
+ break;
+ case MLX5E_XDP_XMIT_MODE_PAGE:
+ /* XDP_TX from the regular RQ */
+ mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle);
+ break;
+ case MLX5E_XDP_XMIT_MODE_XSK:
+ /* AF_XDP send */
+ (*xsk_frames)++;
+ break;
+ default:
+ WARN_ON_ONCE(true);
}
}
}
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
{
struct mlx5e_xdpsq *sq;
struct mlx5_cqe64 *cqe;
+ u32 xsk_frames = 0;
u16 sqcc;
int i;
@@ -343,10 +440,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
sqcc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, rq, true);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
} while (!last_wqe);
} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
+ if (xsk_frames)
+ xsk_umem_complete_tx(sq->umem, xsk_frames);
+
sq->stats->cqes += i;
mlx5_cqwq_update_db_record(&cq->wq);
@@ -358,8 +458,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
{
+ u32 xsk_frames = 0;
+
while (sq->cc != sq->pc) {
struct mlx5e_xdp_wqe_info *wi;
u16 ci;
@@ -369,8 +471,11 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
sq->cc += wi->num_wqebbs;
- mlx5e_free_xdpsq_desc(sq, wi, rq, false);
+ mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false);
}
+
+ if (xsk_frames)
+ xsk_umem_complete_tx(sq->umem, xsk_frames);
}
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -398,21 +503,27 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
+ struct mlx5e_xdp_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
- xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len,
- DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) {
+ xdptxd.data = xdpf->data;
+ xdptxd.len = xdpf->len;
+ xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data,
+ xdptxd.len, DMA_TO_DEVICE);
+
+ if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) {
xdp_return_frame_rx_napi(xdpf);
drops++;
continue;
}
- xdpi.xdpf = xdpf;
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME;
+ xdpi.frame.xdpf = xdpf;
+ xdpi.frame.dma_addr = xdptxd.dma_addr;
- if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
- dma_unmap_single(sq->pdev, xdpi.dma_addr,
- xdpf->len, DMA_TO_DEVICE);
+ if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) {
+ dma_unmap_single(sq->pdev, xdptxd.dma_addr,
+ xdptxd.len, DMA_TO_DEVICE);
xdp_return_frame_rx_napi(xdpf);
drops++;
}
@@ -429,7 +540,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
{
- struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
+ struct mlx5e_xdpsq *xdpsq = rq->xdpsq;
if (xdpsq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(xdpsq);
@@ -444,6 +555,8 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
{
+ sq->xmit_xdp_frame_check = is_mpw ?
+ mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check;
sq->xmit_xdp_frame = is_mpw ?
mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 8b537a4b0840..b90923932668 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -33,17 +33,20 @@
#define __MLX5_EN_XDP_H__
#include "en.h"
+#include "en/txrx.h"
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
-int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
+struct mlx5e_xsk_param;
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- void *va, u16 *rx_headroom, u32 *len);
-bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
-void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
+ void *va, u16 *rx_headroom, u32 *len, bool xsk);
+void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
+bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
+void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
@@ -66,6 +69,21 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
}
+static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv)
+{
+ set_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv)
+{
+ clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
+static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv)
+{
+ return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+}
+
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
{
if (sq->doorbell_cseg) {
@@ -97,15 +115,14 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
}
static inline void
-mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
+mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_xmit_data *xdptxd,
struct mlx5e_xdpsq_stats *stats)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
- dma_addr_t dma_addr = xdpi->dma_addr;
- struct xdp_frame *xdpf = xdpi->xdpf;
struct mlx5_wqe_data_seg *dseg =
(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
- u16 dma_len = xdpf->len;
+ u32 dma_len = xdptxd->len;
session->pkt_count++;
@@ -124,7 +141,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
}
inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG);
- memcpy(inline_dseg->data, xdpf->data, dma_len);
+ memcpy(inline_dseg->data, xdptxd->data, dma_len);
session->ds_count += ds_cnt;
stats->inlnw++;
@@ -132,7 +149,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi,
}
no_inline:
- dseg->addr = cpu_to_be64(dma_addr);
+ dseg->addr = cpu_to_be64(xdptxd->dma_addr);
dseg->byte_count = cpu_to_be32(dma_len);
dseg->lkey = sq->mkey_be;
session->ds_count++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile
new file mode 100644
index 000000000000..5ee42991900a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile
@@ -0,0 +1 @@
+subdir-ccflags-y += -I$(src)/../..
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
new file mode 100644
index 000000000000..6a55573ec8f2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "rx.h"
+#include "en/xdp.h"
+#include <net/xdp_sock.h>
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
+{
+ /* Check in advance that we have enough frames, instead of allocating
+ * one-by-one, failing and moving frames to the Reuse Ring.
+ */
+ return xsk_umem_has_addrs_rq(rq->umem, count);
+}
+
+int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ struct xdp_umem *umem = rq->umem;
+ u64 handle;
+
+ if (!xsk_umem_peek_addr_rq(umem, &handle))
+ return -ENOMEM;
+
+ dma_info->xsk.handle = handle + rq->buff.umem_headroom;
+ dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
+
+ /* No need to add headroom to the DMA address. In striding RQ case, we
+ * just provide pages for UMR, and headroom is counted at the setup
+ * stage when creating a WQE. In non-striding RQ case, headroom is
+ * accounted in mlx5e_alloc_rx_wqe.
+ */
+ dma_info->addr = xdp_umem_get_dma(umem, handle);
+
+ xsk_umem_discard_addr_rq(umem);
+
+ dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ return 0;
+}
+
+static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle)
+{
+ xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask);
+}
+
+/* XSKRQ uses pages from UMEM, they must not be released. They are returned to
+ * the userspace if possible, and if not, this function is called to reuse them
+ * in the driver.
+ */
+void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info)
+{
+ mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle);
+}
+
+/* Return a frame back to the hardware to fill in again. It is used by XDP when
+ * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP.
+ */
+void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
+{
+ struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca);
+
+ mlx5e_xsk_recycle_frame(rq, handle);
+}
+
+static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
+ u32 cqe_bcnt)
+{
+ struct sk_buff *skb;
+
+ skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt);
+ if (unlikely(!skb)) {
+ rq->stats->buff_alloc_err++;
+ return NULL;
+ }
+
+ skb_put_data(skb, data, cqe_bcnt);
+
+ return skb;
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+ struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt,
+ u32 head_offset,
+ u32 page_idx)
+{
+ struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
+ u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+ u32 cqe_bcnt32 = cqe_bcnt;
+ void *va, *data;
+ u32 frag_size;
+ bool consumed;
+
+ /* Check packet size. Note LRO doesn't use linear SKB */
+ if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+ rq->stats->oversize_pkts_sw_drop++;
+ return NULL;
+ }
+
+ /* head_offset is not used in this function, because di->xsk.data and
+ * di->addr point directly to the necessary place. Furthermore, in the
+ * current implementation, one page = one packet = one frame, so
+ * head_offset should always be 0.
+ */
+ WARN_ON_ONCE(head_offset);
+
+ va = di->xsk.data;
+ data = va + rx_headroom;
+ frag_size = rq->buff.headroom + cqe_bcnt32;
+
+ dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
+ prefetch(data);
+
+ rcu_read_lock();
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true);
+ rcu_read_unlock();
+
+ /* Possible flows:
+ * - XDP_REDIRECT to XSKMAP:
+ * The page is owned by the userspace from now.
+ * - XDP_TX and other XDP_REDIRECTs:
+ * The page was returned by ZCA and recycled.
+ * - XDP_DROP:
+ * Recycle the page.
+ * - XDP_PASS:
+ * Allocate an SKB, copy the data and recycle the page.
+ *
+ * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
+ * size is the same as the Driver RX Ring's size, and pages for WQEs are
+ * allocated first from the Reuse Ring, so it has enough space.
+ */
+
+ if (likely(consumed)) {
+ if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
+ __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
+ return NULL; /* page/packet was consumed by XDP */
+ }
+
+ /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
+ * frame. On SKB allocation failure, NULL is returned.
+ */
+ return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32);
+}
+
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt)
+{
+ struct mlx5e_dma_info *di = wi->di;
+ u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+ void *va, *data;
+ bool consumed;
+ u32 frag_size;
+
+ /* wi->offset is not used in this function, because di->xsk.data and
+ * di->addr point directly to the necessary place. Furthermore, in the
+ * current implementation, one page = one packet = one frame, so
+ * wi->offset should always be 0.
+ */
+ WARN_ON_ONCE(wi->offset);
+
+ va = di->xsk.data;
+ data = va + rx_headroom;
+ frag_size = rq->buff.headroom + cqe_bcnt;
+
+ dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
+ prefetch(data);
+
+ if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
+ rq->stats->wqe_err++;
+ return NULL;
+ }
+
+ rcu_read_lock();
+ consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true);
+ rcu_read_unlock();
+
+ if (likely(consumed))
+ return NULL; /* page/packet was consumed by XDP */
+
+ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
+ * will be handled by mlx5e_put_rx_frag.
+ * On SKB allocation failure, NULL is returned.
+ */
+ return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
new file mode 100644
index 000000000000..307b923a1361
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_RX_H__
+#define __MLX5_EN_XSK_RX_H__
+
+#include "en.h"
+
+/* RX data path */
+
+bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count);
+int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info);
+void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
+ struct mlx5e_dma_info *dma_info);
+void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
+ struct mlx5e_mpw_info *wi,
+ u16 cqe_bcnt,
+ u32 head_offset,
+ u32 page_idx);
+struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+ struct mlx5_cqe64 *cqe,
+ struct mlx5e_wqe_frag_info *wi,
+ u32 cqe_bcnt);
+
+#endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
new file mode 100644
index 000000000000..aaffa6f68dc0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "setup.h"
+#include "en/params.h"
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5_core_dev *mdev)
+{
+ /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current
+ * mlx5e XDP implementation doesn't support multiple packets per page.
+ */
+ if (xsk->chunk_size != PAGE_SIZE)
+ return false;
+
+ /* Current MTU and XSK headroom don't allow packets to fit the frames. */
+ if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size)
+ return false;
+
+ /* frag_sz is different for regular and XSK RQs, so ensure that linear
+ * SKB mode is possible.
+ */
+ switch (params->rq_wq_type) {
+ case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+ return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+ default: /* MLX5_WQ_TYPE_CYCLIC */
+ return mlx5e_rx_is_linear_skb(params, xsk);
+ }
+}
+
+static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv,
+ u8 log_wq_size,
+ struct mlx5e_sq_param *param)
+{
+ void *sqc = param->sqc;
+ void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+ mlx5e_build_sq_param_common(priv, param);
+
+ MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
+}
+
+static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_channel_param *cparam)
+{
+ const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
+
+ mlx5e_build_rq_param(priv, params, xsk, &cparam->rq);
+ mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
+ mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq);
+ mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq);
+ mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq);
+ mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq);
+}
+
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_channel *c)
+{
+ struct mlx5e_channel_param cparam = {};
+ struct dim_cq_moder icocq_moder = {};
+ int err;
+
+ if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev))
+ return -EINVAL;
+
+ mlx5e_build_xsk_cparam(priv, params, xsk, &cparam);
+
+ err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq);
+ if (unlikely(err))
+ return err;
+
+ err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq);
+ if (unlikely(err))
+ goto err_close_rx_cq;
+
+ err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq);
+ if (unlikely(err))
+ goto err_close_rq;
+
+ /* Create a separate SQ, so that when the UMEM is disabled, we could
+ * close this SQ safely and stop receiving CQEs. In other case, e.g., if
+ * the XDPSQ was used instead, we might run into trouble when the UMEM
+ * is disabled and then reenabled, but the SQ continues receiving CQEs
+ * from the old UMEM.
+ */
+ err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true);
+ if (unlikely(err))
+ goto err_close_tx_cq;
+
+ err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq);
+ if (unlikely(err))
+ goto err_close_sq;
+
+ /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be
+ * triggered and NAPI to be called on the correct CPU.
+ */
+ err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq);
+ if (unlikely(err))
+ goto err_close_icocq;
+
+ spin_lock_init(&c->xskicosq_lock);
+
+ set_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+
+ return 0;
+
+err_close_icocq:
+ mlx5e_close_cq(&c->xskicosq.cq);
+
+err_close_sq:
+ mlx5e_close_xdpsq(&c->xsksq);
+
+err_close_tx_cq:
+ mlx5e_close_cq(&c->xsksq.cq);
+
+err_close_rq:
+ mlx5e_close_rq(&c->xskrq);
+
+err_close_rx_cq:
+ mlx5e_close_cq(&c->xskrq.cq);
+
+ return err;
+}
+
+void mlx5e_close_xsk(struct mlx5e_channel *c)
+{
+ clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+ napi_synchronize(&c->napi);
+
+ mlx5e_close_rq(&c->xskrq);
+ mlx5e_close_cq(&c->xskrq.cq);
+ mlx5e_close_icosq(&c->xskicosq);
+ mlx5e_close_cq(&c->xskicosq.cq);
+ mlx5e_close_xdpsq(&c->xsksq);
+ mlx5e_close_cq(&c->xsksq.cq);
+}
+
+void mlx5e_activate_xsk(struct mlx5e_channel *c)
+{
+ set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ /* TX queue is created active. */
+ mlx5e_trigger_irq(&c->xskicosq);
+}
+
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
+{
+ mlx5e_deactivate_rq(&c->xskrq);
+ /* TX queue is disabled on close. */
+}
+
+static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn)
+{
+ struct mlx5e_redirect_rqt_param direct_rrp = {
+ .is_rss = false,
+ {
+ .rqn = rqn,
+ },
+ };
+
+ u32 rqtn = priv->xsk_tir[ix].rqt.rqtn;
+
+ return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
+}
+
+int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
+{
+ return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn);
+}
+
+int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
+{
+ return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn);
+}
+
+int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ int err, i;
+
+ if (!priv->xsk.refcnt)
+ return 0;
+
+ for (i = 0; i < chs->num; i++) {
+ struct mlx5e_channel *c = chs->c[i];
+
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+ continue;
+
+ err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
+ if (unlikely(err))
+ goto err_stop;
+ }
+
+ return 0;
+
+err_stop:
+ for (i--; i >= 0; i--) {
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
+ continue;
+
+ mlx5e_xsk_redirect_rqt_to_drop(priv, i);
+ }
+
+ return err;
+}
+
+void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+ int i;
+
+ if (!priv->xsk.refcnt)
+ return;
+
+ for (i = 0; i < chs->num; i++) {
+ if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
+ continue;
+
+ mlx5e_xsk_redirect_rqt_to_drop(priv, i);
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
new file mode 100644
index 000000000000..0dd11b81c046
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_SETUP_H__
+#define __MLX5_EN_XSK_SETUP_H__
+
+#include "en.h"
+
+struct mlx5e_xsk_param;
+
+bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5_core_dev *mdev);
+int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk, struct xdp_umem *umem,
+ struct mlx5e_channel *c);
+void mlx5e_close_xsk(struct mlx5e_channel *c);
+void mlx5e_activate_xsk(struct mlx5e_channel *c);
+void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
+int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
+int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
+int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+
+#endif /* __MLX5_EN_XSK_SETUP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
new file mode 100644
index 000000000000..35e188cf4ea4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "tx.h"
+#include "umem.h"
+#include "en/xdp.h"
+#include "en/params.h"
+#include <net/xdp_sock.h>
+
+int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_channel *c;
+ u16 ix;
+
+ if (unlikely(!mlx5e_xdp_is_open(priv)))
+ return -ENETDOWN;
+
+ if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ return -EINVAL;
+
+ c = priv->channels.c[ix];
+
+ if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)))
+ return -ENXIO;
+
+ if (!napi_if_scheduled_mark_missed(&c->napi)) {
+ spin_lock(&c->xskicosq_lock);
+ mlx5e_trigger_irq(&c->xskicosq);
+ spin_unlock(&c->xskicosq_lock);
+ }
+
+ return 0;
+}
+
+/* When TX fails (because of the size of the packet), we need to get completions
+ * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish
+ * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the
+ * same.
+ */
+static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
+ struct mlx5e_xdp_info *xdpi)
+{
+ u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
+ struct mlx5e_tx_wqe *nopwqe;
+
+ wi->num_wqebbs = 1;
+ wi->num_pkts = 1;
+
+ nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
+ sq->doorbell_cseg = &nopwqe->ctrl;
+}
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
+{
+ struct xdp_umem *umem = sq->umem;
+ struct mlx5e_xdp_info xdpi;
+ struct mlx5e_xdp_xmit_data xdptxd;
+ bool work_done = true;
+ bool flush = false;
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK;
+
+ for (; budget; budget--) {
+ int check_result = sq->xmit_xdp_frame_check(sq);
+ struct xdp_desc desc;
+
+ if (unlikely(check_result < 0)) {
+ work_done = false;
+ break;
+ }
+
+ if (!xsk_umem_consume_tx(umem, &desc)) {
+ /* TX will get stuck until something wakes it up by
+ * triggering NAPI. Currently it's expected that the
+ * application calls sendto() if there are consumed, but
+ * not completed frames.
+ */
+ break;
+ }
+
+ xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
+ xdptxd.data = xdp_umem_get_data(umem, desc.addr);
+ xdptxd.len = desc.len;
+
+ dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
+ xdptxd.len, DMA_BIDIRECTIONAL);
+
+ if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+
+ mlx5e_xsk_tx_post_err(sq, &xdpi);
+ }
+
+ flush = true;
+ }
+
+ if (flush) {
+ if (sq->mpwqe.wqe)
+ mlx5e_xdp_mpwqe_complete(sq);
+ mlx5e_xmit_xdp_doorbell(sq);
+
+ xsk_umem_consume_tx_done(umem);
+ }
+
+ return !(budget && work_done);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
new file mode 100644
index 000000000000..7add18bf78d8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_TX_H__
+#define __MLX5_EN_XSK_TX_H__
+
+#include "en.h"
+
+/* TX data path */
+
+int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid);
+
+bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
+
+#endif /* __MLX5_EN_XSK_TX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
new file mode 100644
index 000000000000..4baaa5788320
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <net/xdp_sock.h>
+#include "umem.h"
+#include "setup.h"
+#include "en/params.h"
+
+static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
+ struct xdp_umem *umem)
+{
+ struct device *dev = priv->mdev->device;
+ u32 i;
+
+ for (i = 0; i < umem->npgs; i++) {
+ dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ if (unlikely(dma_mapping_error(dev, dma)))
+ goto err_unmap;
+ umem->pages[i].dma = dma;
+ }
+
+ return 0;
+
+err_unmap:
+ while (i--) {
+ dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ umem->pages[i].dma = 0;
+ }
+
+ return -ENOMEM;
+}
+
+static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
+ struct xdp_umem *umem)
+{
+ struct device *dev = priv->mdev->device;
+ u32 i;
+
+ for (i = 0; i < umem->npgs; i++) {
+ dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ umem->pages[i].dma = 0;
+ }
+}
+
+static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
+{
+ if (!xsk->umems) {
+ xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS,
+ sizeof(*xsk->umems), GFP_KERNEL);
+ if (unlikely(!xsk->umems))
+ return -ENOMEM;
+ }
+
+ xsk->refcnt++;
+ xsk->ever_used = true;
+
+ return 0;
+}
+
+static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk)
+{
+ if (!--xsk->refcnt) {
+ kfree(xsk->umems);
+ xsk->umems = NULL;
+ }
+}
+
+static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix)
+{
+ int err;
+
+ err = mlx5e_xsk_get_umems(xsk);
+ if (unlikely(err))
+ return err;
+
+ xsk->umems[ix] = umem;
+ return 0;
+}
+
+static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
+{
+ xsk->umems[ix] = NULL;
+
+ mlx5e_xsk_put_umems(xsk);
+}
+
+static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
+{
+ return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff;
+}
+
+void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
+{
+ xsk->headroom = umem->headroom;
+ xsk->chunk_size = umem->chunk_size_nohr + umem->headroom;
+}
+
+static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
+ struct xdp_umem *umem, u16 ix)
+{
+ struct mlx5e_params *params = &priv->channels.params;
+ struct mlx5e_xsk_param xsk;
+ struct mlx5e_channel *c;
+ int err;
+
+ if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix)))
+ return -EBUSY;
+
+ if (unlikely(!mlx5e_xsk_is_umem_sane(umem)))
+ return -EINVAL;
+
+ err = mlx5e_xsk_map_umem(priv, umem);
+ if (unlikely(err))
+ return err;
+
+ err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix);
+ if (unlikely(err))
+ goto err_unmap_umem;
+
+ mlx5e_build_xsk_param(umem, &xsk);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ /* XSK objects will be created on open. */
+ goto validate_closed;
+ }
+
+ if (!params->xdp_prog) {
+ /* XSK objects will be created when an XDP program is set,
+ * and the channels are reopened.
+ */
+ goto validate_closed;
+ }
+
+ c = priv->channels.c[ix];
+
+ err = mlx5e_open_xsk(priv, params, &xsk, umem, c);
+ if (unlikely(err))
+ goto err_remove_umem;
+
+ mlx5e_activate_xsk(c);
+
+ /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
+ * any Fill Ring entries at the setup stage.
+ */
+
+ err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+ if (unlikely(err))
+ goto err_deactivate;
+
+ return 0;
+
+err_deactivate:
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+err_remove_umem:
+ mlx5e_xsk_remove_umem(&priv->xsk, ix);
+
+err_unmap_umem:
+ mlx5e_xsk_unmap_umem(priv, umem);
+
+ return err;
+
+validate_closed:
+ /* Check the configuration in advance, rather than fail at a later stage
+ * (in mlx5e_xdp_set or on open) and end up with no channels.
+ */
+ if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) {
+ err = -EINVAL;
+ goto err_remove_umem;
+ }
+
+ return 0;
+}
+
+static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
+{
+ struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params,
+ &priv->xsk, ix);
+ struct mlx5e_channel *c;
+
+ if (unlikely(!umem))
+ return -EINVAL;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto remove_umem;
+
+ /* XSK RQ and SQ are only created if XDP program is set. */
+ if (!priv->channels.params.xdp_prog)
+ goto remove_umem;
+
+ c = priv->channels.c[ix];
+ mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+ mlx5e_deactivate_xsk(c);
+ mlx5e_close_xsk(c);
+
+remove_umem:
+ mlx5e_xsk_remove_umem(&priv->xsk, ix);
+ mlx5e_xsk_unmap_umem(priv, umem);
+
+ return 0;
+}
+
+static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem,
+ u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_enable_locked(priv, umem, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix)
+{
+ int err;
+
+ mutex_lock(&priv->state_lock);
+ err = mlx5e_xsk_disable_locked(priv, ix);
+ mutex_unlock(&priv->state_lock);
+
+ return err;
+}
+
+int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_params *params = &priv->channels.params;
+ u16 ix;
+
+ if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ return -EINVAL;
+
+ return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) :
+ mlx5e_xsk_disable_umem(priv, ix);
+}
+
+int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries)
+{
+ struct xdp_umem_fq_reuse *reuseq;
+
+ reuseq = xsk_reuseq_prepare(nentries);
+ if (unlikely(!reuseq))
+ return -ENOMEM;
+ xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
+
+ return 0;
+}
+
+u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk)
+{
+ u16 res = xsk->refcnt ? params->num_channels : 0;
+
+ while (res) {
+ if (mlx5e_xsk_get_umem(params, xsk, res - 1))
+ break;
+ --res;
+ }
+
+ return res;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
new file mode 100644
index 000000000000..25b4cbe58b54
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_XSK_UMEM_H__
+#define __MLX5_EN_XSK_UMEM_H__
+
+#include "en.h"
+
+static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params,
+ struct mlx5e_xsk *xsk, u16 ix)
+{
+ if (!xsk || !xsk->umems)
+ return NULL;
+
+ if (unlikely(ix >= params->num_channels))
+ return NULL;
+
+ return xsk->umems[ix];
+}
+
+struct mlx5e_xsk_param;
+void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk);
+
+/* .ndo_bpf callback. */
+int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid);
+
+int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries);
+
+u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk);
+
+#endif /* __MLX5_EN_XSK_UMEM_H__ */