aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/channels.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h191
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c188
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.c722
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/htb.h46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c678
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.h71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.c811
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/qos.h38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c122
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rss.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c192
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.c266
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/selq.h53
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c31
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c154
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c124
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c337
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c99
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c119
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c157
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c78
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c63
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c228
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c380
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c585
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c77
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c209
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c91
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c409
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c84
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tir.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/trap.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h88
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c218
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c243
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h12
86 files changed, 7026 insertions, 1897 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
index e7c14c0de0a7..48581ea3adcb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
@@ -10,28 +10,33 @@ unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
return chs->num;
}
-void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+static struct mlx5e_channel *mlx5e_channels_get(struct mlx5e_channels *chs, unsigned int ix)
{
- struct mlx5e_channel *c;
+ WARN_ON_ONCE(ix >= mlx5e_channels_get_num(chs));
+ return chs->c[ix];
+}
- WARN_ON(ix >= mlx5e_channels_get_num(chs));
- c = chs->c[ix];
+bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
- *rqn = c->rq.rqn;
+ return test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
}
-bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
{
- struct mlx5e_channel *c;
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
- WARN_ON(ix >= mlx5e_channels_get_num(chs));
- c = chs->c[ix];
+ *rqn = c->rq.rqn;
+}
- if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
- return false;
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+ struct mlx5e_channel *c = mlx5e_channels_get(chs, ix);
+
+ WARN_ON_ONCE(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state));
*rqn = c->xskrq.rqn;
- return true;
}
bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
index ca00cbc827cb..637ca90daaa8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
@@ -9,8 +9,9 @@
struct mlx5e_channels;
unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
+bool mlx5e_channels_is_xsk(struct mlx5e_channels *chs, unsigned int ix);
void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
-bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+void mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
#endif /* __MLX5_EN_CHANNELS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
index 9976de8b9047..b59aee75de94 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
@@ -40,13 +40,11 @@ struct mlx5e_dcbx_dp {
};
void mlx5e_dcbnl_build_netdev(struct net_device *netdev);
-void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev);
void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
#else
static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {}
-static inline void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) {}
static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {}
static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {}
static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index ae52e7f38306..b69f9d10ccbd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -21,6 +21,7 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
struct netdev_phys_item_id ppid = {};
struct devlink_port *dl_port;
unsigned int dl_port_index;
+ int ret;
if (mlx5_core_is_pf(priv->mdev)) {
attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
@@ -41,7 +42,13 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
memset(dl_port, 0, sizeof(*dl_port));
devlink_port_attrs_set(dl_port, &attrs);
- return devlink_port_register(devlink, dl_port, dl_port_index);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_lock(devlink);
+ ret = devl_port_register(devlink, dl_port, dl_port_index);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_unlock(devlink);
+
+ return ret;
}
void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
@@ -54,8 +61,13 @@ void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv)
void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv)
{
struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv);
+ struct devlink *devlink = priv_to_devlink(priv->mdev);
- devlink_port_unregister(dl_port);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_lock(devlink);
+ devl_port_unregister(dl_port);
+ if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW))
+ devl_unlock(devlink);
}
struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 678ffbb48a25..bf2741eb7f9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -8,32 +8,17 @@
#include "lib/fs_ttc.h"
struct mlx5e_post_act;
+struct mlx5e_tc_table;
enum {
MLX5E_TC_FT_LEVEL = 0,
MLX5E_TC_TTC_FT_LEVEL,
+ MLX5E_TC_MISS_LEVEL,
};
-struct mlx5e_tc_table {
- /* Protects the dynamic assignment of the t parameter
- * which is the nic tc root table.
- */
- struct mutex t_lock;
- struct mlx5_flow_table *t;
- struct mlx5_fs_chains *chains;
- struct mlx5e_post_act *post_act;
-
- struct rhashtable ht;
-
- struct mod_hdr_tbl mod_hdr;
- struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
- DECLARE_HASHTABLE(hairpin_tbl, 8);
-
- struct notifier_block netdevice_nb;
- struct netdev_net_notifier netdevice_nn;
-
- struct mlx5_tc_ct_priv *ct;
- struct mapping_ctx *mapping;
+enum {
+ MLX5E_TC_PRIO = 0,
+ MLX5E_NIC_PRIO
};
struct mlx5e_flow_table {
@@ -104,108 +89,116 @@ enum {
#endif
};
-struct mlx5e_priv;
-
-#ifdef CONFIG_MLX5_EN_RXNFC
-
-struct mlx5e_ethtool_table {
- struct mlx5_flow_table *ft;
- int num_rules;
-};
-
-#define ETHTOOL_NUM_L3_L4_FTS 7
-#define ETHTOOL_NUM_L2_FTS 4
-
-struct mlx5e_ethtool_steering {
- struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS];
- struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS];
- struct list_head rules;
- int tot_num_rules;
-};
-
-void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
-void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
-int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd);
-int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
- struct ethtool_rxnfc *info, u32 *rule_locs);
-#else
-static inline void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) { }
-static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) { }
-static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
-{ return -EOPNOTSUPP; }
-static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
- struct ethtool_rxnfc *info, u32 *rule_locs)
-{ return -EOPNOTSUPP; }
-#endif /* CONFIG_MLX5_EN_RXNFC */
+struct mlx5e_flow_steering;
+struct mlx5e_rx_res;
#ifdef CONFIG_MLX5_EN_ARFS
struct mlx5e_arfs_tables;
-int mlx5e_arfs_create_tables(struct mlx5e_priv *priv);
-void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv);
-int mlx5e_arfs_enable(struct mlx5e_priv *priv);
-int mlx5e_arfs_disable(struct mlx5e_priv *priv);
+int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple);
+void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple);
+int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs);
+int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs);
int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
#else
-static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) { return 0; }
-static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {}
-static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
-static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_create_tables(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res, bool ntuple)
+{ return 0; }
+static inline void mlx5e_arfs_destroy_tables(struct mlx5e_flow_steering *fs, bool ntuple) {}
+static inline int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
+{ return -EOPNOTSUPP; }
#endif
#ifdef CONFIG_MLX5_EN_TLS
struct mlx5e_accel_fs_tcp;
#endif
+struct mlx5e_profile;
struct mlx5e_fs_udp;
struct mlx5e_fs_any;
struct mlx5e_ptp_fs;
-struct mlx5e_flow_steering {
- struct mlx5_flow_namespace *ns;
- struct mlx5_flow_namespace *egress_ns;
+void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ struct ttc_params *ttc_params, bool tunnel);
+
+void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs);
+int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res);
+
+void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+
+void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc);
+
+int mlx5e_create_flow_steering(struct mlx5e_flow_steering *fs,
+ struct mlx5e_rx_res *rx_res,
+ const struct mlx5e_profile *profile,
+ struct net_device *netdev);
+void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple,
+ const struct mlx5e_profile *profile);
+
+struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
+ struct mlx5_core_dev *mdev,
+ bool state_destroy);
+void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs);
+struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc);
+struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs);
+struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs);
+struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress);
+void mlx5e_fs_set_ns(struct mlx5e_flow_steering *fs, struct mlx5_flow_namespace *ns, bool egress);
#ifdef CONFIG_MLX5_EN_RXNFC
- struct mlx5e_ethtool_steering ethtool;
+struct mlx5e_ethtool_steering *mlx5e_fs_get_ethtool(struct mlx5e_flow_steering *fs);
#endif
- struct mlx5e_tc_table tc;
- struct mlx5e_promisc_table promisc;
- struct mlx5e_vlan_table *vlan;
- struct mlx5e_l2_table l2;
- struct mlx5_ttc_table *ttc;
- struct mlx5_ttc_table *inner_ttc;
+struct mlx5_ttc_table *mlx5e_fs_get_ttc(struct mlx5e_flow_steering *fs, bool inner);
+void mlx5e_fs_set_ttc(struct mlx5e_flow_steering *fs, struct mlx5_ttc_table *ttc, bool inner);
#ifdef CONFIG_MLX5_EN_ARFS
- struct mlx5e_arfs_tables *arfs;
+struct mlx5e_arfs_tables *mlx5e_fs_get_arfs(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_arfs(struct mlx5e_flow_steering *fs, struct mlx5e_arfs_tables *arfs);
#endif
+struct mlx5e_ptp_fs *mlx5e_fs_get_ptp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_ptp(struct mlx5e_flow_steering *fs, struct mlx5e_ptp_fs *ptp_fs);
+struct mlx5e_fs_any *mlx5e_fs_get_any(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_any(struct mlx5e_flow_steering *fs, struct mlx5e_fs_any *any);
+struct mlx5e_fs_udp *mlx5e_fs_get_udp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_udp(struct mlx5e_flow_steering *fs, struct mlx5e_fs_udp *udp);
#ifdef CONFIG_MLX5_EN_TLS
- struct mlx5e_accel_fs_tcp *accel_tcp;
+struct mlx5e_accel_fs_tcp *mlx5e_fs_get_accel_tcp(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_accel_tcp(struct mlx5e_flow_steering *fs, struct mlx5e_accel_fs_tcp *accel_tcp);
#endif
- struct mlx5e_fs_udp *udp;
- struct mlx5e_fs_any *any;
- struct mlx5e_ptp_fs *ptp_fs;
-};
-
-void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
- struct ttc_params *ttc_params, bool tunnel);
-
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
-
-void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
-
-void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
-void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
-
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
-void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
-
-int mlx5e_fs_init(struct mlx5e_priv *priv);
-void mlx5e_fs_cleanup(struct mlx5e_priv *priv);
-
-int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
-void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv);
-int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num);
-void mlx5e_remove_mac_trap(struct mlx5e_priv *priv);
+void mlx5e_fs_set_state_destroy(struct mlx5e_flow_steering *fs, bool state_destroy);
+void mlx5e_fs_set_vlan_strip_disable(struct mlx5e_flow_steering *fs, bool vlan_strip_disable);
+
+struct mlx5_core_dev *mlx5e_fs_get_mdev(struct mlx5e_flow_steering *fs);
+int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_vlan_trap(struct mlx5e_flow_steering *fs);
+int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num);
+void mlx5e_remove_mac_trap(struct mlx5e_flow_steering *fs);
+void mlx5e_fs_set_rx_mode_work(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+int mlx5e_fs_vlan_rx_add_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid);
+int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs,
+ struct net_device *netdev,
+ __be16 proto, u16 vid);
+void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev);
+
+#define fs_err(fs, fmt, ...) \
+ mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_dbg(fs, fmt, ...) \
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn(fs, fmt, ...) \
+ mlx5_core_warn(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
+
+#define fs_warn_once(fs, fmt, ...) \
+ mlx5_core_warn_once(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__)
#endif /* __MLX5E_FLOW_STEER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
new file mode 100644
index 000000000000..9e276fd3c0cf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5E_FS_ETHTOOL_H__
+#define __MLX5E_FS_ETHTOOL_H__
+
+struct mlx5e_priv;
+struct mlx5e_ethtool_steering;
+#ifdef CONFIG_MLX5_EN_RXNFC
+int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool);
+void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool);
+void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs);
+void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs);
+int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd);
+int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs);
+#else
+static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool)
+{ return 0; }
+static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { }
+static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { }
+static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { }
+static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd)
+{ return -EOPNOTSUPP; }
+static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv,
+ struct ethtool_rxnfc *info, u32 *rule_locs)
+{ return -EOPNOTSUPP; }
+#endif
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
index 7aa25a5e29d7..03cb79adf912 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
-#include <linux/netdevice.h>
#include "en/fs_tt_redirect.h"
#include "fs_core.h"
+#include "mlx5_core.h"
enum fs_udp_type {
FS_IPV4_UDP,
@@ -74,17 +74,17 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type
}
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
enum mlx5_traffic_types ttc_type,
u32 tir_num, u16 d_port)
{
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
enum fs_udp_type type = tt2fs_udp(ttc_type);
struct mlx5_flow_destination dest = {};
struct mlx5_flow_table *ft = NULL;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
- struct mlx5e_fs_udp *fs_udp;
int err;
if (type == FS_UDP_NUM_TYPES)
@@ -94,7 +94,6 @@ mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
if (!spec)
return ERR_PTR(-ENOMEM);
- fs_udp = priv->fs.udp;
ft = fs_udp->tables[type].t;
fs_udp_set_dport_flow(spec, type, d_port);
@@ -106,31 +105,30 @@ mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add %s rule failed, err %d\n",
- __func__, fs_udp_type2str(type), err);
+ fs_err(fs, "%s: add %s rule failed, err %d\n",
+ __func__, fs_udp_type2str(type), err);
}
return rule;
}
-static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type type)
+static int fs_udp_add_default_rule(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
struct mlx5e_flow_table *fs_udp_t;
struct mlx5_flow_destination dest;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
- struct mlx5e_fs_udp *fs_udp;
int err;
- fs_udp = priv->fs.udp;
fs_udp_t = &fs_udp->tables[type];
- dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type));
+ dest = mlx5_ttc_get_default_dest(ttc, fs_udp2tt(type));
rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev,
- "%s: add default rule failed, fs type=%d, err %d\n",
- __func__, type, err);
+ fs_err(fs, "%s: add default rule failed, fs type=%d, err %d\n",
+ __func__, type, err);
return err;
}
@@ -206,33 +204,36 @@ out:
return err;
}
-static int fs_udp_create_table(struct mlx5e_priv *priv, enum fs_udp_type type)
+static int fs_udp_create_table(struct mlx5e_flow_steering *fs, enum fs_udp_type type)
{
- struct mlx5e_flow_table *ft = &priv->fs.udp->tables[type];
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5e_flow_table *ft;
int err;
+ ft = &fs_udp->tables[type];
ft->num_groups = 0;
ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE;
ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
return err;
}
- netdev_dbg(priv->netdev, "Created fs %s table id %u level %u\n",
- fs_udp_type2str(type), ft->t->id, ft->t->level);
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs %s table id %u level %u\n",
+ fs_udp_type2str(type), ft->t->id, ft->t->level);
err = fs_udp_create_groups(ft, type);
if (err)
goto err;
- err = fs_udp_add_default_rule(priv, type);
+ err = fs_udp_add_default_rule(fs, type);
if (err)
goto err;
@@ -253,17 +254,17 @@ static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i)
fs_udp->tables[i].t = NULL;
}
-static int fs_udp_disable(struct mlx5e_priv *priv)
+static int fs_udp_disable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
int err, i;
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i));
+ err = mlx5_ttc_fwd_default_dest(ttc, fs_udp2tt(i));
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, fs_udp2tt(i), err);
+ fs_err(fs, "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
return err;
}
}
@@ -271,30 +272,31 @@ static int fs_udp_disable(struct mlx5e_priv *priv)
return 0;
}
-static int fs_udp_enable(struct mlx5e_priv *priv)
+static int fs_udp_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
struct mlx5_flow_destination dest = {};
int err, i;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
- dest.ft = priv->fs.udp->tables[i].t;
+ dest.ft = udp->tables[i].t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest);
+ err = mlx5_ttc_fwd_dest(ttc, fs_udp2tt(i), &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
- __func__, fs_udp2tt(i), err);
+ fs_err(fs, "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, fs_udp2tt(i), err);
return err;
}
}
return 0;
}
-void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv)
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_fs_udp *fs_udp = priv->fs.udp;
+ struct mlx5e_fs_udp *fs_udp = mlx5e_fs_get_udp(fs);
int i;
if (!fs_udp)
@@ -303,48 +305,50 @@ void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv)
if (--fs_udp->ref_cnt)
return;
- fs_udp_disable(priv);
+ fs_udp_disable(fs);
for (i = 0; i < FS_UDP_NUM_TYPES; i++)
fs_udp_destroy_table(fs_udp, i);
kfree(fs_udp);
- priv->fs.udp = NULL;
+ mlx5e_fs_set_udp(fs, NULL);
}
-int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv)
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_fs_udp *udp = mlx5e_fs_get_udp(fs);
int i, err;
- if (priv->fs.udp) {
- priv->fs.udp->ref_cnt++;
+ if (udp) {
+ udp->ref_cnt++;
return 0;
}
- priv->fs.udp = kzalloc(sizeof(*priv->fs.udp), GFP_KERNEL);
- if (!priv->fs.udp)
+ udp = kzalloc(sizeof(*udp), GFP_KERNEL);
+ if (!udp)
return -ENOMEM;
+ mlx5e_fs_set_udp(fs, udp);
for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
- err = fs_udp_create_table(priv, i);
+ err = fs_udp_create_table(fs, i);
if (err)
goto err_destroy_tables;
}
- err = fs_udp_enable(priv);
+ err = fs_udp_enable(fs);
if (err)
goto err_destroy_tables;
- priv->fs.udp->ref_cnt = 1;
+ udp->ref_cnt = 1;
return 0;
err_destroy_tables:
while (--i >= 0)
- fs_udp_destroy_table(priv->fs.udp, i);
+ fs_udp_destroy_table(udp, i);
- kfree(priv->fs.udp);
- priv->fs.udp = NULL;
+ kfree(udp);
+ mlx5e_fs_set_udp(fs, NULL);
return err;
}
@@ -356,22 +360,21 @@ static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_typ
}
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
u32 tir_num, u16 ether_type)
{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
struct mlx5_flow_destination dest = {};
struct mlx5_flow_table *ft = NULL;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
struct mlx5_flow_spec *spec;
- struct mlx5e_fs_any *fs_any;
int err;
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec)
return ERR_PTR(-ENOMEM);
- fs_any = priv->fs.any;
ft = fs_any->table.t;
fs_any_set_ethertype_flow(spec, ether_type);
@@ -383,31 +386,29 @@ mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add ANY rule failed, err %d\n",
- __func__, err);
+ fs_err(fs, "%s: add ANY rule failed, err %d\n",
+ __func__, err);
}
return rule;
}
-static int fs_any_add_default_rule(struct mlx5e_priv *priv)
+static int fs_any_add_default_rule(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
struct mlx5e_flow_table *fs_any_t;
struct mlx5_flow_destination dest;
MLX5_DECLARE_FLOW_ACT(flow_act);
struct mlx5_flow_handle *rule;
- struct mlx5e_fs_any *fs_any;
int err;
- fs_any = priv->fs.any;
fs_any_t = &fs_any->table;
-
- dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY);
+ dest = mlx5_ttc_get_default_dest(ttc, MLX5_TT_ANY);
rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev,
- "%s: add default rule failed, fs type=ANY, err %d\n",
- __func__, err);
+ fs_err(fs, "%s: add default rule failed, fs type=ANY, err %d\n",
+ __func__, err);
return err;
}
@@ -472,9 +473,11 @@ err:
return err;
}
-static int fs_any_create_table(struct mlx5e_priv *priv)
+static int fs_any_create_table(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_flow_table *ft = &priv->fs.any->table;
+ struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs, false);
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
+ struct mlx5e_flow_table *ft = &fs_any->table;
struct mlx5_flow_table_attr ft_attr = {};
int err;
@@ -484,21 +487,21 @@ static int fs_any_create_table(struct mlx5e_priv *priv)
ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL;
ft_attr.prio = MLX5E_NIC_PRIO;
- ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+ ft->t = mlx5_create_flow_table(ns, &ft_attr);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
ft->t = NULL;
return err;
}
- netdev_dbg(priv->netdev, "Created fs ANY table id %u level %u\n",
- ft->t->id, ft->t->level);
+ mlx5_core_dbg(mlx5e_fs_get_mdev(fs), "Created fs ANY table id %u level %u\n",
+ ft->t->id, ft->t->level);
err = fs_any_create_groups(ft);
if (err)
goto err;
- err = fs_any_add_default_rule(priv);
+ err = fs_any_add_default_rule(fs);
if (err)
goto err;
@@ -509,35 +512,38 @@ err:
return err;
}
-static int fs_any_disable(struct mlx5e_priv *priv)
+static int fs_any_disable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
int err;
/* Modify ttc rules destination to point back to the indir TIRs */
- err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY);
+ err = mlx5_ttc_fwd_default_dest(ttc, MLX5_TT_ANY);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] default destination failed, err(%d)\n",
- __func__, MLX5_TT_ANY, err);
+ fs_err(fs,
+ "%s: modify ttc[%d] default destination failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
return err;
}
return 0;
}
-static int fs_any_enable(struct mlx5e_priv *priv)
+static int fs_any_enable(struct mlx5e_flow_steering *fs)
{
+ struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs, false);
+ struct mlx5e_fs_any *any = mlx5e_fs_get_any(fs);
struct mlx5_flow_destination dest = {};
int err;
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest.ft = priv->fs.any->table.t;
+ dest.ft = any->table.t;
/* Modify ttc rules destination to point on the accel_fs FTs */
- err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest);
+ err = mlx5_ttc_fwd_dest(ttc, MLX5_TT_ANY, &dest);
if (err) {
- netdev_err(priv->netdev,
- "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
- __func__, MLX5_TT_ANY, err);
+ fs_err(fs,
+ "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+ __func__, MLX5_TT_ANY, err);
return err;
}
return 0;
@@ -553,9 +559,9 @@ static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any)
fs_any->table.t = NULL;
}
-void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv)
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_fs_any *fs_any = priv->fs.any;
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
if (!fs_any)
return;
@@ -563,43 +569,45 @@ void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv)
if (--fs_any->ref_cnt)
return;
- fs_any_disable(priv);
+ fs_any_disable(fs);
fs_any_destroy_table(fs_any);
kfree(fs_any);
- priv->fs.any = NULL;
+ mlx5e_fs_set_any(fs, NULL);
}
-int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv)
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs)
{
+ struct mlx5e_fs_any *fs_any = mlx5e_fs_get_any(fs);
int err;
- if (priv->fs.any) {
- priv->fs.any->ref_cnt++;
+ if (fs_any) {
+ fs_any->ref_cnt++;
return 0;
}
- priv->fs.any = kzalloc(sizeof(*priv->fs.any), GFP_KERNEL);
- if (!priv->fs.any)
+ fs_any = kzalloc(sizeof(*fs_any), GFP_KERNEL);
+ if (!fs_any)
return -ENOMEM;
+ mlx5e_fs_set_any(fs, fs_any);
- err = fs_any_create_table(priv);
+ err = fs_any_create_table(fs);
if (err)
return err;
- err = fs_any_enable(priv);
+ err = fs_any_enable(fs);
if (err)
goto err_destroy_table;
- priv->fs.any->ref_cnt = 1;
+ fs_any->ref_cnt = 1;
return 0;
err_destroy_table:
- fs_any_destroy_table(priv->fs.any);
+ fs_any_destroy_table(fs_any);
- kfree(priv->fs.any);
- priv->fs.any = NULL;
+ kfree(fs_any);
+ mlx5e_fs_set_any(fs, NULL);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
index 7a70c4f38fda..5780fd7ad507 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
@@ -4,23 +4,22 @@
#ifndef __MLX5E_FS_TT_REDIRECT_H__
#define __MLX5E_FS_TT_REDIRECT_H__
-#include "en.h"
#include "en/fs.h"
void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
/* UDP traffic type redirect */
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_flow_steering *fs,
enum mlx5_traffic_types ttc_type,
u32 tir_num, u16 d_port);
-void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv);
-int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv);
+void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_flow_steering *fs);
/* ANY traffic type redirect*/
struct mlx5_flow_handle *
-mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv,
+mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_flow_steering *fs,
u32 tir_num, u16 ether_type);
-void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv);
-int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv);
+void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_flow_steering *fs);
+int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index d5b7110a4265..0107e4e73bb0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c);
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c);
#define MLX5E_REPORTER_PER_Q_MAX_LEN 256
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
new file mode 100644
index 000000000000..6dac76fa58a3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <net/pkt_cls.h>
+#include "htb.h"
+#include "en.h"
+#include "../qos.h"
+
+struct mlx5e_qos_node {
+ struct hlist_node hnode;
+ struct mlx5e_qos_node *parent;
+ u64 rate;
+ u32 bw_share;
+ u32 max_average_bw;
+ u32 hw_id;
+ u32 classid; /* 16-bit, except root. */
+ u16 qid;
+};
+
+struct mlx5e_htb {
+ DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
+ DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
+ struct mlx5_core_dev *mdev;
+ struct net_device *netdev;
+ struct mlx5e_priv *priv;
+ struct mlx5e_selq *selq;
+};
+
+#define MLX5E_QOS_QID_INNER 0xffff
+#define MLX5E_HTB_CLASSID_ROOT 0xffffffff
+
+/* Software representation of the QoS tree */
+
+int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt, err;
+
+ hash_for_each(htb->qos_tc2node, bkt, node, hnode) {
+ if (node->qid == MLX5E_QOS_QID_INNER)
+ continue;
+ err = callback(data, node->qid, node->hw_id);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb)
+{
+ int last;
+
+ last = find_last_bit(htb->qos_used_qids, mlx5e_qos_max_leaf_nodes(htb->mdev));
+ return last == mlx5e_qos_max_leaf_nodes(htb->mdev) ? 0 : last + 1;
+}
+
+static int mlx5e_htb_find_unused_qos_qid(struct mlx5e_htb *htb)
+{
+ int size = mlx5e_qos_max_leaf_nodes(htb->mdev);
+ struct mlx5e_priv *priv = htb->priv;
+ int res;
+
+ WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__);
+ res = find_first_zero_bit(htb->qos_used_qids, size);
+
+ return res == size ? -ENOSPC : res;
+}
+
+static struct mlx5e_qos_node *
+mlx5e_htb_node_create_leaf(struct mlx5e_htb *htb, u16 classid, u16 qid,
+ struct mlx5e_qos_node *parent)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->parent = parent;
+
+ node->qid = qid;
+ __set_bit(qid, htb->qos_used_qids);
+
+ node->classid = classid;
+ hash_add_rcu(htb->qos_tc2node, &node->hnode, classid);
+
+ mlx5e_update_tx_netdev_queues(htb->priv);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_create_root(struct mlx5e_htb *htb)
+{
+ struct mlx5e_qos_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->qid = MLX5E_QOS_QID_INNER;
+ node->classid = MLX5E_HTB_CLASSID_ROOT;
+ hash_add_rcu(htb->qos_tc2node, &node->hnode, node->classid);
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find(struct mlx5e_htb *htb, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible(htb->qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find_rcu(struct mlx5e_htb *htb, u32 classid)
+{
+ struct mlx5e_qos_node *node = NULL;
+
+ hash_for_each_possible_rcu(htb->qos_tc2node, node, hnode, classid) {
+ if (node->classid == classid)
+ break;
+ }
+
+ return node;
+}
+
+static void mlx5e_htb_node_delete(struct mlx5e_htb *htb, struct mlx5e_qos_node *node)
+{
+ hash_del_rcu(&node->hnode);
+ if (node->qid != MLX5E_QOS_QID_INNER) {
+ __clear_bit(node->qid, htb->qos_used_qids);
+ mlx5e_update_tx_netdev_queues(htb->priv);
+ }
+ /* Make sure this qid is no longer selected by mlx5e_select_queue, so
+ * that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue.
+ */
+ synchronize_net();
+ kfree(node);
+}
+
+/* TX datapath API */
+
+int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid)
+{
+ struct mlx5e_qos_node *node;
+ u16 qid;
+ int res;
+
+ rcu_read_lock();
+
+ node = mlx5e_htb_node_find_rcu(htb, classid);
+ if (!node) {
+ res = -ENOENT;
+ goto out;
+ }
+ qid = READ_ONCE(node->qid);
+ if (qid == MLX5E_QOS_QID_INNER) {
+ res = -EINVAL;
+ goto out;
+ }
+ res = mlx5e_qid_from_qos(&htb->priv->channels, qid);
+
+out:
+ rcu_read_unlock();
+ return res;
+}
+
+/* HTB TC handlers */
+
+static int
+mlx5e_htb_root_add(struct mlx5e_htb *htb, u16 htb_maj_id, u16 htb_defcls,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *root;
+ bool opened;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls);
+
+ mlx5e_selq_prepare_htb(htb->selq, htb_maj_id, htb_defcls);
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ err = mlx5e_qos_alloc_queues(priv, &priv->channels);
+ if (err)
+ goto err_cancel_selq;
+ }
+
+ root = mlx5e_htb_node_create_root(htb);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto err_free_queues;
+ }
+
+ err = mlx5_qos_create_root_node(htb->mdev, &root->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware.");
+ goto err_sw_node_delete;
+ }
+
+ mlx5e_selq_apply(htb->selq);
+
+ return 0;
+
+err_sw_node_delete:
+ mlx5e_htb_node_delete(htb, root);
+
+err_free_queues:
+ if (opened)
+ mlx5e_qos_close_all_queues(&priv->channels);
+err_cancel_selq:
+ mlx5e_selq_cancel(htb->selq);
+ return err;
+}
+
+static int mlx5e_htb_root_del(struct mlx5e_htb *htb)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *root;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_DESTROY\n");
+
+ /* Wait until real_num_tx_queues is updated for mlx5e_select_queue,
+ * so that we can safely switch to its non-HTB non-PTP fastpath.
+ */
+ synchronize_net();
+
+ mlx5e_selq_prepare_htb(htb->selq, 0, 0);
+ mlx5e_selq_apply(htb->selq);
+
+ root = mlx5e_htb_node_find(htb, MLX5E_HTB_CLASSID_ROOT);
+ if (!root) {
+ qos_err(htb->mdev, "Failed to find the root node in the QoS tree\n");
+ return -ENOENT;
+ }
+ err = mlx5_qos_destroy_node(htb->mdev, root->hw_id);
+ if (err)
+ qos_err(htb->mdev, "Failed to destroy root node %u, err = %d\n",
+ root->hw_id, err);
+ mlx5e_htb_node_delete(htb, root);
+
+ mlx5e_qos_deactivate_all_queues(&priv->channels);
+ mlx5e_qos_close_all_queues(&priv->channels);
+
+ return err;
+}
+
+static int mlx5e_htb_convert_rate(struct mlx5e_htb *htb, u64 rate,
+ struct mlx5e_qos_node *parent, u32 *bw_share)
+{
+ u64 share = 0;
+
+ while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw)
+ parent = parent->parent;
+
+ if (parent->max_average_bw)
+ share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT),
+ parent->max_average_bw);
+ else
+ share = 101;
+
+ *bw_share = share == 0 ? 1 : share > 100 ? 0 : share;
+
+ qos_dbg(htb->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n",
+ rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share);
+
+ return 0;
+}
+
+static void mlx5e_htb_convert_ceil(struct mlx5e_htb *htb, u64 ceil, u32 *max_average_bw)
+{
+ /* Hardware treats 0 as "unlimited", set at least 1. */
+ *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1);
+
+ qos_dbg(htb->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+ ceil, *max_average_bw);
+}
+
+int
+mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ struct mlx5e_priv *priv = htb->priv;
+ int qid;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n",
+ classid, parent_classid, rate, ceil);
+
+ qid = mlx5e_htb_find_unused_qos_qid(htb);
+ if (qid < 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached.");
+ return qid;
+ }
+
+ parent = mlx5e_htb_node_find(htb, parent_classid);
+ if (!parent)
+ return -EINVAL;
+
+ node = mlx5e_htb_node_create_leaf(htb, classid, qid, parent);
+ if (IS_ERR(node))
+ return PTR_ERR(node);
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node->parent, &node->bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &node->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ mlx5e_htb_node_delete(htb, node);
+ return err;
+ }
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ return mlx5e_qid_from_qos(&priv->channels, node->qid);
+}
+
+int
+mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *child;
+ struct mlx5e_priv *priv = htb->priv;
+ int err, tmp_err;
+ u32 new_hw_id;
+ u16 qid;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n",
+ classid, child_classid, rate, ceil);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_inner_node(htb->mdev, node->parent->hw_id,
+ node->bw_share, node->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node.");
+ qos_err(htb->mdev, "Failed to create an inner node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ /* Intentionally reuse the qid for the upcoming first child. */
+ child = mlx5e_htb_node_create_leaf(htb, child_classid, node->qid, node);
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ goto err_destroy_hw_node;
+ }
+
+ child->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node, &child->bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &child->max_average_bw);
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, new_hw_id, child->bw_share,
+ child->max_average_bw, &child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ goto err_delete_sw_node;
+ }
+
+ /* No fail point. */
+
+ qid = node->qid;
+ /* Pairs with mlx5e_htb_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, child->qid, child->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, child->qid, child->hw_id);
+ }
+ }
+
+ return 0;
+
+err_delete_sw_node:
+ child->qid = MLX5E_QOS_QID_INNER;
+ mlx5e_htb_node_delete(htb, child);
+
+err_destroy_hw_node:
+ tmp_err = mlx5_qos_destroy_node(htb->mdev, new_hw_id);
+ if (tmp_err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n",
+ new_hw_id, classid, tmp_err);
+ return err;
+}
+
+static struct mlx5e_qos_node *mlx5e_htb_node_find_by_qid(struct mlx5e_htb *htb, u16 qid)
+{
+ struct mlx5e_qos_node *node = NULL;
+ int bkt;
+
+ hash_for_each(htb->qos_tc2node, bkt, node, hnode)
+ if (node->qid == qid)
+ break;
+
+ return node;
+}
+
+int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_priv *priv = htb->priv;
+ struct mlx5e_qos_node *node;
+ struct netdev_queue *txq;
+ u16 qid, moved_qid;
+ bool opened;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid);
+
+ node = mlx5e_htb_node_find(htb, *classid);
+ if (!node)
+ return -ENOENT;
+
+ /* Store qid for reuse. */
+ qid = node->qid;
+
+ opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+ if (opened) {
+ txq = netdev_get_tx_queue(htb->netdev,
+ mlx5e_qid_from_qos(&priv->channels, qid));
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, *classid, err);
+
+ mlx5e_htb_node_delete(htb, node);
+
+ moved_qid = mlx5e_htb_cur_leaf_nodes(htb);
+
+ if (moved_qid == 0) {
+ /* The last QoS SQ was just destroyed. */
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+ moved_qid--;
+
+ if (moved_qid < qid) {
+ /* The highest QoS SQ was just destroyed. */
+ WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u",
+ qid, moved_qid);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, qid, txq);
+ return 0;
+ }
+
+ WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid);
+ qos_dbg(htb->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid);
+
+ node = mlx5e_htb_node_find_by_qid(htb, moved_qid);
+ WARN(!node, "Could not find a node with qid %u to move to queue %u",
+ moved_qid, qid);
+
+ /* Stop traffic to the old queue. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+ __clear_bit(moved_qid, priv->htb->qos_used_qids);
+
+ if (opened) {
+ txq = netdev_get_tx_queue(htb->netdev,
+ mlx5e_qid_from_qos(&priv->channels, moved_qid));
+ mlx5e_deactivate_qos_sq(priv, moved_qid);
+ mlx5e_close_qos_sq(priv, moved_qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(htb->netdev, moved_qid);
+
+ __set_bit(qid, htb->qos_used_qids);
+ WRITE_ONCE(node->qid, qid);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n",
+ node->classid, moved_qid, qid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ mlx5e_update_tx_netdev_queues(priv);
+ if (opened)
+ mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
+
+ *classid = node->classid;
+ return 0;
+}
+
+int
+mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *node, *parent;
+ struct mlx5e_priv *priv = htb->priv;
+ u32 old_hw_id, new_hw_id;
+ int err, saved_err = 0;
+ u16 qid;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n",
+ force ? "_FORCE" : "", classid);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ err = mlx5_qos_create_leaf_node(htb->mdev, node->parent->parent->hw_id,
+ node->parent->bw_share,
+ node->parent->max_average_bw,
+ &new_hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+ qos_err(htb->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+ classid, err);
+ if (!force)
+ return err;
+ saved_err = err;
+ }
+
+ /* Store qid for reuse and prevent clearing the bit. */
+ qid = node->qid;
+ /* Pairs with mlx5e_htb_get_txq_by_classid. */
+ WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ mlx5e_deactivate_qos_sq(priv, qid);
+ mlx5e_close_qos_sq(priv, qid);
+ }
+
+ /* Prevent packets from the old class from getting into the new one. */
+ mlx5e_reset_qdisc(htb->netdev, qid);
+
+ err = mlx5_qos_destroy_node(htb->mdev, node->hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ parent = node->parent;
+ mlx5e_htb_node_delete(htb, node);
+
+ node = parent;
+ WRITE_ONCE(node->qid, qid);
+
+ /* Early return on error in force mode. Parent will still be an inner
+ * node to be deleted by a following delete operation.
+ */
+ if (saved_err)
+ return saved_err;
+
+ old_hw_id = node->hw_id;
+ node->hw_id = new_hw_id;
+
+ if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ err = mlx5e_open_qos_sq(priv, &priv->channels, node->qid, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+ qos_warn(htb->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+ classid, err);
+ } else {
+ mlx5e_activate_qos_sq(priv, node->qid, node->hw_id);
+ }
+ }
+
+ err = mlx5_qos_destroy_node(htb->mdev, old_hw_id);
+ if (err) /* Not fatal. */
+ qos_warn(htb->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+ node->hw_id, classid, err);
+
+ return 0;
+}
+
+static int
+mlx5e_htb_update_children(struct mlx5e_htb *htb, struct mlx5e_qos_node *node,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_qos_node *child;
+ int err = 0;
+ int bkt;
+
+ hash_for_each(htb->qos_tc2node, bkt, child, hnode) {
+ u32 old_bw_share = child->bw_share;
+ int err_one;
+
+ if (child->parent != node)
+ continue;
+
+ mlx5e_htb_convert_rate(htb, child->rate, node, &child->bw_share);
+ if (child->bw_share == old_bw_share)
+ continue;
+
+ err_one = mlx5_qos_update_node(htb->mdev, child->hw_id, child->bw_share,
+ child->max_average_bw, child->hw_id);
+ if (!err && err_one) {
+ err = err_one;
+
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node.");
+ qos_err(htb->mdev, "Failed to modify a child node (class %04x), err = %d\n",
+ node->classid, err);
+ }
+ }
+
+ return err;
+}
+
+int
+mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack)
+{
+ u32 bw_share, max_average_bw;
+ struct mlx5e_qos_node *node;
+ bool ceil_changed = false;
+ int err;
+
+ qos_dbg(htb->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n",
+ classid, rate, ceil);
+
+ node = mlx5e_htb_node_find(htb, classid);
+ if (!node)
+ return -ENOENT;
+
+ node->rate = rate;
+ mlx5e_htb_convert_rate(htb, rate, node->parent, &bw_share);
+ mlx5e_htb_convert_ceil(htb, ceil, &max_average_bw);
+
+ err = mlx5_qos_update_node(htb->mdev, node->parent->hw_id, bw_share,
+ max_average_bw, node->hw_id);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
+ qos_err(htb->mdev, "Failed to modify a node (class %04x), err = %d\n",
+ classid, err);
+ return err;
+ }
+
+ if (max_average_bw != node->max_average_bw)
+ ceil_changed = true;
+
+ node->bw_share = bw_share;
+ node->max_average_bw = max_average_bw;
+
+ if (ceil_changed)
+ err = mlx5e_htb_update_children(htb, node, extack);
+
+ return err;
+}
+
+struct mlx5e_htb *mlx5e_htb_alloc(void)
+{
+ return kvzalloc(sizeof(struct mlx5e_htb), GFP_KERNEL);
+}
+
+void mlx5e_htb_free(struct mlx5e_htb *htb)
+{
+ kvfree(htb);
+}
+
+int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt,
+ struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_selq *selq, struct mlx5e_priv *priv)
+{
+ htb->mdev = mdev;
+ htb->netdev = netdev;
+ htb->selq = selq;
+ htb->priv = priv;
+ hash_init(htb->qos_tc2node);
+ return mlx5e_htb_root_add(htb, htb_qopt->parent_classid, htb_qopt->classid,
+ htb_qopt->extack);
+}
+
+void mlx5e_htb_cleanup(struct mlx5e_htb *htb)
+{
+ mlx5e_htb_root_del(htb);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h
new file mode 100644
index 000000000000..8386f1ea4559
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/htb.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5E_EN_HTB_H_
+#define __MLX5E_EN_HTB_H_
+
+#include "qos.h"
+
+#define MLX5E_QOS_MAX_LEAF_NODES 256
+
+struct mlx5e_selq;
+struct mlx5e_htb;
+
+typedef int (*mlx5e_fp_htb_enumerate)(void *data, u16 qid, u32 hw_id);
+int mlx5e_htb_enumerate_leaves(struct mlx5e_htb *htb, mlx5e_fp_htb_enumerate callback, void *data);
+
+int mlx5e_htb_cur_leaf_nodes(struct mlx5e_htb *htb);
+
+/* TX datapath API */
+int mlx5e_htb_get_txq_by_classid(struct mlx5e_htb *htb, u16 classid);
+
+/* HTB TC handlers */
+
+int
+mlx5e_htb_leaf_alloc_queue(struct mlx5e_htb *htb, u16 classid,
+ u32 parent_classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_leaf_to_inner(struct mlx5e_htb *htb, u16 classid, u16 child_classid,
+ u64 rate, u64 ceil, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del(struct mlx5e_htb *htb, u16 *classid,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_leaf_del_last(struct mlx5e_htb *htb, u16 classid, bool force,
+ struct netlink_ext_ack *extack);
+int
+mlx5e_htb_node_modify(struct mlx5e_htb *htb, u16 classid, u64 rate, u64 ceil,
+ struct netlink_ext_ack *extack);
+struct mlx5e_htb *mlx5e_htb_alloc(void);
+void mlx5e_htb_free(struct mlx5e_htb *htb);
+int mlx5e_htb_init(struct mlx5e_htb *htb, struct tc_htb_qopt_offload *htb_qopt,
+ struct net_device *netdev, struct mlx5_core_dev *mdev,
+ struct mlx5e_selq *selq, struct mlx5e_priv *priv);
+void mlx5e_htb_cleanup(struct mlx5e_htb *htb);
+#endif
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
index d290d7276b8d..b4f3bd7d346e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -20,7 +20,7 @@ mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch,
struct mlx5e_channel_stats *stats;
int tc;
- stats = &priv->channel_stats[ch];
+ stats = priv->channel_stats[ch];
data->rx_packets = stats->rq.packets;
data->rx_bytes = stats->rq.bytes;
@@ -120,14 +120,14 @@ static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
cancel_delayed_work_sync(&priv->stats_agent.work);
}
-int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
{
int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
struct mlx5_hv_vhca_agent *agent;
priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
if (!priv->stats_agent.buf)
- return -ENOMEM;
+ return;
agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
MLX5_HV_VHCA_AGENT_STATS,
@@ -142,13 +142,11 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
PTR_ERR(agent));
kvfree(priv->stats_agent.buf);
- return IS_ERR_OR_NULL(agent);
+ return;
}
priv->stats_agent.agent = agent;
INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);
-
- return 0;
}
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
index 664463faf77b..29c8c6d3260f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
@@ -7,19 +7,12 @@
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
-int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
+void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);
#else
-
-static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
-{
- return 0;
-}
-
-static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
-{
-}
+static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {}
+static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {}
#endif
#endif /* __MLX5_EN_STATS_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
index 7edde4d536fd..17325c5d6516 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c
@@ -155,3 +155,61 @@ struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh)
return mh->modify_hdr;
}
+char *
+mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ int new_num_actions, max_hw_actions;
+ size_t new_sz, old_sz;
+ void *ret;
+
+ if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
+ goto out;
+
+ max_hw_actions = mlx5e_mod_hdr_max_actions(mdev, namespace);
+ new_num_actions = min(max_hw_actions,
+ mod_hdr_acts->actions ?
+ mod_hdr_acts->max_actions * 2 : 1);
+ if (mod_hdr_acts->max_actions == new_num_actions)
+ return ERR_PTR(-ENOSPC);
+
+ new_sz = MLX5_MH_ACT_SZ * new_num_actions;
+ old_sz = mod_hdr_acts->max_actions * MLX5_MH_ACT_SZ;
+
+ if (mod_hdr_acts->is_static) {
+ ret = kzalloc(new_sz, GFP_KERNEL);
+ if (ret) {
+ memcpy(ret, mod_hdr_acts->actions, old_sz);
+ mod_hdr_acts->is_static = false;
+ }
+ } else {
+ ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
+ if (ret)
+ memset(ret + old_sz, 0, new_sz - old_sz);
+ }
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+
+ mod_hdr_acts->actions = ret;
+ mod_hdr_acts->max_actions = new_num_actions;
+
+out:
+ return mod_hdr_acts->actions + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
+}
+
+void
+mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+ if (!mod_hdr_acts->is_static)
+ kfree(mod_hdr_acts->actions);
+
+ mod_hdr_acts->actions = NULL;
+ mod_hdr_acts->num_actions = 0;
+ mod_hdr_acts->max_actions = 0;
+}
+
+char *
+mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos)
+{
+ return mod_hdr_acts->actions + (pos * MLX5_MH_ACT_SZ);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
index 33b23d8f9182..b8dac418d0a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h
@@ -7,14 +7,32 @@
#include <linux/hashtable.h>
#include <linux/mlx5/fs.h>
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
+
struct mlx5e_mod_hdr_handle;
struct mlx5e_tc_mod_hdr_acts {
int num_actions;
int max_actions;
+ bool is_static;
void *actions;
};
+#define DECLARE_MOD_HDR_ACTS_ACTIONS(name, len) \
+ u8 name[len][MLX5_MH_ACT_SZ] = {}
+
+#define DECLARE_MOD_HDR_ACTS(name, acts_arr) \
+ struct mlx5e_tc_mod_hdr_acts name = { \
+ .max_actions = ARRAY_SIZE(acts_arr), \
+ .is_static = true, \
+ .actions = acts_arr, \
+ }
+
+char *mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace,
+ struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+void mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
+char *mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos);
+
struct mlx5e_mod_hdr_handle *
mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev,
struct mod_hdr_tbl *tbl,
@@ -28,4 +46,12 @@ struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh);
void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl);
void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl);
+static inline int mlx5e_mod_hdr_max_actions(struct mlx5_core_dev *mdev, int namespace)
+{
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
+ return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
+ else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
+ return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
+}
+
#endif /* __MLX5E_EN_MOD_HDR_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index f8c29022dbb2..29dd3a04c154 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -5,13 +5,213 @@
#include "en/txrx.h"
#include "en/port.h"
#include "en_accel/en_accel.h"
-#include "accel/ipsec.h"
-#include "fpga/ipsec.h"
+#include "en_accel/ipsec.h"
+#include <net/xdp_sock_drv.h>
-static bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev)
+{
+ u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size);
+
+ return min_page_shift ? : 12;
+}
+
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+{
+ u8 req_page_shift = xsk ? order_base_2(xsk->chunk_size) : PAGE_SHIFT;
+ u8 min_page_shift = mlx5e_mpwrq_min_page_shift(mdev);
+
+ /* Regular RQ uses order-0 pages, the NIC must be able to map them. */
+ if (WARN_ON_ONCE(!xsk && req_page_shift < min_page_shift))
+ min_page_shift = req_page_shift;
+
+ return max(req_page_shift, min_page_shift);
+}
+
+enum mlx5e_mpwrq_umr_mode
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk)
+{
+ /* Different memory management schemes use different mechanisms to map
+ * user-mode memory. The stricter guarantees we have, the faster
+ * mechanisms we use:
+ * 1. MTT - direct mapping in page granularity.
+ * 2. KSM - indirect mapping to another MKey to arbitrary addresses, but
+ * all mappings have the same size.
+ * 3. KLM - indirect mapping to another MKey to arbitrary addresses, and
+ * mappings can have different sizes.
+ */
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ bool unaligned = xsk ? xsk->unaligned : false;
+ bool oversized = false;
+
+ if (xsk) {
+ oversized = xsk->chunk_size < (1 << page_shift);
+ WARN_ON_ONCE(xsk->chunk_size > (1 << page_shift));
+ }
+
+ /* XSK frame size doesn't match the UMR page size, either because the
+ * frame size is not a power of two, or it's smaller than the minimal
+ * page size supported by the firmware.
+ * It's possible to receive packets bigger than MTU in certain setups.
+ * To avoid writing over the XSK frame boundary, the top region of each
+ * stride is mapped to a garbage page, resulting in two mappings of
+ * different sizes per frame.
+ */
+ if (oversized) {
+ /* An optimization for frame sizes equal to 3 * power_of_two.
+ * 3 KSMs point to the frame, and one KSM points to the garbage
+ * page, which works faster than KLM.
+ */
+ if (xsk->chunk_size % 3 == 0 && is_power_of_2(xsk->chunk_size / 3))
+ return MLX5E_MPWRQ_UMR_MODE_TRIPLE;
+
+ return MLX5E_MPWRQ_UMR_MODE_OVERSIZED;
+ }
+
+ /* XSK frames can start at arbitrary unaligned locations, but they all
+ * have the same size which is a power of two. It allows to optimize to
+ * one KSM per frame.
+ */
+ if (unaligned)
+ return MLX5E_MPWRQ_UMR_MODE_UNALIGNED;
+
+ /* XSK: frames are naturally aligned, MTT can be used.
+ * Non-XSK: Allocations happen in units of CPU pages, therefore, the
+ * mappings are naturally aligned.
+ */
+ return MLX5E_MPWRQ_UMR_MODE_ALIGNED;
+}
+
+u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode)
+{
+ switch (mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return sizeof(struct mlx5_mtt);
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return sizeof(struct mlx5_ksm);
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ return sizeof(struct mlx5_klm) * 2;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ return sizeof(struct mlx5_ksm) * 4;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode);
+ return 0;
+}
+
+u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u8 max_pages_per_wqe, max_log_mpwqe_size;
+ u16 max_wqe_size;
+
+ /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */
+ max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB;
+ max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe),
+ MLX5_UMR_MTT_ALIGNMENT) / umr_entry_size;
+ max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift;
+
+ WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU);
+
+ return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ);
+}
+
+u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
+ u8 pages_per_wqe;
+
+ pages_per_wqe = log_wqe_sz > page_shift ? (1 << (log_wqe_sz - page_shift)) : 1;
+
+ /* Two MTTs are needed to form an octword. The number of MTTs is encoded
+ * in octwords in a UMR WQE, so we need at least two to avoid mapping
+ * garbage addresses.
+ */
+ if (WARN_ON_ONCE(pages_per_wqe < 2 && umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ pages_per_wqe = 2;
+
+ /* Sanity check for further calculations to succeed. */
+ BUILD_BUG_ON(MLX5_MPWRQ_MAX_PAGES_PER_WQE > 64);
+ if (WARN_ON_ONCE(pages_per_wqe > MLX5_MPWRQ_MAX_PAGES_PER_WQE))
+ return MLX5_MPWRQ_MAX_PAGES_PER_WQE;
+
+ return pages_per_wqe;
+}
+
+u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode);
+ u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
+ u16 umr_wqe_sz;
+
+ umr_wqe_sz = sizeof(struct mlx5e_umr_wqe) +
+ ALIGN(pages_per_wqe * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT);
+
+ WARN_ON_ONCE(DIV_ROUND_UP(umr_wqe_sz, MLX5_SEND_WQE_DS) > MLX5_WQE_CTRL_DS_MASK);
+
+ return umr_wqe_sz;
+}
+
+u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ return DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(mdev, page_shift, umr_mode),
+ MLX5_SEND_WQE_BB);
+}
+
+u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 pages_per_wqe = mlx5e_mpwrq_pages_per_wqe(mdev, page_shift, umr_mode);
+
+ /* Add another page as a buffer between WQEs. This page will absorb
+ * write overflow by the hardware, when receiving packets larger than
+ * MTU. These oversize packets are dropped by the driver at a later
+ * stage.
+ */
+ return ALIGN(pages_per_wqe + 1,
+ MLX5_SEND_WQE_BB / mlx5e_mpwrq_umr_entry_size(umr_mode));
+}
+
+u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
{
- return params->xdp_prog || xsk;
+ /* Same limits apply to KSMs and KLMs. */
+ u32 klm_limit = min(MLX5E_MAX_RQ_NUM_KSMS,
+ 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size));
+
+ switch (umr_mode) {
+ case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
+ return MLX5E_MAX_RQ_NUM_MTTS;
+ case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
+ return klm_limit;
+ case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
+ /* Each entry is two KLMs. */
+ return klm_limit / 2;
+ case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
+ /* Each entry is four KSMs. */
+ return klm_limit / 4;
+ }
+ WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
+ return 0;
+}
+
+static u8 mlx5e_mpwrq_max_log_rq_size(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ u8 mtts_per_wqe = mlx5e_mpwrq_mtts_per_wqe(mdev, page_shift, umr_mode);
+ u32 max_entries = mlx5e_mpwrq_max_num_entries(mdev, umr_mode);
+
+ return ilog2(max_entries / mtts_per_wqe);
+}
+
+u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
+{
+ return mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode) +
+ mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ MLX5E_ORDER2_MAX_PACKET_MTU;
}
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
@@ -23,7 +223,7 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
return xsk->headroom;
headroom = NET_IP_ALIGN;
- if (mlx5e_rx_is_xdp(params, xsk))
+ if (params->xdp_prog)
headroom += XDP_PACKET_HEADROOM;
else
headroom += MLX5_RX_HEADROOM;
@@ -31,70 +231,80 @@ u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
return headroom;
}
-u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u32 mlx5e_rx_get_linear_sz_xsk(struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
- return linear_rq_headroom + hw_mtu;
+ return xsk->headroom + hw_mtu;
}
-static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u32 mlx5e_rx_get_linear_sz_skb(struct mlx5e_params *params, bool xsk)
{
- u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk);
-
- /* AF_XDP doesn't build SKBs in place. */
- if (!xsk)
- frag_sz = MLX5_SKB_FRAG_SZ(frag_sz);
+ /* SKBs built on XDP_PASS on XSK RQs don't have headroom. */
+ u16 headroom = xsk ? 0 : mlx5e_get_linear_rq_headroom(params, NULL);
+ u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
- /* XDP in mlx5e doesn't support multiple packets per page. AF_XDP is a
- * special case. It can run with frames smaller than a page, as it
- * doesn't allocate pages dynamically. However, here we pretend that
- * fragments are page-sized: it allows to treat XSK frames like pages
- * by redirecting alloc and free operations to XSK rings and by using
- * the fact there are no multiple packets per "page" (which is a frame).
- * The latter is important, because frames may come in a random order,
- * and we will have trouble assemblying a real page of multiple frames.
- */
- if (mlx5e_rx_is_xdp(params, xsk))
- frag_sz = max_t(u32, frag_sz, PAGE_SIZE);
+ return MLX5_SKB_FRAG_SZ(headroom + hw_mtu);
+}
- /* Even if we can go with a smaller fragment size, we must not put
- * multiple packets into a single frame.
+static u32 mlx5e_rx_get_linear_stride_sz(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ bool mpwqe)
+{
+ /* XSK frames are mapped as individual pages, because frames may come in
+ * an arbitrary order from random locations in the UMEM.
*/
if (xsk)
- frag_sz = max_t(u32, frag_sz, xsk->chunk_size);
+ return mpwqe ? 1 << mlx5e_mpwrq_page_shift(mdev, xsk) : PAGE_SIZE;
- return frag_sz;
+ /* XDP in mlx5e doesn't support multiple packets per page. */
+ if (params->xdp_prog)
+ return PAGE_SIZE;
+
+ return roundup_pow_of_two(mlx5e_rx_get_linear_sz_skb(params, false));
}
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk)
+static u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
{
- u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk);
+ u32 linear_stride_sz = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
- return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
+ return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
+ order_base_2(linear_stride_sz);
}
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more
- * than one page. For this, check both with and without xsk.
+ if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE)
+ return false;
+
+ /* Both XSK and non-XSK cases allocate an SKB on XDP_PASS. Packet data
+ * must fit into a CPU page.
*/
- u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
- mlx5e_rx_get_linear_frag_sz(params, NULL));
+ if (mlx5e_rx_get_linear_sz_skb(params, xsk) > PAGE_SIZE)
+ return false;
+
+ /* XSK frames must be big enough to hold the packet data. */
+ if (xsk && mlx5e_rx_get_linear_sz_xsk(params, xsk) > xsk->chunk_size)
+ return false;
- return params->packet_merge.type == MLX5E_PACKET_MERGE_NONE &&
- linear_frag_sz <= PAGE_SIZE;
+ return true;
}
-bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
- u8 log_stride_sz, u8 log_num_strides)
+static bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
+ u8 log_stride_sz, u8 log_num_strides,
+ u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode)
{
- if (log_stride_sz + log_num_strides != MLX5_MPWRQ_LOG_WQE_SZ)
+ if (log_stride_sz + log_num_strides !=
+ mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode))
return false;
if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE ||
@@ -114,28 +324,53 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- s8 log_num_strides;
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 log_num_strides;
u8 log_stride_sz;
+ u8 log_wqe_sz;
+
+ if (!mlx5e_rx_is_linear_skb(mdev, params, xsk))
+ return false;
+
+ log_stride_sz = order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
+ log_wqe_sz = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode);
- if (!mlx5e_rx_is_linear_skb(params, xsk))
+ if (log_wqe_sz < log_stride_sz)
return false;
- log_stride_sz = order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
- log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - log_stride_sz;
+ log_num_strides = log_wqe_sz - log_stride_sz;
- return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, log_num_strides);
+ return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz,
+ log_num_strides, page_shift,
+ umr_mode);
}
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 log_pkts_per_wqe, page_shift, max_log_rq_size;
+
+ log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(mdev, params, xsk);
+ page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ max_log_rq_size = mlx5e_mpwrq_max_log_rq_size(mdev, page_shift, umr_mode);
/* Numbers are unsigned, don't subtract to avoid underflow. */
if (params->log_rq_mtu_frames <
log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
+ /* Ethtool's rx_max_pending is calculated for regular RQ, that uses
+ * pages of PAGE_SIZE. Max length of an XSK RQ might differ if it uses a
+ * frame size not equal to PAGE_SIZE.
+ * A stricter condition is checked in mlx5e_mpwrq_validate_xsk, WARN on
+ * unexpected failure.
+ */
+ if (WARN_ON_ONCE(params->log_rq_mtu_frames > log_pkts_per_wqe + max_log_rq_size))
+ return max_log_rq_size;
+
return params->log_rq_mtu_frames - log_pkts_per_wqe;
}
@@ -165,7 +400,7 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
struct mlx5e_xsk_param *xsk)
{
if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
- return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk));
+ return order_base_2(mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, true));
return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}
@@ -174,20 +409,35 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- return MLX5_MPWRQ_LOG_WQE_SZ -
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+
+ return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) -
mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
}
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz)
+{
+#define UMR_WQE_BULK (2)
+ return min_t(unsigned int, UMR_WQE_BULK, wq_sz / 2 - 1);
+}
+
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk)
{
- bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
- mlx5e_rx_is_linear_skb(params, xsk) :
- mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+ u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+
+ if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
+ return linear_headroom;
+
+ if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return linear_headroom;
+
+ if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+ return linear_headroom;
- return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ?
- mlx5e_get_linear_rq_headroom(params, xsk) : 0;
+ return 0;
}
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
@@ -195,14 +445,14 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par
bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
u16 stop_room;
- stop_room = mlx5e_tls_get_stop_room(mdev, params);
- stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ stop_room = mlx5e_ktls_get_stop_room(mdev, params);
+ stop_room += mlx5e_stop_room_for_max_wqe(mdev);
if (is_mpwqe)
- /* A MPWQE can take up to the maximum-sized WQE + all the normal
- * stop room can be taken if a new packet breaks the active
- * MPWQE session and allocates its WQEs right away.
+ /* A MPWQE can take up to the maximum cacheline-aligned WQE +
+ * all the normal stop room can be taken if a new packet breaks
+ * the active MPWQE session and allocates its WQEs right away.
*/
- stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ stop_room += mlx5e_stop_room_for_mpwqe(mdev);
return stop_room;
}
@@ -309,25 +559,46 @@ bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
}
-bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params)
+int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
- if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
- return false;
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, NULL);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, NULL);
- if (mlx5_fpga_is_ipsec_device(mdev))
- return false;
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
+ return -EOPNOTSUPP;
- if (params->xdp_prog) {
- /* XSK params are not considered here. If striding RQ is in use,
- * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
- * be called with the known XSK params.
- */
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- return false;
+ if (params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ bool unaligned = xsk ? xsk->unaligned : false;
+ u16 max_mtu_pkts;
+
+ if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode))
+ return -EOPNOTSUPP;
+
+ if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+ return -EINVAL;
+
+ /* Current RQ length is too big for the given frame size, the
+ * needed number of WQEs exceeds the maximum.
+ */
+ max_mtu_pkts = min_t(u8, MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE,
+ mlx5e_mpwrq_max_log_rq_pkts(mdev, page_shift, unaligned));
+ if (params->log_rq_mtu_frames > max_mtu_pkts) {
+ mlx5_core_err(mdev, "Current RQ length %d is too big for XSK with given frame size %u\n",
+ 1 << params->log_rq_mtu_frames, xsk->chunk_size);
+ return -EINVAL;
}
- return true;
+ return 0;
}
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
@@ -340,7 +611,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
- BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
+ BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, NULL)) :
BIT(params->log_rq_mtu_frames),
BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
@@ -348,8 +619,7 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
- params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
- MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
+ params->rq_wq_type = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
MLX5_WQ_TYPE_CYCLIC;
}
@@ -359,15 +629,16 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
{
/* Prefer Striding RQ, unless any of the following holds:
* - Striding RQ configuration is not possible/supported.
- * - Slow PCI heuristic.
+ * - CQE compression is ON, and stride_index mini_cqe layout is not supported.
* - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
*
* No XSK params: checking the availability of striding RQ in general.
*/
- if (!slow_pci_heuristic(mdev) &&
- mlx5e_striding_rq_possible(mdev, params) &&
+ if ((!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ||
+ MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) &&
+ !mlx5e_mpwrq_validate_regular(mdev, params) &&
(mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
- !mlx5e_rx_is_linear_skb(params, NULL)))
+ !mlx5e_rx_is_linear_skb(mdev, params, NULL)))
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
mlx5e_set_rq_type(mdev, params);
mlx5e_init_rq_type_params(mdev, params);
@@ -385,58 +656,132 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e
};
}
+static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size, bool xdp)
+{
+ if (xdp)
+ /* XDP requires all fragments to be of the same size. */
+ return first_frag_size + (MLX5E_MAX_RX_FRAGS - 1) * frag_size;
+
+ /* Optimization for small packets: the last fragment is bigger than the others. */
+ return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
+}
+
#define DEFAULT_FRAG_SIZE (2048)
-static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
- struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk,
- struct mlx5e_rq_frags_info *info)
+static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
+ struct mlx5e_rq_frags_info *info)
{
u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
int frag_size_max = DEFAULT_FRAG_SIZE;
+ int first_frag_size_max;
u32 buf_size = 0;
+ u16 headroom;
+ int max_mtu;
int i;
- if (mlx5_fpga_is_ipsec_device(mdev))
- byte_count += MLX5E_METADATA_ETHER_LEN;
-
- if (mlx5e_rx_is_linear_skb(params, xsk)) {
+ if (mlx5e_rx_is_linear_skb(mdev, params, xsk)) {
int frag_stride;
- frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
- frag_stride = roundup_pow_of_two(frag_stride);
+ frag_stride = mlx5e_rx_get_linear_stride_sz(mdev, params, xsk, false);
info->arr[0].frag_size = byte_count;
info->arr[0].frag_stride = frag_stride;
info->num_frags = 1;
- info->wqe_bulk = PAGE_SIZE / frag_stride;
+
+ /* N WQEs share the same page, N = PAGE_SIZE / frag_stride. The
+ * first WQE in the page is responsible for allocation of this
+ * page, this WQE's index is k*N. If WQEs [k*N+1; k*N+N-1] are
+ * still not completed, the allocation must stop before k*N.
+ */
+ info->wqe_index_mask = (PAGE_SIZE / frag_stride) - 1;
+
goto out;
}
- if (byte_count > PAGE_SIZE +
- (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max)
+ headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+ first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+ max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
+ params->xdp_prog);
+ if (byte_count > max_mtu || params->xdp_prog) {
frag_size_max = PAGE_SIZE;
+ first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+ max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max,
+ params->xdp_prog);
+ if (byte_count > max_mtu) {
+ mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n",
+ params->sw_mtu, max_mtu);
+ return -EINVAL;
+ }
+ }
i = 0;
while (buf_size < byte_count) {
int frag_size = byte_count - buf_size;
- if (i < MLX5E_MAX_RX_FRAGS - 1)
+ if (i == 0)
+ frag_size = min(frag_size, first_frag_size_max);
+ else if (i < MLX5E_MAX_RX_FRAGS - 1)
frag_size = min(frag_size, frag_size_max);
info->arr[i].frag_size = frag_size;
- info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
-
buf_size += frag_size;
+
+ if (params->xdp_prog) {
+ /* XDP multi buffer expects fragments of the same size. */
+ info->arr[i].frag_stride = frag_size_max;
+ } else {
+ if (i == 0) {
+ /* Ensure that headroom and tailroom are included. */
+ frag_size += headroom;
+ frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ }
+ info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
+ }
+
i++;
}
info->num_frags = i;
- /* number of different wqes sharing a page */
- info->wqe_bulk = 1 + (info->num_frags % 2);
+
+ /* The last fragment of WQE with index 2*N may share the page with the
+ * first fragment of WQE with index 2*N+1 in certain cases. If WQE 2*N+1
+ * is not completed yet, WQE 2*N must not be allocated, as it's
+ * responsible for allocating a new page.
+ */
+ if (frag_size_max == PAGE_SIZE) {
+ /* No WQE can start in the middle of a page. */
+ info->wqe_index_mask = 0;
+ } else {
+ /* PAGE_SIZEs starting from 8192 don't use 2K-sized fragments,
+ * because there would be more than MLX5E_MAX_RX_FRAGS of them.
+ */
+ WARN_ON(PAGE_SIZE != 2 * DEFAULT_FRAG_SIZE);
+
+ /* Odd number of fragments allows to pack the last fragment of
+ * the previous WQE and the first fragment of the next WQE into
+ * the same page.
+ * As long as DEFAULT_FRAG_SIZE is 2048, and MLX5E_MAX_RX_FRAGS
+ * is 4, the last fragment can be bigger than the rest only if
+ * it's the fourth one, so WQEs consisting of 3 fragments will
+ * always share a page.
+ * When a page is shared, WQE bulk size is 2, otherwise just 1.
+ */
+ info->wqe_index_mask = info->num_frags % 2;
+ }
out:
- info->wqe_bulk = max_t(u8, info->wqe_bulk, 8);
+ /* Bulking optimization to skip allocation until at least 8 WQEs can be
+ * allocated in a row. At the same time, never start allocation when
+ * the page is still used by older WQEs.
+ */
+ info->wqe_bulk = max_t(u8, info->wqe_index_mask + 1, 8);
+
info->log_num_frags = order_base_2(info->num_frags);
+
+ return 0;
}
static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
@@ -472,7 +817,7 @@ static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev,
u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params));
u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
- int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(params, xsk));
+ int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
int wqe_size = BIT(log_stride_sz) * num_strides;
/* +1 is for the case that the pkt_per_rsrv dont consume the reservation
@@ -496,7 +841,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
log_cq_size = mlx5e_shampo_get_log_cq_size(mdev, params, xsk);
else
- log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
+ log_cq_size = mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk) +
mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
break;
default: /* MLX5_WQ_TYPE_CYCLIC */
@@ -533,17 +878,22 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
void *rqc = param->rqc;
void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
int ndsegs = 1;
+ int err;
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: {
u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size,
- log_wqe_num_of_strides)) {
+ log_wqe_num_of_strides,
+ page_shift, umr_mode)) {
mlx5_core_err(mdev,
- "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u\n",
- log_wqe_stride_size, log_wqe_num_of_strides);
+ "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u, umr_mode %d\n",
+ log_wqe_stride_size, log_wqe_num_of_strides,
+ umr_mode);
return -EINVAL;
}
@@ -551,7 +901,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
MLX5_SET(wq, wq, log_wqe_stride_size,
log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
- MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
+ MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
MLX5_SET(wq, wq, shampo_enable, true);
MLX5_SET(wq, wq, log_reservation_size,
@@ -572,7 +922,9 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
}
default: /* MLX5_WQ_TYPE_CYCLIC */
MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
- mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
+ err = mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
+ if (err)
+ return err;
ndsegs = param->frags_info.num_frags;
}
@@ -638,8 +990,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev,
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
bool allow_swp;
- allow_swp = mlx5_geneve_tx_allowed(mdev) ||
- !!MLX5_IPSEC_DEV(mdev);
+ allow_swp =
+ mlx5_geneve_tx_allowed(mdev) || !!mlx5_ipsec_device_caps(mdev);
mlx5e_build_sq_param_common(mdev, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
MLX5_SET(sqc, sqc, allow_swp, allow_swp);
@@ -661,13 +1013,6 @@ static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev,
param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
-static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
-{
- void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
-
- return MLX5_GET(wq, wq, log_wq_sz);
-}
-
/* This function calculates the maximum number of headers entries that are needed
* per WQE, the formula is based on the size of the reservations and the
* restriction we have about max packets for reservation that is equal to max
@@ -728,25 +1073,98 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
return wqebbs;
}
+static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk)
+{
+ enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
+ u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
+ u8 umr_wqebbs;
+
+ umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode);
+
+ return umr_wqebbs * (1 << mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk));
+}
+
static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_rq_param *rqp)
{
- u32 wqebbs;
+ u32 wqebbs, total_pages, useful_space;
/* MLX5_WQ_TYPE_CYCLIC */
if (params->rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
- wqebbs = MLX5E_UMR_WQEBBS * BIT(mlx5e_get_rq_log_wq_sz(rqp->rqc));
+ /* UMR WQEs for the regular RQ. */
+ wqebbs = mlx5e_mpwrq_total_umr_wqebbs(mdev, params, NULL);
+
+ /* If XDP program is attached, XSK may be turned on at any time without
+ * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of
+ * both regular RQ and XSK RQ.
+ *
+ * XSK uses different values of page_shift, and the total number of UMR
+ * WQEBBs depends on it. This dependency is complex and not monotonic,
+ * especially taking into consideration that some of the parameters come
+ * from capabilities. Hence, we have to try all valid values of XSK
+ * frame size (and page_shift) to find the maximum.
+ */
+ if (params->xdp_prog) {
+ u32 max_xsk_wqebbs = 0;
+ u8 frame_shift;
+
+ for (frame_shift = XDP_UMEM_MIN_CHUNK_SHIFT;
+ frame_shift <= PAGE_SHIFT; frame_shift++) {
+ /* The headroom doesn't affect the calculation. */
+ struct mlx5e_xsk_param xsk = {
+ .chunk_size = 1 << frame_shift,
+ .unaligned = false,
+ };
+
+ /* XSK aligned mode. */
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is a power of two. */
+ xsk.unaligned = true;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is not equal to stride size. */
+ xsk.chunk_size -= 1;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+
+ /* XSK unaligned mode, frame size is a triple power of two. */
+ xsk.chunk_size = (1 << frame_shift) / 4 * 3;
+ max_xsk_wqebbs = max(max_xsk_wqebbs,
+ mlx5e_mpwrq_total_umr_wqebbs(mdev, params, &xsk));
+ }
+
+ wqebbs += max_xsk_wqebbs;
+ }
+
if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp);
+
+ /* UMR WQEs don't cross the page boundary, they are padded with NOPs.
+ * This padding is always smaller than the max WQE size. That gives us
+ * at least (PAGE_SIZE - (max WQE size - MLX5_SEND_WQE_BB)) useful bytes
+ * per page. The number of pages is estimated as the total size of WQEs
+ * divided by the useful space in page, rounding up. If some WQEs don't
+ * fully fit into the useful space, they can occupy part of the padding,
+ * which proves this estimation to be correct (reserve enough space).
+ */
+ useful_space = PAGE_SIZE - mlx5e_get_max_sq_wqebbs(mdev) + MLX5_SEND_WQE_BB;
+ total_pages = DIV_ROUND_UP(wqebbs * MLX5_SEND_WQE_BB, useful_space);
+ wqebbs = total_pages * (PAGE_SIZE / MLX5_SEND_WQE_BB);
+
return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs));
}
static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
{
- if (mlx5e_accel_is_ktls_rx(mdev))
+ if (mlx5e_is_ktls_rx(mdev))
return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
@@ -774,10 +1192,10 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
mlx5e_build_sq_param_common(mdev, param);
- param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */
- param->is_tls = mlx5e_accel_is_ktls_rx(mdev);
+ param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */
+ param->is_tls = mlx5e_is_ktls_rx(mdev);
if (param->is_tls)
- param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */
+ param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */
MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
@@ -785,6 +1203,7 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
@@ -793,6 +1212,7 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
mlx5e_build_sq_param_common(mdev, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
+ param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk);
mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
}
@@ -812,7 +1232,7 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev);
mlx5e_build_sq_param(mdev, params, &cparam->txq_sq);
- mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
+ mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq);
mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq);
mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 433e6967692d..034debd140bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -9,6 +9,7 @@
struct mlx5e_xsk_param {
u16 headroom;
u16 chunk_size;
+ bool unaligned;
};
struct mlx5e_cq_param {
@@ -31,6 +32,7 @@ struct mlx5e_sq_param {
struct mlx5_wq_param wq;
bool is_mpw;
bool is_tls;
+ bool is_xdp_mb;
u16 stop_room;
};
@@ -51,37 +53,26 @@ struct mlx5e_create_sq_param {
u8 min_inline_mode;
};
-static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params,
- u16 qid,
- enum mlx5e_rq_group group,
- u16 *ix)
-{
- int nch = params->num_channels;
- int ch = qid - nch * group;
-
- if (ch < 0 || ch >= nch)
- return false;
-
- *ix = ch;
- return true;
-}
-
-static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params,
- u16 qid,
- u16 *ix,
- enum mlx5e_rq_group *group)
-{
- u16 nch = params->num_channels;
-
- *ix = qid % nch;
- *group = qid / nch;
-}
-
-static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile,
- struct mlx5e_params *params, u64 qid)
-{
- return qid < params->num_channels * profile->rq_groups;
-}
+/* Striding RQ dynamic parameters */
+
+u8 mlx5e_mpwrq_page_shift(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+enum mlx5e_mpwrq_umr_mode
+mlx5e_mpwrq_umr_mode(struct mlx5_core_dev *mdev, struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode);
+u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u16 mlx5e_mpwrq_umr_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_umr_wqebbs(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_mtts_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u32 mlx5e_mpwrq_max_num_entries(struct mlx5_core_dev *mdev,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
+u8 mlx5e_mpwrq_max_log_rq_pkts(struct mlx5_core_dev *mdev, u8 page_shift,
+ enum mlx5e_mpwrq_umr_mode umr_mode);
/* Parameter calculations */
@@ -91,25 +82,23 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
bool slow_pci_heuristic(struct mlx5_core_dev *mdev);
-bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+int mlx5e_mpwrq_validate_regular(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
+int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk);
void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
-bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
- u8 log_stride_sz, u8 log_num_strides);
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
-u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
-u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params,
- struct mlx5e_xsk_param *xsk);
-bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
+bool mlx5e_rx_is_linear_skb(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
-u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params,
+u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
@@ -129,6 +118,7 @@ u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
+u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz);
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk);
@@ -154,6 +144,7 @@ void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev,
struct mlx5e_cq_param *param);
void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
+ struct mlx5e_xsk_param *xsk,
struct mlx5e_sq_param *param);
int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index 673f1c82d381..c9d5d8d93994 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -309,8 +309,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
if (err)
return err;
- err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz,
- xoff, &port_buffer, &update_buffer);
+ err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff,
+ port_buff_cell_sz, &port_buffer, &update_buffer);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 18d542b1c5cb..8469e9c38670 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -79,19 +79,49 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
}
+#define PTP_WQE_CTR2IDX(val) ((val) & ptpsq->ts_cqe_ctr_mask)
+
+static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+{
+ return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
+}
+
+static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
+{
+ struct skb_shared_hwtstamps hwts = {};
+ struct sk_buff *skb;
+
+ ptpsq->cq_stats->resync_event++;
+
+ while (skb_cc != skb_id) {
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
+ skb_tstamp_tx(skb, &hwts);
+ ptpsq->cq_stats->resync_cqe++;
+ skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
+ }
+}
+
static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
struct mlx5_cqe64 *cqe,
int budget)
{
- struct sk_buff *skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
+ u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
+ u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
struct mlx5e_txqsq *sq = &ptpsq->txqsq;
+ struct sk_buff *skb;
ktime_t hwtstamp;
if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
ptpsq->cq_stats->err_cqe++;
goto out;
}
+ if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
+ mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
+
+ skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP,
hwtstamp, ptpsq->cq_stats);
@@ -195,7 +225,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
int node;
sq->pdev = c->pdev;
- sq->tstamp = c->tstamp;
sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
@@ -242,6 +271,7 @@ static void mlx5e_ptp_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa)
{
int wq_sz = mlx5_wq_cyc_get_size(&ptpsq->txqsq.wq);
+ struct mlx5_core_dev *mdev = ptpsq->txqsq.mdev;
ptpsq->skb_fifo.fifo = kvzalloc_node(array_size(wq_sz, sizeof(*ptpsq->skb_fifo.fifo)),
GFP_KERNEL, numa);
@@ -251,7 +281,9 @@ static int mlx5e_ptp_alloc_traffic_db(struct mlx5e_ptpsq *ptpsq, int numa)
ptpsq->skb_fifo.pc = &ptpsq->skb_fifo_pc;
ptpsq->skb_fifo.cc = &ptpsq->skb_fifo_cc;
ptpsq->skb_fifo.mask = wq_sz - 1;
-
+ if (MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter))
+ ptpsq->ts_cqe_ctr_mask =
+ (1 << MLX5_CAP_GEN_2(mdev, ts_cqe_metadata_size2wqe_counter)) - 1;
return 0;
}
@@ -449,7 +481,7 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
wq = MLX5_ADDR_OF(sqc, sqc, wq);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
- param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+ param->stop_room = mlx5e_stop_room_for_max_wqe(mdev);
mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
}
@@ -590,37 +622,39 @@ static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params)
return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0;
}
-static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv)
+static void mlx5e_ptp_rx_unset_fs(struct mlx5e_flow_steering *fs)
{
- struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
if (!ptp_fs->valid)
return;
mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule);
- mlx5e_fs_tt_redirect_any_destroy(priv);
+ mlx5e_fs_tt_redirect_any_destroy(fs);
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
- mlx5e_fs_tt_redirect_udp_destroy(priv);
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
ptp_fs->valid = false;
}
static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
{
u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
- struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
+ struct mlx5e_flow_steering *fs = priv->fs;
struct mlx5_flow_handle *rule;
+ struct mlx5e_ptp_fs *ptp_fs;
int err;
+ ptp_fs = mlx5e_fs_get_ptp(fs);
if (ptp_fs->valid)
return 0;
- err = mlx5e_fs_tt_redirect_udp_create(priv);
+ err = mlx5e_fs_tt_redirect_udp_create(fs);
if (err)
goto out_free;
- rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP,
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV4_UDP,
tirn, PTP_EV_PORT);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -628,7 +662,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
}
ptp_fs->udp_v4_rule = rule;
- rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP,
+ rule = mlx5e_fs_tt_redirect_udp_add_rule(fs, MLX5_TT_IPV6_UDP,
tirn, PTP_EV_PORT);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -636,11 +670,11 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
}
ptp_fs->udp_v6_rule = rule;
- err = mlx5e_fs_tt_redirect_any_create(priv);
+ err = mlx5e_fs_tt_redirect_any_create(fs);
if (err)
goto out_destroy_udp_v6_rule;
- rule = mlx5e_fs_tt_redirect_any_add_rule(priv, tirn, ETH_P_1588);
+ rule = mlx5e_fs_tt_redirect_any_add_rule(fs, tirn, ETH_P_1588);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
goto out_destroy_fs_any;
@@ -651,13 +685,13 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
return 0;
out_destroy_fs_any:
- mlx5e_fs_tt_redirect_any_destroy(priv);
+ mlx5e_fs_tt_redirect_any_destroy(fs);
out_destroy_udp_v6_rule:
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule);
out_destroy_udp_v4_rule:
mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule);
out_destroy_fs_udp:
- mlx5e_fs_tt_redirect_udp_destroy(priv);
+ mlx5e_fs_tt_redirect_udp_destroy(fs);
out_free:
return err;
}
@@ -691,7 +725,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
if (err)
goto err_free;
- netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64);
+ netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll);
mlx5e_ptp_build_params(c, cparams, params);
@@ -737,6 +771,7 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c)
if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
mlx5e_ptp_rx_set_fs(c->priv);
mlx5e_activate_rq(&c->rq);
+ mlx5e_trigger_napi_sched(&c->napi);
}
}
@@ -764,29 +799,31 @@ int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn)
return 0;
}
-int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv)
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
{
struct mlx5e_ptp_fs *ptp_fs;
- if (!priv->profile->rx_ptp_support)
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
return 0;
ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL);
if (!ptp_fs)
return -ENOMEM;
+ mlx5e_fs_set_ptp(fs, ptp_fs);
- priv->fs.ptp_fs = ptp_fs;
return 0;
}
-void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv)
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile)
{
- struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
+ struct mlx5e_ptp_fs *ptp_fs = mlx5e_fs_get_ptp(fs);
- if (!priv->profile->rx_ptp_support)
+ if (!mlx5e_profile_feature_cap(profile, PTP_RX))
return;
- mlx5e_ptp_rx_unset_fs(priv);
+ mlx5e_ptp_rx_unset_fs(fs);
kfree(ptp_fs);
}
@@ -794,7 +831,7 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set)
{
struct mlx5e_ptp *c = priv->channels.ptp;
- if (!priv->profile->rx_ptp_support)
+ if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
return 0;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
@@ -812,6 +849,6 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set)
netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules");
return -EINVAL;
}
- mlx5e_ptp_rx_unset_fs(priv);
+ mlx5e_ptp_rx_unset_fs(priv->fs);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index a71a32e00ebb..cc7efde88ac3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -6,6 +6,7 @@
#include "en.h"
#include "en_stats.h"
+#include "en/txrx.h"
#include <linux/ptp_classify.h>
#define MLX5E_PTP_CHANNEL_IX 0
@@ -17,6 +18,7 @@ struct mlx5e_ptpsq {
u16 skb_fifo_pc;
struct mlx5e_skb_fifo skb_fifo;
struct mlx5e_ptp_cq_stats *cq_stats;
+ u16 ts_cqe_ctr_mask;
};
enum {
@@ -67,14 +69,24 @@ static inline bool mlx5e_use_ptpsq(struct sk_buff *skb)
fk.ports.dst == htons(PTP_EV_PORT));
}
+static inline bool mlx5e_ptpsq_fifo_has_room(struct mlx5e_txqsq *sq)
+{
+ if (!sq->ptpsq)
+ return true;
+
+ return mlx5e_skb_fifo_has_room(&sq->ptpsq->skb_fifo);
+}
+
int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
u8 lag_port, struct mlx5e_ptp **cp);
void mlx5e_ptp_close(struct mlx5e_ptp *c);
void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c);
void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c);
int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn);
-int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv);
-void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv);
+int mlx5e_ptp_alloc_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
+void mlx5e_ptp_free_rx_fs(struct mlx5e_flow_steering *fs,
+ const struct mlx5e_profile *profile);
int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set);
enum {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
index 50977f01a050..2842195ee548 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -1,11 +1,17 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
#include "en.h"
#include "params.h"
#include "../qos.h"
+#include "en/htb.h"
-#define BYTES_IN_MBIT 125000
+struct qos_sq_callback_params {
+ struct mlx5e_priv *priv;
+ struct mlx5e_channels *chs;
+};
int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes)
{
@@ -27,121 +33,14 @@ int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev));
}
-int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv)
-{
- int last = find_last_bit(priv->htb.qos_used_qids, mlx5e_qos_max_leaf_nodes(priv->mdev));
-
- return last == mlx5e_qos_max_leaf_nodes(priv->mdev) ? 0 : last + 1;
-}
-
-/* Software representation of the QoS tree (internal to this file) */
-
-static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv)
-{
- int size = mlx5e_qos_max_leaf_nodes(priv->mdev);
- int res;
-
- WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__);
- res = find_first_zero_bit(priv->htb.qos_used_qids, size);
-
- return res == size ? -ENOSPC : res;
-}
-
-struct mlx5e_qos_node {
- struct hlist_node hnode;
- struct rcu_head rcu;
- struct mlx5e_qos_node *parent;
- u64 rate;
- u32 bw_share;
- u32 max_average_bw;
- u32 hw_id;
- u32 classid; /* 16-bit, except root. */
- u16 qid;
-};
-
-#define MLX5E_QOS_QID_INNER 0xffff
-#define MLX5E_HTB_CLASSID_ROOT 0xffffffff
-
-static struct mlx5e_qos_node *
-mlx5e_sw_node_create_leaf(struct mlx5e_priv *priv, u16 classid, u16 qid,
- struct mlx5e_qos_node *parent)
-{
- struct mlx5e_qos_node *node;
-
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
- return ERR_PTR(-ENOMEM);
-
- node->parent = parent;
-
- node->qid = qid;
- __set_bit(qid, priv->htb.qos_used_qids);
-
- node->classid = classid;
- hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, classid);
-
- mlx5e_update_tx_netdev_queues(priv);
-
- return node;
-}
-
-static struct mlx5e_qos_node *mlx5e_sw_node_create_root(struct mlx5e_priv *priv)
-{
- struct mlx5e_qos_node *node;
-
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
- return ERR_PTR(-ENOMEM);
-
- node->qid = MLX5E_QOS_QID_INNER;
- node->classid = MLX5E_HTB_CLASSID_ROOT;
- hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, node->classid);
-
- return node;
-}
-
-static struct mlx5e_qos_node *mlx5e_sw_node_find(struct mlx5e_priv *priv, u32 classid)
-{
- struct mlx5e_qos_node *node = NULL;
-
- hash_for_each_possible(priv->htb.qos_tc2node, node, hnode, classid) {
- if (node->classid == classid)
- break;
- }
-
- return node;
-}
-
-static struct mlx5e_qos_node *mlx5e_sw_node_find_rcu(struct mlx5e_priv *priv, u32 classid)
-{
- struct mlx5e_qos_node *node = NULL;
-
- hash_for_each_possible_rcu(priv->htb.qos_tc2node, node, hnode, classid) {
- if (node->classid == classid)
- break;
- }
-
- return node;
-}
-
-static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
-{
- hash_del_rcu(&node->hnode);
- if (node->qid != MLX5E_QOS_QID_INNER) {
- __clear_bit(node->qid, priv->htb.qos_used_qids);
- mlx5e_update_tx_netdev_queues(priv);
- }
- kfree_rcu(node, rcu);
-}
-
/* TX datapath API */
-static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
+u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
{
/* These channel params are safe to access from the datapath, because:
- * 1. This function is called only after checking priv->htb.maj_id != 0,
+ * 1. This function is called only after checking selq->htb_maj_id != 0,
* and the number of queues can't change while HTB offload is active.
- * 2. When priv->htb.maj_id becomes 0, synchronize_rcu waits for
+ * 2. When selq->htb_maj_id becomes 0, synchronize_rcu waits for
* mlx5e_select_queue to finish while holding priv->state_lock,
* preventing other code from changing the number of queues.
*/
@@ -150,30 +49,7 @@ static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
return (chs->params.num_channels + is_ptp) * mlx5e_get_dcb_num_tc(&chs->params) + qid;
}
-int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid)
-{
- struct mlx5e_qos_node *node;
- u16 qid;
- int res;
-
- rcu_read_lock();
-
- node = mlx5e_sw_node_find_rcu(priv, classid);
- if (!node) {
- res = -ENOENT;
- goto out;
- }
- qid = READ_ONCE(node->qid);
- if (qid == MLX5E_QOS_QID_INNER) {
- res = -EINVAL;
- goto out;
- }
- res = mlx5e_qid_from_qos(&priv->channels, qid);
-
-out:
- rcu_read_unlock();
- return res;
-}
+/* SQ lifecycle */
static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid)
{
@@ -190,10 +66,8 @@ static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid)
return mlx5e_state_dereference(priv, qos_sqs[qid]);
}
-/* SQ lifecycle */
-
-static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
- struct mlx5e_qos_node *node)
+int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ u16 node_qid, u32 hw_id)
{
struct mlx5e_create_cq_param ccp = {};
struct mlx5e_txqsq __rcu **qos_sqs;
@@ -206,13 +80,13 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs
params = &chs->params;
- txq_ix = mlx5e_qid_from_qos(chs, node->qid);
+ txq_ix = mlx5e_qid_from_qos(chs, node_qid);
- WARN_ON(node->qid > priv->htb.max_qos_sqs);
- if (node->qid == priv->htb.max_qos_sqs) {
+ WARN_ON(node_qid > priv->htb_max_qos_sqs);
+ if (node_qid == priv->htb_max_qos_sqs) {
struct mlx5e_sq_stats *stats, **stats_list = NULL;
- if (priv->htb.max_qos_sqs == 0) {
+ if (priv->htb_max_qos_sqs == 0) {
stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
sizeof(*stats_list),
GFP_KERNEL);
@@ -225,16 +99,16 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs
return -ENOMEM;
}
if (stats_list)
- WRITE_ONCE(priv->htb.qos_sq_stats, stats_list);
- WRITE_ONCE(priv->htb.qos_sq_stats[node->qid], stats);
- /* Order max_qos_sqs increment after writing the array pointer.
+ WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+ WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
+ /* Order htb_max_qos_sqs increment after writing the array pointer.
* Pairs with smp_load_acquire in en_stats.c.
*/
- smp_store_release(&priv->htb.max_qos_sqs, priv->htb.max_qos_sqs + 1);
+ smp_store_release(&priv->htb_max_qos_sqs, priv->htb_max_qos_sqs + 1);
}
- ix = node->qid % params->num_channels;
- qid = node->qid / params->num_channels;
+ ix = node_qid % params->num_channels;
+ qid = node_qid / params->num_channels;
c = chs->c[ix];
qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
@@ -253,8 +127,8 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs
if (err)
goto err_free_sq;
err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params,
- &param_sq, sq, 0, node->hw_id,
- priv->htb.qos_sq_stats[node->qid]);
+ &param_sq, sq, 0, hw_id,
+ priv->htb_qos_sq_stats[node_qid]);
if (err)
goto err_close_cq;
@@ -269,13 +143,29 @@ err_free_sq:
return err;
}
-static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
+static int mlx5e_open_qos_sq_cb_wrapper(void *data, u16 node_qid, u32 hw_id)
{
+ struct qos_sq_callback_params *cb_params = data;
+
+ return mlx5e_open_qos_sq(cb_params->priv, cb_params->chs, node_qid, hw_id);
+}
+
+int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
+{
+ struct mlx5e_priv *priv = data;
struct mlx5e_txqsq *sq;
+ u16 qid;
- sq = mlx5e_get_qos_sq(priv, node->qid);
+ sq = mlx5e_get_qos_sq(priv, node_qid);
- WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq);
+ qid = mlx5e_qid_from_qos(&priv->channels, node_qid);
+
+ /* If it's a new queue, it will be marked as started at this point.
+ * Stop it before updating txq2sq.
+ */
+ mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid));
+
+ priv->txq2sq[qid] = sq;
/* Make the change to txq2sq visible before the queue is started.
* As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
@@ -283,11 +173,13 @@ static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node
*/
smp_wmb();
- qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node->qid);
+ qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid);
mlx5e_activate_txqsq(sq);
+
+ return 0;
}
-static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
+void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
{
struct mlx5e_txqsq *sq;
@@ -298,11 +190,16 @@ static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
mlx5e_deactivate_txqsq(sq);
- /* The queue is disabled, no synchronization with datapath is needed. */
priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
+
+ /* Make the change to txq2sq visible before the queue is started again.
+ * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+ * which pairs with this barrier.
+ */
+ smp_wmb();
}
-static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
+void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
{
struct mlx5e_txqsq __rcu **qos_sqs;
struct mlx5e_params *params;
@@ -352,7 +249,7 @@ void mlx5e_qos_close_queues(struct mlx5e_channel *c)
kvfree(qos_sqs);
}
-static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
+void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
{
int i;
@@ -360,7 +257,7 @@ static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
mlx5e_qos_close_queues(chs->c[i]);
}
-static int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
{
u16 qos_sqs_size;
int i;
@@ -396,24 +293,20 @@ err_free:
int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
{
- struct mlx5e_qos_node *node = NULL;
- int bkt, err;
-
- if (!priv->htb.maj_id)
- return 0;
+ struct qos_sq_callback_params callback_params;
+ int err;
err = mlx5e_qos_alloc_queues(priv, chs);
if (err)
return err;
- hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) {
- if (node->qid == MLX5E_QOS_QID_INNER)
- continue;
- err = mlx5e_open_qos_sq(priv, chs, node);
- if (err) {
- mlx5e_qos_close_all_queues(chs);
- return err;
- }
+ callback_params.priv = priv;
+ callback_params.chs = chs;
+
+ err = mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_open_qos_sq_cb_wrapper, &callback_params);
+ if (err) {
+ mlx5e_qos_close_all_queues(chs);
+ return err;
}
return 0;
@@ -421,14 +314,7 @@ int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
void mlx5e_qos_activate_queues(struct mlx5e_priv *priv)
{
- struct mlx5e_qos_node *node = NULL;
- int bkt;
-
- hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) {
- if (node->qid == MLX5E_QOS_QID_INNER)
- continue;
- mlx5e_activate_qos_sq(priv, node);
- }
+ mlx5e_htb_enumerate_leaves(priv->htb, mlx5e_activate_qos_sq, priv);
}
void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
@@ -457,7 +343,7 @@ void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
}
}
-static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
+void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
{
int i;
@@ -465,278 +351,14 @@ static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
mlx5e_qos_deactivate_queues(chs->c[i]);
}
-/* HTB API */
-
-int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
- struct netlink_ext_ack *extack)
-{
- struct mlx5e_qos_node *root;
- bool opened;
- int err;
-
- qos_dbg(priv->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls);
-
- if (!mlx5_qos_is_supported(priv->mdev)) {
- NL_SET_ERR_MSG_MOD(extack,
- "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
- return -EOPNOTSUPP;
- }
-
- opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
- if (opened) {
- err = mlx5e_qos_alloc_queues(priv, &priv->channels);
- if (err)
- return err;
- }
-
- root = mlx5e_sw_node_create_root(priv);
- if (IS_ERR(root)) {
- err = PTR_ERR(root);
- goto err_free_queues;
- }
-
- err = mlx5_qos_create_root_node(priv->mdev, &root->hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware.");
- goto err_sw_node_delete;
- }
-
- WRITE_ONCE(priv->htb.defcls, htb_defcls);
- /* Order maj_id after defcls - pairs with
- * mlx5e_select_queue/mlx5e_select_htb_queues.
- */
- smp_store_release(&priv->htb.maj_id, htb_maj_id);
-
- return 0;
-
-err_sw_node_delete:
- mlx5e_sw_node_delete(priv, root);
-
-err_free_queues:
- if (opened)
- mlx5e_qos_close_all_queues(&priv->channels);
- return err;
-}
-
-int mlx5e_htb_root_del(struct mlx5e_priv *priv)
-{
- struct mlx5e_qos_node *root;
- int err;
-
- qos_dbg(priv->mdev, "TC_HTB_DESTROY\n");
-
- WRITE_ONCE(priv->htb.maj_id, 0);
- synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */
-
- root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT);
- if (!root) {
- qos_err(priv->mdev, "Failed to find the root node in the QoS tree\n");
- return -ENOENT;
- }
- err = mlx5_qos_destroy_node(priv->mdev, root->hw_id);
- if (err)
- qos_err(priv->mdev, "Failed to destroy root node %u, err = %d\n",
- root->hw_id, err);
- mlx5e_sw_node_delete(priv, root);
-
- mlx5e_qos_deactivate_all_queues(&priv->channels);
- mlx5e_qos_close_all_queues(&priv->channels);
-
- return err;
-}
-
-static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate,
- struct mlx5e_qos_node *parent, u32 *bw_share)
-{
- u64 share = 0;
-
- while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw)
- parent = parent->parent;
-
- if (parent->max_average_bw)
- share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT),
- parent->max_average_bw);
- else
- share = 101;
-
- *bw_share = share == 0 ? 1 : share > 100 ? 0 : share;
-
- qos_dbg(priv->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n",
- rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share);
-
- return 0;
-}
-
-static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw)
-{
- *max_average_bw = div_u64(ceil, BYTES_IN_MBIT);
-
- qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
- ceil, *max_average_bw);
-}
-
-int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
- u32 parent_classid, u64 rate, u64 ceil,
- struct netlink_ext_ack *extack)
-{
- struct mlx5e_qos_node *node, *parent;
- int qid;
- int err;
-
- qos_dbg(priv->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n",
- classid, parent_classid, rate, ceil);
-
- qid = mlx5e_find_unused_qos_qid(priv);
- if (qid < 0) {
- NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached.");
- return qid;
- }
-
- parent = mlx5e_sw_node_find(priv, parent_classid);
- if (!parent)
- return -EINVAL;
-
- node = mlx5e_sw_node_create_leaf(priv, classid, qid, parent);
- if (IS_ERR(node))
- return PTR_ERR(node);
-
- node->rate = rate;
- mlx5e_htb_convert_rate(priv, rate, node->parent, &node->bw_share);
- mlx5e_htb_convert_ceil(priv, ceil, &node->max_average_bw);
-
- err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->hw_id,
- node->bw_share, node->max_average_bw,
- &node->hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
- qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
- classid, err);
- mlx5e_sw_node_delete(priv, node);
- return err;
- }
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_open_qos_sq(priv, &priv->channels, node);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
- qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
- classid, err);
- } else {
- mlx5e_activate_qos_sq(priv, node);
- }
- }
-
- return mlx5e_qid_from_qos(&priv->channels, node->qid);
-}
-
-int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
- u64 rate, u64 ceil, struct netlink_ext_ack *extack)
-{
- struct mlx5e_qos_node *node, *child;
- int err, tmp_err;
- u32 new_hw_id;
- u16 qid;
-
- qos_dbg(priv->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n",
- classid, child_classid, rate, ceil);
-
- node = mlx5e_sw_node_find(priv, classid);
- if (!node)
- return -ENOENT;
-
- err = mlx5_qos_create_inner_node(priv->mdev, node->parent->hw_id,
- node->bw_share, node->max_average_bw,
- &new_hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node.");
- qos_err(priv->mdev, "Failed to create an inner node (class %04x), err = %d\n",
- classid, err);
- return err;
- }
-
- /* Intentionally reuse the qid for the upcoming first child. */
- child = mlx5e_sw_node_create_leaf(priv, child_classid, node->qid, node);
- if (IS_ERR(child)) {
- err = PTR_ERR(child);
- goto err_destroy_hw_node;
- }
-
- child->rate = rate;
- mlx5e_htb_convert_rate(priv, rate, node, &child->bw_share);
- mlx5e_htb_convert_ceil(priv, ceil, &child->max_average_bw);
-
- err = mlx5_qos_create_leaf_node(priv->mdev, new_hw_id, child->bw_share,
- child->max_average_bw, &child->hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
- qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
- classid, err);
- goto err_delete_sw_node;
- }
-
- /* No fail point. */
-
- qid = node->qid;
- /* Pairs with mlx5e_get_txq_by_classid. */
- WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- mlx5e_deactivate_qos_sq(priv, qid);
- mlx5e_close_qos_sq(priv, qid);
- }
-
- err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
- if (err) /* Not fatal. */
- qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
- node->hw_id, classid, err);
-
- node->hw_id = new_hw_id;
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_open_qos_sq(priv, &priv->channels, child);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
- qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
- classid, err);
- } else {
- mlx5e_activate_qos_sq(priv, child);
- }
- }
-
- return 0;
-
-err_delete_sw_node:
- child->qid = MLX5E_QOS_QID_INNER;
- mlx5e_sw_node_delete(priv, child);
-
-err_destroy_hw_node:
- tmp_err = mlx5_qos_destroy_node(priv->mdev, new_hw_id);
- if (tmp_err) /* Not fatal. */
- qos_warn(priv->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n",
- new_hw_id, classid, tmp_err);
- return err;
-}
-
-static struct mlx5e_qos_node *mlx5e_sw_node_find_by_qid(struct mlx5e_priv *priv, u16 qid)
-{
- struct mlx5e_qos_node *node = NULL;
- int bkt;
-
- hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode)
- if (node->qid == qid)
- break;
-
- return node;
-}
-
-static void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq)
+void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq)
{
qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid);
netdev_tx_reset_queue(txq);
netif_tx_start_queue(txq);
}
-static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
+void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
{
struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid);
struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
@@ -749,251 +371,65 @@ static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
spin_unlock_bh(qdisc_lock(qdisc));
}
-int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid,
- struct netlink_ext_ack *extack)
-{
- struct mlx5e_qos_node *node;
- struct netdev_queue *txq;
- u16 qid, moved_qid;
- bool opened;
- int err;
-
- qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid);
-
- node = mlx5e_sw_node_find(priv, *classid);
- if (!node)
- return -ENOENT;
-
- /* Store qid for reuse. */
- qid = node->qid;
-
- opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
- if (opened) {
- txq = netdev_get_tx_queue(priv->netdev,
- mlx5e_qid_from_qos(&priv->channels, qid));
- mlx5e_deactivate_qos_sq(priv, qid);
- mlx5e_close_qos_sq(priv, qid);
- }
-
- err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
- if (err) /* Not fatal. */
- qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
- node->hw_id, *classid, err);
-
- mlx5e_sw_node_delete(priv, node);
-
- moved_qid = mlx5e_qos_cur_leaf_nodes(priv);
-
- if (moved_qid == 0) {
- /* The last QoS SQ was just destroyed. */
- if (opened)
- mlx5e_reactivate_qos_sq(priv, qid, txq);
- return 0;
- }
- moved_qid--;
-
- if (moved_qid < qid) {
- /* The highest QoS SQ was just destroyed. */
- WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u",
- qid, moved_qid);
- if (opened)
- mlx5e_reactivate_qos_sq(priv, qid, txq);
- return 0;
- }
-
- WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid);
- qos_dbg(priv->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid);
-
- node = mlx5e_sw_node_find_by_qid(priv, moved_qid);
- WARN(!node, "Could not find a node with qid %u to move to queue %u",
- moved_qid, qid);
-
- /* Stop traffic to the old queue. */
- WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
- __clear_bit(moved_qid, priv->htb.qos_used_qids);
-
- if (opened) {
- txq = netdev_get_tx_queue(priv->netdev,
- mlx5e_qid_from_qos(&priv->channels, moved_qid));
- mlx5e_deactivate_qos_sq(priv, moved_qid);
- mlx5e_close_qos_sq(priv, moved_qid);
- }
-
- /* Prevent packets from the old class from getting into the new one. */
- mlx5e_reset_qdisc(priv->netdev, moved_qid);
-
- __set_bit(qid, priv->htb.qos_used_qids);
- WRITE_ONCE(node->qid, qid);
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_open_qos_sq(priv, &priv->channels, node);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
- qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n",
- node->classid, moved_qid, qid, err);
- } else {
- mlx5e_activate_qos_sq(priv, node);
- }
- }
-
- mlx5e_update_tx_netdev_queues(priv);
- if (opened)
- mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
-
- *classid = node->classid;
- return 0;
-}
-
-int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
- struct netlink_ext_ack *extack)
+int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_qopt)
{
- struct mlx5e_qos_node *node, *parent;
- u32 old_hw_id, new_hw_id;
- int err, saved_err = 0;
- u16 qid;
-
- qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n",
- force ? "_FORCE" : "", classid);
-
- node = mlx5e_sw_node_find(priv, classid);
- if (!node)
- return -ENOENT;
-
- err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->parent->hw_id,
- node->parent->bw_share,
- node->parent->max_average_bw,
- &new_hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
- qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
- classid, err);
- if (!force)
- return err;
- saved_err = err;
- }
-
- /* Store qid for reuse and prevent clearing the bit. */
- qid = node->qid;
- /* Pairs with mlx5e_get_txq_by_classid. */
- WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- mlx5e_deactivate_qos_sq(priv, qid);
- mlx5e_close_qos_sq(priv, qid);
- }
-
- /* Prevent packets from the old class from getting into the new one. */
- mlx5e_reset_qdisc(priv->netdev, qid);
-
- err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
- if (err) /* Not fatal. */
- qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
- node->hw_id, classid, err);
-
- parent = node->parent;
- mlx5e_sw_node_delete(priv, node);
+ struct mlx5e_htb *htb = priv->htb;
+ int res;
- node = parent;
- WRITE_ONCE(node->qid, qid);
+ if (!htb && htb_qopt->command != TC_HTB_CREATE)
+ return -EINVAL;
- /* Early return on error in force mode. Parent will still be an inner
- * node to be deleted by a following delete operation.
- */
- if (saved_err)
- return saved_err;
-
- old_hw_id = node->hw_id;
- node->hw_id = new_hw_id;
-
- if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- err = mlx5e_open_qos_sq(priv, &priv->channels, node);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
- qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
- classid, err);
- } else {
- mlx5e_activate_qos_sq(priv, node);
+ switch (htb_qopt->command) {
+ case TC_HTB_CREATE:
+ if (!mlx5_qos_is_supported(priv->mdev)) {
+ NL_SET_ERR_MSG_MOD(htb_qopt->extack,
+ "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+ return -EOPNOTSUPP;
}
- }
-
- err = mlx5_qos_destroy_node(priv->mdev, old_hw_id);
- if (err) /* Not fatal. */
- qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
- node->hw_id, classid, err);
-
- return 0;
-}
-
-static int mlx5e_qos_update_children(struct mlx5e_priv *priv, struct mlx5e_qos_node *node,
- struct netlink_ext_ack *extack)
-{
- struct mlx5e_qos_node *child;
- int err = 0;
- int bkt;
-
- hash_for_each(priv->htb.qos_tc2node, bkt, child, hnode) {
- u32 old_bw_share = child->bw_share;
- int err_one;
-
- if (child->parent != node)
- continue;
-
- mlx5e_htb_convert_rate(priv, child->rate, node, &child->bw_share);
- if (child->bw_share == old_bw_share)
- continue;
-
- err_one = mlx5_qos_update_node(priv->mdev, child->hw_id, child->bw_share,
- child->max_average_bw, child->hw_id);
- if (!err && err_one) {
- err = err_one;
-
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node.");
- qos_err(priv->mdev, "Failed to modify a child node (class %04x), err = %d\n",
- node->classid, err);
+ priv->htb = mlx5e_htb_alloc();
+ htb = priv->htb;
+ if (!htb)
+ return -ENOMEM;
+ res = mlx5e_htb_init(htb, htb_qopt, priv->netdev, priv->mdev, &priv->selq, priv);
+ if (res) {
+ mlx5e_htb_free(htb);
+ priv->htb = NULL;
}
+ return res;
+ case TC_HTB_DESTROY:
+ mlx5e_htb_cleanup(htb);
+ mlx5e_htb_free(htb);
+ priv->htb = NULL;
+ return 0;
+ case TC_HTB_LEAF_ALLOC_QUEUE:
+ res = mlx5e_htb_leaf_alloc_queue(htb, htb_qopt->classid, htb_qopt->parent_classid,
+ htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack);
+ if (res < 0)
+ return res;
+ htb_qopt->qid = res;
+ return 0;
+ case TC_HTB_LEAF_TO_INNER:
+ return mlx5e_htb_leaf_to_inner(htb, htb_qopt->parent_classid, htb_qopt->classid,
+ htb_qopt->rate, htb_qopt->ceil, htb_qopt->extack);
+ case TC_HTB_LEAF_DEL:
+ return mlx5e_htb_leaf_del(htb, &htb_qopt->classid, htb_qopt->extack);
+ case TC_HTB_LEAF_DEL_LAST:
+ case TC_HTB_LEAF_DEL_LAST_FORCE:
+ return mlx5e_htb_leaf_del_last(htb, htb_qopt->classid,
+ htb_qopt->command == TC_HTB_LEAF_DEL_LAST_FORCE,
+ htb_qopt->extack);
+ case TC_HTB_NODE_MODIFY:
+ return mlx5e_htb_node_modify(htb, htb_qopt->classid, htb_qopt->rate, htb_qopt->ceil,
+ htb_qopt->extack);
+ case TC_HTB_LEAF_QUERY_QUEUE:
+ res = mlx5e_htb_get_txq_by_classid(htb, htb_qopt->classid);
+ if (res < 0)
+ return res;
+ htb_qopt->qid = res;
+ return 0;
+ default:
+ return -EOPNOTSUPP;
}
-
- return err;
-}
-
-int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
- struct netlink_ext_ack *extack)
-{
- u32 bw_share, max_average_bw;
- struct mlx5e_qos_node *node;
- bool ceil_changed = false;
- int err;
-
- qos_dbg(priv->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n",
- classid, rate, ceil);
-
- node = mlx5e_sw_node_find(priv, classid);
- if (!node)
- return -ENOENT;
-
- node->rate = rate;
- mlx5e_htb_convert_rate(priv, rate, node->parent, &bw_share);
- mlx5e_htb_convert_ceil(priv, ceil, &max_average_bw);
-
- err = mlx5_qos_update_node(priv->mdev, node->parent->hw_id, bw_share,
- max_average_bw, node->hw_id);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
- qos_err(priv->mdev, "Failed to modify a node (class %04x), err = %d\n",
- classid, err);
- return err;
- }
-
- if (max_average_bw != node->max_average_bw)
- ceil_changed = true;
-
- node->bw_share = bw_share;
- node->max_average_bw = max_average_bw;
-
- if (ceil_changed)
- err = mlx5e_qos_update_children(priv, node, extack);
-
- return err;
}
struct mlx5e_mqprio_rl {
@@ -1079,3 +515,4 @@ int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_i
*hw_id = rl->leaves_id[tc];
return 0;
}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
index b7558907ba20..4947afa23b73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
@@ -6,41 +6,39 @@
#include <linux/mlx5/driver.h>
-#define MLX5E_QOS_MAX_LEAF_NODES 256
+#define BYTES_IN_MBIT 125000
struct mlx5e_priv;
+struct mlx5e_htb;
struct mlx5e_channels;
struct mlx5e_channel;
+struct tc_htb_qopt_offload;
int mlx5e_qos_bytes_rate_check(struct mlx5_core_dev *mdev, u64 nbytes);
int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
-int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv);
-
-/* TX datapath API */
-int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid);
-struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid);
/* SQ lifecycle */
+int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+ u16 node_qid, u32 hw_id);
+int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id);
+void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid);
+void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid);
+void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq);
+void mlx5e_reset_qdisc(struct net_device *dev, u16 qid);
+
int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
void mlx5e_qos_activate_queues(struct mlx5e_priv *priv);
void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c);
+void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs);
void mlx5e_qos_close_queues(struct mlx5e_channel *c);
+void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs);
+int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+
+/* TX datapath API */
+u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid);
/* HTB API */
-int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
- struct netlink_ext_ack *extack);
-int mlx5e_htb_root_del(struct mlx5e_priv *priv);
-int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
- u32 parent_classid, u64 rate, u64 ceil,
- struct netlink_ext_ack *extack);
-int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
- u64 rate, u64 ceil, struct netlink_ext_ack *extack);
-int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 *classid,
- struct netlink_ext_ack *extack);
-int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
- struct netlink_ext_ack *extack);
-int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
- struct netlink_ext_ack *extack);
+int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb);
/* MQPRIO TX rate limit */
struct mlx5e_mqprio_rl;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
index 9c076aa20306..b6f5c1bcdbcd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
{
- struct mlx5e_rep_priv *rpriv;
- struct mlx5e_priv *priv;
-
- /* A given netdev is not a representor or not a slave of LAG configuration */
- if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
- return false;
-
- priv = netdev_priv(netdev);
- rpriv = priv->ppriv;
-
- /* Egress acl forward to vport is supported only non-uplink representor */
- return rpriv->rep->vport != MLX5_VPORT_UPLINK;
+ return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev);
}
static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
@@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt
u16 fwd_vport_num;
int err;
- if (!mlx5e_rep_is_lag_netdev(netdev))
- return;
-
info = ptr;
lag_info = info->lower_state_info;
/* This is not an event of a representor becoming active slave */
@@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
struct net_device *lag_dev;
struct mlx5e_priv *priv;
- if (!mlx5e_rep_is_lag_netdev(netdev))
- return;
-
priv = netdev_priv(netdev);
rpriv = priv->ppriv;
lag_dev = info->upper_dev;
@@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5e_rep_priv *rpriv;
+ struct mlx5e_rep_bond *bond;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_rep_is_lag_netdev(netdev))
+ return NOTIFY_DONE;
+
+ bond = container_of(nb, struct mlx5e_rep_bond, nb);
+ priv = netdev_priv(netdev);
+ rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH);
+ /* Verify VF representor is on the same device of the bond handling the netevent. */
+ if (rpriv->uplink_priv.bond != bond)
+ return NOTIFY_DONE;
switch (event) {
case NETDEV_CHANGELOWERSTATE:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
index c6d2f8c78db7..8099a21e674c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c
@@ -164,6 +164,36 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr
return err;
}
+static int
+mlx5_esw_bridge_changeupper_validate_netdev(void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_changeupper_info *info = ptr;
+ struct net_device *upper = info->upper_dev;
+ struct net_device *lower;
+ struct list_head *iter;
+
+ if (!netif_is_bridge_master(upper) || !netif_is_lag_master(dev))
+ return 0;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_priv *priv;
+
+ if (!mlx5e_eswitch_rep(lower))
+ continue;
+
+ priv = netdev_priv(lower);
+ mdev = priv->mdev;
+ if (!mlx5_lag_is_active(mdev))
+ return -EAGAIN;
+ if (!mlx5_lag_is_shared_fdb(mdev))
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -171,6 +201,7 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb,
switch (event) {
case NETDEV_PRECHANGEUPPER:
+ err = mlx5_esw_bridge_changeupper_validate_netdev(ptr);
break;
case NETDEV_CHANGEUPPER:
@@ -269,6 +300,12 @@ mlx5_esw_bridge_port_obj_attr_set(struct net_device *dev,
err = mlx5_esw_bridge_vlan_filtering_set(vport_num, esw_owner_vhca_id,
attr->u.vlan_filtering, br_offloads);
break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL:
+ err = mlx5_esw_bridge_vlan_proto_set(vport_num,
+ esw_owner_vhca_id,
+ attr->u.vlan_protocol,
+ br_offloads);
+ break;
default:
err = -EOPNOTSUPP;
}
@@ -491,7 +528,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv)
}
br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event;
- err = register_netdevice_notifier(&br_offloads->netdev_nb);
+ err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
if (err) {
esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n",
err);
@@ -509,7 +546,9 @@ err_register_swdev_blk:
err_register_swdev:
destroy_workqueue(br_offloads->wq);
err_alloc_wq:
+ rtnl_lock();
mlx5_esw_bridge_cleanup(esw);
+ rtnl_unlock();
}
void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
@@ -524,7 +563,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv)
return;
cancel_delayed_work_sync(&br_offloads->update_work);
- unregister_netdevice_notifier(&br_offloads->netdev_nb);
+ unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb);
unregister_switchdev_blocking_notifier(&br_offloads->nb_blk);
unregister_switchdev_notifier(&br_offloads->nb);
destroy_workqueue(br_offloads->wq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index fcb0892c08a9..fac7e3ff2674 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -21,6 +21,7 @@
#include "en/tc/sample.h"
#include "en_accel/ipsec_rxtx.h"
#include "en/tc/int_port.h"
+#include "en/tc/act/act.h"
struct mlx5e_rep_indr_block_priv {
struct net_device *netdev;
@@ -263,14 +264,14 @@ int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
INIT_LIST_HEAD(&uplink_priv->unready_flows);
/* init shared tc flow table */
- err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+ err = mlx5e_tc_esw_init(uplink_priv);
return err;
}
void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
{
/* delete shared tc flow table */
- mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
+ mlx5e_tc_esw_cleanup(&rpriv->uplink_priv);
mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
}
@@ -511,12 +512,129 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev, struct Qdisc *sch,
return 0;
}
+static int
+mlx5e_rep_indr_replace_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct flow_action_entry *action;
+ struct mlx5e_tc_act *act;
+ bool add = false;
+ int i;
+
+ /* There is no use case currently for more than one action (e.g. pedit).
+ * when there will be, need to handle cleaning multiple actions on err.
+ */
+ if (!flow_offload_has_one_action(&fl_act->action))
+ return -EOPNOTSUPP;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ flow_action_for_each(i, action, &fl_act->action) {
+ act = mlx5e_tc_act_get(action->id, ns_type);
+ if (!act)
+ continue;
+
+ if (!act->offload_action)
+ continue;
+
+ if (!act->offload_action(priv, fl_act, action))
+ add = true;
+ }
+
+ return add ? 0 : -EOPNOTSUPP;
+}
+
+static int
+mlx5e_rep_indr_destroy_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_tc_act *act;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ act = mlx5e_tc_act_get(fl_act->id, ns_type);
+ if (!act || !act->destroy_action)
+ return -EOPNOTSUPP;
+
+ return act->destroy_action(priv, fl_act);
+}
+
+static int
+mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+
+{
+ struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5e_tc_act *act;
+
+ if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
+ ns_type = MLX5_FLOW_NAMESPACE_FDB;
+ else
+ ns_type = MLX5_FLOW_NAMESPACE_KERNEL;
+
+ act = mlx5e_tc_act_get(fl_act->id, ns_type);
+ if (!act || !act->stats_action)
+ return -EOPNOTSUPP;
+
+ return act->stats_action(priv, fl_act);
+}
+
+static int
+mlx5e_rep_indr_setup_act(struct mlx5e_rep_priv *rpriv,
+ struct flow_offload_action *fl_act)
+{
+ switch (fl_act->command) {
+ case FLOW_ACT_REPLACE:
+ return mlx5e_rep_indr_replace_act(rpriv, fl_act);
+ case FLOW_ACT_DESTROY:
+ return mlx5e_rep_indr_destroy_act(rpriv, fl_act);
+ case FLOW_ACT_STATS:
+ return mlx5e_rep_indr_stats_act(rpriv, fl_act);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+mlx5e_rep_indr_no_dev_setup(struct mlx5e_rep_priv *rpriv,
+ enum tc_setup_type type,
+ void *data)
+{
+ if (!data)
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_ACT:
+ return mlx5e_rep_indr_setup_act(rpriv, data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static
int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
enum tc_setup_type type, void *type_data,
void *data,
void (*cleanup)(struct flow_block_cb *block_cb))
{
+ if (!netdev)
+ return mlx5e_rep_indr_no_dev_setup(cb_priv, type, data);
+
switch (type) {
case TC_SETUP_BLOCK:
return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
index d6c7c81690eb..7c9dd3a75f8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -66,7 +66,7 @@ mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
static inline void
mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
- struct sk_buff *skb) {}
+ struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); }
#endif /* CONFIG_MLX5_CLS_ACT */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index 74086eb556ae..5f6f95ad6888 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -62,6 +62,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq)
static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
{
+ struct mlx5e_rq *xskrq = NULL;
struct mlx5_core_dev *mdev;
struct mlx5e_icosq *icosq;
struct net_device *dev;
@@ -70,7 +71,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
int err;
icosq = ctx;
+
+ mutex_lock(&icosq->channel->icosq_recovery_lock);
+
+ /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */
rq = &icosq->channel->rq;
+ if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state))
+ xskrq = &icosq->channel->xskrq;
mdev = icosq->channel->mdev;
dev = icosq->channel->netdev;
err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state);
@@ -84,6 +91,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
goto out;
mlx5e_deactivate_rq(rq);
+ if (xskrq)
+ mlx5e_deactivate_rq(xskrq);
+
err = mlx5e_wait_for_icosq_flush(icosq);
if (err)
goto out;
@@ -97,35 +107,31 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx)
goto out;
mlx5e_reset_icosq_cc_pc(icosq);
+
mlx5e_free_rx_in_progress_descs(rq);
+ if (xskrq)
+ mlx5e_free_rx_in_progress_descs(xskrq);
+
clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
mlx5e_activate_icosq(icosq);
- mlx5e_activate_rq(rq);
+ mlx5e_activate_rq(rq);
rq->stats->recover++;
- return 0;
-out:
- clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
- return err;
-}
-
-static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
-{
- struct net_device *dev = rq->netdev;
- int err;
- err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
- if (err) {
- netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
- return err;
- }
- err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
- if (err) {
- netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
- return err;
+ if (xskrq) {
+ mlx5e_activate_rq(xskrq);
+ xskrq->stats->recover++;
}
+ mlx5e_trigger_napi_icosq(icosq->channel);
+
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
+
return 0;
+out:
+ clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state);
+ mutex_unlock(&icosq->channel->icosq_recovery_lock);
+ return err;
}
static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
@@ -134,19 +140,18 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
int err;
mlx5e_deactivate_rq(rq);
- mlx5e_free_rx_descs(rq);
-
- err = mlx5e_rq_to_ready(rq, MLX5_RQC_STATE_ERR);
+ err = mlx5e_flush_rq(rq, MLX5_RQC_STATE_ERR);
+ clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
if (err)
- goto out;
+ return err;
- clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
mlx5e_activate_rq(rq);
rq->stats->recover++;
+ if (rq->channel)
+ mlx5e_trigger_napi_icosq(rq->channel);
+ else
+ mlx5e_trigger_napi_sched(rq->cq.napi);
return 0;
-out:
- clear_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state);
- return err;
}
static int mlx5e_rx_reporter_timeout_recover(void *ctx)
@@ -706,6 +711,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
}
+void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c)
+{
+ mutex_lock(&c->icosq_recovery_lock);
+}
+
+void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c)
+{
+ mutex_unlock(&c->icosq_recovery_lock);
+}
+
static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
.name = "rx",
.recover = mlx5e_rx_reporter_recover,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 4f4bc8726ec4..60bc5b577ab9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -466,6 +466,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms
return mlx5e_health_fmsg_named_obj_nest_end(fmsg);
}
+static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+ void *ctx)
+{
+ struct mlx5e_tx_timeout_ctx *to_ctx = ctx;
+
+ return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq);
+}
+
static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
struct devlink_fmsg *fmsg)
{
@@ -561,11 +569,11 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
to_ctx.sq = sq;
err_ctx.ctx = &to_ctx;
err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
- err_ctx.dump = mlx5e_tx_reporter_dump_sq;
+ err_ctx.dump = mlx5e_tx_reporter_timeout_dump;
snprintf(err_str, sizeof(err_str),
"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u",
sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
- jiffies_to_usecs(jiffies - sq->txq->trans_start));
+ jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start)));
mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
return to_ctx.status;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
index c1cdd8c2e37a..7f93426b88b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
@@ -442,7 +442,7 @@ int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
goto inner_tir;
err = mlx5e_tir_modify(tir, builder);
if (err) {
- mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of indirect TIR %#x for traffic type %d: err = %d\n",
mlx5e_tir_get_tirn(tir), tt, err);
if (!final_err)
final_err = err;
@@ -457,7 +457,7 @@ inner_tir:
continue;
err = mlx5e_tir_modify(tir, builder);
if (err) {
- mlx5e_rss_warn(rss->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n",
+ mlx5e_rss_warn(rss->mdev, "Failed to update packet merge state of inner indirect TIR %#x for traffic type %d: err = %d\n",
mlx5e_tir_get_tirn(tir), tt, err);
if (!final_err)
final_err = err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
index 0015a81eb9a1..e1095bc36543 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -24,8 +24,6 @@ struct mlx5e_rx_res {
struct {
struct mlx5e_rqt direct_rqt;
struct mlx5e_tir direct_tir;
- struct mlx5e_rqt xsk_rqt;
- struct mlx5e_tir xsk_tir;
} *channels;
struct {
@@ -37,7 +35,6 @@ struct mlx5e_rx_res {
/* API for rx_res_rss_* */
static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
- const struct mlx5e_packet_merge_param *init_pkt_merge_param,
unsigned int init_nch)
{
bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
@@ -52,7 +49,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
return -ENOMEM;
err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
- init_pkt_merge_param);
+ &res->pkt_merge_param);
if (err)
goto err_rss_free;
@@ -277,8 +274,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL);
}
-static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
- const struct mlx5e_packet_merge_param *init_pkt_merge_param)
+static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res)
{
bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
struct mlx5e_tir_builder *builder;
@@ -309,7 +305,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
inner_ft_support);
- mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
+ mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
mlx5e_tir_builder_build_direct(builder);
err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
@@ -322,48 +318,8 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
mlx5e_tir_builder_clear(builder);
}
- if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
- goto out;
-
- for (ix = 0; ix < res->max_nch; ix++) {
- err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt,
- res->mdev, false, res->drop_rqn);
- if (err) {
- mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n",
- err, ix);
- goto err_destroy_xsk_rqts;
- }
- }
-
- for (ix = 0; ix < res->max_nch; ix++) {
- mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- inner_ft_support);
- mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
- mlx5e_tir_builder_build_direct(builder);
-
- err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
- if (err) {
- mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n",
- err, ix);
- goto err_destroy_xsk_tirs;
- }
-
- mlx5e_tir_builder_clear(builder);
- }
-
goto out;
-err_destroy_xsk_tirs:
- while (--ix >= 0)
- mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
-
- ix = res->max_nch;
-err_destroy_xsk_rqts:
- while (--ix >= 0)
- mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
-
- ix = res->max_nch;
err_destroy_direct_tirs:
while (--ix >= 0)
mlx5e_tir_destroy(&res->channels[ix].direct_tir);
@@ -422,12 +378,6 @@ static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
for (ix = 0; ix < res->max_nch; ix++) {
mlx5e_tir_destroy(&res->channels[ix].direct_tir);
mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
-
- if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
- continue;
-
- mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
- mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
}
kvfree(res->channels);
@@ -454,11 +404,11 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
res->pkt_merge_param = *init_pkt_merge_param;
init_rwsem(&res->pkt_merge_param_sem);
- err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch);
+ err = mlx5e_rx_res_rss_init_def(res, init_nch);
if (err)
goto err_out;
- err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param);
+ err = mlx5e_rx_res_channels_init(res);
if (err)
goto err_rss_destroy;
@@ -493,13 +443,6 @@ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
}
-u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix)
-{
- WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK));
-
- return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir);
-}
-
u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
{
struct mlx5e_rss *rss = res->rss[0];
@@ -525,56 +468,53 @@ static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int i
return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
}
-void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+static void mlx5e_rx_res_channel_activate_direct(struct mlx5e_rx_res *res,
+ struct mlx5e_channels *chs,
+ unsigned int ix)
{
- unsigned int nch, ix;
+ u32 rqn = res->rss_rqns[ix];
int err;
- nch = mlx5e_channels_get_num(chs);
-
- for (ix = 0; ix < chs->num; ix++)
- mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
- res->rss_nch = chs->num;
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ rqn, ix, err);
+}
- mlx5e_rx_res_rss_enable(res);
+static void mlx5e_rx_res_channel_deactivate_direct(struct mlx5e_rx_res *res,
+ unsigned int ix)
+{
+ int err;
- for (ix = 0; ix < nch; ix++) {
- u32 rqn;
+ err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+ if (err)
+ mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+ mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+ res->drop_rqn, ix, err);
+}
- mlx5e_channels_get_regular_rqn(chs, ix, &rqn);
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
- rqn, ix, err);
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+{
+ unsigned int nch, ix;
+ int err;
- if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
- continue;
+ nch = mlx5e_channels_get_num(chs);
- if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
- rqn = res->drop_rqn;
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- rqn, ix, err);
+ for (ix = 0; ix < chs->num; ix++) {
+ if (mlx5e_channels_is_xsk(chs, ix))
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ else
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
}
- for (ix = nch; ix < res->max_nch; ix++) {
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
- res->drop_rqn, ix, err);
+ res->rss_nch = chs->num;
- if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
- continue;
+ mlx5e_rx_res_rss_enable(res);
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- res->drop_rqn, ix, err);
- }
+ for (ix = 0; ix < nch; ix++)
+ mlx5e_rx_res_channel_activate_direct(res, chs, ix);
+ for (ix = nch; ix < res->max_nch; ix++)
+ mlx5e_rx_res_channel_deactivate_direct(res, ix);
if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
u32 rqn;
@@ -597,22 +537,8 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
mlx5e_rx_res_rss_disable(res);
- for (ix = 0; ix < res->max_nch; ix++) {
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
- res->drop_rqn, ix, err);
-
- if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
- continue;
-
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- res->drop_rqn, ix, err);
- }
+ for (ix = 0; ix < res->max_nch; ix++)
+ mlx5e_rx_res_channel_deactivate_direct(res, ix);
if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
@@ -623,33 +549,17 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
}
}
-int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
- unsigned int ix)
+void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix, bool xsk)
{
- u32 rqn;
- int err;
-
- if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
- return -EINVAL;
-
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- rqn, ix, err);
- return err;
-}
+ if (xsk)
+ mlx5e_channels_get_xsk_rqn(chs, ix, &res->rss_rqns[ix]);
+ else
+ mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
-int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
-{
- int err;
+ mlx5e_rx_res_rss_enable(res);
- err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
- if (err)
- mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
- mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
- res->drop_rqn, ix, err);
- return err;
+ mlx5e_rx_res_channel_activate_direct(res, chs, ix);
}
int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
index b39b20a720e0..5d5f64fab60f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -17,8 +17,7 @@ struct mlx5e_rss_params_hash;
enum mlx5e_rx_res_features {
MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
- MLX5E_RX_RES_FEATURE_XSK = BIT(1),
- MLX5E_RX_RES_FEATURE_PTP = BIT(2),
+ MLX5E_RX_RES_FEATURE_PTP = BIT(1),
};
/* Setup */
@@ -32,7 +31,6 @@ void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
/* TIRN getters for flow steering */
u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
-u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix);
u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
@@ -40,9 +38,8 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
/* Activate/deactivate API */
void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
-int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
- unsigned int ix);
-int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix);
+void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+ unsigned int ix, bool xsk);
/* Configuration API */
void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
new file mode 100644
index 000000000000..f675b1926340
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "selq.h"
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include "en.h"
+#include "en/ptp.h"
+#include "en/htb.h"
+
+struct mlx5e_selq_params {
+ unsigned int num_regular_queues;
+ unsigned int num_channels;
+ unsigned int num_tcs;
+ union {
+ u8 is_special_queues;
+ struct {
+ bool is_htb : 1;
+ bool is_ptp : 1;
+ };
+ };
+ u16 htb_maj_id;
+ u16 htb_defcls;
+};
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
+{
+ struct mlx5e_selq_params *init_params;
+
+ selq->state_lock = state_lock;
+
+ selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL);
+ if (!selq->standby)
+ return -ENOMEM;
+
+ init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL);
+ if (!init_params) {
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ return -ENOMEM;
+ }
+ /* Assign dummy values, so that mlx5e_select_queue won't crash. */
+ *init_params = (struct mlx5e_selq_params) {
+ .num_regular_queues = 1,
+ .num_channels = 1,
+ .num_tcs = 1,
+ .is_htb = false,
+ .is_ptp = false,
+ .htb_maj_id = 0,
+ .htb_defcls = 0,
+ };
+ rcu_assign_pointer(selq->active, init_params);
+
+ return 0;
+}
+
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
+{
+ WARN_ON_ONCE(selq->is_prepared);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+ selq->is_prepared = true;
+
+ mlx5e_selq_apply(selq);
+
+ kvfree(selq->standby);
+ selq->standby = NULL;
+}
+
+void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
+{
+ struct mlx5e_selq_params *selq_active;
+
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq_active = rcu_dereference_protected(selq->active,
+ lockdep_is_held(selq->state_lock));
+ *selq->standby = *selq_active;
+ selq->standby->num_channels = params->num_channels;
+ selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params);
+ selq->standby->num_regular_queues =
+ selq->standby->num_channels * selq->standby->num_tcs;
+ selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS);
+}
+
+bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *selq_active =
+ rcu_dereference_protected(selq->active, lockdep_is_held(selq->state_lock));
+
+ return selq_active->htb_maj_id;
+}
+
+void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls)
+{
+ struct mlx5e_selq_params *selq_active;
+
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(selq->is_prepared);
+
+ selq->is_prepared = true;
+
+ selq_active = rcu_dereference_protected(selq->active,
+ lockdep_is_held(selq->state_lock));
+ *selq->standby = *selq_active;
+ selq->standby->is_htb = htb_maj_id;
+ selq->standby->htb_maj_id = htb_maj_id;
+ selq->standby->htb_defcls = htb_defcls;
+}
+
+void mlx5e_selq_apply(struct mlx5e_selq *selq)
+{
+ struct mlx5e_selq_params *old_params;
+
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+
+ old_params = rcu_replace_pointer(selq->active, selq->standby,
+ lockdep_is_held(selq->state_lock));
+ synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */
+ selq->standby = old_params;
+}
+
+void mlx5e_selq_cancel(struct mlx5e_selq *selq)
+{
+ lockdep_assert_held(selq->state_lock);
+ WARN_ON_ONCE(!selq->is_prepared);
+
+ selq->is_prepared = false;
+}
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+ int dscp_cp = 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+ return priv->dcbx_dp.dscp2prio[dscp_cp];
+}
+#endif
+
+static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb)
+{
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+ if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP)
+ return mlx5e_get_dscp_up(priv, skb);
+#endif
+ if (skb_vlan_tag_present(skb))
+ return skb_vlan_tag_get_prio(skb);
+ return 0;
+}
+
+static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ int up;
+
+ up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0;
+
+ return selq->num_regular_queues + up;
+}
+
+static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5e_selq_params *selq)
+{
+ u16 classid;
+
+ /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
+ if ((TC_H_MAJ(skb->priority) >> 16) == selq->htb_maj_id)
+ classid = TC_H_MIN(skb->priority);
+ else
+ classid = selq->htb_defcls;
+
+ if (!classid)
+ return 0;
+
+ return mlx5e_htb_get_txq_by_classid(priv->htb, classid);
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ struct mlx5e_selq_params *selq;
+ int txq_ix, up;
+
+ selq = rcu_dereference_bh(priv->selq.active);
+
+ /* This is a workaround needed only for the mlx5e_netdev_change_profile
+ * flow that zeroes out the whole priv without unregistering the netdev
+ * and without preventing ndo_select_queue from being called.
+ */
+ if (unlikely(!selq))
+ return 0;
+
+ if (likely(!selq->is_special_queues)) {
+ /* No special queues, netdev_pick_tx returns one of the regular ones. */
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ /* Normalize any picked txq_ix to [0, num_channels),
+ * So we can return a txq_ix that matches the channel and
+ * packet UP.
+ */
+ return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) +
+ up * selq->num_channels;
+ }
+
+ if (unlikely(selq->htb_maj_id)) {
+ /* num_tcs == 1, shortcut for PTP */
+
+ txq_ix = mlx5e_select_htb_queue(priv, skb, selq);
+ if (txq_ix > 0)
+ return txq_ix;
+
+ if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb)))
+ return selq->num_channels;
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
+ * If they are selected, switch to regular queues.
+ * Driver to select these queues only at mlx5e_select_ptpsq()
+ * and mlx5e_select_htb_queue().
+ */
+ return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels);
+ }
+
+ /* PTP is enabled */
+
+ if (mlx5e_use_ptpsq(skb))
+ return mlx5e_select_ptpsq(dev, skb, selq);
+
+ txq_ix = netdev_pick_tx(dev, skb, NULL);
+
+ /* Normalize any picked txq_ix to [0, num_channels). Queues in range
+ * [0, num_regular_queues) will be mapped to the corresponding channel
+ * index, so that we can apply the packet's UP (if num_tcs > 1).
+ * If netdev_pick_tx() picks ptp_channel, switch to a regular queue,
+ * because driver should select the PTP only at mlx5e_select_ptpsq().
+ */
+ txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels);
+
+ if (selq->num_tcs <= 1)
+ return txq_ix;
+
+ up = mlx5e_get_up(priv, skb);
+
+ return txq_ix + up * selq->num_channels;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
new file mode 100644
index 000000000000..fd590f80e4d1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/selq.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_SELQ_H__
+#define __MLX5_EN_SELQ_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_selq_params;
+
+struct mlx5e_selq {
+ struct mlx5e_selq_params __rcu *active;
+ struct mlx5e_selq_params *standby;
+ struct mutex *state_lock; /* points to priv->state_lock */
+ bool is_prepared;
+};
+
+struct mlx5e_params;
+struct net_device;
+struct sk_buff;
+
+int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock);
+void mlx5e_selq_cleanup(struct mlx5e_selq *selq);
+void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params);
+void mlx5e_selq_prepare_htb(struct mlx5e_selq *selq, u16 htb_maj_id, u16 htb_defcls);
+bool mlx5e_selq_is_htb_enabled(struct mlx5e_selq *selq);
+void mlx5e_selq_apply(struct mlx5e_selq *selq);
+void mlx5e_selq_cancel(struct mlx5e_selq *selq);
+
+static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels)
+{
+ while (unlikely(txq >= num_channels))
+ txq -= num_channels;
+ return txq;
+}
+
+static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels)
+{
+ if (unlikely(txq >= num_channels)) {
+ if (unlikely(txq >= num_channels << 3))
+ txq %= num_channels;
+ else
+ do
+ txq -= num_channels;
+ while (txq >= num_channels);
+ }
+ return txq;
+}
+
+u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev);
+
+#endif /* __MLX5_EN_SELQ_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
new file mode 100644
index 000000000000..21aab96357b5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_ACCEPT;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_accept = {
+ .can_offload = tc_act_can_offload_accept,
+ .parse_action = tc_act_parse_accept,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
new file mode 100644
index 000000000000..3337241cfd84
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc/post_act.h"
+#include "en/tc_priv.h"
+#include "mlx5_core.h"
+
+static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = {
+ [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept,
+ [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
+ [FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap,
+ [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred,
+ [FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress,
+ [FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_POP] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_MANGLE] = &mlx5e_tc_act_vlan_mangle,
+ [FLOW_ACTION_TUNNEL_ENCAP] = &mlx5e_tc_act_tun_encap,
+ [FLOW_ACTION_TUNNEL_DECAP] = &mlx5e_tc_act_tun_decap,
+ [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum,
+ [FLOW_ACTION_PTYPE] = &mlx5e_tc_act_ptype,
+ [FLOW_ACTION_SAMPLE] = &mlx5e_tc_act_sample,
+ [FLOW_ACTION_POLICE] = &mlx5e_tc_act_police,
+ [FLOW_ACTION_CT] = &mlx5e_tc_act_ct,
+ [FLOW_ACTION_MPLS_PUSH] = &mlx5e_tc_act_mpls_push,
+ [FLOW_ACTION_MPLS_POP] = &mlx5e_tc_act_mpls_pop,
+ [FLOW_ACTION_VLAN_PUSH_ETH] = &mlx5e_tc_act_vlan,
+ [FLOW_ACTION_VLAN_POP_ETH] = &mlx5e_tc_act_vlan,
+};
+
+static struct mlx5e_tc_act *tc_acts_nic[NUM_FLOW_ACTIONS] = {
+ [FLOW_ACTION_ACCEPT] = &mlx5e_tc_act_accept,
+ [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop,
+ [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto,
+ [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred_nic,
+ [FLOW_ACTION_MANGLE] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_ADD] = &mlx5e_tc_act_pedit,
+ [FLOW_ACTION_CSUM] = &mlx5e_tc_act_csum,
+ [FLOW_ACTION_MARK] = &mlx5e_tc_act_mark,
+ [FLOW_ACTION_CT] = &mlx5e_tc_act_ct,
+};
+
+/**
+ * mlx5e_tc_act_get() - Get an action parser for an action id.
+ * @act_id: Flow action id.
+ * @ns_type: flow namespace type.
+ */
+struct mlx5e_tc_act *
+mlx5e_tc_act_get(enum flow_action_id act_id,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5e_tc_act **tc_acts;
+
+ tc_acts = ns_type == MLX5_FLOW_NAMESPACE_FDB ? tc_acts_fdb : tc_acts_nic;
+
+ return tc_acts[act_id];
+}
+
+/**
+ * mlx5e_tc_act_init_parse_state() - Init a new parse_state.
+ * @parse_state: Parsing state.
+ * @flow: mlx5e tc flow being handled.
+ * @flow_action: flow action to parse.
+ * @extack: to set an error msg.
+ *
+ * The same parse_state should be passed to action parsers
+ * for tracking the current parsing state.
+ */
+void
+mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_tc_flow *flow,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack)
+{
+ memset(parse_state, 0, sizeof(*parse_state));
+ parse_state->flow = flow;
+ parse_state->extack = extack;
+ parse_state->flow_action = flow_action;
+}
+
+void
+mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
+ struct mlx5e_tc_flow_action *flow_action_reorder)
+{
+ struct flow_action_entry *act;
+ int i, j = 0;
+
+ flow_action_for_each(i, act, flow_action) {
+ /* Add CT action to be first. */
+ if (act->id == FLOW_ACTION_CT)
+ flow_action_reorder->entries[j++] = act;
+ }
+
+ flow_action_for_each(i, act, flow_action) {
+ if (act->id == FLOW_ACTION_CT)
+ continue;
+ flow_action_reorder->entries[j++] = act;
+ }
+}
+
+int
+mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action,
+ struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct flow_action_entry *act;
+ struct mlx5e_tc_act *tc_act;
+ struct mlx5e_priv *priv;
+ int err = 0, i;
+
+ priv = parse_state->flow->priv;
+
+ flow_action_for_each(i, act, flow_action) {
+ tc_act = mlx5e_tc_act_get(act->id, ns_type);
+ if (!tc_act || !tc_act->post_parse)
+ continue;
+
+ err = tc_act->post_parse(parse_state, priv, attr);
+ if (err)
+ goto out;
+ }
+
+out:
+ return err;
+}
+
+int
+mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_attr *next_attr)
+{
+ struct mlx5_core_dev *mdev = flow->priv->mdev;
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
+ int err;
+
+ mod_acts = &attr->parse_attr->mod_hdr_acts;
+
+ /* Set handle on current post act rule to next post act rule. */
+ err = mlx5e_tc_post_act_set_handle(mdev, next_attr->post_act_handle, mod_acts);
+ if (err) {
+ mlx5_core_warn(mdev, "Failed setting post action handle");
+ return err;
+ }
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
new file mode 100644
index 000000000000..e1570ff056ae
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_H__
+#define __MLX5_EN_TC_ACT_H__
+
+#include <net/tc_act/tc_pedit.h>
+#include <net/flow_offload.h>
+#include <linux/netlink.h>
+#include "eswitch.h"
+#include "pedit.h"
+
+struct mlx5_flow_attr;
+
+struct mlx5e_tc_act_parse_state {
+ struct flow_action *flow_action;
+ struct mlx5e_tc_flow *flow;
+ struct netlink_ext_ack *extack;
+ u32 actions;
+ bool ct;
+ bool ct_clear;
+ bool encap;
+ bool decap;
+ bool mpls_push;
+ bool eth_push;
+ bool eth_pop;
+ bool ptype_host;
+ const struct ip_tunnel_info *tun_info;
+ struct mlx5e_mpls_info mpls_info;
+ int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
+ int if_count;
+ struct mlx5_tc_ct_priv *ct_priv;
+};
+
+struct mlx5e_tc_act {
+ bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr);
+
+ int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+ int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr);
+
+ bool (*is_multi_table_act)(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr);
+
+ int (*offload_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act,
+ struct flow_action_entry *act);
+
+ int (*destroy_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act);
+
+ int (*stats_action)(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act);
+};
+
+struct mlx5e_tc_flow_action {
+ unsigned int num_entries;
+ struct flow_action_entry **entries;
+};
+
+extern struct mlx5e_tc_act mlx5e_tc_act_drop;
+extern struct mlx5e_tc_act mlx5e_tc_act_trap;
+extern struct mlx5e_tc_act mlx5e_tc_act_accept;
+extern struct mlx5e_tc_act mlx5e_tc_act_mark;
+extern struct mlx5e_tc_act mlx5e_tc_act_goto;
+extern struct mlx5e_tc_act mlx5e_tc_act_tun_encap;
+extern struct mlx5e_tc_act mlx5e_tc_act_tun_decap;
+extern struct mlx5e_tc_act mlx5e_tc_act_csum;
+extern struct mlx5e_tc_act mlx5e_tc_act_pedit;
+extern struct mlx5e_tc_act mlx5e_tc_act_vlan;
+extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle;
+extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push;
+extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop;
+extern struct mlx5e_tc_act mlx5e_tc_act_mirred;
+extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic;
+extern struct mlx5e_tc_act mlx5e_tc_act_ct;
+extern struct mlx5e_tc_act mlx5e_tc_act_sample;
+extern struct mlx5e_tc_act mlx5e_tc_act_ptype;
+extern struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress;
+extern struct mlx5e_tc_act mlx5e_tc_act_police;
+
+struct mlx5e_tc_act *
+mlx5e_tc_act_get(enum flow_action_id act_id,
+ enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_tc_flow *flow,
+ struct flow_action *flow_action,
+ struct netlink_ext_ack *extack);
+
+void
+mlx5e_tc_act_reorder_flow_actions(struct flow_action *flow_action,
+ struct mlx5e_tc_flow_action *flow_action_reorder);
+
+int
+mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state,
+ struct flow_action *flow_action,
+ struct mlx5_flow_attr *attr,
+ enum mlx5_flow_namespace_type ns_type);
+
+int
+mlx5e_tc_act_set_next_post_act(struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ struct mlx5_flow_attr *next_attr);
+
+#endif /* __MLX5_EN_TC_ACT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
new file mode 100644
index 000000000000..c0f08ae6a57f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/tc_act/tc_csum.h>
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+csum_offload_supported(struct mlx5e_priv *priv,
+ u32 action,
+ u32 update_flags,
+ struct netlink_ext_ack *extack)
+{
+ u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
+ TCA_CSUM_UPDATE_FLAG_UDP;
+
+ /* The HW recalcs checksums only if re-writing headers */
+ if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "TC csum action is only offloaded with pedit");
+ netdev_warn(priv->netdev,
+ "TC csum action is only offloaded with pedit\n");
+ return false;
+ }
+
+ if (update_flags & ~prot_flags) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload TC csum action for some header/s");
+ netdev_warn(priv->netdev,
+ "can't offload TC csum action for some header/s - flags %#x\n",
+ update_flags);
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ return csum_offload_supported(flow->priv, attr->action,
+ act->csum_flags, parse_state->extack);
+}
+
+static int
+tc_act_parse_csum(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_csum = {
+ .can_offload = tc_act_can_offload_csum,
+ .parse_action = tc_act_parse_csum,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
new file mode 100644
index 000000000000..a829c94289c1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+#include "en/tc_ct.h"
+
+static bool
+tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (parse_state->ct && !clear_action) {
+ NL_SET_ERR_MSG_MOD(extack, "Multiple CT actions are not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+ int err;
+
+ /* It's redundant to do ct clear more than once. */
+ if (clear_action && parse_state->ct_clear)
+ return 0;
+
+ err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr,
+ &attr->parse_attr->mod_hdr_acts,
+ act, parse_state->extack);
+ if (err)
+ return err;
+
+
+ if (mlx5e_is_eswitch_flow(parse_state->flow))
+ attr->esw_attr->split_count = attr->esw_attr->out_count;
+
+ if (clear_action) {
+ parse_state->ct_clear = true;
+ } else {
+ attr->flags |= MLX5_ATTR_FLAG_CT;
+ flow_flag_set(parse_state->flow, CT);
+ parse_state->ct = true;
+ }
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts;
+ int err;
+
+ /* If ct action exist, we can ignore previous ct_clear actions */
+ if (parse_state->ct)
+ return 0;
+
+ if (parse_state->ct_clear) {
+ err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Failed to set registers for ct clear");
+ return err;
+ }
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ /* Prevent handling of additional, redundant clear actions */
+ parse_state->ct_clear = false;
+ }
+
+ return 0;
+}
+
+static bool
+tc_act_is_multi_table_act_ct(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->ct.action & TCA_CT_ACT_CLEAR)
+ return false;
+
+ return true;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_ct = {
+ .can_offload = tc_act_can_offload_ct,
+ .parse_action = tc_act_parse_ct,
+ .is_multi_table_act = tc_act_is_multi_table_act_ct,
+ .post_parse = tc_act_post_parse_ct,
+};
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
new file mode 100644
index 000000000000..dd025a95c439
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_drop = {
+ .can_offload = tc_act_can_offload_drop,
+ .parse_action = tc_act_parse_drop,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
new file mode 100644
index 000000000000..25174f68613e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+#include "eswitch.h"
+
+static int
+validate_goto_chain(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+ bool is_esw = mlx5e_is_eswitch_flow(flow);
+ bool ft_flow = mlx5e_is_ft_flow(flow);
+ u32 dest_chain = act->chain_index;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_eswitch *esw;
+ u32 reformat_and_fwd;
+ u32 max_chain;
+
+ esw = priv->mdev->priv.eswitch;
+ chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(tc);
+ max_chain = mlx5_chains_get_chain_range(chains);
+ reformat_and_fwd = is_esw ?
+ MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
+
+ if (ft_flow) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5_chains_backwards_supported(chains) &&
+ dest_chain <= attr->chain) {
+ NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (dest_chain > max_chain) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested destination chain is out of supported range");
+ return -EOPNOTSUPP;
+ }
+
+ if (attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+ MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+ !reformat_and_fwd) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Goto chain is not allowed if action has reformat or decap");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ if (validate_goto_chain(flow->priv, flow, attr, act, extack))
+ return false;
+
+ return true;
+}
+
+static int
+tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->dest_chain = act->chain_index;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_goto(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+
+ if (!attr->dest_chain)
+ return 0;
+
+ if (parse_state->decap) {
+ /* It can be supported if we'll create a mapping for
+ * the tunnel device only (without tunnel), and set
+ * this tunnel id with this decap flow.
+ *
+ * On restore (miss), we'll just set this saved tunnel
+ * device.
+ */
+
+ NL_SET_ERR_MSG_MOD(extack, "Decap with goto isn't supported");
+ netdev_warn(priv->netdev, "Decap with goto isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!mlx5e_is_eswitch_flow(flow) && parse_attr->mirred_ifindex[0]) {
+ NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_goto = {
+ .can_offload = tc_act_can_offload_goto,
+ .parse_action = tc_act_parse_goto,
+ .post_parse = tc_act_post_parse_goto,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
new file mode 100644
index 000000000000..e8d227595b3e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en_tc.h"
+
+static bool
+tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mark(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->nic_attr->flow_tag = act->mark;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mark = {
+ .can_offload = tc_act_can_offload_mark,
+ .parse_action = tc_act_parse_mark,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
new file mode 100644
index 000000000000..4ac7de3f6afa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_macvlan.h>
+#include <linux/if_vlan.h>
+#include <net/bareudp.h>
+#include <net/bonding.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc_priv.h"
+#include "en_rep.h"
+#include "lag/lag.h"
+
+static bool
+same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev)
+{
+ return mlx5e_eswitch_vf_rep(priv->netdev) &&
+ priv->netdev == out_dev;
+}
+
+static int
+verify_uplink_forwarding(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device *out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rep_priv;
+
+ /* Forwarding non encapsulated traffic between
+ * uplink ports is allowed only if
+ * termination_table_raw_traffic cap is set.
+ *
+ * Input vport was stored attr->in_rep.
+ * In LAG case, *priv* is the private data of
+ * uplink which may be not the input vport.
+ */
+ rep_priv = mlx5e_rep_to_rep_priv(attr->esw_attr->in_rep);
+
+ if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
+ mlx5e_eswitch_uplink_rep(out_dev)))
+ return 0;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
+ termination_table_raw_traffic)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are both uplink, can't offload forwarding");
+ return -EOPNOTSUPP;
+ } else if (out_dev != rep_priv->netdev) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not the same uplink, can't offload forwarding");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static bool
+is_duplicated_output_device(struct net_device *dev,
+ struct net_device *out_dev,
+ int *ifindexes, int if_count,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < if_count; i++) {
+ if (ifindexes[i] == out_dev->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate output to same device");
+ netdev_err(dev, "can't duplicate output to same device: %s\n",
+ out_dev->name);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static struct net_device *
+get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev)
+{
+ struct net_device *fdb_out_dev = out_dev;
+ struct net_device *uplink_upper;
+
+ rcu_read_lock();
+ uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
+ if (uplink_upper && netif_is_lag_master(uplink_upper) &&
+ uplink_upper == out_dev) {
+ fdb_out_dev = uplink_dev;
+ } else if (netif_is_lag_master(out_dev)) {
+ fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
+ if (fdb_out_dev &&
+ (!mlx5e_eswitch_rep(fdb_out_dev) ||
+ !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
+ fdb_out_dev = NULL;
+ }
+ rcu_read_unlock();
+ return fdb_out_dev;
+}
+
+static bool
+tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct net_device *out_dev = act->dev;
+ struct mlx5e_priv *priv = flow->priv;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ if (!out_dev) {
+ /* out_dev is NULL when filters with
+ * non-existing mirred device are replayed to
+ * the driver.
+ */
+ return false;
+ }
+
+ if (parse_state->mpls_push && !netif_is_bareudp(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls is supported only through a bareudp device");
+ return false;
+ }
+
+ if (parse_state->eth_pop && !parse_state->mpls_push) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan pop eth is supported only with mpls push");
+ return false;
+ }
+
+ if (flow_flag_test(parse_state->flow, L3_TO_L2_DECAP) && !parse_state->eth_push) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop is only supported with vlan eth push");
+ return false;
+ }
+
+ if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) {
+ /* Ignore forward to self rules generated
+ * by adding both mlx5 devs to the flow table
+ * block on a normal nft offload setup.
+ */
+ return false;
+ }
+
+ if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't support more output ports, can't offload forwarding");
+ netdev_warn(priv->netdev,
+ "can't support more than %d output ports, can't offload forwarding\n",
+ esw_attr->out_count);
+ return false;
+ }
+
+ if (parse_state->encap ||
+ netdev_port_same_parent_id(priv->netdev, out_dev) ||
+ netif_is_ovs_master(out_dev))
+ return true;
+
+ if (parse_attr->filter_dev != priv->netdev) {
+ /* All mlx5 devices are called to configure
+ * high level device filters. Therefore, the
+ * *attempt* to install a filter on invalid
+ * eswitch should not trigger an explicit error
+ */
+ return false;
+ }
+
+ NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding");
+
+ return false;
+}
+
+static int
+parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+
+ parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex;
+ parse_attr->tun_info[esw_attr->out_count] =
+ mlx5e_dup_tun_info(parse_state->tun_info);
+
+ if (!parse_attr->tun_info[esw_attr->out_count])
+ return -ENOMEM;
+
+ parse_state->encap = false;
+
+ if (parse_state->mpls_push) {
+ memcpy(&parse_attr->mpls_info[esw_attr->out_count],
+ &parse_state->mpls_info, sizeof(parse_state->mpls_info));
+ parse_state->mpls_push = false;
+ }
+ esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP;
+ esw_attr->out_count++;
+ /* attr->dests[].rep is resolved when we handle encap */
+
+ return 0;
+}
+
+static int
+parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct net_device *out_dev = act->dev;
+ struct net_device *uplink_dev;
+ struct mlx5e_priv *out_priv;
+ struct mlx5_eswitch *esw;
+ bool is_uplink_rep;
+ int *ifindexes;
+ int if_count;
+ int err;
+
+ esw = priv->mdev->priv.eswitch;
+ uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
+ ifindexes = parse_state->ifindexes;
+ if_count = parse_state->if_count;
+
+ if (is_duplicated_output_device(priv->netdev, out_dev, ifindexes, if_count, extack))
+ return -EOPNOTSUPP;
+
+ parse_state->ifindexes[if_count] = out_dev->ifindex;
+ parse_state->if_count++;
+ is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev);
+ err = mlx5_lag_do_mirred(priv->mdev, out_dev);
+ if (err)
+ return err;
+
+ out_dev = get_fdb_out_dev(uplink_dev, out_dev);
+ if (!out_dev)
+ return -ENODEV;
+
+ if (is_vlan_dev(out_dev)) {
+ err = mlx5e_tc_act_vlan_add_push_action(priv, attr, &out_dev, extack);
+ if (err)
+ return err;
+ }
+
+ if (is_vlan_dev(parse_attr->filter_dev)) {
+ err = mlx5e_tc_act_vlan_add_pop_action(priv, attr, extack);
+ if (err)
+ return err;
+ }
+
+ if (netif_is_macvlan(out_dev))
+ out_dev = macvlan_dev_real_dev(out_dev);
+
+ err = verify_uplink_forwarding(priv, attr, out_dev, extack);
+ if (err)
+ return err;
+
+ if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ return -EOPNOTSUPP;
+ }
+
+ if (same_vf_reps(priv, out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "can't forward from a VF to itself");
+ return -EOPNOTSUPP;
+ }
+
+ out_priv = netdev_priv(out_dev);
+ rpriv = out_priv->ppriv;
+ esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
+ esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
+
+ /* If output device is bond master then rules are not explicit
+ * so we don't attempt to count them.
+ */
+ if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
+ MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
+ attr->lag.count = true;
+
+ esw_attr->out_count++;
+
+ return 0;
+}
+
+static int
+parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+ int err;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
+ MLX5E_TC_INT_PORT_EGRESS,
+ &attr->action, esw_attr->out_count);
+ if (err)
+ return err;
+
+ esw_attr->out_count++;
+ return 0;
+}
+
+static int
+tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct net_device *out_dev = act->dev;
+ int err = -EOPNOTSUPP;
+
+ if (parse_state->encap)
+ err = parse_mirred_encap(parse_state, act, attr);
+ else if (netdev_port_same_parent_id(priv->netdev, out_dev))
+ err = parse_mirred(parse_state, act, priv, attr);
+ else if (netif_is_ovs_master(out_dev))
+ err = parse_mirred_ovs_master(parse_state, act, priv, attr);
+
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mirred = {
+ .can_offload = tc_act_can_offload_mirred,
+ .parse_action = tc_act_parse_mirred,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
new file mode 100644
index 000000000000..90b4c1b34776
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ struct net_device *out_dev = act->dev;
+ struct mlx5e_priv *priv = flow->priv;
+
+ if (act->id != FLOW_ACTION_REDIRECT)
+ return false;
+
+ if (priv->netdev->netdev_ops != out_dev->netdev_ops ||
+ !mlx5e_same_hw_devs(priv, netdev_priv(out_dev))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
+ netdev_warn(priv->netdev,
+ "devices %s %s not on same switch HW, can't offload forwarding\n",
+ netdev_name(priv->netdev),
+ out_dev->name);
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex;
+ flow_flag_set(parse_state->flow, HAIRPIN);
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = {
+ .can_offload = tc_act_can_offload_mirred_nic,
+ .parse_action = tc_act_parse_mirred_nic,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
new file mode 100644
index 000000000000..f106190bf37c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <net/bareudp.h>
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_priv *priv = parse_state->flow->priv;
+
+ if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) ||
+ act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls push is supported only for mpls_uc protocol");
+ return false;
+ }
+
+ return true;
+}
+
+static void
+copy_mpls_info(struct mlx5e_mpls_info *mpls_info,
+ const struct flow_action_entry *act)
+{
+ mpls_info->label = act->mpls_push.label;
+ mpls_info->tc = act->mpls_push.tc;
+ mpls_info->bos = act->mpls_push.bos;
+ mpls_info->ttl = act->mpls_push.ttl;
+}
+
+static int
+tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->mpls_push = true;
+ copy_mpls_info(&parse_state->mpls_info, act);
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct net_device *filter_dev;
+
+ filter_dev = attr->parse_attr->filter_dev;
+
+ /* we only support mpls pop if it is the first action
+ * or it is second action after tunnel key unset
+ * and the filter net device is bareudp. Subsequent
+ * actions can be pedit and the last can be mirred
+ * egress redirect.
+ */
+ if ((act_index == 1 && !parse_state->decap) || act_index > 1) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action or with decap");
+ return false;
+ }
+
+ if (!netif_is_bareudp(filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->esw_attr->eth.h_proto = act->mpls_pop.proto;
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ flow_flag_set(parse_state->flow, L3_TO_L2_DECAP);
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_mpls_push = {
+ .can_offload = tc_act_can_offload_mpls_push,
+ .parse_action = tc_act_parse_mpls_push,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_mpls_pop = {
+ .can_offload = tc_act_can_offload_mpls_pop,
+ .parse_action = tc_act_parse_mpls_pop,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
new file mode 100644
index 000000000000..47597c524e59
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "pedit.h"
+#include "en/tc_priv.h"
+#include "en/mod_hdr.h"
+
+static int pedit_header_offsets[] = {
+ [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
+ [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
+ [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
+ [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
+};
+
+#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
+
+static int
+set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ u32 *curr_pmask, *curr_pval;
+
+ curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
+ curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
+
+ if (*curr_pmask & mask) { /* disallow acting twice on the same location */
+ NL_SET_ERR_MSG_MOD(extack,
+ "curr_pmask and new mask same. Acting twice on same location");
+ goto out_err;
+ }
+
+ *curr_pmask |= mask;
+ *curr_pval |= (val & mask);
+
+ return 0;
+
+out_err:
+ return -EOPNOTSUPP;
+}
+
+int
+mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack)
+{
+ u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
+ u8 htype = act->mangle.htype;
+ int err = -EOPNOTSUPP;
+ u32 mask, val, offset;
+
+ if (htype == FLOW_ACT_MANGLE_UNSPEC) {
+ NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
+ goto out_err;
+ }
+
+ if (!mlx5e_mod_hdr_max_actions(priv->mdev, namespace)) {
+ NL_SET_ERR_MSG_MOD(extack, "The pedit offload action is not supported");
+ goto out_err;
+ }
+
+ mask = act->mangle.mask;
+ val = act->mangle.val;
+ offset = act->mangle.offset;
+
+ err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack);
+ if (err)
+ goto out_err;
+
+ hdrs[cmd].pedits++;
+
+ return 0;
+out_err:
+ return err;
+}
+
+static bool
+tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5e_tc_flow *flow = parse_state->flow;
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ ns_type = mlx5e_get_flow_namespace(flow);
+
+ err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, attr->parse_attr->hdrs,
+ parse_state->extack);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ esw_attr->split_count = esw_attr->out_count;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_pedit = {
+ .can_offload = tc_act_can_offload_pedit,
+ .parse_action = tc_act_parse_pedit,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
new file mode 100644
index 000000000000..434c8bd710a2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_PEDIT_H__
+#define __MLX5_EN_TC_ACT_PEDIT_H__
+
+#include "en_tc.h"
+
+struct pedit_headers {
+ struct ethhdr eth;
+ struct vlan_hdr vlan;
+ struct iphdr ip4;
+ struct ipv6hdr ip6;
+ struct tcphdr tcp;
+ struct udphdr udp;
+};
+
+struct pedit_headers_action {
+ struct pedit_headers vals;
+ struct pedit_headers masks;
+ u32 pedits;
+};
+
+int
+mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act, int namespace,
+ struct pedit_headers_action *hdrs,
+ struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
new file mode 100644
index 000000000000..c8e5ca65bb6e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
+ act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Offload not supported when conform action is not pipe or ok");
+ return false;
+ }
+ if (mlx5e_policer_validate(parse_state->flow_action, act,
+ parse_state->extack))
+ return false;
+
+ return !!mlx5e_get_flow_meters(parse_state->flow->priv->mdev);
+}
+
+static int
+fill_meter_params_from_act(const struct flow_action_entry *act,
+ struct mlx5e_flow_meter_params *params)
+{
+ params->index = act->hw_index;
+ if (act->police.rate_bytes_ps) {
+ params->mode = MLX5_RATE_LIMIT_BPS;
+ /* change rate to bits per second */
+ params->rate = act->police.rate_bytes_ps << 3;
+ params->burst = act->police.burst;
+ } else if (act->police.rate_pkt_ps) {
+ params->mode = MLX5_RATE_LIMIT_PPS;
+ params->rate = act->police.rate_pkt_ps;
+ params->burst = act->police.burst_pkt;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int
+tc_act_parse_police(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ int err;
+
+ err = fill_meter_params_from_act(act, &attr->meter_attr.params);
+ if (err)
+ return err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO;
+ attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER;
+
+ return 0;
+}
+
+static bool
+tc_act_is_multi_table_act_police(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_police_offload(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act,
+ struct flow_action_entry *act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+ int err = 0;
+
+ err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack);
+ if (err)
+ return err;
+
+ err = fill_meter_params_from_act(act, &params);
+ if (err)
+ return err;
+
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter) && PTR_ERR(meter) == -ENOENT) {
+ meter = mlx5e_tc_meter_replace(priv->mdev, &params);
+ } else if (!IS_ERR(meter)) {
+ err = mlx5e_tc_meter_update(meter, &params);
+ mlx5e_tc_meter_put(meter);
+ }
+
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ err = PTR_ERR(meter);
+ }
+
+ return err;
+}
+
+static int
+tc_act_police_destroy(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+
+ params.index = fl_act->index;
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ return PTR_ERR(meter);
+ }
+ /* first put for the get and second for cleanup */
+ mlx5e_tc_meter_put(meter);
+ mlx5e_tc_meter_put(meter);
+ return 0;
+}
+
+static int
+tc_act_police_stats(struct mlx5e_priv *priv,
+ struct flow_offload_action *fl_act)
+{
+ struct mlx5e_flow_meter_params params = {};
+ struct mlx5e_flow_meter_handle *meter;
+ u64 bytes, packets, drops, lastuse;
+
+ params.index = fl_act->index;
+ meter = mlx5e_tc_meter_get(priv->mdev, &params);
+ if (IS_ERR(meter)) {
+ NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
+ mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
+ return PTR_ERR(meter);
+ }
+
+ mlx5e_tc_meter_get_stats(meter, &bytes, &packets, &drops, &lastuse);
+ flow_stats_update(&fl_act->stats, bytes, packets, drops, lastuse,
+ FLOW_ACTION_HW_STATS_DELAYED);
+ mlx5e_tc_meter_put(meter);
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_police = {
+ .can_offload = tc_act_can_offload_police,
+ .parse_action = tc_act_parse_police,
+ .is_multi_table_act = tc_act_is_multi_table_act_police,
+ .offload_action = tc_act_police_offload,
+ .destroy_action = tc_act_police_destroy,
+ .stats_action = tc_act_police_stats,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
new file mode 100644
index 000000000000..6454b031ff7a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (act->ptype != PACKET_HOST) {
+ NL_SET_ERR_MSG_MOD(extack, "skbedit ptype is only supported with type host");
+ return -EOPNOTSUPP;
+ }
+
+ parse_state->ptype_host = true;
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_ptype = {
+ .can_offload = tc_act_can_offload_ptype,
+ .parse_action = tc_act_parse_ptype,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
new file mode 100644
index 000000000000..ad09a8a5f36e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct net_device *out_dev = act->dev;
+ struct mlx5_esw_flow_attr *esw_attr;
+
+ parse_attr = attr->parse_attr;
+ esw_attr = attr->esw_attr;
+
+ if (!out_dev)
+ return false;
+
+ if (!netif_is_ovs_master(out_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to ingress is supported only for OVS internal ports");
+ return false;
+ }
+
+ if (netif_is_ovs_master(parse_attr->filter_dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to ingress is not supported from internal port");
+ return false;
+ }
+
+ if (!parse_state->ptype_host) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to int port ingress requires ptype=host action");
+ return false;
+ }
+
+ if (esw_attr->out_count) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "redirect to int port ingress is supported only as single destination");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct net_device *out_dev = act->dev;
+ int err;
+
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+ err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex,
+ MLX5E_TC_INT_PORT_INGRESS,
+ &attr->action, esw_attr->out_count);
+ if (err)
+ return err;
+
+ esw_attr->out_count++;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress = {
+ .can_offload = tc_act_can_offload_redirect_ingress,
+ .parse_action = tc_act_parse_redirect_ingress,
+};
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
new file mode 100644
index 000000000000..2c0196431302
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <net/psample.h>
+#include "act.h"
+#include "en/tc_priv.h"
+#include "en/tc/act/sample.h"
+
+static bool
+tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+ bool ct_nat;
+
+ ct_nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
+
+ if (flow_flag_test(parse_state->flow, CT) && ct_nat) {
+ NL_SET_ERR_MSG_MOD(extack, "Sample action with CT NAT is not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_sample_attr *sample_attr = &attr->sample_attr;
+
+ sample_attr->rate = act->sample.rate;
+ sample_attr->group_num = act->sample.psample_group->group_num;
+
+ if (act->sample.truncate)
+ sample_attr->trunc_size = act->sample.trunc_size;
+
+ attr->flags |= MLX5_ATTR_FLAG_SAMPLE;
+ flow_flag_set(parse_state->flow, SAMPLE);
+
+ return 0;
+}
+
+bool
+mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_attr *attr)
+{
+ if (MLX5_CAP_GEN(mdev, reg_c_preserve) ||
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return true;
+
+ return false;
+}
+
+static bool
+tc_act_is_multi_table_act_sample(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_flow_attr *attr)
+{
+ return mlx5e_tc_act_sample_is_multi_table(priv->mdev, attr);
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_sample = {
+ .can_offload = tc_act_can_offload_sample,
+ .parse_action = tc_act_parse_sample,
+ .is_multi_table_act = tc_act_is_multi_table_act_sample,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h
new file mode 100644
index 000000000000..3efb3a15c5d2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_SAMPLE_H__
+#define __MLX5_EN_TC_ACT_SAMPLE_H__
+
+#include <net/flow_offload.h>
+#include "en/tc_priv.h"
+
+bool
+mlx5e_tc_act_sample_is_multi_table(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_attr *attr);
+
+#endif /* __MLX5_EN_TC_ACT_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
new file mode 100644
index 000000000000..53b270f652b9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ struct netlink_ext_ack *extack = parse_state->extack;
+
+ if (parse_state->flow_action->num_entries != 1) {
+ NL_SET_ERR_MSG_MOD(extack, "action trap is supported as a sole action only");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_trap = {
+ .can_offload = tc_act_can_offload_trap,
+ .parse_action = tc_act_parse_trap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
new file mode 100644
index 000000000000..b4fa2de9711d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "act.h"
+#include "en/tc_tun_encap.h"
+#include "en/tc_priv.h"
+
+static bool
+tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ if (!act->tunnel) {
+ NL_SET_ERR_MSG_MOD(parse_state->extack,
+ "Zero tunnel attributes is not supported");
+ return false;
+ }
+
+ return true;
+}
+
+static int
+tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->tun_info = act->tunnel;
+ parse_state->encap = true;
+
+ return 0;
+}
+
+static bool
+tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ parse_state->decap = true;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_tun_encap = {
+ .can_offload = tc_act_can_offload_tun_encap,
+ .parse_action = tc_act_parse_tun_encap,
+};
+
+struct mlx5e_tc_act mlx5e_tc_act_tun_decap = {
+ .can_offload = tc_act_can_offload_tun_decap,
+ .parse_action = tc_act_parse_tun_decap,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
new file mode 100644
index 000000000000..b86ac604d0c2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_priv.h"
+
+static int
+add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack)
+{
+ const struct flow_action_entry prio_tag_act = {
+ .vlan.vid = 0,
+ .vlan.prio =
+ MLX5_GET(fte_match_set_lyr_2_4,
+ mlx5e_get_match_headers_value(*action,
+ &parse_attr->spec),
+ first_prio) &
+ MLX5_GET(fte_match_set_lyr_2_4,
+ mlx5e_get_match_headers_criteria(*action,
+ &parse_attr->spec),
+ first_prio),
+ };
+
+ return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
+ &prio_tag_act, parse_attr, action,
+ extack);
+}
+
+static int
+parse_tc_vlan_action(struct mlx5e_priv *priv,
+ const struct flow_action_entry *act,
+ struct mlx5_esw_flow_attr *attr,
+ u32 *action,
+ struct netlink_ext_ack *extack,
+ struct mlx5e_tc_act_parse_state *parse_state)
+{
+ u8 vlan_idx = attr->total_vlan;
+
+ if (vlan_idx >= MLX5_FS_VLAN_DEPTH) {
+ NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported");
+ return -EOPNOTSUPP;
+ }
+
+ switch (act->id) {
+ case FLOW_ACTION_VLAN_POP:
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH)) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan pop action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
+ } else {
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ }
+ break;
+ case FLOW_ACTION_VLAN_PUSH:
+ attr->vlan_vid[vlan_idx] = act->vlan.vid;
+ attr->vlan_prio[vlan_idx] = act->vlan.prio;
+ attr->vlan_proto[vlan_idx] = act->vlan.proto;
+ if (!attr->vlan_proto[vlan_idx])
+ attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
+
+ if (vlan_idx) {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
+ MLX5_FS_VLAN_DEPTH)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "vlan push action is not supported for vlan depth > 1");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
+ } else {
+ if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
+ (act->vlan.proto != htons(ETH_P_8021Q) ||
+ act->vlan.prio)) {
+ NL_SET_ERR_MSG_MOD(extack, "vlan push action is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+ }
+ break;
+ case FLOW_ACTION_VLAN_POP_ETH:
+ parse_state->eth_pop = true;
+ break;
+ case FLOW_ACTION_VLAN_PUSH_ETH:
+ if (!flow_flag_test(parse_state->flow, L3_TO_L2_DECAP))
+ return -EOPNOTSUPP;
+ parse_state->eth_push = true;
+ memcpy(attr->eth.h_dest, act->vlan_push_eth.dst, ETH_ALEN);
+ memcpy(attr->eth.h_source, act->vlan_push_eth.src, ETH_ALEN);
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN");
+ return -EINVAL;
+ }
+
+ attr->total_vlan = vlan_idx + 1;
+
+ return 0;
+}
+
+int
+mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device **out_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *vlan_dev = *out_dev;
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_PUSH,
+ .vlan.vid = vlan_dev_vlan_id(vlan_dev),
+ .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
+ .vlan.prio = 0,
+ };
+ int err;
+
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL);
+ if (err)
+ return err;
+
+ rcu_read_lock();
+ *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
+ rcu_read_unlock();
+ if (!*out_dev)
+ return -ENODEV;
+
+ if (is_vlan_dev(*out_dev))
+ err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack);
+
+ return err;
+}
+
+int
+mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct flow_action_entry vlan_act = {
+ .id = FLOW_ACTION_VLAN_POP,
+ };
+ int nest_level, err = 0;
+
+ nest_level = attr->parse_attr->filter_dev->lower_level -
+ priv->netdev->lower_level;
+ while (nest_level--) {
+ err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action,
+ extack, NULL);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static bool
+tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int err;
+
+ if (act->id == FLOW_ACTION_VLAN_PUSH &&
+ (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
+ /* Replace vlan pop+push with vlan modify */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act,
+ attr->parse_attr, &attr->action,
+ parse_state->extack);
+ } else {
+ err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action,
+ parse_state->extack, parse_state);
+ }
+
+ if (err)
+ return err;
+
+ esw_attr->split_count = esw_attr->out_count;
+
+ return 0;
+}
+
+static int
+tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
+ struct netlink_ext_ack *extack = parse_state->extack;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ int err;
+
+ if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+ /* For prio tag mode, replace vlan pop with rewrite vlan prio
+ * tag rewrite.
+ */
+ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+ err = add_vlan_prio_tag_rewrite_action(priv, parse_attr,
+ &attr->action, extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_vlan = {
+ .can_offload = tc_act_can_offload_vlan,
+ .parse_action = tc_act_parse_vlan,
+ .post_parse = tc_act_post_parse_vlan,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
new file mode 100644
index 000000000000..2fa58c6f44eb
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_TC_ACT_VLAN_H__
+#define __MLX5_EN_TC_ACT_VLAN_H__
+
+#include <net/flow_offload.h>
+#include "en/tc_priv.h"
+
+struct pedit_headers_action;
+
+int
+mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct net_device **out_dev,
+ struct netlink_ext_ack *extack);
+
+int
+mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr,
+ struct netlink_ext_ack *extack);
+
+int
+mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack);
+
+#endif /* __MLX5_EN_TC_ACT_VLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
new file mode 100644
index 000000000000..9a8a1a6bd99e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/if_vlan.h>
+#include "act.h"
+#include "vlan.h"
+#include "en/tc_priv.h"
+
+struct pedit_headers_action;
+
+int
+mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace,
+ const struct flow_action_entry *act,
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ u32 *action, struct netlink_ext_ack *extack)
+{
+ u16 mask16 = VLAN_VID_MASK;
+ u16 val16 = act->vlan.vid & VLAN_VID_MASK;
+ const struct flow_action_entry pedit_act = {
+ .id = FLOW_ACTION_MANGLE,
+ .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
+ .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
+ .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
+ .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
+ };
+ u8 match_prio_mask, match_prio_val;
+ void *headers_c, *headers_v;
+ int err;
+
+ headers_c = mlx5e_get_match_headers_criteria(*action, &parse_attr->spec);
+ headers_v = mlx5e_get_match_headers_value(*action, &parse_attr->spec);
+
+ if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
+ MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
+ NL_SET_ERR_MSG_MOD(extack, "VLAN rewrite action must have VLAN protocol match");
+ return -EOPNOTSUPP;
+ }
+
+ match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
+ match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
+ if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
+ NL_SET_ERR_MSG_MOD(extack, "Changing VLAN prio is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr->hdrs,
+ extack);
+ *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+ return err;
+}
+
+static bool
+tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ int act_index,
+ struct mlx5_flow_attr *attr)
+{
+ return true;
+}
+
+static int
+tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state,
+ const struct flow_action_entry *act,
+ struct mlx5e_priv *priv,
+ struct mlx5_flow_attr *attr)
+{
+ enum mlx5_flow_namespace_type ns_type;
+ int err;
+
+ ns_type = mlx5e_get_flow_namespace(parse_state->flow);
+ err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, attr->parse_attr,
+ &attr->action, parse_state->extack);
+ if (err)
+ return err;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ attr->esw_attr->split_count = attr->esw_attr->out_count;
+
+ return 0;
+}
+
+struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = {
+ .can_offload = tc_act_can_offload_vlan_mangle,
+ .parse_action = tc_act_parse_vlan_mangle,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
new file mode 100644
index 000000000000..bb6b1a979ba1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_EN_TC_CT_FS_H__
+#define __MLX5_EN_TC_CT_FS_H__
+
+struct mlx5_ct_fs {
+ const struct net_device *netdev;
+ struct mlx5_core_dev *dev;
+
+ /* private data */
+ void *priv_data[];
+};
+
+struct mlx5_ct_fs_rule {
+};
+
+struct mlx5_ct_fs_ops {
+ int (*init)(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct);
+ void (*destroy)(struct mlx5_ct_fs *fs);
+
+ struct mlx5_ct_fs_rule * (*ct_rule_add)(struct mlx5_ct_fs *fs,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ struct flow_rule *flow_rule);
+ void (*ct_rule_del)(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule);
+
+ size_t priv_size;
+};
+
+static inline void *mlx5_ct_fs_priv(struct mlx5_ct_fs *fs)
+{
+ return &fs->priv_data;
+}
+
+struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void);
+
+#if IS_ENABLED(CONFIG_MLX5_SW_STEERING)
+struct mlx5_ct_fs_ops *mlx5_ct_fs_smfs_ops_get(void);
+#else
+static inline struct mlx5_ct_fs_ops *
+mlx5_ct_fs_smfs_ops_get(void)
+{
+ return NULL;
+}
+#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */
+
+#endif /* __MLX5_EN_TC_CT_FS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
new file mode 100644
index 000000000000..ae4f55be48ce
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_dmfs.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "en_tc.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(fs->netdev, "ct_fs_dmfs debug: " fmt "\n", ##args)
+
+struct mlx5_ct_fs_dmfs_rule {
+ struct mlx5_ct_fs_rule fs_rule;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
+};
+
+static int
+mlx5_ct_fs_dmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
+{
+ return 0;
+}
+
+static void
+mlx5_ct_fs_dmfs_destroy(struct mlx5_ct_fs *fs)
+{
+}
+
+static struct mlx5_ct_fs_rule *
+mlx5_ct_fs_dmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
+{
+ struct mlx5e_priv *priv = netdev_priv(fs->netdev);
+ struct mlx5_ct_fs_dmfs_rule *dmfs_rule;
+ int err;
+
+ dmfs_rule = kzalloc(sizeof(*dmfs_rule), GFP_KERNEL);
+ if (!dmfs_rule)
+ return ERR_PTR(-ENOMEM);
+
+ dmfs_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
+ if (IS_ERR(dmfs_rule->rule)) {
+ err = PTR_ERR(dmfs_rule->rule);
+ ct_dbg("Failed to add ct entry fs rule");
+ goto err_insert;
+ }
+
+ dmfs_rule->attr = attr;
+
+ return &dmfs_rule->fs_rule;
+
+err_insert:
+ kfree(dmfs_rule);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_ct_fs_dmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
+{
+ struct mlx5_ct_fs_dmfs_rule *dmfs_rule = container_of(fs_rule,
+ struct mlx5_ct_fs_dmfs_rule,
+ fs_rule);
+
+ mlx5_tc_rule_delete(netdev_priv(fs->netdev), dmfs_rule->rule, dmfs_rule->attr);
+ kfree(dmfs_rule);
+}
+
+static struct mlx5_ct_fs_ops dmfs_ops = {
+ .ct_rule_add = mlx5_ct_fs_dmfs_ct_rule_add,
+ .ct_rule_del = mlx5_ct_fs_dmfs_ct_rule_del,
+
+ .init = mlx5_ct_fs_dmfs_init,
+ .destroy = mlx5_ct_fs_dmfs_destroy,
+};
+
+struct mlx5_ct_fs_ops *mlx5_ct_fs_dmfs_ops_get(void)
+{
+ return &dmfs_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
new file mode 100644
index 000000000000..2b80fe73549d
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <linux/refcount.h>
+
+#include "en_tc.h"
+#include "en/tc_priv.h"
+#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+
+#include "lib/smfs.h"
+
+#define INIT_ERR_PREFIX "ct_fs_smfs init failed"
+#define ct_dbg(fmt, args...)\
+ netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args)
+#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16)
+
+struct mlx5_ct_fs_smfs_matcher {
+ struct mlx5dr_matcher *dr_matcher;
+ struct list_head list;
+ int prio;
+ refcount_t ref;
+};
+
+struct mlx5_ct_fs_smfs_matchers {
+ struct mlx5_ct_fs_smfs_matcher smfs_matchers[6];
+ struct list_head used;
+};
+
+struct mlx5_ct_fs_smfs {
+ struct mlx5dr_table *ct_tbl, *ct_nat_tbl;
+ struct mlx5_ct_fs_smfs_matchers matchers;
+ struct mlx5_ct_fs_smfs_matchers matchers_nat;
+ struct mlx5dr_action *fwd_action;
+ struct mlx5_flow_table *ct_nat;
+ struct mutex lock; /* Guards matchers */
+};
+
+struct mlx5_ct_fs_smfs_rule {
+ struct mlx5_ct_fs_rule fs_rule;
+ struct mlx5dr_rule *rule;
+ struct mlx5dr_action *count_action;
+ struct mlx5_ct_fs_smfs_matcher *smfs_matcher;
+};
+
+static inline void
+mlx5_ct_fs_smfs_fill_mask(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, bool ipv4, bool tcp,
+ bool gre)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
+
+ if (likely(MLX5_CAP_FLOWTABLE_NIC_RX(fs->dev, ft_field_support.outer_ip_version)))
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
+ else
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+ if (likely(ipv4)) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ } else {
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ 0xFF,
+ MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6));
+ memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ 0xFF,
+ MLX5_FLD_SZ_BYTES(fte_match_set_lyr_2_4,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6));
+ }
+
+ if (likely(tcp)) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_sport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, tcp_dport);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
+ ntohs(MLX5_CT_TCP_FLAGS_MASK));
+ } else if (!gre) {
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_sport);
+ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, udp_dport);
+ }
+
+ mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, 0, MLX5_CT_ZONE_MASK);
+}
+
+static struct mlx5dr_matcher *
+mlx5_ct_fs_smfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5dr_table *tbl, bool ipv4,
+ bool tcp, bool gre, u32 priority)
+{
+ struct mlx5dr_matcher *dr_matcher;
+ struct mlx5_flow_spec *spec;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return ERR_PTR(-ENOMEM);
+
+ mlx5_ct_fs_smfs_fill_mask(fs, spec, ipv4, tcp, gre);
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS;
+
+ dr_matcher = mlx5_smfs_matcher_create(tbl, priority, spec);
+ kvfree(spec);
+ if (!dr_matcher)
+ return ERR_PTR(-EINVAL);
+
+ return dr_matcher;
+}
+
+static struct mlx5_ct_fs_smfs_matcher *
+mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp, bool gre)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+ struct mlx5_ct_fs_smfs_matcher *m, *smfs_matcher;
+ struct mlx5_ct_fs_smfs_matchers *matchers;
+ struct mlx5dr_matcher *dr_matcher;
+ struct mlx5dr_table *tbl;
+ struct list_head *prev;
+ int prio;
+
+ matchers = nat ? &fs_smfs->matchers_nat : &fs_smfs->matchers;
+ smfs_matcher = &matchers->smfs_matchers[ipv4 * 3 + tcp * 2 + gre];
+
+ if (refcount_inc_not_zero(&smfs_matcher->ref))
+ return smfs_matcher;
+
+ mutex_lock(&fs_smfs->lock);
+
+ /* Retry with lock, as another thread might have already created the relevant matcher
+ * till we acquired the lock
+ */
+ if (refcount_inc_not_zero(&smfs_matcher->ref))
+ goto out_unlock;
+
+ // Find next available priority in sorted used list
+ prio = 0;
+ prev = &matchers->used;
+ list_for_each_entry(m, &matchers->used, list) {
+ prev = &m->list;
+
+ if (m->prio == prio)
+ prio = m->prio + 1;
+ else
+ break;
+ }
+
+ tbl = nat ? fs_smfs->ct_nat_tbl : fs_smfs->ct_tbl;
+ dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio);
+ if (IS_ERR(dr_matcher)) {
+ netdev_warn(fs->netdev,
+ "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n",
+ nat, ipv4, tcp, gre, PTR_ERR(dr_matcher));
+
+ smfs_matcher = ERR_CAST(dr_matcher);
+ goto out_unlock;
+ }
+
+ smfs_matcher->dr_matcher = dr_matcher;
+ smfs_matcher->prio = prio;
+ list_add(&smfs_matcher->list, prev);
+ refcount_set(&smfs_matcher->ref, 1);
+
+out_unlock:
+ mutex_unlock(&fs_smfs->lock);
+ return smfs_matcher;
+}
+
+static void
+mlx5_ct_fs_smfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_smfs_matcher *smfs_matcher)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ if (!refcount_dec_and_mutex_lock(&smfs_matcher->ref, &fs_smfs->lock))
+ return;
+
+ mlx5_smfs_matcher_destroy(smfs_matcher->dr_matcher);
+ list_del(&smfs_matcher->list);
+ mutex_unlock(&fs_smfs->lock);
+}
+
+static int
+mlx5_ct_fs_smfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
+ struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
+{
+ struct mlx5dr_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl;
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ post_ct_tbl = mlx5_smfs_table_get_from_fs_ft(post_ct);
+ ct_nat_tbl = mlx5_smfs_table_get_from_fs_ft(ct_nat);
+ ct_tbl = mlx5_smfs_table_get_from_fs_ft(ct);
+ fs_smfs->ct_nat = ct_nat;
+
+ if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) {
+ netdev_warn(fs->netdev, "ct_fs_smfs: failed to init, missing backing dr tables");
+ return -EOPNOTSUPP;
+ }
+
+ ct_dbg("using smfs steering");
+
+ fs_smfs->fwd_action = mlx5_smfs_action_create_dest_table(post_ct_tbl);
+ if (!fs_smfs->fwd_action) {
+ return -EINVAL;
+ }
+
+ fs_smfs->ct_tbl = ct_tbl;
+ fs_smfs->ct_nat_tbl = ct_nat_tbl;
+ mutex_init(&fs_smfs->lock);
+ INIT_LIST_HEAD(&fs_smfs->matchers.used);
+ INIT_LIST_HEAD(&fs_smfs->matchers_nat.used);
+
+ return 0;
+}
+
+static void
+mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+
+ mlx5_smfs_action_destroy(fs_smfs->fwd_action);
+}
+
+static inline bool
+mlx5_tc_ct_valid_used_dissector_keys(const u32 used_keys)
+{
+#define DISS_BIT(name) BIT(FLOW_DISSECTOR_KEY_ ## name)
+ const u32 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | DISS_BIT(META);
+ const u32 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS) | DISS_BIT(TCP);
+ const u32 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | DISS_BIT(PORTS);
+ const u32 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS);
+ const u32 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS);
+
+ return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp ||
+ used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre);
+}
+
+static bool
+mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule)
+{
+ struct flow_match_ipv4_addrs ipv4_addrs;
+ struct flow_match_ipv6_addrs ipv6_addrs;
+ struct flow_match_control control;
+ struct flow_match_basic basic;
+ struct flow_match_ports ports;
+ struct flow_match_tcp tcp;
+
+ if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) {
+ ct_dbg("rule uses unexpected dissectors (0x%08x)",
+ flow_rule->match.dissector->used_keys);
+ return false;
+ }
+
+ flow_rule_match_basic(flow_rule, &basic);
+ flow_rule_match_control(flow_rule, &control);
+ flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs);
+ flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs);
+ if (basic.key->ip_proto != IPPROTO_GRE)
+ flow_rule_match_ports(flow_rule, &ports);
+ if (basic.key->ip_proto == IPPROTO_TCP)
+ flow_rule_match_tcp(flow_rule, &tcp);
+
+ if (basic.mask->n_proto != htons(0xFFFF) ||
+ (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) ||
+ basic.mask->ip_proto != 0xFF ||
+ (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP &&
+ basic.key->ip_proto != IPPROTO_GRE)) {
+ ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)",
+ ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto),
+ basic.key->ip_proto, basic.mask->ip_proto);
+ return false;
+ }
+
+ if (basic.key->ip_proto != IPPROTO_GRE &&
+ (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) {
+ ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)",
+ ports.mask->src, ports.mask->dst);
+ return false;
+ }
+
+ if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) {
+ ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags);
+ return false;
+ }
+
+ return true;
+}
+
+static struct mlx5_ct_fs_rule *
+mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
+{
+ struct mlx5_ct_fs_smfs *fs_smfs = mlx5_ct_fs_priv(fs);
+ struct mlx5_ct_fs_smfs_matcher *smfs_matcher;
+ struct mlx5_ct_fs_smfs_rule *smfs_rule;
+ struct mlx5dr_action *actions[5];
+ struct mlx5dr_rule *rule;
+ int num_actions = 0, err;
+ bool nat, tcp, ipv4, gre;
+
+ if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL);
+ if (!smfs_rule)
+ return ERR_PTR(-ENOMEM);
+
+ smfs_rule->count_action = mlx5_smfs_action_create_flow_counter(mlx5_fc_id(attr->counter));
+ if (!smfs_rule->count_action) {
+ err = -EINVAL;
+ goto err_count;
+ }
+
+ actions[num_actions++] = smfs_rule->count_action;
+ actions[num_actions++] = attr->modify_hdr->action.dr_action;
+ actions[num_actions++] = fs_smfs->fwd_action;
+
+ nat = (attr->ft == fs_smfs->ct_nat);
+ ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4;
+ tcp = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_TCP;
+ gre = MLX5_GET(fte_match_param, spec->match_value,
+ outer_headers.ip_protocol) == IPPROTO_GRE;
+
+ smfs_matcher = mlx5_ct_fs_smfs_matcher_get(fs, nat, ipv4, tcp, gre);
+ if (IS_ERR(smfs_matcher)) {
+ err = PTR_ERR(smfs_matcher);
+ goto err_matcher;
+ }
+
+ rule = mlx5_smfs_rule_create(smfs_matcher->dr_matcher, spec, num_actions, actions,
+ spec->flow_context.flow_source);
+ if (!rule) {
+ err = -EINVAL;
+ goto err_create;
+ }
+
+ smfs_rule->rule = rule;
+ smfs_rule->smfs_matcher = smfs_matcher;
+
+ return &smfs_rule->fs_rule;
+
+err_create:
+ mlx5_ct_fs_smfs_matcher_put(fs, smfs_matcher);
+err_matcher:
+ mlx5_smfs_action_destroy(smfs_rule->count_action);
+err_count:
+ kfree(smfs_rule);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_ct_fs_smfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
+{
+ struct mlx5_ct_fs_smfs_rule *smfs_rule = container_of(fs_rule,
+ struct mlx5_ct_fs_smfs_rule,
+ fs_rule);
+
+ mlx5_smfs_rule_destroy(smfs_rule->rule);
+ mlx5_ct_fs_smfs_matcher_put(fs, smfs_rule->smfs_matcher);
+ mlx5_smfs_action_destroy(smfs_rule->count_action);
+ kfree(smfs_rule);
+}
+
+static struct mlx5_ct_fs_ops fs_smfs_ops = {
+ .ct_rule_add = mlx5_ct_fs_smfs_ct_rule_add,
+ .ct_rule_del = mlx5_ct_fs_smfs_ct_rule_del,
+
+ .init = mlx5_ct_fs_smfs_init,
+ .destroy = mlx5_ct_fs_smfs_destroy,
+
+ .priv_size = sizeof(struct mlx5_ct_fs_smfs),
+};
+
+struct mlx5_ct_fs_ops *
+mlx5_ct_fs_smfs_ops_get(void)
+{
+ return &fs_smfs_ops;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
new file mode 100644
index 000000000000..be74e1403328
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include <linux/math64.h>
+#include "lib/aso.h"
+#include "en/tc/post_act.h"
+#include "meter.h"
+#include "en/tc_priv.h"
+
+#define MLX5_START_COLOR_SHIFT 28
+#define MLX5_METER_MODE_SHIFT 24
+#define MLX5_CBS_EXP_SHIFT 24
+#define MLX5_CBS_MAN_SHIFT 16
+#define MLX5_CIR_EXP_SHIFT 8
+
+/* cir = 8*(10^9)*cir_mantissa/(2^cir_exponent)) bits/s */
+#define MLX5_CONST_CIR 8000000000ULL
+#define MLX5_CALC_CIR(m, e) ((MLX5_CONST_CIR * (m)) >> (e))
+#define MLX5_MAX_CIR ((MLX5_CONST_CIR * 0x100) - 1)
+
+/* cbs = cbs_mantissa*2^cbs_exponent */
+#define MLX5_CALC_CBS(m, e) ((m) << (e))
+#define MLX5_MAX_CBS ((0x100ULL << 0x1F) - 1)
+#define MLX5_MAX_HW_CBS 0x7FFFFFFF
+
+struct mlx5e_flow_meter_aso_obj {
+ struct list_head entry;
+ int base_id;
+ int total_meters;
+
+ unsigned long meters_map[0]; /* must be at the end of this struct */
+};
+
+struct mlx5e_flow_meters {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_aso *aso;
+ struct mutex aso_lock; /* Protects aso operations */
+ int log_granularity;
+ u32 pdn;
+
+ DECLARE_HASHTABLE(hashtbl, 8);
+
+ struct mutex sync_lock; /* protect flow meter operations */
+ struct list_head partial_list;
+ struct list_head full_list;
+
+ struct mlx5_core_dev *mdev;
+ struct mlx5e_post_act *post_act;
+};
+
+static void
+mlx5e_flow_meter_cir_calc(u64 cir, u8 *man, u8 *exp)
+{
+ s64 _cir, _delta, delta = S64_MAX;
+ u8 e, _man = 0, _exp = 0;
+ u64 m;
+
+ for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */
+ m = cir << e;
+ if ((s64)m < 0) /* overflow */
+ break;
+ m = div64_u64(m, MLX5_CONST_CIR);
+ if (m > 0xFF) /* man width 8 bit */
+ continue;
+ _cir = MLX5_CALC_CIR(m, e);
+ _delta = cir - _cir;
+ if (_delta < delta) {
+ _man = m;
+ _exp = e;
+ if (!_delta)
+ goto found;
+ delta = _delta;
+ }
+ }
+
+found:
+ *man = _man;
+ *exp = _exp;
+}
+
+static void
+mlx5e_flow_meter_cbs_calc(u64 cbs, u8 *man, u8 *exp)
+{
+ s64 _cbs, _delta, delta = S64_MAX;
+ u8 e, _man = 0, _exp = 0;
+ u64 m;
+
+ for (e = 0; e <= 0x1F; e++) { /* exp width 5bit */
+ m = cbs >> e;
+ if (m > 0xFF) /* man width 8 bit */
+ continue;
+ _cbs = MLX5_CALC_CBS(m, e);
+ _delta = cbs - _cbs;
+ if (_delta < delta) {
+ _man = m;
+ _exp = e;
+ if (!_delta)
+ goto found;
+ delta = _delta;
+ }
+ }
+
+found:
+ *man = _man;
+ *exp = _exp;
+}
+
+int
+mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
+ struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *meter_params)
+{
+ struct mlx5_wqe_aso_ctrl_seg *aso_ctrl;
+ struct mlx5_wqe_aso_data_seg *aso_data;
+ struct mlx5e_flow_meters *flow_meters;
+ u8 cir_man, cir_exp, cbs_man, cbs_exp;
+ struct mlx5_aso_wqe *aso_wqe;
+ unsigned long expires;
+ struct mlx5_aso *aso;
+ u64 rate, burst;
+ u8 ds_cnt;
+ int err;
+
+ rate = meter_params->rate;
+ burst = meter_params->burst;
+
+ /* HW treats each packet as 128 bytes in PPS mode */
+ if (meter_params->mode == MLX5_RATE_LIMIT_PPS) {
+ rate <<= 10;
+ burst <<= 7;
+ }
+
+ if (!rate || rate > MLX5_MAX_CIR || !burst || burst > MLX5_MAX_CBS)
+ return -EINVAL;
+
+ /* HW has limitation of total 31 bits for cbs */
+ if (burst > MLX5_MAX_HW_CBS) {
+ mlx5_core_warn(mdev,
+ "burst(%lld) is too large, use HW allowed value(%d)\n",
+ burst, MLX5_MAX_HW_CBS);
+ burst = MLX5_MAX_HW_CBS;
+ }
+
+ mlx5_core_dbg(mdev, "meter mode=%d\n", meter_params->mode);
+ mlx5e_flow_meter_cir_calc(rate, &cir_man, &cir_exp);
+ mlx5_core_dbg(mdev, "rate=%lld, cir=%lld, exp=%d, man=%d\n",
+ rate, MLX5_CALC_CIR(cir_man, cir_exp), cir_exp, cir_man);
+ mlx5e_flow_meter_cbs_calc(burst, &cbs_man, &cbs_exp);
+ mlx5_core_dbg(mdev, "burst=%lld, cbs=%lld, exp=%d, man=%d\n",
+ burst, MLX5_CALC_CBS((u64)cbs_man, cbs_exp), cbs_exp, cbs_man);
+
+ if (!cir_man || !cbs_man)
+ return -EINVAL;
+
+ flow_meters = meter->flow_meters;
+ aso = flow_meters->aso;
+
+ mutex_lock(&flow_meters->aso_lock);
+ aso_wqe = mlx5_aso_get_wqe(aso);
+ ds_cnt = DIV_ROUND_UP(sizeof(struct mlx5_aso_wqe_data), MLX5_SEND_WQE_DS);
+ mlx5_aso_build_wqe(aso, ds_cnt, aso_wqe, meter->obj_id,
+ MLX5_ACCESS_ASO_OPC_MOD_FLOW_METER);
+
+ aso_ctrl = &aso_wqe->aso_ctrl;
+ memset(aso_ctrl, 0, sizeof(*aso_ctrl));
+ aso_ctrl->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BYTEWISE_64BYTE << 6;
+ aso_ctrl->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE |
+ MLX5_ASO_ALWAYS_TRUE << 4;
+ aso_ctrl->data_offset_condition_operand = MLX5_ASO_LOGICAL_OR << 6;
+ aso_ctrl->data_mask = cpu_to_be64(0x80FFFFFFULL << (meter->idx ? 0 : 32));
+
+ aso_data = (struct mlx5_wqe_aso_data_seg *)(aso_wqe + 1);
+ memset(aso_data, 0, sizeof(*aso_data));
+ aso_data->bytewise_data[meter->idx * 8] = cpu_to_be32((0x1 << 31) | /* valid */
+ (MLX5_FLOW_METER_COLOR_GREEN << MLX5_START_COLOR_SHIFT));
+ if (meter_params->mode == MLX5_RATE_LIMIT_PPS)
+ aso_data->bytewise_data[meter->idx * 8] |=
+ cpu_to_be32(MLX5_FLOW_METER_MODE_NUM_PACKETS << MLX5_METER_MODE_SHIFT);
+ else
+ aso_data->bytewise_data[meter->idx * 8] |=
+ cpu_to_be32(MLX5_FLOW_METER_MODE_BYTES_IP_LENGTH << MLX5_METER_MODE_SHIFT);
+
+ aso_data->bytewise_data[meter->idx * 8 + 2] = cpu_to_be32((cbs_exp << MLX5_CBS_EXP_SHIFT) |
+ (cbs_man << MLX5_CBS_MAN_SHIFT) |
+ (cir_exp << MLX5_CIR_EXP_SHIFT) |
+ cir_man);
+
+ mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl);
+
+ /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */
+ expires = jiffies + msecs_to_jiffies(10);
+ do {
+ err = mlx5_aso_poll_cq(aso, true);
+ if (err)
+ usleep_range(2, 10);
+ } while (err && time_is_after_jiffies(expires));
+ mutex_unlock(&flow_meters->aso_lock);
+
+ return err;
+}
+
+static int
+mlx5e_flow_meter_create_aso_obj(struct mlx5e_flow_meters *flow_meters, int *obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(create_flow_meter_aso_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ void *obj;
+ int err;
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO);
+ MLX5_SET(general_obj_in_cmd_hdr, in, log_obj_range, flow_meters->log_granularity);
+
+ obj = MLX5_ADDR_OF(create_flow_meter_aso_obj_in, in, flow_meter_aso_obj);
+ MLX5_SET(flow_meter_aso_obj, obj, meter_aso_access_pd, flow_meters->pdn);
+
+ err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ if (!err) {
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) created\n", *obj_id);
+ }
+
+ return err;
+}
+
+static void
+mlx5e_flow_meter_destroy_aso_obj(struct mlx5_core_dev *mdev, u32 obj_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+
+ mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+ mlx5_core_dbg(mdev, "flow meter aso obj(0x%x) destroyed\n", obj_id);
+}
+
+static struct mlx5e_flow_meter_handle *
+__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters)
+{
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ struct mlx5e_flow_meter_handle *meter;
+ struct mlx5_fc *counter;
+ int err, pos, total;
+ u32 id;
+
+ meter = kzalloc(sizeof(*meter), GFP_KERNEL);
+ if (!meter)
+ return ERR_PTR(-ENOMEM);
+
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_red_counter;
+ }
+ meter->red_counter = counter;
+
+ counter = mlx5_fc_create(mdev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_green_counter;
+ }
+ meter->green_counter = counter;
+
+ meters_obj = list_first_entry_or_null(&flow_meters->partial_list,
+ struct mlx5e_flow_meter_aso_obj,
+ entry);
+ /* 2 meters in one object */
+ total = 1 << (flow_meters->log_granularity + 1);
+ if (!meters_obj) {
+ err = mlx5e_flow_meter_create_aso_obj(flow_meters, &id);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to create flow meter ASO object\n");
+ goto err_create;
+ }
+
+ meters_obj = kzalloc(sizeof(*meters_obj) + BITS_TO_BYTES(total),
+ GFP_KERNEL);
+ if (!meters_obj) {
+ err = -ENOMEM;
+ goto err_mem;
+ }
+
+ meters_obj->base_id = id;
+ meters_obj->total_meters = total;
+ list_add(&meters_obj->entry, &flow_meters->partial_list);
+ pos = 0;
+ } else {
+ pos = find_first_zero_bit(meters_obj->meters_map, total);
+ if (bitmap_weight(meters_obj->meters_map, total) == total - 1) {
+ list_del(&meters_obj->entry);
+ list_add(&meters_obj->entry, &flow_meters->full_list);
+ }
+ }
+
+ bitmap_set(meters_obj->meters_map, pos, 1);
+ meter->flow_meters = flow_meters;
+ meter->meters_obj = meters_obj;
+ meter->obj_id = meters_obj->base_id + pos / 2;
+ meter->idx = pos % 2;
+
+ mlx5_core_dbg(mdev, "flow meter allocated, obj_id=0x%x, index=%d\n",
+ meter->obj_id, meter->idx);
+
+ return meter;
+
+err_mem:
+ mlx5e_flow_meter_destroy_aso_obj(mdev, id);
+err_create:
+ mlx5_fc_destroy(mdev, meter->green_counter);
+err_green_counter:
+ mlx5_fc_destroy(mdev, meter->red_counter);
+err_red_counter:
+ kfree(meter);
+ return ERR_PTR(err);
+}
+
+static void
+__mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter)
+{
+ struct mlx5e_flow_meters *flow_meters = meter->flow_meters;
+ struct mlx5_core_dev *mdev = flow_meters->mdev;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ int n, pos;
+
+ mlx5_fc_destroy(mdev, meter->green_counter);
+ mlx5_fc_destroy(mdev, meter->red_counter);
+
+ meters_obj = meter->meters_obj;
+ pos = (meter->obj_id - meters_obj->base_id) * 2 + meter->idx;
+ bitmap_clear(meters_obj->meters_map, pos, 1);
+ n = bitmap_weight(meters_obj->meters_map, meters_obj->total_meters);
+ if (n == 0) {
+ list_del(&meters_obj->entry);
+ mlx5e_flow_meter_destroy_aso_obj(mdev, meters_obj->base_id);
+ kfree(meters_obj);
+ } else if (n == meters_obj->total_meters - 1) {
+ list_del(&meters_obj->entry);
+ list_add(&meters_obj->entry, &flow_meters->partial_list);
+ }
+
+ mlx5_core_dbg(mdev, "flow meter freed, obj_id=0x%x, index=%d\n",
+ meter->obj_id, meter->idx);
+ kfree(meter);
+}
+
+static struct mlx5e_flow_meter_handle *
+__mlx5e_tc_meter_get(struct mlx5e_flow_meters *flow_meters, u32 index)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ hash_for_each_possible(flow_meters->hashtbl, meter, hlist, index)
+ if (meter->params.index == index)
+ goto add_ref;
+
+ return ERR_PTR(-ENOENT);
+
+add_ref:
+ meter->refcnt++;
+
+ return meter;
+}
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_handle *meter;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&flow_meters->sync_lock);
+ meter = __mlx5e_tc_meter_get(flow_meters, params->index);
+ mutex_unlock(&flow_meters->sync_lock);
+
+ return meter;
+}
+
+static void
+__mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter)
+{
+ if (--meter->refcnt == 0) {
+ hash_del(&meter->hlist);
+ __mlx5e_flow_meter_free(meter);
+ }
+}
+
+void
+mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter)
+{
+ struct mlx5e_flow_meters *flow_meters = meter->flow_meters;
+
+ mutex_lock(&flow_meters->sync_lock);
+ __mlx5e_tc_meter_put(meter);
+ mutex_unlock(&flow_meters->sync_lock);
+}
+
+static struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_alloc(struct mlx5e_flow_meters *flow_meters,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meter_handle *meter;
+
+ meter = __mlx5e_flow_meter_alloc(flow_meters);
+ if (IS_ERR(meter))
+ return meter;
+
+ hash_add(flow_meters->hashtbl, &meter->hlist, params->index);
+ meter->params.index = params->index;
+ meter->refcnt++;
+
+ return meter;
+}
+
+static int
+__mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5_core_dev *mdev = meter->flow_meters->mdev;
+ int err = 0;
+
+ if (meter->params.mode != params->mode || meter->params.rate != params->rate ||
+ meter->params.burst != params->burst) {
+ err = mlx5e_tc_meter_modify(mdev, meter, params);
+ if (err)
+ goto out;
+
+ meter->params.mode = params->mode;
+ meter->params.rate = params->rate;
+ meter->params.burst = params->burst;
+ }
+
+out:
+ return err;
+}
+
+int
+mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5_core_dev *mdev = meter->flow_meters->mdev;
+ struct mlx5e_flow_meters *flow_meters;
+ int err;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&flow_meters->sync_lock);
+ err = __mlx5e_tc_meter_update(meter, params);
+ mutex_unlock(&flow_meters->sync_lock);
+ return err;
+}
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params)
+{
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_handle *meter;
+ int err;
+
+ flow_meters = mlx5e_get_flow_meters(mdev);
+ if (!flow_meters)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&flow_meters->sync_lock);
+ meter = __mlx5e_tc_meter_get(flow_meters, params->index);
+ if (IS_ERR(meter)) {
+ meter = mlx5e_tc_meter_alloc(flow_meters, params);
+ if (IS_ERR(meter)) {
+ err = PTR_ERR(meter);
+ goto err_get;
+ }
+ }
+
+ err = __mlx5e_tc_meter_update(meter, params);
+ if (err)
+ goto err_update;
+
+ mutex_unlock(&flow_meters->sync_lock);
+ return meter;
+
+err_update:
+ __mlx5e_tc_meter_put(meter);
+err_get:
+ mutex_unlock(&flow_meters->sync_lock);
+ return ERR_PTR(err);
+}
+
+enum mlx5_flow_namespace_type
+mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters)
+{
+ return flow_meters->ns_type;
+}
+
+struct mlx5e_flow_meters *
+mlx5e_flow_meters_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5e_flow_meters *flow_meters;
+ int err;
+
+ if (!(MLX5_CAP_GEN_64(mdev, general_obj_types) &
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (IS_ERR_OR_NULL(post_act)) {
+ netdev_dbg(priv->netdev,
+ "flow meter offload is not supported, post action is missing\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ flow_meters = kzalloc(sizeof(*flow_meters), GFP_KERNEL);
+ if (!flow_meters)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5_core_alloc_pd(mdev, &flow_meters->pdn);
+ if (err) {
+ mlx5_core_err(mdev, "Failed to alloc pd for flow meter aso, err=%d\n", err);
+ goto err_out;
+ }
+
+ flow_meters->aso = mlx5_aso_create(mdev, flow_meters->pdn);
+ if (IS_ERR(flow_meters->aso)) {
+ mlx5_core_warn(mdev, "Failed to create aso wqe for flow meter\n");
+ err = PTR_ERR(flow_meters->aso);
+ goto err_sq;
+ }
+
+ mutex_init(&flow_meters->sync_lock);
+ INIT_LIST_HEAD(&flow_meters->partial_list);
+ INIT_LIST_HEAD(&flow_meters->full_list);
+
+ flow_meters->ns_type = ns_type;
+ flow_meters->mdev = mdev;
+ flow_meters->post_act = post_act;
+ mutex_init(&flow_meters->aso_lock);
+ flow_meters->log_granularity = min_t(int, 6,
+ MLX5_CAP_QOS(mdev, log_meter_aso_max_alloc));
+
+ return flow_meters;
+
+err_sq:
+ mlx5_core_dealloc_pd(mdev, flow_meters->pdn);
+err_out:
+ kfree(flow_meters);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters)
+{
+ if (IS_ERR_OR_NULL(flow_meters))
+ return;
+
+ mlx5_aso_destroy(flow_meters->aso);
+ mlx5_core_dealloc_pd(flow_meters->mdev, flow_meters->pdn);
+ kfree(flow_meters);
+}
+
+void
+mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
+ u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse)
+{
+ u64 bytes1, packets1, lastuse1;
+ u64 bytes2, packets2, lastuse2;
+
+ mlx5_fc_query_cached(meter->green_counter, &bytes1, &packets1, &lastuse1);
+ mlx5_fc_query_cached(meter->red_counter, &bytes2, &packets2, &lastuse2);
+
+ *bytes = bytes1 + bytes2;
+ *packets = packets1 + packets2;
+ *drops = packets2;
+ *lastuse = max_t(u64, lastuse1, lastuse2);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
new file mode 100644
index 000000000000..6de6e8a16327
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_FLOW_METER_H__
+#define __MLX5_EN_FLOW_METER_H__
+
+struct mlx5e_post_meter_priv;
+struct mlx5e_flow_meter_aso_obj;
+struct mlx5e_flow_meters;
+struct mlx5_flow_attr;
+
+enum mlx5e_flow_meter_mode {
+ MLX5_RATE_LIMIT_BPS,
+ MLX5_RATE_LIMIT_PPS,
+};
+
+struct mlx5e_flow_meter_params {
+ enum mlx5e_flow_meter_mode mode;
+ /* police action index */
+ u32 index;
+ u64 rate;
+ u64 burst;
+};
+
+struct mlx5e_flow_meter_handle {
+ struct mlx5e_flow_meters *flow_meters;
+ struct mlx5e_flow_meter_aso_obj *meters_obj;
+ u32 obj_id;
+ u8 idx;
+
+ int refcnt;
+ struct hlist_node hlist;
+ struct mlx5e_flow_meter_params params;
+
+ struct mlx5_fc *green_counter;
+ struct mlx5_fc *red_counter;
+};
+
+struct mlx5e_meter_attr {
+ struct mlx5e_flow_meter_params params;
+ struct mlx5e_flow_meter_handle *meter;
+ struct mlx5e_post_meter_priv *post_meter;
+};
+
+int
+mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
+ struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *meter_params);
+
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_get(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params);
+void
+mlx5e_tc_meter_put(struct mlx5e_flow_meter_handle *meter);
+int
+mlx5e_tc_meter_update(struct mlx5e_flow_meter_handle *meter,
+ struct mlx5e_flow_meter_params *params);
+struct mlx5e_flow_meter_handle *
+mlx5e_tc_meter_replace(struct mlx5_core_dev *mdev, struct mlx5e_flow_meter_params *params);
+
+enum mlx5_flow_namespace_type
+mlx5e_tc_meter_get_namespace(struct mlx5e_flow_meters *flow_meters);
+
+struct mlx5e_flow_meters *
+mlx5e_flow_meters_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_action);
+void
+mlx5e_flow_meters_cleanup(struct mlx5e_flow_meters *flow_meters);
+
+void
+mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter,
+ u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse);
+
+#endif /* __MLX5_EN_FLOW_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
index 31b4e39be2d3..4e48946c4c2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#include "en/tc_priv.h"
#include "en_tc.h"
#include "post_act.h"
#include "mlx5_core.h"
@@ -21,9 +22,9 @@ struct mlx5e_post_act_handle {
u32 id;
};
-#define MLX5_POST_ACTION_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen)
-#define MLX5_POST_ACTION_MAX GENMASK(MLX5_POST_ACTION_BITS - 1, 0)
-#define MLX5_POST_ACTION_MASK MLX5_POST_ACTION_MAX
+#define MLX5_POST_ACTION_BITS MLX5_REG_MAPPING_MBITS(FTEID_TO_REG)
+#define MLX5_POST_ACTION_MASK MLX5_REG_MAPPING_MASK(FTEID_TO_REG)
+#define MLX5_POST_ACTION_MAX MLX5_POST_ACTION_MASK
struct mlx5e_post_act *
mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
@@ -35,7 +36,7 @@ mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
int err;
if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ignore_flow_level, table_type)) {
- if (priv->mdev->coredev_type != MLX5_COREDEV_VF)
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
err = -EOPNOTSUPP;
goto err_check;
@@ -75,21 +76,47 @@ mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act)
kfree(post_act);
}
+int
+mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle)
+{
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ /* Post action rule matches on fte_id and executes original rule's tc rule action */
+ mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG, handle->id, MLX5_POST_ACTION_MASK);
+
+ handle->rule = mlx5e_tc_rule_offload(post_act->priv, spec, handle->attr);
+ if (IS_ERR(handle->rule)) {
+ err = PTR_ERR(handle->rule);
+ netdev_warn(post_act->priv->netdev, "Failed to add post action rule");
+ goto err_rule;
+ }
+
+ kvfree(spec);
+ return 0;
+
+err_rule:
+ kvfree(spec);
+ return err;
+}
+
struct mlx5e_post_act_handle *
mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr)
{
u32 attr_sz = ns_to_attr_sz(post_act->ns_type);
- struct mlx5e_post_act_handle *handle = NULL;
- struct mlx5_flow_attr *post_attr = NULL;
- struct mlx5_flow_spec *spec = NULL;
+ struct mlx5e_post_act_handle *handle;
+ struct mlx5_flow_attr *post_attr;
int err;
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
post_attr = mlx5_alloc_flow_attr(post_act->ns_type);
- if (!handle || !spec || !post_attr) {
+ if (!handle || !post_attr) {
kfree(post_attr);
- kvfree(spec);
kfree(handle);
return ERR_PTR(-ENOMEM);
}
@@ -100,7 +127,8 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at
post_attr->ft = post_act->ft;
post_attr->inner_match_level = MLX5_MATCH_NONE;
post_attr->outer_match_level = MLX5_MATCH_NONE;
- post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
+ post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ post_attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
handle->ns_type = post_act->ns_type;
/* Splits were handled before post action */
@@ -112,36 +140,29 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at
if (err)
goto err_xarray;
- /* Post action rule matches on fte_id and executes original rule's
- * tc rule action
- */
- mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG,
- handle->id, MLX5_POST_ACTION_MASK);
-
- handle->rule = mlx5_tc_rule_insert(post_act->priv, spec, post_attr);
- if (IS_ERR(handle->rule)) {
- err = PTR_ERR(handle->rule);
- netdev_warn(post_act->priv->netdev, "Failed to add post action rule");
- goto err_rule;
- }
handle->attr = post_attr;
- kvfree(spec);
return handle;
-err_rule:
- xa_erase(&post_act->ids, handle->id);
err_xarray:
kfree(post_attr);
- kvfree(spec);
kfree(handle);
return ERR_PTR(err);
}
void
+mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle)
+{
+ mlx5e_tc_rule_unoffload(post_act->priv, handle->rule, handle->attr);
+ handle->rule = NULL;
+}
+
+void
mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle)
{
- mlx5_tc_rule_delete(post_act->priv, handle->rule, handle->attr);
+ if (!IS_ERR_OR_NULL(handle->rule))
+ mlx5e_tc_post_act_unoffload(post_act, handle);
xa_erase(&post_act->ids, handle->id);
kfree(handle->attr);
kfree(handle);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
index b530ec1981a5..f476774c0b75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
@@ -24,6 +24,14 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at
void
mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle);
+int
+mlx5e_tc_post_act_offload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle);
+
+void
+mlx5e_tc_post_act_unoffload(struct mlx5e_post_act *post_act,
+ struct mlx5e_post_act_handle *handle);
+
struct mlx5_flow_table *
mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
new file mode 100644
index 000000000000..8b77e822810e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en/tc_priv.h"
+#include "post_meter.h"
+#include "en/tc/post_act.h"
+
+#define MLX5_PACKET_COLOR_BITS MLX5_REG_MAPPING_MBITS(PACKET_COLOR_TO_REG)
+#define MLX5_PACKET_COLOR_MASK MLX5_REG_MAPPING_MASK(PACKET_COLOR_TO_REG)
+
+struct mlx5e_post_meter_priv {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_handle *fwd_green_rule;
+ struct mlx5_flow_handle *drop_red_rule;
+};
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter)
+{
+ return post_meter->ft;
+}
+
+static int
+mlx5e_post_meter_table_create(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_meter_priv *post_meter)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *root_ns;
+
+ root_ns = mlx5_get_flow_namespace(priv->mdev, ns_type);
+ if (!root_ns) {
+ mlx5_core_warn(priv->mdev, "Failed to get namespace for flow meter\n");
+ return -EOPNOTSUPP;
+ }
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.prio = FDB_SLOW_PATH;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+
+ post_meter->ft = mlx5_create_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(post_meter->ft)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n");
+ return PTR_ERR(post_meter->ft);
+ }
+
+ return 0;
+}
+
+static int
+mlx5e_post_meter_fg_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *misc2, *match_criteria;
+ u32 *flow_group_in;
+ int err = 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ MLX5_MATCH_MISC_PARAMETERS_2);
+ match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+ match_criteria);
+ misc2 = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters_2);
+ MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_5, MLX5_PACKET_COLOR_MASK);
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ post_meter->fg = mlx5_create_flow_group(post_meter->ft, flow_group_in);
+ if (IS_ERR(post_meter->fg)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow group\n");
+ err = PTR_ERR(post_meter->fg);
+ }
+
+ kvfree(flow_group_in);
+ return err;
+}
+
+static int
+mlx5e_post_meter_rules_create(struct mlx5e_priv *priv,
+ struct mlx5e_post_meter_priv *post_meter,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter)
+{
+ struct mlx5_flow_destination dest[2] = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
+ int err;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
+ MLX5_FLOW_METER_COLOR_RED, MLX5_PACKET_COLOR_MASK);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[0].counter_id = mlx5_fc_id(red_counter);
+
+ rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 1);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow drop rule\n");
+ err = PTR_ERR(rule);
+ goto err_red;
+ }
+ post_meter->drop_red_rule = rule;
+
+ mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG,
+ MLX5_FLOW_METER_COLOR_GREEN, MLX5_PACKET_COLOR_MASK);
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[0].ft = mlx5e_tc_post_act_get_ft(post_act);
+ dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest[1].counter_id = mlx5_fc_id(green_counter);
+
+ rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 2);
+ if (IS_ERR(rule)) {
+ mlx5_core_warn(priv->mdev, "Failed to create post_meter flow fwd rule\n");
+ err = PTR_ERR(rule);
+ goto err_green;
+ }
+ post_meter->fwd_green_rule = rule;
+
+ kvfree(spec);
+ return 0;
+
+err_green:
+ mlx5_del_flow_rules(post_meter->drop_red_rule);
+err_red:
+ kvfree(spec);
+ return err;
+}
+
+static void
+mlx5e_post_meter_rules_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_del_flow_rules(post_meter->drop_red_rule);
+ mlx5_del_flow_rules(post_meter->fwd_green_rule);
+}
+
+static void
+mlx5e_post_meter_fg_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_group(post_meter->fg);
+}
+
+static void
+mlx5e_post_meter_table_destroy(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5_destroy_flow_table(post_meter->ft);
+}
+
+struct mlx5e_post_meter_priv *
+mlx5e_post_meter_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter)
+{
+ struct mlx5e_post_meter_priv *post_meter;
+ int err;
+
+ post_meter = kzalloc(sizeof(*post_meter), GFP_KERNEL);
+ if (!post_meter)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlx5e_post_meter_table_create(priv, ns_type, post_meter);
+ if (err)
+ goto err_ft;
+
+ err = mlx5e_post_meter_fg_create(priv, post_meter);
+ if (err)
+ goto err_fg;
+
+ err = mlx5e_post_meter_rules_create(priv, post_meter, post_act, green_counter,
+ red_counter);
+ if (err)
+ goto err_rules;
+
+ return post_meter;
+
+err_rules:
+ mlx5e_post_meter_fg_destroy(post_meter);
+err_fg:
+ mlx5e_post_meter_table_destroy(post_meter);
+err_ft:
+ kfree(post_meter);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter)
+{
+ mlx5e_post_meter_rules_destroy(post_meter);
+ mlx5e_post_meter_fg_destroy(post_meter);
+ mlx5e_post_meter_table_destroy(post_meter);
+ kfree(post_meter);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
new file mode 100644
index 000000000000..34d0e4b9fc7a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_EN_POST_METER_H__
+#define __MLX5_EN_POST_METER_H__
+
+#define packet_color_to_reg { \
+ .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5, \
+ .moffset = 0, \
+ .mlen = 8, \
+ .soffset = MLX5_BYTE_OFF(fte_match_param, \
+ misc_parameters_2.metadata_reg_c_5), \
+}
+
+struct mlx5e_post_meter_priv;
+
+struct mlx5_flow_table *
+mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter);
+
+struct mlx5e_post_meter_priv *
+mlx5e_post_meter_init(struct mlx5e_priv *priv,
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
+ struct mlx5_fc *green_counter,
+ struct mlx5_fc *red_counter);
+void
+mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter);
+
+#endif /* __MLX5_EN_POST_METER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
index df6888c4793c..1cbd2eb9d04f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
@@ -5,6 +5,8 @@
#include <net/psample.h>
#include "en/mapping.h"
#include "en/tc/post_act.h"
+#include "en/tc/act/sample.h"
+#include "en/mod_hdr.h"
#include "sample.h"
#include "eswitch.h"
#include "en_tc.h"
@@ -45,14 +47,12 @@ struct mlx5e_sample_flow {
struct mlx5_flow_handle *pre_rule;
struct mlx5_flow_attr *post_attr;
struct mlx5_flow_handle *post_rule;
- struct mlx5e_post_act_handle *post_act_handle;
};
struct mlx5e_sample_restore {
struct hlist_node hlist;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_flow_handle *rule;
- struct mlx5e_post_act_handle *post_act_handle;
u32 obj_id;
int count;
};
@@ -93,6 +93,7 @@ sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample)
act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
dest.vport.num = esw->manager_vport;
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1);
if (IS_ERR(tc_psample->termtbl_rule)) {
err = PTR_ERR(tc_psample->termtbl_rule);
@@ -230,69 +231,46 @@ sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler)
*/
static struct mlx5_modify_hdr *
sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id,
- struct mlx5e_post_act_handle *handle)
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
{
- struct mlx5e_tc_mod_hdr_acts mod_acts = {};
struct mlx5_modify_hdr *modify_hdr;
int err;
- err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
+ err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB,
CHAIN_TO_REG, obj_id);
if (err)
goto err_set_regc0;
- if (handle) {
- err = mlx5e_tc_post_act_set_handle(mdev, handle, &mod_acts);
- if (err)
- goto err_post_act;
- }
-
modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
- mod_acts.num_actions,
- mod_acts.actions);
+ mod_acts->num_actions,
+ mod_acts->actions);
if (IS_ERR(modify_hdr)) {
err = PTR_ERR(modify_hdr);
goto err_modify_hdr;
}
- dealloc_mod_hdr_actions(&mod_acts);
+ mlx5e_mod_hdr_dealloc(mod_acts);
return modify_hdr;
err_modify_hdr:
-err_post_act:
- dealloc_mod_hdr_actions(&mod_acts);
+ mlx5e_mod_hdr_dealloc(mod_acts);
err_set_regc0:
return ERR_PTR(err);
}
-static u32
-restore_hash(u32 obj_id, struct mlx5e_post_act_handle *post_act_handle)
-{
- return jhash_2words(obj_id, hash32_ptr(post_act_handle), 0);
-}
-
-static bool
-restore_equal(struct mlx5e_sample_restore *restore, u32 obj_id,
- struct mlx5e_post_act_handle *post_act_handle)
-{
- return restore->obj_id == obj_id && restore->post_act_handle == post_act_handle;
-}
-
static struct mlx5e_sample_restore *
sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
- struct mlx5e_post_act_handle *post_act_handle)
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
{
struct mlx5_eswitch *esw = tc_psample->esw;
struct mlx5_core_dev *mdev = esw->dev;
struct mlx5e_sample_restore *restore;
struct mlx5_modify_hdr *modify_hdr;
- u32 hash_key;
int err;
mutex_lock(&tc_psample->restore_lock);
- hash_key = restore_hash(obj_id, post_act_handle);
- hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, hash_key)
- if (restore_equal(restore, obj_id, post_act_handle))
+ hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, obj_id)
+ if (restore->obj_id == obj_id)
goto add_ref;
restore = kzalloc(sizeof(*restore), GFP_KERNEL);
@@ -301,9 +279,8 @@ sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
goto err_alloc;
}
restore->obj_id = obj_id;
- restore->post_act_handle = post_act_handle;
- modify_hdr = sample_modify_hdr_get(mdev, obj_id, post_act_handle);
+ modify_hdr = sample_modify_hdr_get(mdev, obj_id, mod_acts);
if (IS_ERR(modify_hdr)) {
err = PTR_ERR(modify_hdr);
goto err_modify_hdr;
@@ -316,7 +293,7 @@ sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
goto err_restore;
}
- hash_add(tc_psample->restore_hashtbl, &restore->hlist, hash_key);
+ hash_add(tc_psample->restore_hashtbl, &restore->hlist, obj_id);
add_ref:
restore->count++;
mutex_unlock(&tc_psample->restore_lock);
@@ -402,7 +379,7 @@ add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
post_attr->chain = 0;
post_attr->prio = 0;
post_attr->ft = default_tbl;
- post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+ post_attr->flags = MLX5_ATTR_FLAG_NO_IN_PORT;
/* When offloading sample and encap action, if there is no valid
* neigh data struct, a slow path rule is offloaded first. Source
@@ -491,16 +468,16 @@ del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
struct mlx5_flow_handle *
mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- u32 tunnel_id)
+ struct mlx5_flow_attr *attr)
{
- struct mlx5e_post_act_handle *post_act_handle = NULL;
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
struct mlx5_esw_flow_attr *pre_esw_attr;
struct mlx5_mapped_obj restore_obj = {};
+ struct mlx5e_tc_mod_hdr_acts *mod_acts;
struct mlx5e_sample_flow *sample_flow;
struct mlx5e_sample_attr *sample_attr;
struct mlx5_flow_attr *pre_attr;
+ u32 tunnel_id = attr->tunnel_id;
struct mlx5_eswitch *esw;
u32 default_tbl_id;
u32 obj_id;
@@ -512,7 +489,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
if (!sample_flow)
return ERR_PTR(-ENOMEM);
- sample_attr = attr->sample_attr;
+ sample_attr = &attr->sample_attr;
sample_attr->sample_flow = sample_flow;
/* For NICs with reg_c_preserve support or decap action, use
@@ -521,18 +498,11 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
* original flow table.
*/
esw = tc_psample->esw;
- if (MLX5_CAP_GEN(esw->dev, reg_c_preserve) ||
- attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+ if (mlx5e_tc_act_sample_is_multi_table(esw->dev, attr)) {
struct mlx5_flow_table *ft;
ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act);
default_tbl_id = ft->id;
- post_act_handle = mlx5e_tc_post_act_add(tc_psample->post_act, attr);
- if (IS_ERR(post_act_handle)) {
- err = PTR_ERR(post_act_handle);
- goto err_post_act;
- }
- sample_flow->post_act_handle = post_act_handle;
} else {
err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id);
if (err)
@@ -545,6 +515,7 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
err = PTR_ERR(sample_flow->sampler);
goto err_sampler;
}
+ sample_attr->sampler_id = sample_flow->sampler->sampler_id;
/* Create an id mapping reg_c0 value to sample object. */
restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
@@ -558,7 +529,8 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
sample_attr->restore_obj_id = obj_id;
/* Create sample restore context. */
- sample_flow->restore = sample_restore_get(tc_psample, obj_id, post_act_handle);
+ mod_acts = &attr->parse_attr->mod_hdr_acts;
+ sample_flow->restore = sample_restore_get(tc_psample, obj_id, mod_acts);
if (IS_ERR(sample_flow->restore)) {
err = PTR_ERR(sample_flow->restore);
goto err_sample_restore;
@@ -579,13 +551,13 @@ mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
if (tunnel_id)
pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
- pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE;
+ pre_attr->flags = MLX5_ATTR_FLAG_SAMPLE;
pre_attr->inner_match_level = attr->inner_match_level;
pre_attr->outer_match_level = attr->outer_match_level;
pre_attr->chain = attr->chain;
pre_attr->prio = attr->prio;
- pre_attr->sample_attr = attr->sample_attr;
- sample_attr->sampler_id = sample_flow->sampler->sampler_id;
+ pre_attr->ft = attr->ft;
+ pre_attr->sample_attr = *sample_attr;
pre_esw_attr = pre_attr->esw_attr;
pre_esw_attr->in_mdev = esw_attr->in_mdev;
pre_esw_attr->in_rep = esw_attr->in_rep;
@@ -610,9 +582,6 @@ err_sampler:
if (sample_flow->post_rule)
del_post_rule(esw, sample_flow, attr);
err_post_rule:
- if (post_act_handle)
- mlx5e_tc_post_act_del(tc_psample->post_act, post_act_handle);
-err_post_act:
kfree(sample_flow);
return ERR_PTR(err);
}
@@ -632,15 +601,13 @@ mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
* will hit fw syndromes.
*/
esw = tc_psample->esw;
- sample_flow = attr->sample_attr->sample_flow;
+ sample_flow = attr->sample_attr.sample_flow;
mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr);
sample_restore_put(tc_psample, sample_flow->restore);
- mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id);
+ mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr.restore_obj_id);
sampler_put(tc_psample, sample_flow->sampler);
- if (sample_flow->post_act_handle)
- mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle);
- else
+ if (sample_flow->post_rule)
del_post_rule(esw, sample_flow, attr);
kfree(sample_flow->pre_attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
index 9ef8a49d7801..a569367eae4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
@@ -26,8 +26,7 @@ void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
struct mlx5_flow_handle *
mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- u32 tunnel_id);
+ struct mlx5_flow_attr *attr);
void
mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv,
@@ -45,8 +44,7 @@ mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
static inline struct mlx5_flow_handle *
mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr,
- u32 tunnel_id)
+ struct mlx5_flow_attr *attr)
{ return ERR_PTR(-EOPNOTSUPP); }
static inline void
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 2445e2ae3324..864ce0c393e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -14,18 +14,21 @@
#include <linux/workqueue.h>
#include <linux/refcount.h>
#include <linux/xarray.h>
+#include <linux/if_macvlan.h>
+#include <linux/debugfs.h>
#include "lib/fs_chains.h"
#include "en/tc_ct.h"
+#include "en/tc/ct_fs.h"
+#include "en/tc_priv.h"
#include "en/mod_hdr.h"
#include "en/mapping.h"
#include "en/tc/post_act.h"
#include "en.h"
#include "en_tc.h"
#include "en_rep.h"
+#include "fs_core.h"
-#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen)
-#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
#define MLX5_CT_STATE_TRK_BIT BIT(2)
#define MLX5_CT_STATE_NAT_BIT BIT(3)
@@ -33,12 +36,27 @@
#define MLX5_CT_STATE_RELATED_BIT BIT(5)
#define MLX5_CT_STATE_INVALID_BIT BIT(6)
-#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
-#define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
+#define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG)
+#define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG)
+
+/* Statically allocate modify actions for
+ * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
+ * This will be increased dynamically if needed (for the ipv6 snat + dnat).
+ */
+#define MLX5_CT_MIN_MOD_ACTS 10
#define ct_dbg(fmt, args...)\
netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
+struct mlx5_tc_ct_debugfs {
+ struct {
+ atomic_t offloaded;
+ atomic_t rx_dropped;
+ } stats;
+
+ struct dentry *root;
+};
+
struct mlx5_tc_ct_priv {
struct mlx5_core_dev *dev;
const struct net_device *netdev;
@@ -55,19 +73,23 @@ struct mlx5_tc_ct_priv {
struct mapping_ctx *labels_mapping;
enum mlx5_flow_namespace_type ns_type;
struct mlx5_fs_chains *chains;
+ struct mlx5_ct_fs *fs;
+ struct mlx5_ct_fs_ops *fs_ops;
spinlock_t ht_lock; /* protects ft entries */
+ struct workqueue_struct *wq;
+
+ struct mlx5_tc_ct_debugfs debugfs;
};
struct mlx5_ct_flow {
struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_flow_handle *pre_ct_rule;
- struct mlx5e_post_act_handle *post_act_handle;
struct mlx5_ct_ft *ft;
u32 chain_mapping;
};
struct mlx5_ct_zone_rule {
- struct mlx5_flow_handle *rule;
+ struct mlx5_ct_fs_rule *rule;
struct mlx5e_mod_hdr_handle *mh;
struct mlx5_flow_attr *attr;
bool nat;
@@ -251,7 +273,8 @@ mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
return -EOPNOTSUPP;
}
} else {
- return -EOPNOTSUPP;
+ if (tuple->ip_proto != IPPROTO_GRE)
+ return -EOPNOTSUPP;
}
return 0;
@@ -320,7 +343,33 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
}
static int
-mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
+mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct net_device *ndev)
+{
+ struct mlx5e_priv *other_priv = netdev_priv(ndev);
+ struct mlx5_core_dev *mdev = ct_priv->dev;
+ bool vf_rep, uplink_rep;
+
+ vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+ uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
+
+ if (vf_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+ if (uplink_rep)
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+ if (is_vlan_dev(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
+ if (netif_is_macvlan(ndev))
+ return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
+ if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+ return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
+}
+
+static int
+mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
+ struct mlx5_flow_spec *spec,
struct flow_rule *rule)
{
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -335,8 +384,7 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
flow_rule_match_basic(rule, &match);
- mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
- headers_v);
+ mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
match.mask->ip_proto);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
@@ -432,6 +480,23 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
ntohs(match.key->flags));
}
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+ struct flow_match_meta match;
+
+ flow_rule_match_meta(rule, &match);
+
+ if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
+ if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
+ spec->flow_context.flow_source =
+ mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
+
+ dev_put(dev);
+ }
+ }
+
return 0;
}
@@ -456,7 +521,7 @@ mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
- mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
+ ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
kfree(attr);
@@ -468,6 +533,8 @@ mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
{
mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
+
+ atomic_dec(&ct_priv->debugfs.stats.offloaded);
}
static struct flow_action_entry *
@@ -530,6 +597,12 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
return 0;
}
+int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
+}
+
static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
char *modact)
@@ -609,22 +682,15 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
struct flow_action *flow_action = &flow_rule->action;
struct mlx5_core_dev *mdev = ct_priv->dev;
struct flow_action_entry *act;
- size_t action_size;
char *modact;
int err, i;
- action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
-
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_MANGLE: {
- err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
- mod_acts);
- if (err)
- return err;
-
- modact = mod_acts->actions +
- mod_acts->num_actions * action_size;
+ modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
+ if (IS_ERR(modact))
+ return PTR_ERR(modact);
err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
if (err)
@@ -650,9 +716,10 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_flow_attr *attr,
struct flow_rule *flow_rule,
struct mlx5e_mod_hdr_handle **mh,
- u8 zone_restore_id, bool nat)
+ u8 zone_restore_id, bool nat_table, bool has_nat)
{
- struct mlx5e_tc_mod_hdr_acts mod_acts = {};
+ DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
+ DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
struct flow_action_entry *meta;
u16 ct_state = 0;
int err;
@@ -665,11 +732,12 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
&attr->ct_attr.ct_labels_id);
if (err)
return -EOPNOTSUPP;
- if (nat) {
- err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
- &mod_acts);
- if (err)
- goto err_mapping;
+ if (nat_table) {
+ if (has_nat) {
+ err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
+ if (err)
+ goto err_mapping;
+ }
ct_state |= MLX5_CT_STATE_NAT_BIT;
}
@@ -684,7 +752,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
if (err)
goto err_mapping;
- if (nat) {
+ if (nat_table && has_nat) {
attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
mod_acts.num_actions,
mod_acts.actions);
@@ -706,11 +774,11 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
}
- dealloc_mod_hdr_actions(&mod_acts);
+ mlx5e_mod_hdr_dealloc(&mod_acts);
return 0;
err_mapping:
- dealloc_mod_hdr_actions(&mod_acts);
+ mlx5e_mod_hdr_dealloc(&mod_acts);
mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
return err;
}
@@ -752,7 +820,9 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
&zone_rule->mh,
- zone_restore_id, nat);
+ zone_restore_id,
+ nat,
+ mlx5_tc_ct_entry_has_nat(entry));
if (err) {
ct_dbg("Failed to create ct entry mod hdr");
goto err_mod_hdr;
@@ -764,16 +834,20 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
attr->dest_chain = 0;
attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
- attr->outer_match_level = MLX5_MATCH_L4;
+ if (entry->tuple.ip_proto == IPPROTO_TCP ||
+ entry->tuple.ip_proto == IPPROTO_UDP)
+ attr->outer_match_level = MLX5_MATCH_L4;
+ else
+ attr->outer_match_level = MLX5_MATCH_L3;
attr->counter = entry->counter->counter;
- attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+ attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
attr->esw_attr->in_mdev = priv->mdev;
- mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
+ mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
- zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
+ zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
if (IS_ERR(zone_rule->rule)) {
err = PTR_ERR(zone_rule->rule);
ct_dbg("Failed to add ct entry rule, nat: %d", nat);
@@ -868,14 +942,11 @@ static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
{
- struct mlx5e_priv *priv;
-
if (!refcount_dec_and_test(&entry->refcnt))
return;
- priv = netdev_priv(entry->ct_priv->netdev);
INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
- queue_work(priv->wq, &entry->work);
+ queue_work(entry->ct_priv->wq, &entry->work);
}
static struct mlx5_ct_counter *
@@ -907,12 +978,9 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_tuple rev_tuple = entry->tuple;
struct mlx5_ct_counter *shared_counter;
struct mlx5_ct_entry *rev_entry;
- __be16 tmp_port;
/* get the reversed tuple */
- tmp_port = rev_tuple.port.src;
- rev_tuple.port.src = rev_tuple.port.dst;
- rev_tuple.port.dst = tmp_port;
+ swap(rev_tuple.port.src, rev_tuple.port.dst);
if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
__be32 tmp_addr = rev_tuple.ip.src_v4;
@@ -987,6 +1055,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
if (err)
goto err_nat;
+ atomic_inc(&ct_priv->debugfs.stats.offloaded);
return 0;
err_nat:
@@ -1114,7 +1183,6 @@ mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
}
rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
- mlx5_tc_ct_entry_remove_from_tuples(entry);
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_entry_put(entry);
@@ -1184,16 +1252,20 @@ mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
struct flow_keys flow_keys;
skb_reset_network_header(skb);
- skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
+ skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
tuple->zone = zone;
if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
- flow_keys.basic.ip_proto != IPPROTO_UDP)
+ flow_keys.basic.ip_proto != IPPROTO_UDP &&
+ flow_keys.basic.ip_proto != IPPROTO_GRE)
return false;
- tuple->port.src = flow_keys.ports.src;
- tuple->port.dst = flow_keys.ports.dst;
+ if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
+ flow_keys.basic.ip_proto == IPPROTO_UDP) {
+ tuple->port.src = flow_keys.ports.src;
+ tuple->port.dst = flow_keys.ports.dst;
+ }
tuple->n_proto = flow_keys.basic.n_proto;
tuple->ip_proto = flow_keys.basic.ip_proto;
@@ -1360,9 +1432,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
const struct flow_action_entry *act,
struct netlink_ext_ack *extack)
{
- bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
- int err;
-
if (!priv) {
NL_SET_ERR_MSG_MOD(extack,
"offload of ct action isn't available");
@@ -1373,17 +1442,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
attr->ct_attr.ct_action = act->ct.action;
attr->ct_attr.nf_ft = act->ct.flow_table;
- if (!clear_action)
- goto out;
-
- err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear");
- return err;
- }
- attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-
-out:
return 0;
}
@@ -1460,7 +1518,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
}
pre_ct->miss_rule = rule;
- dealloc_mod_hdr_actions(&pre_mod_acts);
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
kvfree(spec);
return 0;
@@ -1469,7 +1527,7 @@ err_miss_rule:
err_flow_rule:
mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
err_mapping:
- dealloc_mod_hdr_actions(&pre_mod_acts);
+ mlx5e_mod_hdr_dealloc(&pre_mod_acts);
kvfree(spec);
return err;
}
@@ -1702,6 +1760,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
if (!refcount_dec_and_test(&ft->refcount))
return;
+ flush_workqueue(ct_priv->wq);
nf_flow_table_offload_del_cb(ft->nf_ft,
mlx5_tc_ct_block_flow_offload, ft);
rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
@@ -1716,7 +1775,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
/* We translate the tc filter with CT action to the following HW model:
*
* +---------------------+
- * + ft prio (tc chain) +
+ * + ft prio (tc chain) +
* + original match +
* +---------------------+
* | set chain miss mapping
@@ -1726,7 +1785,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* v
* +---------------------+
* + pre_ct/pre_ct_nat + if matches +-------------------------+
- * + zone+nat match +---------------->+ post_act (see below) +
+ * + zone+nat match +---------------->+ post_act (see below) +
* +---------------------+ set zone +-------------------------+
* | set zone
* v
@@ -1741,21 +1800,19 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* | do nat (if needed)
* v
* +--------------+
- * + post_act + original filter actions
+ * + post_act + original filter actions
* + fte_id match +------------------------>
* +--------------+
*/
static struct mlx5_flow_handle *
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *orig_spec,
struct mlx5_flow_attr *attr)
{
bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+ struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
- struct mlx5e_post_act_handle *handle;
struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_modify_hdr *mod_hdr;
struct mlx5_ct_flow *ct_flow;
@@ -1764,7 +1821,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
if (!ct_flow) {
- kfree(ct_flow);
return ERR_PTR(-ENOMEM);
}
@@ -1778,14 +1834,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
ct_flow->ft = ft;
- handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- ct_dbg("Failed to allocate post action handle");
- goto err_post_act_handle;
- }
- ct_flow->post_act_handle = handle;
-
/* Base flow attributes of both rules on original rule attribute */
ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
if (!ct_flow->pre_ct_attr) {
@@ -1795,6 +1843,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
pre_ct_attr = ct_flow->pre_ct_attr;
memcpy(pre_ct_attr, attr, attr_sz);
+ pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
/* Modify the original rule's action to fwd and modify, leave decap */
pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
@@ -1813,30 +1862,22 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
ct_flow->chain_mapping = chain_mapping;
- err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
CHAIN_TO_REG, chain_mapping);
if (err) {
ct_dbg("Failed to set chain register mapping");
goto err_mapping;
}
- err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts);
- if (err) {
- ct_dbg("Failed to set post action handle");
- goto err_mapping;
- }
-
/* If original flow is decap, we do it before going into ct table
* so add a rewrite for the tunnel match_id.
*/
if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
attr->chain == 0) {
- u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
-
- err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
+ err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
ct_priv->ns_type,
TUNNEL_TO_REG,
- tun_id);
+ attr->tunnel_id);
if (err) {
ct_dbg("Failed to set tunnel register mapping");
goto err_mapping;
@@ -1844,8 +1885,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
- pre_mod_acts.num_actions,
- pre_mod_acts.actions);
+ pre_mod_acts->num_actions,
+ pre_mod_acts->actions);
if (IS_ERR(mod_hdr)) {
err = PTR_ERR(mod_hdr);
ct_dbg("Failed to create pre ct mod hdr");
@@ -1865,20 +1906,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
attr->ct_attr.ct_flow = ct_flow;
- dealloc_mod_hdr_actions(&pre_mod_acts);
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
return ct_flow->pre_ct_rule;
err_insert_orig:
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
err_mapping:
- dealloc_mod_hdr_actions(&pre_mod_acts);
+ mlx5e_mod_hdr_dealloc(pre_mod_acts);
mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
err_get_chain:
kfree(ct_flow->pre_ct_attr);
err_alloc_pre:
- mlx5e_tc_post_act_del(ct_priv->post_act, handle);
-err_post_act_handle:
mlx5_tc_ct_del_ft_cb(ct_priv, ft);
err_ft:
kfree(ct_flow);
@@ -1886,87 +1925,19 @@ err_ft:
return ERR_PTR(err);
}
-static struct mlx5_flow_handle *
-__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_flow_spec *orig_spec,
- struct mlx5_flow_attr *attr,
- struct mlx5e_tc_mod_hdr_acts *mod_acts)
-{
- struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
- struct mlx5_flow_attr *pre_ct_attr;
- struct mlx5_modify_hdr *mod_hdr;
- struct mlx5_flow_handle *rule;
- struct mlx5_ct_flow *ct_flow;
- int err;
-
- ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
- if (!ct_flow)
- return ERR_PTR(-ENOMEM);
-
- /* Base esw attributes on original rule attribute */
- pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
- if (!pre_ct_attr) {
- err = -ENOMEM;
- goto err_attr;
- }
-
- memcpy(pre_ct_attr, attr, attr_sz);
-
- mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
- mod_acts->num_actions,
- mod_acts->actions);
- if (IS_ERR(mod_hdr)) {
- err = PTR_ERR(mod_hdr);
- ct_dbg("Failed to add create ct clear mod hdr");
- goto err_mod_hdr;
- }
-
- pre_ct_attr->modify_hdr = mod_hdr;
-
- rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
- if (IS_ERR(rule)) {
- err = PTR_ERR(rule);
- ct_dbg("Failed to add ct clear rule");
- goto err_insert;
- }
-
- attr->ct_attr.ct_flow = ct_flow;
- ct_flow->pre_ct_attr = pre_ct_attr;
- ct_flow->pre_ct_rule = rule;
- return rule;
-
-err_insert:
- mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
-err_mod_hdr:
- netdev_warn(priv->netdev,
- "Failed to offload ct clear flow, err %d\n", err);
- kfree(pre_ct_attr);
-err_attr:
- kfree(ct_flow);
-
- return ERR_PTR(err);
-}
-
struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
{
- bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
struct mlx5_flow_handle *rule;
if (!priv)
return ERR_PTR(-EOPNOTSUPP);
mutex_lock(&priv->control_lock);
-
- if (clear_action)
- rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
- else
- rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
+ rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
mutex_unlock(&priv->control_lock);
return rule;
@@ -1974,21 +1945,17 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5e_tc_flow *flow,
- struct mlx5_ct_flow *ct_flow)
+ struct mlx5_ct_flow *ct_flow,
+ struct mlx5_flow_attr *attr)
{
struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
- mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
- pre_ct_attr);
+ mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
- if (ct_flow->post_act_handle) {
- mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
- mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle);
- mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
- }
+ mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
+ mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
kfree(ct_flow->pre_ct_attr);
kfree(ct_flow);
@@ -1996,7 +1963,6 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr)
{
struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
@@ -2008,11 +1974,43 @@ mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
return;
mutex_lock(&priv->control_lock);
- __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
+ __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
mutex_unlock(&priv->control_lock);
}
static int
+mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
+{
+ struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
+ struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
+ int err;
+
+ if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
+ ct_dbg("Using SMFS ct flow steering provider");
+ fs_ops = mlx5_ct_fs_smfs_ops_get();
+ }
+
+ ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
+ if (!ct_priv->fs)
+ return -ENOMEM;
+
+ ct_priv->fs->netdev = ct_priv->netdev;
+ ct_priv->fs->dev = ct_priv->dev;
+ ct_priv->fs_ops = fs_ops;
+
+ err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
+ if (err)
+ goto err_init;
+
+ return 0;
+
+err_init:
+ kfree(ct_priv->fs);
+ return err;
+}
+
+static int
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
const char **err_msg)
{
@@ -2064,7 +2062,7 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
/* Ignore_flow_level support isn't supported by default for VFs and so post_act
* won't be supported. Skip showing error msg.
*/
- if (priv->mdev->coredev_type != MLX5_COREDEV_VF)
+ if (priv->mdev->coredev_type == MLX5_COREDEV_PF)
err_msg = "post action is missing";
err = -EOPNOTSUPP;
goto out_err;
@@ -2079,6 +2077,29 @@ out_err:
return err;
}
+static void
+mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB;
+ struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
+ char dirname[16] = {};
+
+ if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0)
+ return;
+
+ ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev));
+ debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.offloaded);
+ debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
+ &ct_dbgfs->stats.rx_dropped);
+}
+
+static void
+mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
+{
+ debugfs_remove_recursive(ct_priv->debugfs.root);
+}
+
#define INIT_ERR_PREFIX "tc ct offload init failed"
struct mlx5_tc_ct_priv *
@@ -2150,8 +2171,23 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
goto err_ct_tuples_nat_ht;
+ ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
+ if (!ct_priv->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
+ err = mlx5_tc_ct_fs_init(ct_priv);
+ if (err)
+ goto err_init_fs;
+
+ mlx5_ct_tc_create_dbgfs(ct_priv);
return ct_priv;
+err_init_fs:
+ destroy_workqueue(ct_priv->wq);
+err_wq:
+ rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
err_ct_tuples_nat_ht:
rhashtable_destroy(&ct_priv->ct_tuples_ht);
err_ct_tuples_ht:
@@ -2180,8 +2216,13 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
if (!ct_priv)
return;
+ destroy_workqueue(ct_priv->wq);
+ mlx5_ct_tc_remove_dbgfs(ct_priv);
chains = ct_priv->chains;
+ ct_priv->fs_ops->destroy(ct_priv->fs);
+ kfree(ct_priv->fs);
+
mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
mlx5_chains_destroy_global_table(chains, ct_priv->ct);
mapping_destroy(ct_priv->zone_mapping);
@@ -2206,22 +2247,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
return true;
if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
- return false;
+ goto out_inc_drop;
if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
- return false;
+ goto out_inc_drop;
spin_lock(&ct_priv->ht_lock);
entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
if (!entry) {
spin_unlock(&ct_priv->ht_lock);
- return false;
+ goto out_inc_drop;
}
if (IS_ERR(entry)) {
spin_unlock(&ct_priv->ht_lock);
- return false;
+ goto out_inc_drop;
}
spin_unlock(&ct_priv->ht_lock);
@@ -2229,4 +2270,8 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
__mlx5_tc_ct_entry_put(entry);
return true;
+
+out_inc_drop:
+ atomic_inc(&ct_priv->debugfs.stats.rx_dropped);
+ return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 99662af1e41a..5bbd6b92840f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -62,10 +62,11 @@ struct mlx5_ct_attr {
misc_parameters_2.metadata_reg_c_4),\
}
+/* 8 LSB of metadata C5 are reserved for packet color */
#define fteid_to_reg_ct {\
.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_5,\
- .moffset = 0,\
- .mlen = 32,\
+ .moffset = 8,\
+ .mlen = 24,\
.soffset = MLX5_BYTE_OFF(fte_match_param,\
misc_parameters_2.metadata_reg_c_5),\
}
@@ -84,8 +85,8 @@ struct mlx5_ct_attr {
.mlen = ESW_ZONE_ID_BITS,\
}
-#define REG_MAPPING_MLEN(reg) (mlx5e_tc_attr_to_reg_mappings[reg].mlen)
-#define REG_MAPPING_MOFFSET(reg) (mlx5e_tc_attr_to_reg_mappings[reg].moffset)
+#define MLX5_CT_ZONE_BITS MLX5_REG_MAPPING_MBITS(ZONE_TO_REG)
+#define MLX5_CT_ZONE_MASK MLX5_REG_MAPPING_MASK(ZONE_TO_REG)
#if IS_ENABLED(CONFIG_MLX5_TC_CT)
@@ -116,19 +117,21 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr);
bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
struct sk_buff *skb, u8 zone_restore_id);
+int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts);
+
#else /* CONFIG_MLX5_TC_CT */
static inline struct mlx5_tc_ct_priv *
@@ -171,6 +174,13 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
}
static inline int
+mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
+ struct mlx5e_tc_mod_hdr_acts *mod_acts)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_acts,
@@ -183,7 +193,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
static inline struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr,
struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
@@ -193,7 +202,6 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
static inline void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
- struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr)
{
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
index b689701ac7d8..2e42d7c5451e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
@@ -5,11 +5,13 @@
#define __MLX5_EN_TC_PRIV_H__
#include "en_tc.h"
+#include "en/tc/act/act.h"
#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
#define MLX5E_TC_MAX_SPLITS 1
+
enum {
MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
@@ -32,13 +34,17 @@ enum {
struct mlx5e_tc_flow_parse_attr {
const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
+ struct mlx5e_mpls_info mpls_info[MLX5_MAX_FLOW_FWD_VPORTS];
struct net_device *filter_dev;
struct mlx5_flow_spec spec;
+ struct pedit_headers_action hdrs[__PEDIT_CMD_MAX];
struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
- struct ethhdr eth;
+ struct mlx5e_tc_act_parse_state parse_state;
};
+struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc);
+
/* Helper struct for accessing a struct containing list_head array.
* Containing struct
* |- Helper array
@@ -90,6 +96,7 @@ struct mlx5e_tc_flow {
struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5e_tc_flow *peer_flow;
struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
+ struct mlx5e_mod_hdr_handle *slow_mh; /* attached mod header instance for slow path */
struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
struct list_head hairpin; /* flows sharing the same hairpin */
struct list_head peer; /* flows with peer flow */
@@ -103,10 +110,21 @@ struct mlx5e_tc_flow {
struct rcu_head rcu_head;
struct completion init_done;
struct completion del_hw_done;
- int tunnel_id; /* the mapped tunnel id of this flow */
struct mlx5_flow_attr *attr;
+ struct list_head attrs;
+ u32 chain_mapping;
};
+struct mlx5_flow_handle *
+mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer);
struct mlx5_flow_handle *
@@ -115,7 +133,17 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr);
+struct mlx5_flow_attr *
+mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow);
+
+void mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow);
+int mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow);
+
+bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow);
+bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow);
bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow);
+int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow);
+bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv);
static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
{
@@ -165,6 +193,7 @@ struct mlx5_flow_handle *
mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec);
+
void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr);
@@ -176,4 +205,14 @@ struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow);
struct mlx5e_tc_int_port_priv *
mlx5e_get_int_port_priv(struct mlx5e_priv *priv);
+
+struct mlx5e_flow_meters *mlx5e_get_flow_meters(struct mlx5_core_dev *dev);
+
+void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec);
+void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec);
+
+int mlx5e_policer_validate(const struct flow_action *action,
+ const struct flow_action_entry *act,
+ struct netlink_ext_ack *extack);
+
#endif /* __MLX5_EN_TC_PRIV_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index a5e450973225..e6f64d890fb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2018 Mellanox Technologies. */
+#include <net/inet_ecn.h>
#include <net/vxlan.h>
#include <net/gre.h>
#include <net/geneve.h>
@@ -103,7 +104,7 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv,
}
static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
+ struct net_device *dev,
struct mlx5e_tc_tun_route_attr *attr)
{
struct net_device *route_dev;
@@ -122,13 +123,13 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
} else {
- struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(mirred_dev);
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
if (tunnel && tunnel->get_remote_ifindex)
- attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(mirred_dev);
+ attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev);
}
- rt = ip_route_output_key(dev_net(mirred_dev), &attr->fl.fl4);
+ rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
@@ -235,7 +236,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
int err;
/* add the IP fields */
- attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
attr.fl.fl4.saddr = tun_key->u.ipv4.src;
attr.ttl = tun_key->ttl;
@@ -350,7 +351,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
int err;
/* add the IP fields */
- attr.fl.fl4.flowi4_tos = tun_key->tos;
+ attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
attr.fl.fl4.saddr = tun_key->u.ipv4.src;
attr.ttl = tun_key->ttl;
@@ -440,10 +441,10 @@ release_neigh:
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
- struct net_device *mirred_dev,
+ struct net_device *dev,
struct mlx5e_tc_tun_route_attr *attr)
{
- struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(mirred_dev);
+ struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
struct net_device *route_dev;
struct net_device *out_dev;
struct dst_entry *dst;
@@ -451,8 +452,8 @@ static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
int ret;
if (tunnel && tunnel->get_remote_ifindex)
- attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(mirred_dev);
- dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, &attr->fl.fl6,
+ attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev);
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6,
NULL);
if (IS_ERR(dst))
return PTR_ERR(dst);
@@ -505,7 +506,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
int err;
attr.ttl = tun_key->ttl;
- attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
attr.fl.fl6.saddr = tun_key->u.ipv6.src;
@@ -619,7 +620,7 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
attr.ttl = tun_key->ttl;
- attr.fl.fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
+ attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
attr.fl.fl6.saddr = tun_key->u.ipv6.src;
@@ -708,9 +709,11 @@ release_neigh:
int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *flow_attr)
+ struct mlx5_flow_attr *flow_attr,
+ struct net_device *filter_dev)
{
struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_int_port *int_port;
TC_TUN_ROUTE_ATTR_INIT(attr);
u16 vport_num;
@@ -720,14 +723,14 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
/* Addresses are swapped for decap */
attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
- err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr);
+ err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr);
}
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
else if (flow_attr->tun_ip_version == 6) {
/* Addresses are swapped for decap */
attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
- err = mlx5e_route_lookup_ipv6_get(priv, priv->netdev, &attr);
+ err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr);
}
#endif
else
@@ -745,7 +748,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value,
misc_parameters.vxlan_vni);
esw_attr->rx_tun_attr->decap_vport = vport_num;
- } else if (netif_is_ovs_master(attr.route_dev)) {
+ } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
attr.route_dev->ifindex,
MLX5E_TC_INT_PORT_INGRESS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index aa092eaeaec3..b38f693bbb52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -94,7 +94,8 @@ mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
#endif
int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr);
+ struct mlx5_flow_attr *attr,
+ struct net_device *filter_dev);
bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
struct net_device *netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 042b1abe1437..5aff97914367 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -173,19 +173,29 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
list_for_each_entry(flow, flow_list, tmp_list) {
if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
continue;
- attr = flow->attr;
- esw_attr = attr->esw_attr;
- spec = &attr->parse_attr->spec;
+ spec = &flow->attr->parse_attr->spec;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
/* Do not offload flows with unresolved neighbors */
if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
continue;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
+ err);
+ continue;
+ }
+
/* update from slow path rule to encap rule */
- rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
if (IS_ERR(rule)) {
+ mlx5e_tc_unoffload_flow_post_acts(flow);
err = PTR_ERR(rule);
mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
err);
@@ -214,12 +224,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
list_for_each_entry(flow, flow_list, tmp_list) {
if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
continue;
- attr = flow->attr;
- esw_attr = attr->esw_attr;
- spec = &attr->parse_attr->spec;
+ spec = &flow->attr->parse_attr->spec;
/* update from encap rule to slow path rule */
rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
+
+ attr = mlx5e_tc_get_encap_attr(flow);
+ esw_attr = attr->esw_attr;
/* mark the flow's encap dest as non-valid */
esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
@@ -230,7 +241,8 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
continue;
}
- mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5e_tc_unoffload_flow_post_acts(flow);
flow->rule[0] = rule;
/* was unset when fast path rule removed */
flow_flag_set(flow, OFFLOADED);
@@ -488,12 +500,17 @@ static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
int out_index);
void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index)
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index)
{
struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- if (flow->attr->esw_attr->dests[out_index].flags &
+ if (!mlx5e_is_eswitch_flow(flow))
+ return;
+
+ if (attr->esw_attr->dests[out_index].flags &
MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
mlx5e_detach_encap_route(priv, flow, out_index);
@@ -733,6 +750,7 @@ static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct mlx5e_encap_entry *e,
bool new_encap_entry,
unsigned long tbl_time_before,
@@ -740,6 +758,7 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct net_device *mirred_dev,
int out_index,
struct netlink_ext_ack *extack,
@@ -748,8 +767,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
const struct ip_tunnel_info *tun_info;
+ const struct mlx5e_mpls_info *mpls_info;
unsigned long tbl_time_before = 0;
struct mlx5e_encap_entry *e;
struct mlx5e_encap_key key;
@@ -760,6 +779,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
parse_attr = attr->parse_attr;
tun_info = parse_attr->tun_info[out_index];
+ mpls_info = &parse_attr->mpls_info[out_index];
family = ip_tunnel_info_af(tun_info);
key.ip_tun_key = &tun_info->key;
key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
@@ -810,6 +830,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
goto out_err_init;
}
e->tun_info = tun_info;
+ memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
if (err)
goto out_err_init;
@@ -834,8 +855,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
e->compl_result = 1;
attach_flow:
- err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
- out_index);
+ err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
+ tbl_time_before, out_index);
if (err)
goto out_err;
@@ -885,20 +906,18 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
struct mlx5_pkt_reformat_params reformat_params;
- struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_decap_entry *d;
struct mlx5e_decap_key key;
uintptr_t hash_key;
int err = 0;
- parse_attr = flow->attr->parse_attr;
- if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+ if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
NL_SET_ERR_MSG_MOD(extack,
"encap header larger than max supported");
return -EOPNOTSUPP;
}
- key.key = parse_attr->eth;
+ key.key = attr->eth;
hash_key = hash_decap_info(&key);
mutex_lock(&esw->offloads.decap_tbl_lock);
d = mlx5e_decap_get(priv, &key, hash_key);
@@ -928,8 +947,8 @@ int mlx5e_attach_decap(struct mlx5e_priv *priv,
memset(&reformat_params, 0, sizeof(reformat_params));
reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
- reformat_params.size = sizeof(parse_attr->eth);
- reformat_params.data = &parse_attr->eth;
+ reformat_params.size = sizeof(attr->eth);
+ reformat_params.data = &attr->eth;
d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
&reformat_params,
MLX5_FLOW_NAMESPACE_FDB);
@@ -1159,7 +1178,7 @@ int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
tbl_time_after = tbl_time_before;
- err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
+ err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
if (err || !esw_attr->rx_tun_attr->decap_vport)
goto out;
@@ -1198,6 +1217,7 @@ out:
static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct mlx5e_encap_entry *e,
bool new_encap_entry,
unsigned long tbl_time_before,
@@ -1206,7 +1226,6 @@ static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
unsigned long tbl_time_after = tbl_time_before;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
const struct ip_tunnel_info *tun_info;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5e_route_entry *r;
@@ -1357,17 +1376,19 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
list_for_each_entry(flow, encap_flows, tmp_list) {
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *attr;
struct mlx5_flow_spec *spec;
if (flow_flag_test(flow, FAILED))
continue;
+ spec = &flow->attr->parse_attr->spec;
+
+ attr = mlx5e_tc_get_encap_attr(flow);
esw_attr = attr->esw_attr;
parse_attr = attr->parse_attr;
- spec = &parse_attr->spec;
err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
e->out_dev, e->route_dev_ifindex,
@@ -1377,7 +1398,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
continue;
}
- err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
+ err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
if (err) {
mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
err);
@@ -1389,9 +1410,18 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
goto offload_to_slow_path;
+
+ err = mlx5e_tc_offload_flow_post_acts(flow);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
+ err);
+ goto offload_to_slow_path;
+ }
+
/* update from slow path rule to encap rule */
- rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
if (IS_ERR(rule)) {
+ mlx5e_tc_unoffload_flow_post_acts(flow);
err = PTR_ERR(rule);
mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
err);
@@ -1480,7 +1510,7 @@ static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
parse_attr = attr->parse_attr;
spec = &parse_attr->spec;
- err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
+ err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
if (err) {
mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
err);
@@ -1579,6 +1609,8 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
struct net_device *fib_dev;
fen_info = container_of(info, struct fib_entry_notifier_info, info);
+ if (fen_info->fi->nh)
+ return NULL;
fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
fen_info->dst_len != 32)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
index 3391504d9a08..d542b8476491 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
@@ -7,15 +7,19 @@
#include "tc_priv.h"
void mlx5e_detach_encap(struct mlx5e_priv *priv,
- struct mlx5e_tc_flow *flow, int out_index);
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
+ int out_index);
int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_attr *attr,
struct net_device *mirred_dev,
int out_index,
struct netlink_ext_ack *extack,
struct net_device **encap_dev,
bool *encap_valid);
+
int mlx5e_attach_decap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct netlink_ext_ack *extack);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
index 60952b33b568..c5b1617d556f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -30,16 +30,15 @@ static int generate_ip_tun_hdr(char buf[],
struct mlx5e_encap_entry *r)
{
const struct ip_tunnel_key *tun_key = &r->tun_info->key;
+ const struct mlx5e_mpls_info *mpls_info = &r->mpls_info;
struct udphdr *udp = (struct udphdr *)(buf);
struct mpls_shim_hdr *mpls;
- u32 tun_id;
- tun_id = be32_to_cpu(tunnel_id_to_key32(tun_key->tun_id));
mpls = (struct mpls_shim_hdr *)(udp + 1);
*ip_proto = IPPROTO_UDP;
udp->dest = tun_key->tp_dst;
- *mpls = mpls_entry_encode(tun_id, tun_key->ttl, tun_key->tos, true);
+ *mpls = mpls_entry_encode(mpls_info->label, mpls_info->ttl, mpls_info->tc, mpls_info->bos);
return 0;
}
@@ -60,37 +59,31 @@ static int parse_tunnel(struct mlx5e_priv *priv,
void *headers_v)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
- struct flow_match_enc_keyid enc_keyid;
struct flow_match_mpls match;
void *misc2_c;
void *misc2_v;
- misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters_2);
- misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters_2);
-
- if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
- return 0;
-
- if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
- return 0;
-
- flow_rule_match_enc_keyid(rule, &enc_keyid);
-
- if (!enc_keyid.mask->keyid)
- return 0;
-
if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) &&
!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP))
return -EOPNOTSUPP;
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+ return -EOPNOTSUPP;
+
+ if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
+ return 0;
+
flow_rule_match_mpls(rule, &match);
/* Only support matching the first LSE */
if (match.mask->used_lses != 1)
return -EOPNOTSUPP;
+ misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+ misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
MLX5_SET(fte_match_set_misc2, misc2_c,
outer_first_mpls_over_udp.mpls_label,
match.mask->ls[0].mpls_label);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
index da169b816665..d4239e3b3c88 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -88,9 +88,6 @@ void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
(MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
break;
- case MLX5E_PACKET_MERGE_SHAMPO:
- MLX5_SET(tirc, tirc, packet_merge_mask, MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO);
- break;
default:
break;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index a55b066746cb..201ac7dd338f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -14,19 +14,26 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget)
bool busy = false;
int work_done = 0;
+ rcu_read_lock();
+
ch_stats->poll++;
work_done = mlx5e_poll_rx_cq(&rq->cq, budget);
busy |= work_done == budget;
busy |= rq->post_wqes(rq);
- if (busy)
- return budget;
+ if (busy) {
+ work_done = budget;
+ goto out;
+ }
if (unlikely(!napi_complete_done(napi, work_done)))
- return work_done;
+ goto out;
mlx5e_cq_arm(&rq->cq);
+
+out:
+ rcu_read_unlock();
return work_done;
}
@@ -140,7 +147,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
t->stats = &priv->trap_stats.ch;
- netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64);
+ netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll);
err = mlx5e_open_trap_rq(priv, t);
if (unlikely(err))
@@ -172,6 +179,7 @@ static void mlx5e_activate_trap(struct mlx5e_trap *trap)
{
napi_enable(&trap->napi);
mlx5e_activate_rq(&trap->rq);
+ mlx5e_trigger_napi_sched(&trap->napi);
}
void mlx5e_deactivate_trap(struct mlx5e_priv *priv)
@@ -222,12 +230,12 @@ static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id)
switch (trap_id) {
case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
- err = mlx5e_add_vlan_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ err = mlx5e_add_vlan_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
if (err)
goto err_out;
break;
case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
- err = mlx5e_add_mac_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
+ err = mlx5e_add_mac_trap(priv->fs, trap_id, mlx5e_trap_get_tirn(priv->en_trap));
if (err)
goto err_out;
break;
@@ -248,10 +256,10 @@ static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id)
{
switch (trap_id) {
case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER:
- mlx5e_remove_vlan_trap(priv);
+ mlx5e_remove_vlan_trap(priv->fs);
break;
case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER:
- mlx5e_remove_mac_trap(priv);
+ mlx5e_remove_mac_trap(priv->fs);
break;
default:
netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 4cdf8e5b24c2..853f312cd757 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -9,20 +9,28 @@
#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
-/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
- * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
- * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
- * full-session WQE be cache-aligned.
+#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+
+/* IPSEC inline data includes:
+ * 1. ESP trailer: up to 255 bytes of padding, 1 byte for pad length, 1 byte for
+ * next header.
+ * 2. ESP authentication data: 16 bytes for ICV.
*/
-#if L1_CACHE_BYTES < 128
-#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#else
-#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
-#endif
+#define MLX5E_MAX_TX_IPSEC_DS DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + \
+ 255 + 1 + 1 + 16, MLX5_SEND_WQE_DS)
-#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
+/* 366 should be big enough to cover all L2, L3 and L4 headers with possible
+ * encapsulations.
+ */
+#define MLX5E_MAX_TX_INLINE_DS DIV_ROUND_UP(366 - INL_HDR_START_SZ + VLAN_HLEN, \
+ MLX5_SEND_WQE_DS)
-#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
+/* Sync the calculation with mlx5e_sq_calc_wqe_attr. */
+#define MLX5E_MAX_TX_WQEBBS DIV_ROUND_UP(MLX5E_TX_WQE_EMPTY_DS_COUNT + \
+ MLX5E_MAX_TX_INLINE_DS + \
+ MLX5E_MAX_TX_IPSEC_DS + \
+ MAX_SKB_FRAGS + 1, \
+ MLX5_SEND_WQEBB_NUM_DS)
#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
@@ -57,10 +65,8 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
/* RX */
-void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info);
-void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info,
- bool recycle);
+void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page);
+void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle);
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq));
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
@@ -68,13 +74,17 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
/* TX */
-u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev);
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
static inline bool
+mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
+{
+ return (*fifo->pc - *fifo->cc) < fifo->mask;
+}
+
+static inline bool
mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
{
return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
@@ -167,6 +177,11 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
return pi;
}
+static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+ return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1);
+}
+
struct mlx5e_shampo_umr {
u16 len;
};
@@ -303,9 +318,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
-static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
+static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{
- return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS;
+ return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
}
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
@@ -426,9 +441,11 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
}
}
-static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
+#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1)
+
+static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size)
{
- BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS);
+ WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < mlx5e_get_max_sq_wqebbs(mdev));
/* A WQE must not cross the page boundary, hence two conditions:
* 1. Its size must not exceed the page size.
@@ -438,19 +455,36 @@ static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
* stop room of X-1 + X.
* WQE size is also limited by the hardware limit.
*/
+ WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev),
+ "wqe_size %u is greater than max SQ WQEBBs %u",
+ wqe_size, mlx5e_get_max_sq_wqebbs(mdev));
- if (__builtin_constant_p(wqe_size))
- BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
- else
- WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
+ return MLX5E_STOP_ROOM(wqe_size);
+}
- return wqe_size * 2 - 1;
+static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev)
+{
+ return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev));
+}
+
+static inline u16 mlx5e_stop_room_for_mpwqe(struct mlx5_core_dev *mdev)
+{
+ u8 mpwqe_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
+
+ return mlx5e_stop_room_for_wqe(mdev, mpwqe_wqebbs);
}
static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size)
{
- u16 room = sq->reserved_room + mlx5e_stop_room_for_wqe(wqe_size);
+ u16 room = sq->reserved_room + MLX5E_STOP_ROOM(wqe_size);
return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room);
}
+
+static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
+{
+ size_t isz = struct_size(rq->mpwqe.info, alloc_units, rq->mpwqe.pages_per_wqe);
+
+ return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
+}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 2f0df5cc1a2d..20507ef2f956 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -57,12 +57,14 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
- struct mlx5e_dma_info *di, struct xdp_buff *xdp)
+ struct page *page, struct xdp_buff *xdp)
{
+ struct skb_shared_info *sinfo = NULL;
struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
+ int i;
xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
@@ -96,46 +98,77 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
xdptxd.dma_addr = dma_addr;
xdpi.frame.xdpf = xdpf;
xdpi.frame.dma_addr = dma_addr;
- } else {
- /* Driver assumes that xdp_convert_buff_to_frame returns
- * an xdp_frame that points to the same memory region as
- * the original xdp_buff. It allows to map the memory only
- * once and to use the DMA_BIDIRECTIONAL mode.
- */
- xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+ if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
+ return false;
+
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ return true;
+ }
+
+ /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame
+ * that points to the same memory region as the original xdp_buff. It
+ * allows to map the memory only once and to use the DMA_BIDIRECTIONAL
+ * mode.
+ */
+
+ xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
+ xdpi.page.rq = rq;
+
+ dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
+ dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);
+
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ sinfo = xdp_get_shared_info_from_frame(xdpf);
- dma_addr = di->addr + (xdpf->data - (void *)xdpf);
- dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len,
- DMA_TO_DEVICE);
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ dma_addr_t addr;
+ u32 len;
- xdptxd.dma_addr = dma_addr;
- xdpi.page.rq = rq;
- xdpi.page.di = *di;
+ addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+ len = skb_frag_size(frag);
+ dma_sync_single_for_device(sq->pdev, addr, len,
+ DMA_BIDIRECTIONAL);
+ }
}
- return INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, 0);
+ xdptxd.dma_addr = dma_addr;
+
+ if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, sinfo, 0)))
+ return false;
+
+ xdpi.page.page = page;
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ for (i = 0; i < sinfo->nr_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+
+ xdpi.page.page = skb_frag_page(frag);
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
+ }
+ }
+
+ return true;
}
/* returns true if packet was consumed by xdp */
-bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- u32 *len, struct xdp_buff *xdp)
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
+ struct bpf_prog *prog, struct xdp_buff *xdp)
{
- struct bpf_prog *prog = rcu_dereference(rq->xdp_prog);
u32 act;
int err;
- if (!prog)
- return false;
-
act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_PASS:
- *len = xdp->data_end - xdp->data;
return false;
case XDP_TX:
- if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, xdp)))
+ if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, page, xdp)))
goto xdp_abort;
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
return true;
@@ -147,11 +180,11 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
- mlx5e_page_dma_unmap(rq, di);
+ mlx5e_page_dma_unmap(rq, page);
rq->stats->xdp_redirect++;
return true;
default:
- bpf_warn_invalid_xdp_action(act);
+ bpf_warn_invalid_xdp_action(rq->netdev, prog, act);
fallthrough;
case XDP_ABORTED:
xdp_abort:
@@ -199,7 +232,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
struct mlx5e_tx_wqe *wqe;
u16 pi;
- pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
+ pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
net_prefetchw(wqe->data);
@@ -245,10 +278,8 @@ enum {
INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
{
if (unlikely(!sq->mpwqe.wqe)) {
- const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
-
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- stop_room))) {
+ sq->stop_room))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
@@ -262,12 +293,26 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
}
INDIRECT_CALLABLE_SCOPE bool
+mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
+ struct skb_shared_info *sinfo, int check_result);
+
+INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi, int check_result)
+ struct skb_shared_info *sinfo, int check_result)
{
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
+ if (unlikely(sinfo)) {
+ /* MPWQE is enabled, but a multi-buffer packet is queued for
+ * transmission. MPWQE can't send fragmented packets, so close
+ * the current session and fall back to a regular WQE.
+ */
+ if (unlikely(sq->mpwqe.wqe))
+ mlx5e_xdp_mpwqe_complete(sq);
+ return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
+ }
+
if (unlikely(xdptxd->len > sq->hw_mtu)) {
stats->err++;
return false;
@@ -288,17 +333,16 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
- if (unlikely(mlx5e_xdp_mpqwe_is_full(session)))
+ if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs)))
mlx5e_xdp_mpwqe_complete(sq);
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
stats->xmit++;
return true;
}
-INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room)
{
- if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
@@ -308,43 +352,76 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
return MLX5E_XDP_CHECK_OK;
}
+INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
+{
+ return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1);
+}
+
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi, int check_result)
+ struct skb_shared_info *sinfo, int check_result)
{
struct mlx5_wq_cyc *wq = &sq->wq;
- u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-
- struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
- struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
- struct mlx5_wqe_data_seg *dseg = wqe->data;
+ struct mlx5_wqe_ctrl_seg *cseg;
+ struct mlx5_wqe_data_seg *dseg;
+ struct mlx5_wqe_eth_seg *eseg;
+ struct mlx5e_tx_wqe *wqe;
dma_addr_t dma_addr = xdptxd->dma_addr;
u32 dma_len = xdptxd->len;
+ u16 ds_cnt, inline_hdr_sz;
+ u8 num_wqebbs = 1;
+ int num_frags = 0;
+ u16 pi;
struct mlx5e_xdpsq_stats *stats = sq->stats;
- net_prefetchw(wqe);
-
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
stats->err++;
return false;
}
- if (!check_result)
- check_result = mlx5e_xmit_xdp_frame_check(sq);
+ ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
+ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
+ ds_cnt++;
+
+ /* check_result must be 0 if sinfo is passed. */
+ if (!check_result) {
+ int stop_room = 1;
+
+ if (unlikely(sinfo)) {
+ ds_cnt += sinfo->nr_frags;
+ num_frags = sinfo->nr_frags;
+ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+ /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
+ * enough to hold all fragments.
+ */
+ stop_room = MLX5E_STOP_ROOM(num_wqebbs);
+ }
+
+ check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room);
+ }
if (unlikely(check_result < 0))
return false;
- cseg->fm_ce_se = 0;
+ pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs);
+ wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ net_prefetchw(wqe);
+
+ cseg = &wqe->ctrl;
+ eseg = &wqe->eth;
+ dseg = wqe->data;
+
+ inline_hdr_sz = 0;
/* copy the inline part if required */
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
- memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE);
- eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+ memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start));
+ memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start),
+ MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start));
dma_len -= MLX5E_XDP_MIN_INLINE;
dma_addr += MLX5E_XDP_MIN_INLINE;
+ inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
dseg++;
}
@@ -354,11 +431,45 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
- sq->pc++;
+ if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
+ u8 num_pkts = 1 + num_frags;
+ int i;
+
+ memset(&cseg->trailer, 0, sizeof(cseg->trailer));
+ memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer));
+
+ eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
+ dseg->lkey = sq->mkey_be;
+
+ for (i = 0; i < num_frags; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ dma_addr_t addr;
+
+ addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
+ skb_frag_off(frag);
+
+ dseg++;
+ dseg->addr = cpu_to_be64(addr);
+ dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
+ dseg->lkey = sq->mkey_be;
+ }
+
+ cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
+
+ sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
+ .num_wqebbs = num_wqebbs,
+ .num_pkts = num_pkts,
+ };
+
+ sq->pc += num_wqebbs;
+ } else {
+ cseg->fm_ce_se = 0;
+
+ sq->pc++;
+ }
sq->doorbell_cseg = cseg;
- mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
stats->xmit++;
return true;
}
@@ -384,7 +495,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
break;
case MLX5E_XDP_XMIT_MODE_PAGE:
/* XDP_TX from the regular RQ */
- mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle);
+ mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
break;
case MLX5E_XDP_XMIT_MODE_XSK:
/* AF_XDP send */
@@ -537,12 +648,13 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdpi.frame.dma_addr = xdptxd.dma_addr;
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, 0);
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
if (unlikely(!ret)) {
dma_unmap_single(sq->pdev, xdptxd.dma_addr,
xdptxd.len, DMA_TO_DEVICE);
break;
}
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
nxmit++;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 8d991c3b7a50..bc2d9034af5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -38,7 +38,6 @@
#include "en/txrx.h"
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
-#define MLX5E_XDP_TX_DS_COUNT (MLX5E_TX_WQE_EMPTY_DS_COUNT + 1 /* SG DS */)
#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16
#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \
@@ -47,8 +46,8 @@
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
-bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
- u32 *len, struct xdp_buff *xdp);
+bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
+ struct bpf_prog *prog, struct xdp_buff *xdp);
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
@@ -59,11 +58,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi,
+ struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd,
- struct mlx5e_xdp_info *xdpi,
+ struct skb_shared_info *sinfo,
int check_result));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));
@@ -123,12 +122,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
return cur;
}
-static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session)
+static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{
if (session->inline_on)
return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
- MLX5E_TX_MPW_MAX_NUM_DS;
- return mlx5e_tx_mpwqe_is_full(session);
+ max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
+
+ return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs);
}
struct mlx5e_xdp_wqe_info {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
index 7b562d2c8a19..ebada0c5af3c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -11,13 +11,13 @@ static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv,
{
struct device *dev = mlx5_core_dma_dev(priv->mdev);
- return xsk_pool_dma_map(pool, dev, 0);
+ return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC);
}
static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv,
struct xsk_buff_pool *pool)
{
- return xsk_pool_dma_unmap(pool, 0);
+ return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC);
}
static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk)
@@ -72,6 +72,7 @@ void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *x
{
xsk->headroom = xsk_pool_get_headroom(pool);
xsk->chunk_size = xsk_pool_get_chunk_size(pool);
+ xsk->unaligned = pool->unaligned;
}
static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
@@ -98,6 +99,15 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
mlx5e_build_xsk_param(pool, &xsk);
+ if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+ mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) {
+ const char *recommendation = is_power_of_2(xsk.chunk_size) ?
+ "Upgrade firmware" : "Disable striding RQ";
+
+ mlx5_core_warn(priv->mdev, "Expected slowdown with XSK frame size %u. %s for better performance.\n",
+ xsk.chunk_size, recommendation);
+ }
+
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
/* XSK objects will be created on open. */
goto validate_closed;
@@ -117,20 +127,18 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
goto err_remove_pool;
mlx5e_activate_xsk(c);
+ mlx5e_trigger_napi_icosq(c);
/* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
* any Fill Ring entries at the setup stage.
*/
- err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix);
- if (unlikely(err))
- goto err_deactivate;
+ mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true);
- return 0;
+ mlx5e_deactivate_rq(&c->rq);
+ mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY);
-err_deactivate:
- mlx5e_deactivate_xsk(c);
- mlx5e_close_xsk(c);
+ return 0;
err_remove_pool:
mlx5e_xsk_remove_pool(&priv->xsk, ix);
@@ -169,7 +177,13 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
goto remove_pool;
c = priv->channels.c[ix];
- mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix);
+
+ mlx5e_activate_rq(&c->rq);
+ mlx5e_trigger_napi_icosq(c);
+ mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT);
+
+ mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false);
+
mlx5e_deactivate_xsk(c);
mlx5e_close_xsk(c);
@@ -207,11 +221,10 @@ int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_params *params = &priv->channels.params;
- u16 ix;
- if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ if (unlikely(qid >= params->num_channels))
return -EINVAL;
- return pool ? mlx5e_xsk_enable_pool(priv, pool, ix) :
- mlx5e_xsk_disable_pool(priv, ix);
+ return pool ? mlx5e_xsk_enable_pool(priv, pool, qid) :
+ mlx5e_xsk_disable_pool(priv, qid);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 8e7b877d8a12..c91b54d9ff27 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -4,21 +4,225 @@
#include "rx.h"
#include "en/xdp.h"
#include <net/xdp_sock_drv.h>
+#include <linux/filter.h>
/* RX data path */
-static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
- u32 cqe_bcnt)
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{
+ struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
+ struct mlx5e_icosq *icosq = rq->icosq;
+ struct mlx5_wq_cyc *wq = &icosq->wq;
+ struct mlx5e_umr_wqe *umr_wqe;
+ int batch, i;
+ u32 offset; /* 17-bit value with MTT. */
+ u16 pi;
+
+ if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
+ goto err;
+
+ BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
+ batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
+ rq->mpwqe.pages_per_wqe);
+
+ /* If batch < pages_per_wqe, either:
+ * 1. Some (or all) descriptors were invalid.
+ * 2. dma_need_sync is true, and it fell back to allocating one frame.
+ * In either case, try to continue allocating frames one by one, until
+ * the first error, which will mean there are no more valid descriptors.
+ */
+ for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
+ wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!wi->alloc_units[batch].xsk))
+ goto err_reuse_batch;
+ }
+
+ pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
+ umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+ memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
+
+ if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
+ .ptag = cpu_to_be64(addr | MLX5_EN_WR),
+ };
+ }
+ } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ }
+ } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
+ u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr + mapping_size),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr + mapping_size * 2),
+ };
+ umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(rq->wqe_overflow.addr),
+ };
+ }
+ } else {
+ __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
+ rq->xsk_pool->chunk_size);
+ __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
+
+ for (i = 0; i < batch; i++) {
+ dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
+
+ umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(addr),
+ .bcount = frame_size,
+ };
+ umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
+ .key = rq->mkey_be,
+ .va = cpu_to_be64(rq->wqe_overflow.addr),
+ .bcount = pad_size,
+ };
+ }
+ }
+
+ bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe);
+ wi->consumed_strides = 0;
+
+ umr_wqe->ctrl.opmod_idx_opcode =
+ cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
+
+ /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
+ offset = ix * rq->mpwqe.mtts_per_wqe;
+ if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
+ offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
+ else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
+ offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
+ else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
+ offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
+ umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
+
+ icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+ .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
+ .num_wqebbs = rq->mpwqe.umr_wqebbs,
+ .umr.rq = rq,
+ };
+
+ icosq->pc += rq->mpwqe.umr_wqebbs;
+
+ icosq->doorbell_cseg = &umr_wqe->ctrl;
+
+ return 0;
+
+err_reuse_batch:
+ while (--batch >= 0)
+ xsk_buff_free(wi->alloc_units[batch].xsk);
+
+err:
+ rq->stats->buff_alloc_err++;
+ return -ENOMEM;
+}
+
+int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ struct xdp_buff **buffs;
+ u32 contig, alloc;
+ int i;
+
+ /* mlx5e_init_frags_partition creates a 1:1 mapping between
+ * rq->wqe.frags and rq->wqe.alloc_units, which allows us to
+ * allocate XDP buffers straight into alloc_units.
+ */
+ BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) !=
+ sizeof(rq->wqe.alloc_units[0].xsk));
+ buffs = (struct xdp_buff **)rq->wqe.alloc_units;
+ contig = mlx5_wq_cyc_get_size(wq) - ix;
+ if (wqe_bulk <= contig) {
+ alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
+ } else {
+ alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
+ if (likely(alloc == contig))
+ alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
+ }
+
+ for (i = 0; i < alloc; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *frag;
+ struct mlx5e_rx_wqe_cyc *wqe;
+ dma_addr_t addr;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+ /* Assumes log_num_frags == 0. */
+ frag = &rq->wqe.frags[j];
+
+ addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ }
+
+ return alloc;
+}
+
+int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
+{
+ struct mlx5_wq_cyc *wq = &rq->wqe.wq;
+ int i;
+
+ for (i = 0; i < wqe_bulk; i++) {
+ int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
+ struct mlx5e_wqe_frag_info *frag;
+ struct mlx5e_rx_wqe_cyc *wqe;
+ dma_addr_t addr;
+
+ wqe = mlx5_wq_cyc_get_wqe(wq, j);
+ /* Assumes log_num_frags == 0. */
+ frag = &rq->wqe.frags[j];
+
+ frag->au->xsk = xsk_buff_alloc(rq->xsk_pool);
+ if (unlikely(!frag->au->xsk))
+ return i;
+
+ addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk);
+ wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
+ }
+
+ return wqe_bulk;
+}
+
+static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
+{
+ u32 totallen = xdp->data_end - xdp->data_meta;
+ u32 metalen = xdp->data - xdp->data_meta;
struct sk_buff *skb;
- skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt);
+ skb = napi_alloc_skb(rq->cq.napi, totallen);
if (unlikely(!skb)) {
rq->stats->buff_alloc_err++;
return NULL;
}
- skb_put_data(skb, data, cqe_bcnt);
+ skb_put_data(skb, xdp->data_meta, totallen);
+
+ if (metalen) {
+ skb_metadata_set(skb, metalen);
+ __skb_pull(skb, metalen);
+ }
return skb;
}
@@ -29,8 +233,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
u32 head_offset,
u32 page_idx)
{
- struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
- u32 cqe_bcnt32 = cqe_bcnt;
+ struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk;
+ struct bpf_prog *prog;
/* Check packet size. Note LRO doesn't use linear SKB */
if (unlikely(cqe_bcnt > rq->hw_mtu)) {
@@ -45,8 +249,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
*/
WARN_ON_ONCE(head_offset);
- xdp->data_end = xdp->data + cqe_bcnt32;
- xdp_set_data_meta_invalid(xdp);
+ xsk_buff_set_size(xdp, cqe_bcnt);
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
net_prefetch(xdp->data);
@@ -65,7 +268,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
* allocated first from the Reuse Ring, so it has enough space.
*/
- if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp))) {
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) {
if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
return NULL; /* page/packet was consumed by XDP */
@@ -74,15 +278,15 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
* frame. On SKB allocation failure, NULL is returned.
*/
- return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt32);
+ return mlx5e_xsk_construct_skb(rq, xdp);
}
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
- struct mlx5_cqe64 *cqe,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt)
{
- struct xdp_buff *xdp = wi->di->xsk;
+ struct xdp_buff *xdp = wi->au->xsk;
+ struct bpf_prog *prog;
/* wi->offset is not used in this function, because xdp->data and the
* DMA address point directly to the necessary place. Furthermore, the
@@ -91,22 +295,17 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
*/
WARN_ON_ONCE(wi->offset);
- xdp->data_end = xdp->data + cqe_bcnt;
- xdp_set_data_meta_invalid(xdp);
+ xsk_buff_set_size(xdp, cqe_bcnt);
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
net_prefetch(xdp->data);
- if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
- rq->stats->wqe_err++;
- return NULL;
- }
-
- if (likely(mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp)))
+ prog = rcu_dereference(rq->xdp_prog);
+ if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp)))
return NULL; /* page/packet was consumed by XDP */
/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
- * will be handled by mlx5e_put_rx_frag.
+ * will be handled by mlx5e_free_rx_wqe.
* On SKB allocation failure, NULL is returned.
*/
- return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt);
+ return mlx5e_xsk_construct_skb(rq, xdp);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index 7f88ccf67fdd..087c943bd8e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -5,48 +5,19 @@
#define __MLX5_EN_XSK_RX_H__
#include "en.h"
-#include <net/xdp_sock_drv.h>
/* RX data path */
+int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
+int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
+int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
struct mlx5e_mpw_info *wi,
u16 cqe_bcnt,
u32 head_offset,
u32 page_idx);
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
- struct mlx5_cqe64 *cqe,
struct mlx5e_wqe_frag_info *wi,
u32 cqe_bcnt);
-static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
- struct mlx5e_dma_info *dma_info)
-{
- dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
- if (!dma_info->xsk)
- return -ENOMEM;
-
- /* Store the DMA address without headroom. In striding RQ case, we just
- * provide pages for UMR, and headroom is counted at the setup stage
- * when creating a WQE. In non-striding RQ case, headroom is accounted
- * in mlx5e_alloc_rx_wqe.
- */
- dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
-
- return 0;
-}
-
-static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
-{
- if (!xsk_uses_need_wakeup(rq->xsk_pool))
- return alloc_err;
-
- if (unlikely(alloc_err))
- xsk_set_rx_need_wakeup(rq->xsk_pool);
- else
- xsk_clear_rx_need_wakeup(rq->xsk_pool);
-
- return false;
-}
-
#endif /* __MLX5_EN_XSK_RX_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 538bc2419bd8..ff03c43833bb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -4,24 +4,20 @@
#include "setup.h"
#include "en/params.h"
#include "en/txrx.h"
+#include "en/health.h"
+#include <net/xdp_sock_drv.h>
-/* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may
- * change unexpectedly, and mlx5e has a minimum valid stride size for striding
- * RQ, keep this check in the driver.
+/* The limitation of 2048 can be altered, but shouldn't go beyond the minimal
+ * stride size of striding RQ.
*/
-#define MLX5E_MIN_XSK_CHUNK_SIZE 2048
+#define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE)
bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
struct mlx5_core_dev *mdev)
{
/* AF_XDP doesn't support frames larger than PAGE_SIZE. */
- if (xsk->chunk_size > PAGE_SIZE ||
- xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
- return false;
-
- /* Current MTU and XSK headroom don't allow packets to fit the frames. */
- if (mlx5e_rx_get_min_frag_sz(params, xsk) > xsk->chunk_size)
+ if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE)
return false;
/* frag_sz is different for regular and XSK RQs, so ensure that linear
@@ -29,9 +25,9 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params,
*/
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
- return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+ return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk);
default: /* MLX5_WQ_TYPE_CYCLIC */
- return mlx5e_rx_is_linear_skb(params, xsk);
+ return mlx5e_rx_is_linear_skb(mdev, params, xsk);
}
}
@@ -42,7 +38,7 @@ static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev,
struct mlx5e_channel_param *cparam)
{
mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq);
- mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq);
+ mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq);
}
static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
@@ -63,13 +59,14 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c,
rq->clock = &mdev->clock;
rq->icosq = &c->icosq;
rq->ix = c->ix;
+ rq->channel = c;
rq->mdev = mdev;
rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
rq->xdpsq = &c->rq_xdpsq;
rq->xsk_pool = pool;
- rq->stats = &c->priv->channel_stats[c->ix].xskrq;
+ rq->stats = &c->priv->channel_stats[c->ix]->xskrq;
rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
- rq_xdp_ix = c->ix + params->num_channels * MLX5E_RQ_GROUP_XSK;
+ rq_xdp_ix = c->ix;
err = mlx5e_rq_set_handlers(rq, params, xsk);
if (err)
return err;
@@ -157,7 +154,7 @@ err_free_cparam:
void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
- synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */
+ synchronize_net(); /* Sync with NAPI. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);
@@ -170,16 +167,25 @@ void mlx5e_close_xsk(struct mlx5e_channel *c)
void mlx5e_activate_xsk(struct mlx5e_channel *c)
{
+ /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid
+ * activating XSKRQ in the middle of recovery.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
- /* TX queue is created active. */
+ mlx5e_reporter_icosq_resume_recovery(c);
- spin_lock_bh(&c->async_icosq_lock);
- mlx5e_trigger_irq(&c->async_icosq);
- spin_unlock_bh(&c->async_icosq_lock);
+ /* TX queue is created active. */
}
void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
{
- mlx5e_deactivate_rq(&c->xskrq);
+ /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the
+ * middle of recovery. Suspend the recovery to avoid it.
+ */
+ mlx5e_reporter_icosq_suspend_recovery(c);
+ clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state);
+ mlx5e_reporter_icosq_resume_recovery(c);
+ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
+
/* TX queue is disabled on close. */
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 8e96260fce1d..367a9505ca4f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -12,18 +12,14 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_params *params = &priv->channels.params;
struct mlx5e_channel *c;
- u16 ix;
if (unlikely(!mlx5e_xdp_is_active(priv)))
return -ENETDOWN;
- if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
+ if (unlikely(qid >= params->num_channels))
return -EINVAL;
- c = priv->channels.c[ix];
-
- if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)))
- return -ENXIO;
+ c = priv->channels.c[qid];
if (!napi_if_scheduled_mark_missed(&c->napi)) {
/* To avoid WQE overrun, don't post a NOP if async_icosq is not
@@ -36,9 +32,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state))
return 0;
- spin_lock_bh(&c->async_icosq_lock);
- mlx5e_trigger_irq(&c->async_icosq);
- spin_unlock_bh(&c->async_icosq_lock);
+ mlx5e_trigger_napi_icosq(c);
}
return 0;
@@ -103,12 +97,15 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
- mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, check_result);
+ mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL,
+ check_result);
if (unlikely(!ret)) {
if (sq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xsk_tx_post_err(sq, &xdpi);
+ } else {
+ mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
}
flush = true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index a05085035f23..9c505158b975 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -5,7 +5,6 @@
#define __MLX5_EN_XSK_TX_H__
#include "en.h"
-#include <net/xdp_sock_drv.h>
/* TX data path */
@@ -13,15 +12,4 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
-static inline void mlx5e_xsk_update_tx_wakeup(struct mlx5e_xdpsq *sq)
-{
- if (!xsk_uses_need_wakeup(sq->xsk_pool))
- return;
-
- if (sq->pc != sq->cc)
- xsk_clear_tx_need_wakeup(sq->xsk_pool);
- else
- xsk_set_tx_need_wakeup(sq->xsk_pool);
-}
-
#endif /* __MLX5_EN_XSK_TX_H__ */