diff options
Diffstat (limited to '')
108 files changed, 5336 insertions, 2236 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 92056452a9e3..4ba1a78c6515 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -115,6 +115,7 @@ config MLX5_TC_CT config MLX5_TC_SAMPLE bool "MLX5 TC sample offload support" depends on MLX5_CLS_ACT + depends on PSAMPLE=y || PSAMPLE=n || MLX5_CORE=m default y help Say Y here if you want to support offloading sample rules via tc diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index e63bb9ceb9c0..fcfd38fa9e6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -46,6 +46,15 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \ en/tc/post_act.o en/tc/int_port.o + +mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en/tc/act/act.o en/tc/act/drop.o en/tc/act/trap.o \ + en/tc/act/accept.o en/tc/act/mark.o en/tc/act/goto.o \ + en/tc/act/tun.o en/tc/act/csum.o en/tc/act/pedit.o \ + en/tc/act/vlan.o en/tc/act/vlan_mangle.o en/tc/act/mpls.o \ + en/tc/act/mirred.o en/tc/act/mirred_nic.o \ + en/tc/act/ct.o en/tc/act/sample.o en/tc/act/ptype.o \ + en/tc/act/redirect_ingress.o + mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o @@ -95,11 +104,12 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o steering/dr_ste.o steering/dr_send.o \ steering/dr_ste_v0.o steering/dr_ste_v1.o \ steering/dr_cmd.o steering/dr_fw.o \ - steering/dr_action.o steering/fs_dr.o + steering/dr_action.o steering/fs_dr.o \ + steering/dr_dbg.o # # SF device # -mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o +mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o # # SF manager diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index f71ec4d9d68e..17fe05809653 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -148,8 +148,12 @@ static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) if (!refcount_dec_and_test(&ent->refcnt)) return; - if (ent->idx >= 0) - cmd_free_index(ent->cmd, ent->idx); + if (ent->idx >= 0) { + struct mlx5_cmd *cmd = ent->cmd; + + cmd_free_index(cmd, ent->idx); + up(ent->page_queue ? &cmd->pages_sem : &cmd->sem); + } cmd_free_ent(ent); } @@ -339,6 +343,9 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_PAGE_FAULT_RESUME: case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: case MLX5_CMD_OP_DEALLOC_SF: + case MLX5_CMD_OP_DESTROY_UCTX: + case MLX5_CMD_OP_DESTROY_UMEM: + case MLX5_CMD_OP_MODIFY_RQT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -444,7 +451,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_TIS: case MLX5_CMD_OP_QUERY_TIS: case MLX5_CMD_OP_CREATE_RQT: - case MLX5_CMD_OP_MODIFY_RQT: case MLX5_CMD_OP_QUERY_RQT: case MLX5_CMD_OP_CREATE_FLOW_TABLE: @@ -464,9 +470,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: case MLX5_CMD_OP_CREATE_UCTX: - case MLX5_CMD_OP_DESTROY_UCTX: case MLX5_CMD_OP_CREATE_UMEM: - case MLX5_CMD_OP_DESTROY_UMEM: case MLX5_CMD_OP_ALLOC_MEMIC: case MLX5_CMD_OP_MODIFY_XRQ: case MLX5_CMD_OP_RELEASE_XRQ_ERROR: @@ -900,25 +904,6 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) return cmd->allowed_opcode == opcode; } -static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) -{ - unsigned long alloc_end = jiffies + msecs_to_jiffies(1000); - int idx; - -retry: - idx = cmd_alloc_index(cmd); - if (idx < 0 && time_before(jiffies, alloc_end)) { - /* Index allocation can fail on heavy load of commands. This is a temporary - * situation as the current command already holds the semaphore, meaning that - * another command completion is being handled and it is expected to release - * the entry index soon. - */ - cpu_relax(); - goto retry; - } - return idx; -} - bool mlx5_cmd_is_down(struct mlx5_core_dev *dev) { return pci_channel_offline(dev->pdev) || @@ -946,7 +931,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index_retry(cmd); + alloc_ret = cmd_alloc_index(cmd); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { @@ -1602,8 +1587,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force vector = vec & 0xffffffff; for (i = 0; i < (1 << cmd->log_sz); i++) { if (test_bit(i, &vector)) { - struct semaphore *sem; - ent = cmd->ent_arr[i]; /* if we already completed the command, ignore it */ @@ -1626,10 +1609,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) cmd_ent_put(ent); - if (ent->page_queue) - sem = &cmd->pages_sem; - else - sem = &cmd->sem; ent->ts2 = ktime_get_ns(); memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); dump_command(dev, ent, 0); @@ -1683,7 +1662,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force */ complete(&ent->done); } - up(sem); } } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 02e77ffe5c3e..5371ad0a12eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -164,13 +164,14 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); MLX5_SET(destroy_cq_in, in, uid, cq->uid); err = mlx5_cmd_exec_in(dev, destroy_cq, in); + if (err) + return err; synchronize_irq(cq->irqn); - mlx5_cq_put(cq); wait_for_completion(&cq->free); - return err; + return 0; } EXPORT_SYMBOL(mlx5_core_destroy_cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 07c8d9811bc8..10d195042ab5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -507,6 +507,8 @@ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) if (!mlx5_debugfs_root) return; - if (cq->dbg) + if (cq->dbg) { rem_res_tree(cq->dbg); + cq->dbg = NULL; + } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index a8b84d53dfb0..ba6dad97e308 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -538,7 +538,7 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev) return add_drivers(dev); } -static bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) +bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) { u64 fsystem_guid, psystem_guid; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 1c98652b244a..d1093bb2d436 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -546,6 +546,13 @@ static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32 return 0; } +static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL; +} + static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, @@ -570,6 +577,10 @@ static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_enable_remote_dev_reset_get, mlx5_devlink_enable_remote_dev_reset_set, NULL), + DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_eq_depth_validate), + DEVLINK_PARAM_GENERIC(EVENT_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_eq_depth_validate), }; static void mlx5_devlink_set_params_init_values(struct devlink *devlink) @@ -608,6 +619,16 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) value); } #endif + + value.vu32 = MLX5_COMP_EQ_SIZE; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + value); + + value.vu32 = MLX5_NUM_ASYNC_EQE; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + value); } static const struct devlink_param enable_eth_param = @@ -752,6 +773,66 @@ static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink) mlx5_devlink_eth_param_unregister(devlink); } +static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (val.vu32 == 0) { + NL_SET_ERR_MSG_MOD(extack, "max_macs value must be greater than 0"); + return -EINVAL; + } + + if (!is_power_of_2(val.vu32)) { + NL_SET_ERR_MSG_MOD(extack, "Only power of 2 values are supported for max_macs"); + return -EINVAL; + } + + if (ilog2(val.vu32) > + MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) { + NL_SET_ERR_MSG_MOD(extack, "max_macs value is out of the supported range"); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param max_uc_list_param = + DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_max_uc_list_validate); + +static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + union devlink_param_value value; + int err; + + if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) + return 0; + + err = devlink_param_register(devlink, &max_uc_list_param); + if (err) + return err; + + value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); + return 0; +} + +static void +mlx5_devlink_max_uc_list_param_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) + return; + + devlink_param_unregister(devlink, &max_uc_list_param); +} + #define MLX5_TRAP_DROP(_id, _group_id) \ DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ @@ -811,6 +892,10 @@ int mlx5_devlink_register(struct devlink *devlink) if (err) goto auxdev_reg_err; + err = mlx5_devlink_max_uc_list_param_register(devlink); + if (err) + goto max_uc_list_err; + err = mlx5_devlink_traps_register(devlink); if (err) goto traps_reg_err; @@ -821,6 +906,8 @@ int mlx5_devlink_register(struct devlink *devlink) return 0; traps_reg_err: + mlx5_devlink_max_uc_list_param_unregister(devlink); +max_uc_list_err: mlx5_devlink_auxdev_params_unregister(devlink); auxdev_reg_err: devlink_params_unregister(devlink, mlx5_devlink_params, @@ -831,6 +918,7 @@ auxdev_reg_err: void mlx5_devlink_unregister(struct devlink *devlink) { mlx5_devlink_traps_unregister(devlink); + mlx5_devlink_max_uc_list_param_unregister(devlink); mlx5_devlink_auxdev_params_unregister(devlink); devlink_params_unregister(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f0ac6b0d9653..812e6810cb3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -145,7 +145,6 @@ struct page_pool; #define MLX5E_MIN_NUM_CHANNELS 0x1 #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) -#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_TX_XSK_POLL_BUDGET 64 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ @@ -173,7 +172,7 @@ struct page_pool; #define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\ ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) -#define MLX5E_MAX_KLM_PER_WQE(mdev) \ +#define MLX5E_MAX_KLM_PER_WQE \ MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE) #define MLX5E_MSG_LEVEL NETIF_MSG_LINK @@ -783,6 +782,8 @@ struct mlx5e_channel { DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; + /* Sync between icosq recovery and XSK enable/disable. */ + struct mutex icosq_recovery_lock; }; struct mlx5e_ptp; @@ -875,10 +876,8 @@ struct mlx5e_trap; struct mlx5e_priv { /* priv data path fields - start */ - /* +1 for port ptp ts */ - struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC + - MLX5E_QOS_MAX_LEAF_NODES]; - int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; + struct mlx5e_txqsq **txq2sq; + int **channel_tc2realtxq; int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC]; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx_dp dcbx_dp; @@ -893,7 +892,7 @@ struct mlx5e_priv { struct mlx5e_channels channels; u32 tisn[MLX5_MAX_PORTS][MLX5E_MAX_NUM_TC]; struct mlx5e_rx_res *rx_res; - u32 tx_rates[MLX5E_MAX_NUM_SQS]; + u32 *tx_rates; struct mlx5e_flow_steering fs; @@ -909,7 +908,7 @@ struct mlx5e_priv { struct net_device *netdev; struct mlx5e_trap *en_trap; struct mlx5e_stats stats; - struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_channel_stats **channel_stats; struct mlx5e_channel_stats trap_stats; struct mlx5e_ptp_stats ptp_stats; u16 stats_nch; @@ -956,6 +955,12 @@ struct mlx5e_rx_handlers { extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic; +enum mlx5e_profile_feature { + MLX5E_PROFILE_FEATURE_PTP_RX, + MLX5E_PROFILE_FEATURE_PTP_TX, + MLX5E_PROFILE_FEATURE_QOS_HTB, +}; + struct mlx5e_profile { int (*init)(struct mlx5_core_dev *mdev, struct net_device *netdev); @@ -969,14 +974,18 @@ struct mlx5e_profile { int (*update_rx)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); void (*update_carrier)(struct mlx5e_priv *priv); + int (*max_nch_limit)(struct mlx5_core_dev *mdev); unsigned int (*stats_grps_num)(struct mlx5e_priv *priv); mlx5e_stats_grp_t *stats_grps; const struct mlx5e_rx_handlers *rx_handlers; int max_tc; u8 rq_groups; - bool rx_ptp_support; + u32 features; }; +#define mlx5e_profile_feature_cap(profile, feature) \ + ((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature)) + void mlx5e_build_ptys2ethtool_map(void); bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); @@ -1014,9 +1023,6 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); void mlx5e_destroy_rq(struct mlx5e_rq *rq); struct mlx5e_sq_param; -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); -void mlx5e_close_icosq(struct mlx5e_icosq *sq); int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, struct mlx5e_xdpsq *sq, bool is_redirect); @@ -1057,7 +1063,6 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, mlx5e_fp_preactivate preactivate, void *context, bool reset); int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv); -int mlx5e_num_channels_changed(struct mlx5e_priv *priv); int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); @@ -1148,9 +1153,12 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch); int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, - struct ethtool_coalesce *coal); + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal); int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, - struct ethtool_coalesce *coal); + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack); int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, struct ethtool_link_ksettings *link_ksettings); int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, @@ -1186,8 +1194,7 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev); void mlx5e_priv_cleanup(struct mlx5e_priv *priv); struct net_device * -mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, - unsigned int txqs, unsigned int rxqs); +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile); int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index d5b7110a4265..0107e4e73bb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c); +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c); #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c index d290d7276b8d..b4f3bd7d346e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -20,7 +20,7 @@ mlx5e_hv_vhca_fill_ring_stats(struct mlx5e_priv *priv, int ch, struct mlx5e_channel_stats *stats; int tc; - stats = &priv->channel_stats[ch]; + stats = priv->channel_stats[ch]; data->rx_packets = stats->rq.packets; data->rx_bytes = stats->rq.bytes; @@ -120,14 +120,14 @@ static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent) cancel_delayed_work_sync(&priv->stats_agent.work); } -int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) +void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) { int buf_len = mlx5e_hv_vhca_stats_buf_size(priv); struct mlx5_hv_vhca_agent *agent; priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL); if (!priv->stats_agent.buf) - return -ENOMEM; + return; agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca, MLX5_HV_VHCA_AGENT_STATS, @@ -142,13 +142,11 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) PTR_ERR(agent)); kvfree(priv->stats_agent.buf); - return IS_ERR_OR_NULL(agent); + return; } priv->stats_agent.agent = agent; INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work); - - return 0; } void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h index 664463faf77b..29c8c6d3260f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h @@ -7,19 +7,12 @@ #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) -int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); +void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv); void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv); #else - -static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) -{ - return 0; -} - -static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) -{ -} +static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {} +static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {} #endif #endif /* __MLX5_EN_STATS_VHCA_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c index 7edde4d536fd..17325c5d6516 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c @@ -155,3 +155,61 @@ struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh) return mh->modify_hdr; } +char * +mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + int new_num_actions, max_hw_actions; + size_t new_sz, old_sz; + void *ret; + + if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) + goto out; + + max_hw_actions = mlx5e_mod_hdr_max_actions(mdev, namespace); + new_num_actions = min(max_hw_actions, + mod_hdr_acts->actions ? + mod_hdr_acts->max_actions * 2 : 1); + if (mod_hdr_acts->max_actions == new_num_actions) + return ERR_PTR(-ENOSPC); + + new_sz = MLX5_MH_ACT_SZ * new_num_actions; + old_sz = mod_hdr_acts->max_actions * MLX5_MH_ACT_SZ; + + if (mod_hdr_acts->is_static) { + ret = kzalloc(new_sz, GFP_KERNEL); + if (ret) { + memcpy(ret, mod_hdr_acts->actions, old_sz); + mod_hdr_acts->is_static = false; + } + } else { + ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); + if (ret) + memset(ret + old_sz, 0, new_sz - old_sz); + } + if (!ret) + return ERR_PTR(-ENOMEM); + + mod_hdr_acts->actions = ret; + mod_hdr_acts->max_actions = new_num_actions; + +out: + return mod_hdr_acts->actions + (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); +} + +void +mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) +{ + if (!mod_hdr_acts->is_static) + kfree(mod_hdr_acts->actions); + + mod_hdr_acts->actions = NULL; + mod_hdr_acts->num_actions = 0; + mod_hdr_acts->max_actions = 0; +} + +char * +mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos) +{ + return mod_hdr_acts->actions + (pos * MLX5_MH_ACT_SZ); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h index 33b23d8f9182..b8dac418d0a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.h @@ -7,14 +7,32 @@ #include <linux/hashtable.h> #include <linux/mlx5/fs.h> +#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) + struct mlx5e_mod_hdr_handle; struct mlx5e_tc_mod_hdr_acts { int num_actions; int max_actions; + bool is_static; void *actions; }; +#define DECLARE_MOD_HDR_ACTS_ACTIONS(name, len) \ + u8 name[len][MLX5_MH_ACT_SZ] = {} + +#define DECLARE_MOD_HDR_ACTS(name, acts_arr) \ + struct mlx5e_tc_mod_hdr_acts name = { \ + .max_actions = ARRAY_SIZE(acts_arr), \ + .is_static = true, \ + .actions = acts_arr, \ + } + +char *mlx5e_mod_hdr_alloc(struct mlx5_core_dev *mdev, int namespace, + struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +void mlx5e_mod_hdr_dealloc(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); +char *mlx5e_mod_hdr_get_item(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts, int pos); + struct mlx5e_mod_hdr_handle * mlx5e_mod_hdr_attach(struct mlx5_core_dev *mdev, struct mod_hdr_tbl *tbl, @@ -28,4 +46,12 @@ struct mlx5_modify_hdr *mlx5e_mod_hdr_get(struct mlx5e_mod_hdr_handle *mh); void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl); void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl); +static inline int mlx5e_mod_hdr_max_actions(struct mlx5_core_dev *mdev, int namespace) +{ + if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ + return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions); + else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ + return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); +} + #endif /* __MLX5E_EN_MOD_HDR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index f8c29022dbb2..66180ffb4606 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -717,7 +717,7 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); u32 wqebbs; - max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev); + max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE; max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr; rest = max_hd_per_wqe % max_klm_per_umr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 18d542b1c5cb..82baafd3c00c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -768,7 +768,7 @@ int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv) { struct mlx5e_ptp_fs *ptp_fs; - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) return 0; ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL); @@ -783,7 +783,7 @@ void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv) { struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) return; mlx5e_ptp_rx_unset_fs(priv); @@ -794,7 +794,7 @@ int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set) { struct mlx5e_ptp *c = priv->channels.ptp; - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) return 0; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 50977f01a050..00449df98a5e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ +#include <net/sch_generic.h> #include "en.h" #include "params.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index fcb0892c08a9..0991345c4ae5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -517,6 +517,9 @@ int mlx5e_rep_indr_setup_cb(struct net_device *netdev, struct Qdisc *sch, void * void *data, void (*cleanup)(struct flow_block_cb *block_cb)) { + if (!netdev) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_BLOCK: return mlx5e_rep_indr_setup_block(netdev, sch, cb_priv, type_data, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index d6c7c81690eb..7c9dd3a75f8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -66,7 +66,7 @@ mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, static inline void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, - struct sk_buff *skb) {} + struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); } #endif /* CONFIG_MLX5_CLS_ACT */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 74086eb556ae..2684e9da9f41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -62,6 +62,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) { + struct mlx5e_rq *xskrq = NULL; struct mlx5_core_dev *mdev; struct mlx5e_icosq *icosq; struct net_device *dev; @@ -70,7 +71,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) int err; icosq = ctx; + + mutex_lock(&icosq->channel->icosq_recovery_lock); + + /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ rq = &icosq->channel->rq; + if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) + xskrq = &icosq->channel->xskrq; mdev = icosq->channel->mdev; dev = icosq->channel->netdev; err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); @@ -84,6 +91,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_deactivate_rq(rq); + if (xskrq) + mlx5e_deactivate_rq(xskrq); + err = mlx5e_wait_for_icosq_flush(icosq); if (err) goto out; @@ -97,15 +107,28 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_reset_icosq_cc_pc(icosq); + mlx5e_free_rx_in_progress_descs(rq); + if (xskrq) + mlx5e_free_rx_in_progress_descs(xskrq); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); mlx5e_activate_icosq(icosq); - mlx5e_activate_rq(rq); + mlx5e_activate_rq(rq); rq->stats->recover++; + + if (xskrq) { + mlx5e_activate_rq(xskrq); + xskrq->stats->recover++; + } + + mutex_unlock(&icosq->channel->icosq_recovery_lock); + return 0; out: clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mutex_unlock(&icosq->channel->icosq_recovery_lock); return err; } @@ -706,6 +729,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); } +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) +{ + mutex_lock(&c->icosq_recovery_lock); +} + +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) +{ + mutex_unlock(&c->icosq_recovery_lock); +} + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 4f4bc8726ec4..60bc5b577ab9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -466,6 +466,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); +} + static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { @@ -561,11 +569,11 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) to_ctx.sq = sq; err_ctx.ctx = &to_ctx; err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - err_ctx.dump = mlx5e_tx_reporter_dump_sq; + err_ctx.dump = mlx5e_tx_reporter_timeout_dump; snprintf(err_str, sizeof(err_str), "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, - jiffies_to_usecs(jiffies - sq->txq->trans_start)); + jiffies_to_usecs(jiffies - READ_ONCE(sq->txq->trans_start))); mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); return to_ctx.status; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 142953847996..24c32f73040a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -13,6 +13,9 @@ struct mlx5e_rx_res { unsigned int max_nch; u32 drop_rqn; + struct mlx5e_packet_merge_param pkt_merge_param; + struct rw_semaphore pkt_merge_param_sem; + struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS]; bool rss_active; u32 rss_rqns[MLX5E_INDIR_RQT_SIZE]; @@ -34,7 +37,6 @@ struct mlx5e_rx_res { /* API for rx_res_rss_* */ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, unsigned int init_nch) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; @@ -49,7 +51,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, return -ENOMEM; err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn, - init_pkt_merge_param); + &res->pkt_merge_param); if (err) goto err_rss_free; @@ -274,8 +276,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL); } -static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, - const struct mlx5e_packet_merge_param *init_pkt_merge_param) +static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; struct mlx5e_tir_builder *builder; @@ -306,7 +307,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), inner_ft_support); - mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); mlx5e_tir_builder_build_direct(builder); err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true); @@ -336,7 +337,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), inner_ft_support); - mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); mlx5e_tir_builder_build_direct(builder); err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true); @@ -392,6 +393,7 @@ static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res) if (err) goto out; + /* Separated from the channels RQs, does not share pkt_merge state with them */ mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, mlx5e_rqt_get_rqtn(&res->ptp.rqt), inner_ft_support); @@ -447,11 +449,14 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, res->max_nch = max_nch; res->drop_rqn = drop_rqn; - err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch); + res->pkt_merge_param = *init_pkt_merge_param; + init_rwsem(&res->pkt_merge_param_sem); + + err = mlx5e_rx_res_rss_init_def(res, init_nch); if (err) goto err_out; - err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param); + err = mlx5e_rx_res_channels_init(res); if (err) goto err_rss_destroy; @@ -513,7 +518,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) return mlx5e_tir_get_tirn(&res->ptp.tir); } -u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); } @@ -656,6 +661,9 @@ int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, if (!builder) return -ENOMEM; + down_write(&res->pkt_merge_param_sem); + res->pkt_merge_param = *pkt_merge_param; + mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); final_err = 0; @@ -681,6 +689,7 @@ int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, } } + up_write(&res->pkt_merge_param_sem); mlx5e_tir_builder_free(builder); return final_err; } @@ -689,3 +698,31 @@ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res * { return mlx5e_rss_get_hash(res->rss[0]); } + +int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq, + struct mlx5e_tir *tir) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + u32 rqtn; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq); + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn, + inner_ft_support); + mlx5e_tir_builder_build_direct(builder); + mlx5e_tir_builder_build_tls(builder); + down_read(&res->pkt_merge_param_sem); + mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); + err = mlx5e_tir_init(tir, builder, res->mdev, false); + up_read(&res->pkt_merge_param_sem); + + mlx5e_tir_builder_free(builder); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index d09f7d174a51..b39b20a720e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -37,9 +37,6 @@ u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); -/* RQTN getters for modules that create their own TIRs */ -u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); - /* Activate/deactivate API */ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res); @@ -69,4 +66,7 @@ struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx); /* Workaround for hairpin */ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res); +/* Accel TIRs */ +int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq, + struct mlx5e_tir *tir); #endif /* __MLX5_EN_RX_RES_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c new file mode 100644 index 000000000000..b0de6b999675 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_accept(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + return true; +} + +static int +tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_accept = { + .can_offload = tc_act_can_offload_accept, + .parse_action = tc_act_parse_accept, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c new file mode 100644 index 000000000000..e600924e30ea --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "mlx5_core.h" + +/* Must be aligned with enum flow_action_id. */ +static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = { + &mlx5e_tc_act_accept, + &mlx5e_tc_act_drop, + &mlx5e_tc_act_trap, + &mlx5e_tc_act_goto, + &mlx5e_tc_act_mirred, + &mlx5e_tc_act_mirred, + &mlx5e_tc_act_redirect_ingress, + NULL, /* FLOW_ACTION_MIRRED_INGRESS, */ + &mlx5e_tc_act_vlan, + &mlx5e_tc_act_vlan, + &mlx5e_tc_act_vlan_mangle, + &mlx5e_tc_act_tun_encap, + &mlx5e_tc_act_tun_decap, + &mlx5e_tc_act_pedit, + &mlx5e_tc_act_pedit, + &mlx5e_tc_act_csum, + NULL, /* FLOW_ACTION_MARK, */ + &mlx5e_tc_act_ptype, + NULL, /* FLOW_ACTION_PRIORITY, */ + NULL, /* FLOW_ACTION_WAKE, */ + NULL, /* FLOW_ACTION_QUEUE, */ + &mlx5e_tc_act_sample, + NULL, /* FLOW_ACTION_POLICE, */ + &mlx5e_tc_act_ct, + NULL, /* FLOW_ACTION_CT_METADATA, */ + &mlx5e_tc_act_mpls_push, + &mlx5e_tc_act_mpls_pop, +}; + +/* Must be aligned with enum flow_action_id. */ +static struct mlx5e_tc_act *tc_acts_nic[NUM_FLOW_ACTIONS] = { + &mlx5e_tc_act_accept, + &mlx5e_tc_act_drop, + NULL, /* FLOW_ACTION_TRAP, */ + &mlx5e_tc_act_goto, + &mlx5e_tc_act_mirred_nic, + NULL, /* FLOW_ACTION_MIRRED, */ + NULL, /* FLOW_ACTION_REDIRECT_INGRESS, */ + NULL, /* FLOW_ACTION_MIRRED_INGRESS, */ + NULL, /* FLOW_ACTION_VLAN_PUSH, */ + NULL, /* FLOW_ACTION_VLAN_POP, */ + NULL, /* FLOW_ACTION_VLAN_MANGLE, */ + NULL, /* FLOW_ACTION_TUNNEL_ENCAP, */ + NULL, /* FLOW_ACTION_TUNNEL_DECAP, */ + &mlx5e_tc_act_pedit, + &mlx5e_tc_act_pedit, + &mlx5e_tc_act_csum, + &mlx5e_tc_act_mark, + NULL, /* FLOW_ACTION_PTYPE, */ + NULL, /* FLOW_ACTION_PRIORITY, */ + NULL, /* FLOW_ACTION_WAKE, */ + NULL, /* FLOW_ACTION_QUEUE, */ + NULL, /* FLOW_ACTION_SAMPLE, */ + NULL, /* FLOW_ACTION_POLICE, */ + &mlx5e_tc_act_ct, +}; + +/** + * mlx5e_tc_act_get() - Get an action parser for an action id. + * @act_id: Flow action id. + * @ns_type: flow namespace type. + */ +struct mlx5e_tc_act * +mlx5e_tc_act_get(enum flow_action_id act_id, + enum mlx5_flow_namespace_type ns_type) +{ + struct mlx5e_tc_act **tc_acts; + + tc_acts = ns_type == MLX5_FLOW_NAMESPACE_FDB ? tc_acts_fdb : tc_acts_nic; + + return tc_acts[act_id]; +} + +/** + * mlx5e_tc_act_init_parse_state() - Init a new parse_state. + * @parse_state: Parsing state. + * @flow: mlx5e tc flow being handled. + * @flow_action: flow action to parse. + * @extack: to set an error msg. + * + * The same parse_state should be passed to action parsers + * for tracking the current parsing state. + */ +void +mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_tc_flow *flow, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) +{ + memset(parse_state, 0, sizeof(*parse_state)); + parse_state->flow = flow; + parse_state->num_actions = flow_action->num_entries; + parse_state->extack = extack; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h new file mode 100644 index 000000000000..26efa33de56f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_H__ +#define __MLX5_EN_TC_ACT_H__ + +#include <net/tc_act/tc_pedit.h> +#include <net/flow_offload.h> +#include <linux/netlink.h> +#include "eswitch.h" +#include "pedit.h" + +struct mlx5_flow_attr; + +struct mlx5e_tc_act_parse_state { + unsigned int num_actions; + struct mlx5e_tc_flow *flow; + struct netlink_ext_ack *extack; + bool encap; + bool decap; + bool mpls_push; + bool ptype_host; + const struct ip_tunnel_info *tun_info; + struct pedit_headers_action hdrs[__PEDIT_CMD_MAX]; + int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; + int if_count; + struct mlx5_tc_ct_priv *ct_priv; +}; + +struct mlx5e_tc_act { + bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index); + + int (*parse_action)(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr); + + int (*post_parse)(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr); +}; + +extern struct mlx5e_tc_act mlx5e_tc_act_drop; +extern struct mlx5e_tc_act mlx5e_tc_act_trap; +extern struct mlx5e_tc_act mlx5e_tc_act_accept; +extern struct mlx5e_tc_act mlx5e_tc_act_mark; +extern struct mlx5e_tc_act mlx5e_tc_act_goto; +extern struct mlx5e_tc_act mlx5e_tc_act_tun_encap; +extern struct mlx5e_tc_act mlx5e_tc_act_tun_decap; +extern struct mlx5e_tc_act mlx5e_tc_act_csum; +extern struct mlx5e_tc_act mlx5e_tc_act_pedit; +extern struct mlx5e_tc_act mlx5e_tc_act_vlan; +extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle; +extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push; +extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop; +extern struct mlx5e_tc_act mlx5e_tc_act_mirred; +extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic; +extern struct mlx5e_tc_act mlx5e_tc_act_ct; +extern struct mlx5e_tc_act mlx5e_tc_act_sample; +extern struct mlx5e_tc_act mlx5e_tc_act_ptype; +extern struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress; + +struct mlx5e_tc_act * +mlx5e_tc_act_get(enum flow_action_id act_id, + enum mlx5_flow_namespace_type ns_type); + +void +mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_tc_flow *flow, + struct flow_action *flow_action, + struct netlink_ext_ack *extack); + +#endif /* __MLX5_EN_TC_ACT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c new file mode 100644 index 000000000000..29920ef0180a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/csum.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/tc_act/tc_csum.h> +#include "act.h" +#include "en/tc_priv.h" + +static bool +csum_offload_supported(struct mlx5e_priv *priv, + u32 action, + u32 update_flags, + struct netlink_ext_ack *extack) +{ + u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | + TCA_CSUM_UPDATE_FLAG_UDP; + + /* The HW recalcs checksums only if re-writing headers */ + if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { + NL_SET_ERR_MSG_MOD(extack, + "TC csum action is only offloaded with pedit"); + netdev_warn(priv->netdev, + "TC csum action is only offloaded with pedit\n"); + return false; + } + + if (update_flags & ~prot_flags) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload TC csum action for some header/s"); + netdev_warn(priv->netdev, + "can't offload TC csum action for some header/s - flags %#x\n", + update_flags); + return false; + } + + return true; +} + +static bool +tc_act_can_offload_csum(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + struct mlx5e_tc_flow *flow = parse_state->flow; + + return csum_offload_supported(flow->priv, flow->attr->action, + act->csum_flags, parse_state->extack); +} + +static int +tc_act_parse_csum(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_csum = { + .can_offload = tc_act_can_offload_csum, + .parse_action = tc_act_parse_csum, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c new file mode 100644 index 000000000000..06ec30cdb269 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "en/tc_ct.h" + +static bool +tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + struct netlink_ext_ack *extack = parse_state->extack; + + if (flow_flag_test(parse_state->flow, SAMPLE)) { + NL_SET_ERR_MSG_MOD(extack, + "Sample action with connection tracking is not supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + int err; + + err = mlx5_tc_ct_parse_action(parse_state->ct_priv, attr, + &attr->parse_attr->mod_hdr_acts, + act, parse_state->extack); + if (err) + return err; + + flow_flag_set(parse_state->flow, CT); + + if (mlx5e_is_eswitch_flow(parse_state->flow)) + attr->esw_attr->split_count = attr->esw_attr->out_count; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_ct = { + .can_offload = tc_act_can_offload_ct, + .parse_action = tc_act_parse_ct, +}; + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c new file mode 100644 index 000000000000..2e29a23bed12 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_drop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + return true; +} + +static int +tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_drop = { + .can_offload = tc_act_can_offload_drop, + .parse_action = tc_act_parse_drop, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c new file mode 100644 index 000000000000..f44515061228 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" +#include "eswitch.h" + +static int +validate_goto_chain(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + bool is_esw = mlx5e_is_eswitch_flow(flow); + bool ft_flow = mlx5e_is_ft_flow(flow); + u32 dest_chain = act->chain_index; + struct mlx5_fs_chains *chains; + struct mlx5_eswitch *esw; + u32 reformat_and_fwd; + u32 max_chain; + + esw = priv->mdev->priv.eswitch; + chains = is_esw ? esw_chains(esw) : mlx5e_nic_chains(priv); + max_chain = mlx5_chains_get_chain_range(chains); + reformat_and_fwd = is_esw ? + MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) : + MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table); + + if (ft_flow) { + NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); + return -EOPNOTSUPP; + } + + if (!mlx5_chains_backwards_supported(chains) && + dest_chain <= flow->attr->chain) { + NL_SET_ERR_MSG_MOD(extack, "Goto lower numbered chain isn't supported"); + return -EOPNOTSUPP; + } + + if (dest_chain > max_chain) { + NL_SET_ERR_MSG_MOD(extack, + "Requested destination chain is out of supported range"); + return -EOPNOTSUPP; + } + + if (flow->attr->action & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | + MLX5_FLOW_CONTEXT_ACTION_DECAP) && + !reformat_and_fwd) { + NL_SET_ERR_MSG_MOD(extack, + "Goto chain is not allowed if action has reformat or decap"); + return -EOPNOTSUPP; + } + + return 0; +} + +static bool +tc_act_can_offload_goto(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + + if (validate_goto_chain(flow->priv, flow, act, extack)) + return false; + + return true; +} + +static int +tc_act_parse_goto(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = act->chain_index; + + return 0; +} + +static int +tc_act_post_parse_goto(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + + if (!attr->dest_chain) + return 0; + + if (parse_state->decap) { + /* It can be supported if we'll create a mapping for + * the tunnel device only (without tunnel), and set + * this tunnel id with this decap flow. + * + * On restore (miss), we'll just set this saved tunnel + * device. + */ + + NL_SET_ERR_MSG_MOD(extack, "Decap with goto isn't supported"); + netdev_warn(priv->netdev, "Decap with goto isn't supported"); + return -EOPNOTSUPP; + } + + if (!mlx5e_is_eswitch_flow(flow) && parse_attr->mirred_ifindex[0]) { + NL_SET_ERR_MSG_MOD(extack, "Mirroring goto chain rules isn't supported"); + return -EOPNOTSUPP; + } + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_goto = { + .can_offload = tc_act_can_offload_goto, + .parse_action = tc_act_parse_goto, + .post_parse = tc_act_post_parse_goto, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c new file mode 100644 index 000000000000..d775c3d9edf3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mark.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en_tc.h" + +static bool +tc_act_can_offload_mark(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + if (act->mark & ~MLX5E_TC_FLOW_ID_MASK) { + NL_SET_ERR_MSG_MOD(parse_state->extack, "Bad flow mark, only 16 bit supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_mark(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->nic_attr->flow_tag = act->mark; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mark = { + .can_offload = tc_act_can_offload_mark, + .parse_action = tc_act_parse_mark, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c new file mode 100644 index 000000000000..c614fc7fdc9c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_macvlan.h> +#include <linux/if_vlan.h> +#include <net/bareudp.h> +#include <net/bonding.h> +#include "act.h" +#include "vlan.h" +#include "en/tc_tun_encap.h" +#include "en/tc_priv.h" +#include "en_rep.h" + +static bool +same_vf_reps(struct mlx5e_priv *priv, struct net_device *out_dev) +{ + return mlx5e_eswitch_vf_rep(priv->netdev) && + priv->netdev == out_dev; +} + +static int +verify_uplink_forwarding(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device *out_dev, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rep_priv; + + /* Forwarding non encapsulated traffic between + * uplink ports is allowed only if + * termination_table_raw_traffic cap is set. + * + * Input vport was stored attr->in_rep. + * In LAG case, *priv* is the private data of + * uplink which may be not the input vport. + */ + rep_priv = mlx5e_rep_to_rep_priv(attr->esw_attr->in_rep); + + if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && + mlx5e_eswitch_uplink_rep(out_dev))) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, + termination_table_raw_traffic)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are both uplink, can't offload forwarding"); + return -EOPNOTSUPP; + } else if (out_dev != rep_priv->netdev) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not the same uplink, can't offload forwarding"); + return -EOPNOTSUPP; + } + return 0; +} + +static bool +is_duplicated_output_device(struct net_device *dev, + struct net_device *out_dev, + int *ifindexes, int if_count, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < if_count; i++) { + if (ifindexes[i] == out_dev->ifindex) { + NL_SET_ERR_MSG_MOD(extack, "can't duplicate output to same device"); + netdev_err(dev, "can't duplicate output to same device: %s\n", + out_dev->name); + return true; + } + } + + return false; +} + +static struct net_device * +get_fdb_out_dev(struct net_device *uplink_dev, struct net_device *out_dev) +{ + struct net_device *fdb_out_dev = out_dev; + struct net_device *uplink_upper; + + rcu_read_lock(); + uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); + if (uplink_upper && netif_is_lag_master(uplink_upper) && + uplink_upper == out_dev) { + fdb_out_dev = uplink_dev; + } else if (netif_is_lag_master(out_dev)) { + fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev)); + if (fdb_out_dev && + (!mlx5e_eswitch_rep(fdb_out_dev) || + !netdev_port_same_parent_id(fdb_out_dev, uplink_dev))) + fdb_out_dev = NULL; + } + rcu_read_unlock(); + return fdb_out_dev; +} + +static bool +tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct net_device *out_dev = act->dev; + struct mlx5e_priv *priv = flow->priv; + struct mlx5_esw_flow_attr *esw_attr; + + parse_attr = flow->attr->parse_attr; + esw_attr = flow->attr->esw_attr; + + if (!out_dev) { + /* out_dev is NULL when filters with + * non-existing mirred device are replayed to + * the driver. + */ + return false; + } + + if (parse_state->mpls_push && !netif_is_bareudp(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "mpls is supported only through a bareudp device"); + return false; + } + + if (mlx5e_is_ft_flow(flow) && out_dev == priv->netdev) { + /* Ignore forward to self rules generated + * by adding both mlx5 devs to the flow table + * block on a normal nft offload setup. + */ + return false; + } + + if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { + NL_SET_ERR_MSG_MOD(extack, + "can't support more output ports, can't offload forwarding"); + netdev_warn(priv->netdev, + "can't support more than %d output ports, can't offload forwarding\n", + esw_attr->out_count); + return false; + } + + if (parse_state->encap || + netdev_port_same_parent_id(priv->netdev, out_dev) || + netif_is_ovs_master(out_dev)) + return true; + + if (parse_attr->filter_dev != priv->netdev) { + /* All mlx5 devices are called to configure + * high level device filters. Therefore, the + * *attempt* to install a filter on invalid + * eswitch should not trigger an explicit error + */ + return false; + } + + NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); + + return false; +} + +static int +parse_mirred_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + + parse_attr->mirred_ifindex[esw_attr->out_count] = out_dev->ifindex; + parse_attr->tun_info[esw_attr->out_count] = + mlx5e_dup_tun_info(parse_state->tun_info); + + if (!parse_attr->tun_info[esw_attr->out_count]) + return -ENOMEM; + + parse_state->encap = false; + esw_attr->dests[esw_attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; + esw_attr->out_count++; + /* attr->dests[].rep is resolved when we handle encap */ + + return 0; +} + +static int +parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct net_device *out_dev = act->dev; + struct net_device *uplink_dev; + struct mlx5e_priv *out_priv; + struct mlx5_eswitch *esw; + int *ifindexes; + int if_count; + int err; + + esw = priv->mdev->priv.eswitch; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + ifindexes = parse_state->ifindexes; + if_count = parse_state->if_count; + + if (is_duplicated_output_device(priv->netdev, out_dev, ifindexes, if_count, extack)) + return -EOPNOTSUPP; + + parse_state->ifindexes[if_count] = out_dev->ifindex; + parse_state->if_count++; + + out_dev = get_fdb_out_dev(uplink_dev, out_dev); + if (!out_dev) + return -ENODEV; + + if (is_vlan_dev(out_dev)) { + err = mlx5e_tc_act_vlan_add_push_action(priv, attr, &out_dev, extack); + if (err) + return err; + } + + if (is_vlan_dev(parse_attr->filter_dev)) { + err = mlx5e_tc_act_vlan_add_pop_action(priv, attr, extack); + if (err) + return err; + } + + if (netif_is_macvlan(out_dev)) + out_dev = macvlan_dev_real_dev(out_dev); + + err = verify_uplink_forwarding(priv, attr, out_dev, extack); + if (err) + return err; + + if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + return -EOPNOTSUPP; + } + + if (same_vf_reps(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "can't forward from a VF to itself"); + return -EOPNOTSUPP; + } + + out_priv = netdev_priv(out_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; + esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; + esw_attr->out_count++; + + return 0; +} + +static int +parse_mirred_ovs_master(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + int err; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, + MLX5E_TC_INT_PORT_EGRESS, + &attr->action, esw_attr->out_count); + if (err) + return err; + + esw_attr->out_count++; + return 0; +} + +static int +tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct net_device *out_dev = act->dev; + int err = -EOPNOTSUPP; + + if (parse_state->encap) + err = parse_mirred_encap(parse_state, act, attr); + else if (netdev_port_same_parent_id(priv->netdev, out_dev)) + err = parse_mirred(parse_state, act, priv, attr); + else if (netif_is_ovs_master(out_dev)) + err = parse_mirred_ovs_master(parse_state, act, priv, attr); + + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mirred = { + .can_offload = tc_act_can_offload_mirred, + .parse_action = tc_act_parse_mirred, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c new file mode 100644 index 000000000000..2c74567b6d25 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct net_device *out_dev = act->dev; + struct mlx5e_priv *priv = flow->priv; + + if (act->id != FLOW_ACTION_REDIRECT) + return false; + + if (priv->netdev->netdev_ops != out_dev->netdev_ops || + !mlx5e_same_hw_devs(priv, netdev_priv(out_dev))) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + netdev_warn(priv->netdev, + "devices %s %s not on same switch HW, can't offload forwarding\n", + netdev_name(priv->netdev), + out_dev->name); + return false; + } + + return true; +} + +static int +tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->parse_attr->mirred_ifindex[0] = act->dev->ifindex; + flow_flag_set(parse_state->flow, HAIRPIN); + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = { + .can_offload = tc_act_can_offload_mirred_nic, + .parse_action = tc_act_parse_mirred_nic, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c new file mode 100644 index 000000000000..784fc4f68b1e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mpls.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <net/bareudp.h> +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_priv *priv = parse_state->flow->priv; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) || + act->mpls_push.proto != htons(ETH_P_MPLS_UC)) { + NL_SET_ERR_MSG_MOD(extack, "mpls push is supported only for mpls_uc protocol"); + return false; + } + + return true; +} + +static int +tc_act_parse_mpls_push(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->mpls_push = true; + + return 0; +} + +static bool +tc_act_can_offload_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct net_device *filter_dev; + + filter_dev = flow->attr->parse_attr->filter_dev; + + /* we only support mpls pop if it is the first action + * and the filter net device is bareudp. Subsequent + * actions can be pedit and the last can be mirred + * egress redirect. + */ + if (act_index) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only as first action"); + return false; + } + + if (!netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices"); + return false; + } + + return true; +} + +static int +tc_act_parse_mpls_pop(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->parse_attr->eth.h_proto = act->mpls_pop.proto; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_flag_set(parse_state->flow, L3_TO_L2_DECAP); + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_mpls_push = { + .can_offload = tc_act_can_offload_mpls_push, + .parse_action = tc_act_parse_mpls_push, +}; + +struct mlx5e_tc_act mlx5e_tc_act_mpls_pop = { + .can_offload = tc_act_can_offload_mpls_pop, + .parse_action = tc_act_parse_mpls_pop, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c new file mode 100644 index 000000000000..79addbbef087 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_vlan.h> +#include "act.h" +#include "pedit.h" +#include "en/tc_priv.h" +#include "en/mod_hdr.h" + +static int pedit_header_offsets[] = { + [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), +}; + +#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) + +static int +set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + u32 *curr_pmask, *curr_pval; + + curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); + + if (*curr_pmask & mask) { /* disallow acting twice on the same location */ + NL_SET_ERR_MSG_MOD(extack, + "curr_pmask and new mask same. Acting twice on same location"); + goto out_err; + } + + *curr_pmask |= mask; + *curr_pval |= (val & mask); + + return 0; + +out_err: + return -EOPNOTSUPP; +} + +static int +parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; + u8 htype = act->mangle.htype; + int err = -EOPNOTSUPP; + u32 mask, val, offset; + + if (htype == FLOW_ACT_MANGLE_UNSPEC) { + NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); + goto out_err; + } + + if (!mlx5e_mod_hdr_max_actions(priv->mdev, namespace)) { + NL_SET_ERR_MSG_MOD(extack, "The pedit offload action is not supported"); + goto out_err; + } + + mask = act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; + + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack); + if (err) + goto out_err; + + hdrs[cmd].pedits++; + + return 0; +out_err: + return err; +} + +static int +parse_pedit_to_reformat(const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) +{ + u32 mask, val, offset; + u32 *p; + + if (act->id != FLOW_ACTION_MANGLE) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported action id"); + return -EOPNOTSUPP; + } + + if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) { + NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported"); + return -EOPNOTSUPP; + } + + mask = ~act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; + p = (u32 *)&parse_attr->eth; + *(p + (offset >> 2)) |= (val & mask); + + return 0; +} + +int +mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + if (flow && flow_flag_test(flow, L3_TO_L2_DECAP)) + return parse_pedit_to_reformat(act, parse_attr, extack); + + return parse_pedit_to_modify_hdr(priv, act, namespace, parse_attr, hdrs, extack); +} + +static bool +tc_act_can_offload_pedit(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + return true; +} + +static int +tc_act_parse_pedit(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5e_tc_flow *flow = parse_state->flow; + enum mlx5_flow_namespace_type ns_type; + int err; + + ns_type = mlx5e_get_flow_namespace(flow); + + err = mlx5e_tc_act_pedit_parse_action(flow->priv, act, ns_type, + attr->parse_attr, parse_state->hdrs, + flow, parse_state->extack); + if (err) + return err; + + if (flow_flag_test(flow, L3_TO_L2_DECAP)) + goto out; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + esw_attr->split_count = esw_attr->out_count; + +out: + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_pedit = { + .can_offload = tc_act_can_offload_pedit, + .parse_action = tc_act_parse_pedit, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h new file mode 100644 index 000000000000..da8ab03af58f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_PEDIT_H__ +#define __MLX5_EN_TC_ACT_PEDIT_H__ + +#include "en_tc.h" + +struct pedit_headers { + struct ethhdr eth; + struct vlan_hdr vlan; + struct iphdr ip4; + struct ipv6hdr ip6; + struct tcphdr tcp; + struct udphdr udp; +}; + +struct pedit_headers_action { + struct pedit_headers vals; + struct pedit_headers masks; + u32 pedits; +}; + +int +mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack); + +#endif /* __MLX5_EN_TC_ACT_PEDIT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c new file mode 100644 index 000000000000..0819110193dc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ptype.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_ptype(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + return true; +} + +static int +tc_act_parse_ptype(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct netlink_ext_ack *extack = parse_state->extack; + + if (act->ptype != PACKET_HOST) { + NL_SET_ERR_MSG_MOD(extack, "skbedit ptype is only supported with type host"); + return -EOPNOTSUPP; + } + + parse_state->ptype_host = true; + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_ptype = { + .can_offload = tc_act_can_offload_ptype, + .parse_action = tc_act_parse_ptype, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c new file mode 100644 index 000000000000..1c32e24e528d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/redirect_ingress.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct net_device *out_dev = act->dev; + struct mlx5_esw_flow_attr *esw_attr; + + parse_attr = flow->attr->parse_attr; + esw_attr = flow->attr->esw_attr; + + if (!out_dev) + return false; + + if (!netif_is_ovs_master(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to ingress is supported only for OVS internal ports"); + return false; + } + + if (netif_is_ovs_master(parse_attr->filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to ingress is not supported from internal port"); + return false; + } + + if (!parse_state->ptype_host) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to int port ingress requires ptype=host action"); + return false; + } + + if (esw_attr->out_count) { + NL_SET_ERR_MSG_MOD(extack, + "redirect to int port ingress is supported only as single destination"); + return false; + } + + return true; +} + +static int +tc_act_parse_redirect_ingress(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct net_device *out_dev = act->dev; + int err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, + MLX5E_TC_INT_PORT_INGRESS, + &attr->action, esw_attr->out_count); + if (err) + return err; + + esw_attr->out_count++; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_redirect_ingress = { + .can_offload = tc_act_can_offload_redirect_ingress, + .parse_action = tc_act_parse_redirect_ingress, +}; + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c new file mode 100644 index 000000000000..6699bdf5cf01 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/sample.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <net/psample.h> +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_sample(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + struct netlink_ext_ack *extack = parse_state->extack; + + if (flow_flag_test(parse_state->flow, CT)) { + NL_SET_ERR_MSG_MOD(extack, + "Sample action with connection tracking is not supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_sample(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_sample_attr *sample_attr; + + sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL); + if (!sample_attr) + return -ENOMEM; + + sample_attr->rate = act->sample.rate; + sample_attr->group_num = act->sample.psample_group->group_num; + + if (act->sample.truncate) + sample_attr->trunc_size = act->sample.trunc_size; + + attr->sample_attr = sample_attr; + flow_flag_set(parse_state->flow, SAMPLE); + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_sample = { + .can_offload = tc_act_can_offload_sample, + .parse_action = tc_act_parse_sample, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c new file mode 100644 index 000000000000..046b64c2cec4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/trap.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_trap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + struct netlink_ext_ack *extack = parse_state->extack; + + if (parse_state->num_actions != 1) { + NL_SET_ERR_MSG_MOD(extack, "action trap is supported as a sole action only"); + return false; + } + + return true; +} + +static int +tc_act_parse_trap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_trap = { + .can_offload = tc_act_can_offload_trap, + .parse_action = tc_act_parse_trap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c new file mode 100644 index 000000000000..6f4a2cf46afd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/tun.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "act.h" +#include "en/tc_tun_encap.h" +#include "en/tc_priv.h" + +static bool +tc_act_can_offload_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + if (!act->tunnel) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Zero tunnel attributes is not supported"); + return false; + } + + return true; +} + +static int +tc_act_parse_tun_encap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->tun_info = act->tunnel; + parse_state->encap = true; + + return 0; +} + +static bool +tc_act_can_offload_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + return true; +} + +static int +tc_act_parse_tun_decap(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + parse_state->decap = true; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_tun_encap = { + .can_offload = tc_act_can_offload_tun_encap, + .parse_action = tc_act_parse_tun_encap, +}; + +struct mlx5e_tc_act mlx5e_tc_act_tun_decap = { + .can_offload = tc_act_can_offload_tun_decap, + .parse_action = tc_act_parse_tun_decap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c new file mode 100644 index 000000000000..70fc0c2d8813 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_vlan.h> +#include "act.h" +#include "vlan.h" +#include "en/tc_priv.h" + +static int +add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + u32 *action, struct netlink_ext_ack *extack) +{ + const struct flow_action_entry prio_tag_act = { + .vlan.vid = 0, + .vlan.prio = + MLX5_GET(fte_match_set_lyr_2_4, + mlx5e_get_match_headers_value(*action, + &parse_attr->spec), + first_prio) & + MLX5_GET(fte_match_set_lyr_2_4, + mlx5e_get_match_headers_criteria(*action, + &parse_attr->spec), + first_prio), + }; + + return mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, + &prio_tag_act, parse_attr, hdrs, action, + extack); +} + +static int +parse_tc_vlan_action(struct mlx5e_priv *priv, + const struct flow_action_entry *act, + struct mlx5_esw_flow_attr *attr, + u32 *action, + struct netlink_ext_ack *extack) +{ + u8 vlan_idx = attr->total_vlan; + + if (vlan_idx >= MLX5_FS_VLAN_DEPTH) { + NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported"); + return -EOPNOTSUPP; + } + + switch (act->id) { + case FLOW_ACTION_VLAN_POP: + if (vlan_idx) { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, + MLX5_FS_VLAN_DEPTH)) { + NL_SET_ERR_MSG_MOD(extack, "vlan pop action is not supported"); + return -EOPNOTSUPP; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; + } else { + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + } + break; + case FLOW_ACTION_VLAN_PUSH: + attr->vlan_vid[vlan_idx] = act->vlan.vid; + attr->vlan_prio[vlan_idx] = act->vlan.prio; + attr->vlan_proto[vlan_idx] = act->vlan.proto; + if (!attr->vlan_proto[vlan_idx]) + attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); + + if (vlan_idx) { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, + MLX5_FS_VLAN_DEPTH)) { + NL_SET_ERR_MSG_MOD(extack, + "vlan push action is not supported for vlan depth > 1"); + return -EOPNOTSUPP; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + } else { + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && + (act->vlan.proto != htons(ETH_P_8021Q) || + act->vlan.prio)) { + NL_SET_ERR_MSG_MOD(extack, "vlan push action is not supported"); + return -EOPNOTSUPP; + } + + *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + } + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN"); + return -EINVAL; + } + + attr->total_vlan = vlan_idx + 1; + + return 0; +} + +int +mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device **out_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *vlan_dev = *out_dev; + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_PUSH, + .vlan.vid = vlan_dev_vlan_id(vlan_dev), + .vlan.proto = vlan_dev_vlan_proto(vlan_dev), + .vlan.prio = 0, + }; + int err; + + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack); + if (err) + return err; + + rcu_read_lock(); + *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); + rcu_read_unlock(); + if (!*out_dev) + return -ENODEV; + + if (is_vlan_dev(*out_dev)) + err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack); + + return err; +} + +int +mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_POP, + }; + int nest_level, err = 0; + + nest_level = attr->parse_attr->filter_dev->lower_level - + priv->netdev->lower_level; + while (nest_level--) { + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, + extack); + if (err) + return err; + } + + return err; +} + +static bool +tc_act_can_offload_vlan(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + return true; +} + +static int +tc_act_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + int err; + + if (act->id == FLOW_ACTION_VLAN_PUSH && + (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) { + /* Replace vlan pop+push with vlan modify */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, act, + attr->parse_attr, parse_state->hdrs, + &attr->action, parse_state->extack); + } else { + err = parse_tc_vlan_action(priv, act, esw_attr, &attr->action, + parse_state->extack); + } + + if (err) + return err; + + esw_attr->split_count = esw_attr->out_count; + + return 0; +} + +static int +tc_act_post_parse_vlan(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; + struct pedit_headers_action *hdrs = parse_state->hdrs; + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int err; + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + /* For prio tag mode, replace vlan pop with rewrite vlan prio + * tag rewrite. + */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs, + &attr->action, extack); + if (err) + return err; + } + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_vlan = { + .can_offload = tc_act_can_offload_vlan, + .parse_action = tc_act_parse_vlan, + .post_parse = tc_act_post_parse_vlan, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h new file mode 100644 index 000000000000..3d62f13ab61f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_TC_ACT_VLAN_H__ +#define __MLX5_EN_TC_ACT_VLAN_H__ + +#include <net/flow_offload.h> +#include "en/tc_priv.h" + +struct pedit_headers_action; + +int +mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct net_device **out_dev, + struct netlink_ext_ack *extack); + +int +mlx5e_tc_act_vlan_add_pop_action(struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack); + +int +mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + u32 *action, struct netlink_ext_ack *extack); + +#endif /* __MLX5_EN_TC_ACT_VLAN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c new file mode 100644 index 000000000000..63e36e7f53e3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan_mangle.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/if_vlan.h> +#include "act.h" +#include "vlan.h" +#include "en/tc_priv.h" + +struct pedit_headers_action; + +int +mlx5e_tc_act_vlan_add_rewrite_action(struct mlx5e_priv *priv, int namespace, + const struct flow_action_entry *act, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + u32 *action, struct netlink_ext_ack *extack) +{ + u16 mask16 = VLAN_VID_MASK; + u16 val16 = act->vlan.vid & VLAN_VID_MASK; + const struct flow_action_entry pedit_act = { + .id = FLOW_ACTION_MANGLE, + .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH, + .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI), + .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16), + .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16), + }; + u8 match_prio_mask, match_prio_val; + void *headers_c, *headers_v; + int err; + + headers_c = mlx5e_get_match_headers_criteria(*action, &parse_attr->spec); + headers_v = mlx5e_get_match_headers_value(*action, &parse_attr->spec); + + if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) && + MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) { + NL_SET_ERR_MSG_MOD(extack, "VLAN rewrite action must have VLAN protocol match"); + return -EOPNOTSUPP; + } + + match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); + match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); + if (act->vlan.prio != (match_prio_val & match_prio_mask)) { + NL_SET_ERR_MSG_MOD(extack, "Changing VLAN prio is not supported"); + return -EOPNOTSUPP; + } + + err = mlx5e_tc_act_pedit_parse_action(priv, &pedit_act, namespace, parse_attr, hdrs, + NULL, extack); + *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + return err; +} + +static bool +tc_act_can_offload_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index) +{ + return true; +} + +static int +tc_act_parse_vlan_mangle(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + enum mlx5_flow_namespace_type ns_type; + int err; + + ns_type = mlx5e_get_flow_namespace(parse_state->flow); + err = mlx5e_tc_act_vlan_add_rewrite_action(priv, ns_type, act, + attr->parse_attr, parse_state->hdrs, + &attr->action, parse_state->extack); + if (err) + return err; + + if (ns_type == MLX5_FLOW_NAMESPACE_FDB) + attr->esw_attr->split_count = attr->esw_attr->out_count; + + return 0; +} + +struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle = { + .can_offload = tc_act_can_offload_vlan_mangle, + .parse_action = tc_act_parse_vlan_mangle, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index df6888c4793c..ff4b4f8a5a9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -5,6 +5,7 @@ #include <net/psample.h> #include "en/mapping.h" #include "en/tc/post_act.h" +#include "en/mod_hdr.h" #include "sample.h" #include "eswitch.h" #include "en_tc.h" @@ -255,12 +256,12 @@ sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id, goto err_modify_hdr; } - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(&mod_acts); return modify_hdr; err_modify_hdr: err_post_act: - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(&mod_acts); err_set_regc0: return ERR_PTR(err); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index c1c6e74c79c4..4a0d38d219ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -14,6 +14,7 @@ #include <linux/workqueue.h> #include <linux/refcount.h> #include <linux/xarray.h> +#include <linux/if_macvlan.h> #include "lib/fs_chains.h" #include "en/tc_ct.h" @@ -36,6 +37,12 @@ #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen) #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0) +/* Statically allocate modify actions for + * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10. + * This will be increased dynamically if needed (for the ipv6 snat + dnat). + */ +#define MLX5_CT_MIN_MOD_ACTS 10 + #define ct_dbg(fmt, args...)\ netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args) @@ -320,7 +327,33 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, } static int -mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, +mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv, + struct net_device *ndev) +{ + struct mlx5e_priv *other_priv = netdev_priv(ndev); + struct mlx5_core_dev *mdev = ct_priv->dev; + bool vf_rep, uplink_rep; + + vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); + uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev); + + if (vf_rep) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + if (uplink_rep) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + if (is_vlan_dev(ndev)) + return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev)); + if (netif_is_macvlan(ndev)) + return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev)); + if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev)) + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; + + return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT; +} + +static int +mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_flow_spec *spec, struct flow_rule *rule) { void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -335,8 +368,7 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, flow_rule_match_basic(rule, &match); - mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c, - headers_v); + mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v); MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, match.mask->ip_proto); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, @@ -432,6 +464,23 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, ntohs(match.key->flags)); } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { + struct flow_match_meta match; + + flow_rule_match_meta(rule, &match); + + if (match.key->ingress_ifindex & match.mask->ingress_ifindex) { + struct net_device *dev; + + dev = dev_get_by_index(&init_net, match.key->ingress_ifindex); + if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source)) + spec->flow_context.flow_source = + mlx5_tc_ct_get_flow_source_match(ct_priv, dev); + + dev_put(dev); + } + } + return 0; } @@ -609,22 +658,15 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv, struct flow_action *flow_action = &flow_rule->action; struct mlx5_core_dev *mdev = ct_priv->dev; struct flow_action_entry *act; - size_t action_size; char *modact; int err, i; - action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); - flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_MANGLE: { - err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type, - mod_acts); - if (err) - return err; - - modact = mod_acts->actions + - mod_acts->num_actions * action_size; + modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts); + if (IS_ERR(modact)) + return PTR_ERR(modact); err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact); if (err) @@ -652,7 +694,8 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, struct mlx5e_mod_hdr_handle **mh, u8 zone_restore_id, bool nat) { - struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); + DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); struct flow_action_entry *meta; u16 ct_state = 0; int err; @@ -706,11 +749,11 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, attr->modify_hdr = mlx5e_mod_hdr_get(*mh); } - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(&mod_acts); return 0; err_mapping: - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(&mod_acts); mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); return err; } @@ -770,7 +813,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) attr->esw_attr->in_mdev = priv->mdev; - mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); + mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr); @@ -907,12 +950,9 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple rev_tuple = entry->tuple; struct mlx5_ct_counter *shared_counter; struct mlx5_ct_entry *rev_entry; - __be16 tmp_port; /* get the reversed tuple */ - tmp_port = rev_tuple.port.src; - rev_tuple.port.src = rev_tuple.port.dst; - rev_tuple.port.dst = tmp_port; + swap(rev_tuple.port.src, rev_tuple.port.dst); if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { __be32 tmp_addr = rev_tuple.ip.src_v4; @@ -1356,9 +1396,13 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { + bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; + int err; + if (!priv) { NL_SET_ERR_MSG_MOD(extack, "offload of ct action isn't available"); @@ -1369,6 +1413,17 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.ct_action = act->ct.action; attr->ct_attr.nf_ft = act->ct.flow_table; + if (!clear_action) + goto out; + + err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear"); + return err; + } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + +out: return 0; } @@ -1445,7 +1500,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, } pre_ct->miss_rule = rule; - dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(&pre_mod_acts); kvfree(spec); return 0; @@ -1454,7 +1509,7 @@ err_miss_rule: err_flow_rule: mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); err_mapping: - dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(&pre_mod_acts); kvfree(spec); return err; } @@ -1850,14 +1905,14 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, } attr->ct_attr.ct_flow = ct_flow; - dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(&pre_mod_acts); return ct_flow->pre_ct_rule; err_insert_orig: mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); err_mapping: - dealloc_mod_hdr_actions(&pre_mod_acts); + mlx5e_mod_hdr_dealloc(&pre_mod_acts); mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); err_get_chain: kfree(ct_flow->pre_ct_attr); @@ -1898,23 +1953,16 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, memcpy(pre_ct_attr, attr, attr_sz); - err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0); - if (err) { - ct_dbg("Failed to set register for ct clear"); - goto err_set_registers; - } - mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type, mod_acts->num_actions, mod_acts->actions); if (IS_ERR(mod_hdr)) { err = PTR_ERR(mod_hdr); ct_dbg("Failed to add create ct clear mod hdr"); - goto err_set_registers; + goto err_mod_hdr; } pre_ct_attr->modify_hdr = mod_hdr; - pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr); if (IS_ERR(rule)) { @@ -1930,7 +1978,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv, err_insert: mlx5_modify_header_dealloc(priv->mdev, mod_hdr); -err_set_registers: +err_mod_hdr: netdev_warn(priv->netdev, "Failed to offload ct clear flow, err %d\n", err); kfree(pre_ct_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 363329f4aac6..99662af1e41a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -110,6 +110,7 @@ int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec); int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, const struct flow_action_entry *act, struct netlink_ext_ack *extack); @@ -172,6 +173,7 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) static inline int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, + struct mlx5e_tc_mod_hdr_acts *mod_acts, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index 8f64f2c8895a..f832c26ff2c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -5,11 +5,14 @@ #define __MLX5_EN_TC_PRIV_H__ #include "en_tc.h" +#include "en/tc/act/act.h" #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1) #define MLX5E_TC_MAX_SPLITS 1 +#define mlx5e_nic_chains(priv) ((priv)->fs.tc.chains) + enum { MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, @@ -37,6 +40,7 @@ struct mlx5e_tc_flow_parse_attr { struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; struct ethhdr eth; + struct mlx5e_tc_act_parse_state parse_state; }; /* Helper struct for accessing a struct containing list_head array. @@ -102,6 +106,7 @@ struct mlx5e_tc_flow { refcount_t refcnt; struct rcu_head rcu_head; struct completion init_done; + struct completion del_hw_done; int tunnel_id; /* the mapped tunnel id of this flow */ struct mlx5_flow_attr *attr; }; @@ -114,7 +119,11 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr); +bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow); +bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow); bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow); +int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow); +bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv); static inline void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) { @@ -175,4 +184,8 @@ struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow); struct mlx5e_tc_int_port_priv * mlx5e_get_int_port_priv(struct mlx5e_priv *priv); + +void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec); +void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec); + #endif /* __MLX5_EN_TC_PRIV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index a5e450973225..33815246fead 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -103,7 +103,7 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, } static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, - struct net_device *mirred_dev, + struct net_device *dev, struct mlx5e_tc_tun_route_attr *attr) { struct net_device *route_dev; @@ -122,13 +122,13 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); attr->fl.fl4.flowi4_oif = uplink_dev->ifindex; } else { - struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(mirred_dev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev); if (tunnel && tunnel->get_remote_ifindex) - attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(mirred_dev); + attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev); } - rt = ip_route_output_key(dev_net(mirred_dev), &attr->fl.fl4); + rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -440,10 +440,10 @@ release_neigh: #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, - struct net_device *mirred_dev, + struct net_device *dev, struct mlx5e_tc_tun_route_attr *attr) { - struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(mirred_dev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev); struct net_device *route_dev; struct net_device *out_dev; struct dst_entry *dst; @@ -451,8 +451,8 @@ static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv, int ret; if (tunnel && tunnel->get_remote_ifindex) - attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(mirred_dev); - dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(mirred_dev), NULL, &attr->fl.fl6, + attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev); + dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6, NULL); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -708,7 +708,8 @@ release_neigh: int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *flow_attr) + struct mlx5_flow_attr *flow_attr, + struct net_device *filter_dev) { struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; struct mlx5e_tc_int_port *int_port; @@ -720,14 +721,14 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, /* Addresses are swapped for decap */ attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4; attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4; - err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr); + err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr); } #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) else if (flow_attr->tun_ip_version == 6) { /* Addresses are swapped for decap */ attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6; attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6; - err = mlx5e_route_lookup_ipv6_get(priv, priv->netdev, &attr); + err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr); } #endif else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index aa092eaeaec3..b38f693bbb52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -94,7 +94,8 @@ mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, #endif int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr); + struct mlx5_flow_attr *attr, + struct net_device *filter_dev); bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 660cca73c36c..9918ed8c059b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -245,8 +245,14 @@ static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, struct list_head *flow_list, int index) { - if (IS_ERR(mlx5e_flow_get(flow))) + if (IS_ERR(mlx5e_flow_get(flow))) { + /* Flow is being deleted concurrently. Wait for it to be + * unoffloaded from hardware, otherwise deleting encap will + * fail. + */ + wait_for_completion(&flow->del_hw_done); return; + } wait_for_completion(&flow->init_done); flow->tmp_entry_index = index; @@ -1153,7 +1159,7 @@ int mlx5e_attach_decap_route(struct mlx5e_priv *priv, tbl_time_before = mlx5e_route_tbl_get_last_update(priv); tbl_time_after = tbl_time_before; - err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr); + err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev); if (err || !esw_attr->rx_tun_attr->decap_vport) goto out; @@ -1474,7 +1480,7 @@ static void mlx5e_reoffload_decap(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; spec = &parse_attr->spec; - err = mlx5e_tc_tun_route_lookup(priv, spec, attr); + err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev); if (err) { mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n", err); @@ -1573,6 +1579,8 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv, struct net_device *fib_dev; fen_info = container_of(info, struct fib_entry_notifier_info, info); + if (fen_info->fi->nh) + return NULL; fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops || fen_info->dst_len != 32) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 2f0df5cc1a2d..338d65e2c9ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -151,7 +151,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, rq->stats->xdp_redirect++; return true; default: - bpf_warn_invalid_xdp_action(act); + bpf_warn_invalid_xdp_action(rq->netdev, prog, act); fallthrough; case XDP_ABORTED: xdp_abort: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 7b562d2c8a19..279cd8f4e79f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -11,13 +11,13 @@ static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv, { struct device *dev = mlx5_core_dma_dev(priv->mdev); - return xsk_pool_dma_map(pool, dev, 0); + return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC); } static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool) { - return xsk_pool_dma_unmap(pool, 0); + return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC); } static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 538bc2419bd8..25eac9e20342 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -4,6 +4,7 @@ #include "setup.h" #include "en/params.h" #include "en/txrx.h" +#include "en/health.h" /* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may * change unexpectedly, and mlx5e has a minimum valid stride size for striding @@ -67,7 +68,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->xdpsq = &c->rq_xdpsq; rq->xsk_pool = pool; - rq->stats = &c->priv->channel_stats[c->ix].xskrq; + rq->stats = &c->priv->channel_stats[c->ix]->xskrq; rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); rq_xdp_ix = c->ix + params->num_channels * MLX5E_RQ_GROUP_XSK; err = mlx5e_rq_set_handlers(rq, params, xsk); @@ -170,7 +171,13 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) void mlx5e_activate_xsk(struct mlx5e_channel *c) { + /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid + * activating XSKRQ in the middle of recovery. + */ + mlx5e_reporter_icosq_suspend_recovery(c); set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + /* TX queue is created active. */ spin_lock_bh(&c->async_icosq_lock); @@ -180,6 +187,13 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c) void mlx5e_deactivate_xsk(struct mlx5e_channel *c) { - mlx5e_deactivate_rq(&c->xskrq); + /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the + * middle of recovery. Suspend the recovery to avoid it. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + /* TX queue is disabled on close. */ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index fb5397324aa4..2db9573a3fe6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -191,7 +191,7 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; eseg->swp_inner_l4_offset = (skb->csum_start + skb->head - skb->data) / 2; - if (skb->protocol == htons(ETH_P_IPV6)) + if (inner_ip_hdr(skb)->version == 6) eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; break; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 62abce008c7b..96064a2033f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -55,6 +55,7 @@ struct mlx5e_ktls_offload_context_rx { DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS); /* resync */ + spinlock_t lock; /* protects resync fields */ struct mlx5e_ktls_rx_resync_ctx resync; struct list_head list; }; @@ -99,25 +100,6 @@ mlx5e_ktls_rx_resync_create_resp_list(void) return resp_list; } -static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqtn) -{ - struct mlx5e_tir_builder *builder; - int err; - - builder = mlx5e_tir_builder_alloc(false); - if (!builder) - return -ENOMEM; - - mlx5e_tir_builder_build_rqt(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqtn, false); - mlx5e_tir_builder_build_direct(builder); - mlx5e_tir_builder_build_tls(builder); - err = mlx5e_tir_init(tir, builder, mdev, false); - - mlx5e_tir_builder_free(builder); - - return err; -} - static void accel_rule_handle_work(struct work_struct *work) { struct mlx5e_ktls_offload_context_rx *priv_rx; @@ -386,14 +368,18 @@ static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_r struct mlx5e_icosq *sq; bool trigger_poll; - memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq)); - sq = &c->async_icosq; ktls_resync = sq->ktls_resync; + trigger_poll = false; spin_lock_bh(&ktls_resync->lock); - list_add_tail(&priv_rx->list, &ktls_resync->list); - trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_lock_bh(&priv_rx->lock); + memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq)); + if (list_empty(&priv_rx->list)) { + list_add_tail(&priv_rx->list, &ktls_resync->list); + trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + } + spin_unlock_bh(&priv_rx->lock); spin_unlock_bh(&ktls_resync->lock); if (!trigger_poll) @@ -604,7 +590,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, struct mlx5_core_dev *mdev; struct mlx5e_priv *priv; int rxq, err; - u32 rqtn; tls_ctx = tls_get_ctx(sk); priv = netdev_priv(netdev); @@ -617,6 +602,8 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, if (err) goto err_create_key; + INIT_LIST_HEAD(&priv_rx->list); + spin_lock_init(&priv_rx->lock); priv_rx->crypto_info = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; @@ -624,13 +611,11 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, priv_rx->rxq = rxq; priv_rx->sk = sk; - priv_rx->rq_stats = &priv->channel_stats[rxq].rq; + priv_rx->rq_stats = &priv->channel_stats[rxq]->rq; priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); - rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq); - - err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn); + err = mlx5e_rx_res_tls_tir_create(priv->rx_res, rxq, &priv_rx->tir); if (err) goto err_create_tir; @@ -730,10 +715,14 @@ bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) priv_rx = list_first_entry(&local_list, struct mlx5e_ktls_offload_context_rx, list); + spin_lock(&priv_rx->lock); cseg = post_static_params(sq, priv_rx); - if (IS_ERR(cseg)) + if (IS_ERR(cseg)) { + spin_unlock(&priv_rx->lock); break; - list_del(&priv_rx->list); + } + list_del_init(&priv_rx->list); + spin_unlock(&priv_rx->lock); db_cseg = cseg; } if (db_cseg) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index fe5d82fa6e92..49cca6bd49a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -556,7 +556,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); - priv->channel_stats[arfs_rule->rxq].rq.arfs_err++; + priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++; mlx5e_dbg(HW, priv, "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n", __func__, arfs_rule->filter_id, arfs_rule->rxq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c2ea5fad48dd..57d755db1cf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -314,7 +314,9 @@ void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, } static void mlx5e_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -380,7 +382,9 @@ unlock: } static int mlx5e_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -511,7 +515,8 @@ static int mlx5e_set_channels(struct net_device *dev, } int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, - struct ethtool_coalesce *coal) + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal) { struct dim_cq_moder *rx_moder, *tx_moder; @@ -528,6 +533,11 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, coal->tx_max_coalesced_frames = tx_moder->pkts; coal->use_adaptive_tx_coalesce = priv->channels.params.tx_dim_enabled; + kernel_coal->use_cqe_mode_rx = + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_BASED_MODER); + kernel_coal->use_cqe_mode_tx = + MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_CQE_BASED_MODER); + return 0; } @@ -538,7 +548,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); - return mlx5e_ethtool_get_coalesce(priv, coal); + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); } #define MLX5E_MAX_COAL_TIME MLX5_MAX_CQ_PERIOD @@ -578,14 +588,26 @@ mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coal } } +/* convert a boolean value of cq_mode to mlx5 period mode + * true : MLX5_CQ_PERIOD_MODE_START_FROM_CQE + * false : MLX5_CQ_PERIOD_MODE_START_FROM_EQE + */ +static int cqe_mode_to_period_mode(bool val) +{ + return val ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; +} + int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, - struct ethtool_coalesce *coal) + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kernel_coal, + struct netlink_ext_ack *extack) { struct dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_params new_params; bool reset_rx, reset_tx; bool reset = true; + u8 cq_period_mode; int err = 0; if (!MLX5_CAP_GEN(mdev, cq_moderation)) @@ -605,6 +627,12 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, return -ERANGE; } + if ((kernel_coal->use_cqe_mode_rx || kernel_coal->use_cqe_mode_tx) && + !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) { + NL_SET_ERR_MSG_MOD(extack, "cqe_mode_rx/tx is not supported on this device"); + return -EOPNOTSUPP; + } + mutex_lock(&priv->state_lock); new_params = priv->channels.params; @@ -621,6 +649,18 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; + cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_rx); + if (cq_period_mode != rx_moder->cq_period_mode) { + mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); + reset_rx = true; + } + + cq_period_mode = cqe_mode_to_period_mode(kernel_coal->use_cqe_mode_tx); + if (cq_period_mode != tx_moder->cq_period_mode) { + mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); + reset_tx = true; + } + if (reset_rx) { u8 mode = MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_BASED_MODER); @@ -656,9 +696,9 @@ static int mlx5e_set_coalesce(struct net_device *netdev, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { - struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_priv *priv = netdev_priv(netdev); - return mlx5e_ethtool_set_coalesce(priv, coal); + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); } static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, @@ -1843,24 +1883,19 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_params new_params; - bool mode_changed; u8 cq_period_mode, current_cq_period_mode; + struct mlx5e_params new_params; + + if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) + return -EOPNOTSUPP; + + cq_period_mode = cqe_mode_to_period_mode(enable); - cq_period_mode = enable ? - MLX5_CQ_PERIOD_MODE_START_FROM_CQE : - MLX5_CQ_PERIOD_MODE_START_FROM_EQE; current_cq_period_mode = is_rx_cq ? priv->channels.params.rx_cq_moderation.cq_period_mode : priv->channels.params.tx_cq_moderation.cq_period_mode; - mode_changed = cq_period_mode != current_cq_period_mode; - - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && - !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) - return -EOPNOTSUPP; - if (!mode_changed) + if (cq_period_mode == current_cq_period_mode) return 0; new_params = priv->channels.params; @@ -1894,7 +1929,7 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val if (curr_val == new_val) return 0; - if (new_val && !priv->profile->rx_ptp_support && rx_filter) { + if (new_val && !mlx5e_profile_feature_cap(priv->profile, PTP_RX) && rx_filter) { netdev_err(priv->netdev, "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n"); return -EINVAL; @@ -2358,7 +2393,8 @@ static void mlx5e_get_rmon_stats(struct net_device *netdev, const struct ethtool_ops mlx5e_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | - ETHTOOL_COALESCE_USE_ADAPTIVE, + ETHTOOL_COALESCE_USE_ADAPTIVE | + ETHTOOL_COALESCE_USE_CQE, .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, .get_link_ext_state = mlx5e_get_link_ext_state, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65571593ec5c..ac69e0aa09bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -37,6 +37,7 @@ #include <net/geneve.h> #include <linux/bpf.h> #include <linux/if_bridge.h> +#include <linux/filter.h> #include <net/page_pool.h> #include <net/xdp_sock_drv.h> #include "eswitch.h" @@ -479,7 +480,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); rq->xdpsq = &c->rq_xdpsq; - rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->stats = &c->priv->channel_stats[c->ix]->rq; rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); err = mlx5e_rq_set_handlers(rq, params, NULL); if (err) @@ -1087,8 +1088,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); - if (rq->icosq) - cancel_work_sync(&rq->icosq->recover_work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@ -1161,10 +1160,10 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->xsk_pool = xsk_pool; sq->stats = sq->xsk_pool ? - &c->priv->channel_stats[c->ix].xsksq : + &c->priv->channel_stats[c->ix]->xsksq : is_redirect ? - &c->priv->channel_stats[c->ix].xdpsq : - &c->priv->channel_stats[c->ix].rq_xdpsq; + &c->priv->channel_stats[c->ix]->xdpsq : + &c->priv->channel_stats[c->ix]->rq_xdpsq; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1216,9 +1215,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_icosq_cqe_err(sq); } +static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + /* Not implemented yet. */ + + netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); +} + static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) + struct mlx5e_icosq *sq, + work_func_t recover_work_func) { void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); struct mlx5_core_dev *mdev = c->mdev; @@ -1239,7 +1249,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work); + INIT_WORK(&sq->recover_work, recover_work_func); return 0; @@ -1575,13 +1585,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_tx_err_cqe(sq); } -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) +static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, + work_func_t recover_work_func) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_icosq(c, param, sq); + err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); if (err) return err; @@ -1620,7 +1631,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) synchronize_net(); /* Sync with NAPI. */ } -void mlx5e_close_icosq(struct mlx5e_icosq *sq) +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1928,7 +1939,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, params, &cparam->txq_sq, &c->sq[tc], tc, qos_queue_group_id, - &c->priv->channel_stats[c->ix].sq[tc]); + &c->priv->channel_stats[c->ix]->sq[tc]); if (err) goto err_close_sqs; } @@ -2084,11 +2095,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, spin_lock_init(&c->async_icosq_lock); - err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq); + err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, + mlx5e_async_icosq_err_cqe_work); if (err) goto err_close_xdpsq_cq; - err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); + mutex_init(&c->icosq_recovery_lock); + + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, + mlx5e_icosq_err_cqe_work); if (err) goto err_close_async_icosq; @@ -2156,9 +2171,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_xdpsq(&c->xdpsq); if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); + /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ + cancel_work_sync(&c->icosq.recover_work); mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); + mutex_destroy(&c->icosq_recovery_lock); mlx5e_close_icosq(&c->async_icosq); if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); @@ -2176,6 +2194,30 @@ static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix) return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev); } +static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) +{ + if (ix > priv->stats_nch) { + netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix, + priv->stats_nch); + return -EINVAL; + } + + if (priv->channel_stats[ix]) + return 0; + + /* Asymmetric dynamic memory allocation. + * Freed in mlx5e_priv_arrays_free, not on channel closure. + */ + mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix); + priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats), + GFP_KERNEL, cpu_to_node(cpu)); + if (!priv->channel_stats[ix]) + return -ENOMEM; + priv->stats_nch++; + + return 0; +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct mlx5e_channel_param *cparam, @@ -2193,6 +2235,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) return err; + err = mlx5e_channel_stats_alloc(priv, ix, cpu); + if (err) + return err; + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) return -ENOMEM; @@ -2207,7 +2253,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); c->num_tc = mlx5e_get_dcb_num_tc(params); c->xdp = !!params->xdp_prog; - c->stats = &priv->channel_stats[ix].ch; + c->stats = &priv->channel_stats[ix]->ch; c->aff_mask = irq_get_effective_affinity_mask(irq); c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); @@ -2598,7 +2644,7 @@ static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, } } -int mlx5e_num_channels_changed(struct mlx5e_priv *priv) +static int mlx5e_num_channels_changed(struct mlx5e_priv *priv) { u16 count = priv->channels.params.num_channels; int err; @@ -3371,7 +3417,7 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) int i; for (i = 0; i < priv->stats_nch; i++) { - struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; + struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i]; struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; int j; @@ -3559,11 +3605,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable) new_params = priv->channels.params; if (enable) { - if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { - netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n"); - err = -EINVAL; - goto out; - } new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; new_params.packet_merge.shampo.match_criteria_type = MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED; @@ -3724,12 +3765,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t *features, - netdev_features_t wanted_features, netdev_features_t feature, mlx5e_feature_handler feature_handler) { - netdev_features_t changes = wanted_features ^ netdev->features; - bool enable = !!(wanted_features & feature); + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); int err; if (!(changes & feature)) @@ -3737,22 +3777,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); netdev_err(netdev, "%s feature %pNF failed, err %d\n", enable ? "Enable" : "Disable", &feature, err); return err; } - MLX5E_SET_FEATURE(features, feature, enable); return 0; } int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { - netdev_features_t oper_features = netdev->features; + netdev_features_t oper_features = features; int err = 0; #define MLX5E_HANDLE_FEATURE(feature, handler) \ - mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) + mlx5e_handle_feature(netdev, &oper_features, feature, handler) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); @@ -3826,6 +3866,11 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, features &= ~NETIF_F_RXHASH; if (netdev->features & NETIF_F_RXHASH) netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); + + if (features & NETIF_F_GRO_HW) { + netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n"); + features &= ~NETIF_F_GRO_HW; + } } if (mlx5e_is_uplink_rep(priv)) @@ -4038,7 +4083,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) goto err_unlock; } - if (!priv->profile->rx_ptp_support) + if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX)) err = mlx5e_hwstamp_config_no_ptp_rx(priv, config.rx_filter != HWTSTAMP_FILTER_NONE); else @@ -4773,15 +4818,22 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) } if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; } if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) { - netdev->hw_features |= NETIF_F_GSO_GRE; - netdev->hw_enc_features |= NETIF_F_GSO_GRE; - netdev->gso_partial_features |= NETIF_F_GSO_GRE; + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; } if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) { @@ -5093,9 +5145,23 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_nic_stats_grps, .stats_grps_num = mlx5e_nic_stats_grps_num, - .rx_ptp_support = true, + .features = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) | + BIT(MLX5E_PROFILE_FEATURE_PTP_TX) | + BIT(MLX5E_PROFILE_FEATURE_QOS_HTB), }; +static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + int nch; + + nch = mlx5e_get_max_num_channels(mdev); + + if (profile->max_nch_limit) + nch = min_t(int, nch, profile->max_nch_limit(mdev)); + return nch; +} + static unsigned int mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, const struct mlx5e_profile *profile) @@ -5104,7 +5170,7 @@ mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev, unsigned int max_nch, tmp; /* core resources */ - max_nch = mlx5e_get_max_num_channels(mdev); + max_nch = mlx5e_profile_max_num_channels(mdev, profile); /* netdev rx queues */ tmp = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); @@ -5128,12 +5194,17 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, struct net_device *netdev, struct mlx5_core_dev *mdev) { + int nch, num_txqs, node, i; + + num_txqs = netdev->num_tx_queues; + nch = mlx5e_calc_max_nch(mdev, netdev, profile); + node = dev_to_node(mlx5_core_dma_dev(mdev)); + /* priv init */ priv->mdev = mdev; priv->netdev = netdev; priv->msglevel = MLX5E_MSG_LEVEL; - priv->max_nch = mlx5e_calc_max_nch(mdev, netdev, profile); - priv->stats_nch = priv->max_nch; + priv->max_nch = nch; priv->max_opened_tc = 1; if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) @@ -5150,11 +5221,46 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, if (!priv->wq) goto err_free_cpumask; + priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node); + if (!priv->txq2sq) + goto err_destroy_workqueue; + + priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node); + if (!priv->tx_rates) + goto err_free_txq2sq; + + priv->channel_tc2realtxq = + kcalloc_node(nch, sizeof(*priv->channel_tc2realtxq), GFP_KERNEL, node); + if (!priv->channel_tc2realtxq) + goto err_free_tx_rates; + + for (i = 0; i < nch; i++) { + priv->channel_tc2realtxq[i] = + kcalloc_node(profile->max_tc, sizeof(**priv->channel_tc2realtxq), + GFP_KERNEL, node); + if (!priv->channel_tc2realtxq[i]) + goto err_free_channel_tc2realtxq; + } + + priv->channel_stats = + kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node); + if (!priv->channel_stats) + goto err_free_channel_tc2realtxq; + return 0; +err_free_channel_tc2realtxq: + while (--i >= 0) + kfree(priv->channel_tc2realtxq[i]); + kfree(priv->channel_tc2realtxq); +err_free_tx_rates: + kfree(priv->tx_rates); +err_free_txq2sq: + kfree(priv->txq2sq); +err_destroy_workqueue: + destroy_workqueue(priv->wq); err_free_cpumask: free_cpumask_var(priv->scratchpad.cpumask); - return -ENOMEM; } @@ -5166,6 +5272,14 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) if (!priv->mdev) return; + for (i = 0; i < priv->stats_nch; i++) + kvfree(priv->channel_stats[i]); + kfree(priv->channel_stats); + for (i = 0; i < priv->max_nch; i++) + kfree(priv->channel_tc2realtxq[i]); + kfree(priv->channel_tc2realtxq); + kfree(priv->tx_rates); + kfree(priv->txq2sq); destroy_workqueue(priv->wq); free_cpumask_var(priv->scratchpad.cpumask); @@ -5181,13 +5295,44 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) memset(priv, 0, sizeof(*priv)); } +static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + unsigned int nch, ptp_txqs, qos_txqs; + + nch = mlx5e_profile_max_num_channels(mdev, profile); + + ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) && + mlx5e_profile_feature_cap(profile, PTP_TX) ? + profile->max_tc : 0; + + qos_txqs = mlx5_qos_is_supported(mdev) && + mlx5e_profile_feature_cap(profile, QOS_HTB) ? + mlx5e_qos_max_leaf_nodes(mdev) : 0; + + return nch * profile->max_tc + ptp_txqs + qos_txqs; +} + +static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) +{ + unsigned int nch; + + nch = mlx5e_profile_max_num_channels(mdev, profile); + + return nch * profile->rq_groups; +} + struct net_device * -mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, - unsigned int txqs, unsigned int rxqs) +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile) { struct net_device *netdev; + unsigned int txqs, rxqs; int err; + txqs = mlx5e_get_max_num_txqs(mdev, profile); + rxqs = mlx5e_get_max_num_rxqs(mdev, profile); + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); @@ -5432,22 +5577,10 @@ static int mlx5e_probe(struct auxiliary_device *adev, struct mlx5_core_dev *mdev = edev->mdev; struct net_device *netdev; pm_message_t state = {}; - unsigned int txqs, rxqs, ptp_txqs = 0; struct mlx5e_priv *priv; - int qos_sqs = 0; int err; - int nch; - - if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) - ptp_txqs = profile->max_tc; - - if (mlx5_qos_is_supported(mdev)) - qos_sqs = mlx5e_qos_max_leaf_nodes(mdev); - nch = mlx5e_get_max_num_channels(mdev); - txqs = nch * profile->max_tc + ptp_txqs + qos_sqs; - rxqs = nch * profile->rq_groups; - netdev = mlx5e_create_netdev(mdev, profile, txqs, rxqs); + netdev = mlx5e_create_netdev(mdev, profile); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index e58a9ec42553..06d1f46f1688 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -50,6 +50,7 @@ #include "fs_core.h" #include "lib/mlx5.h" #include "lib/devcom.h" +#include "lib/vxlan.h" #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" #include "en_accel/ipsec.h" @@ -219,16 +220,22 @@ static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) } } -static void mlx5e_rep_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) +static void +mlx5e_rep_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = netdev_priv(dev); mlx5e_ethtool_get_ringparam(priv, param); } -static int mlx5e_rep_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) +static int +mlx5e_rep_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -258,7 +265,7 @@ static int mlx5e_rep_get_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); - return mlx5e_ethtool_get_coalesce(priv, coal); + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); } static int mlx5e_rep_set_coalesce(struct net_device *netdev, @@ -268,7 +275,7 @@ static int mlx5e_rep_set_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); - return mlx5e_ethtool_set_coalesce(priv, coal); + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); } static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev) @@ -585,6 +592,12 @@ bool mlx5e_eswitch_vf_rep(const struct net_device *netdev) return netdev->netdev_ops == &mlx5e_netdev_ops_rep; } +static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev) +{ + return (1 << MLX5_CAP_GEN(mdev, log_max_tir)) / + mlx5_eswitch_get_total_vports(mdev); +} + static void mlx5e_build_rep_params(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -1027,6 +1040,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) rtnl_lock(); if (netif_running(netdev)) mlx5e_open(netdev); + udp_tunnel_nic_reset_ntf(priv->netdev); netif_device_attach(netdev); rtnl_unlock(); } @@ -1048,6 +1062,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) mlx5_notifier_unregister(mdev, &priv->events_nb); mlx5e_rep_tc_disable(priv); mlx5_lag_remove_netdev(mdev, priv->netdev); + mlx5_vxlan_reset_to_default(mdev->vxlan); } static MLX5E_DEFINE_STATS_GRP(sw_rep, 0); @@ -1080,6 +1095,10 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = { &MLX5E_STATS_GRP(pme), &MLX5E_STATS_GRP(channels), &MLX5E_STATS_GRP(per_port_buff_congest), +#ifdef CONFIG_MLX5_EN_IPSEC + &MLX5E_STATS_GRP(ipsec_sw), + &MLX5E_STATS_GRP(ipsec_hw), +#endif }; static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv) @@ -1103,7 +1122,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, .stats_grps_num = mlx5e_rep_stats_grps_num, - .rx_ptp_support = false, + .max_nch_limit = mlx5e_rep_max_nch_limit, }; static const struct mlx5e_profile mlx5e_uplink_rep_profile = { @@ -1124,7 +1143,6 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_ul_rep_stats_grps, .stats_grps_num = mlx5e_ul_rep_stats_grps_num, - .rx_ptp_support = false, }; /* e-Switch vport representors */ @@ -1175,14 +1193,10 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) struct devlink_port *dl_port; struct net_device *netdev; struct mlx5e_priv *priv; - unsigned int txqs, rxqs; - int nch, err; + int err; profile = &mlx5e_rep_profile; - nch = mlx5e_get_max_num_channels(dev); - txqs = nch * profile->max_tc; - rxqs = nch * profile->rq_groups; - netdev = mlx5e_create_netdev(dev, profile, txqs, rxqs); + netdev = mlx5e_create_netdev(dev, profile); if (!netdev) { mlx5_core_warn(dev, "Failed to create representor netdev for vport %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 96967b0a2441..e86ccc22fb82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -37,6 +37,7 @@ #include <net/ip6_checksum.h> #include <net/page_pool.h> #include <net/inet_ecn.h> +#include <net/gro.h> #include <net/udp.h> #include <net/tcp.h> #include "en.h" @@ -278,8 +279,8 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, if (unlikely(!dma_info->page)) return -ENOMEM; - dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, - PAGE_SIZE, rq->buff.map_dir); + dma_info->addr = dma_map_page_attrs(rq->pdev, dma_info->page, 0, PAGE_SIZE, + rq->buff.map_dir, DMA_ATTR_SKIP_CPU_SYNC); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { page_pool_recycle_direct(rq->page_pool, dma_info->page); dma_info->page = NULL; @@ -300,7 +301,8 @@ static inline int mlx5e_page_alloc(struct mlx5e_rq *rq, void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { - dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir); + dma_unmap_page_attrs(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir, + DMA_ATTR_SKIP_CPU_SYNC); } void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, @@ -543,13 +545,13 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u16 klm_entries, u16 index) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - u16 entries, pi, i, header_offset, err, wqe_bbs, new_entries; + u16 entries, pi, header_offset, err, wqe_bbs, new_entries; u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; struct page *page = shampo->last_page; u64 addr = shampo->last_addr; struct mlx5e_dma_info *dma_info; struct mlx5e_umr_wqe *umr_wqe; - int headroom; + int headroom, i; headroom = rq->buff.headroom; new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); @@ -601,9 +603,7 @@ update_klm: err_unmap: while (--i >= 0) { - if (--index < 0) - index = shampo->hd_per_wq - 1; - dma_info = &shampo->info[index]; + dma_info = &shampo->info[--index]; if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); mlx5e_page_release(rq, dma_info, true); @@ -620,7 +620,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) struct mlx5e_icosq *sq = rq->icosq; int i, err, max_klm_entries, len; - max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev); + max_klm_entries = MLX5E_MAX_KLM_PER_WQE; klm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); @@ -1604,6 +1604,12 @@ static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) } } +static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + trigger_report(rq, cqe); + rq->stats->wqe_err++; +} + static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct mlx5_wq_cyc *wq = &rq->wqe.wq; @@ -1617,8 +1623,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto free_wqe; } @@ -1671,7 +1676,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) cqe_bcnt = be32_to_cpu(cqe->byte_cnt); if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto free_wqe; } @@ -1720,8 +1725,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 wi->consumed_strides += cstrides; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto mpwrq_cqe_out; } @@ -1989,8 +1993,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq wi->consumed_strides += cstrides; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto mpwrq_cqe_out; } @@ -2059,8 +2062,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq wi->consumed_strides += cstrides; if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { - trigger_report(rq, cqe); - rq->stats->wqe_err++; + mlx5e_handle_rx_err_cqe(rq, cqe); goto mpwrq_cqe_out; } @@ -2190,7 +2192,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, priv = mlx5i_epriv(netdev); tstamp = &priv->tstamp; - stats = &priv->channel_stats[rq->ix].rq; + stats = rq->stats; flags_rqpn = be32_to_cpu(cqe->flags_rqpn); g = (flags_rqpn >> 28) & 3; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 2a9bfc3ffa2e..26e326fe503c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -35,6 +35,7 @@ #include "en_accel/tls.h" #include "en_accel/en_accel.h" #include "en/ptp.h" +#include "en/port.h" static unsigned int stats_grps_num(struct mlx5e_priv *priv) { @@ -463,7 +464,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats = - &priv->channel_stats[i]; + priv->channel_stats[i]; int j; mlx5e_stats_grp_sw_update_stats_rq_stats(s, &channel_stats->rq); @@ -1158,12 +1159,99 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } -void mlx5e_stats_fec_get(struct mlx5e_priv *priv, - struct ethtool_fec_stats *fec_stats) +static int fec_num_lanes(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {}; + int err; + + MLX5_SET(pmlp_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return 0; + + return MLX5_GET(pmlp_reg, out, width); +} + +static int fec_active_mode(struct mlx5_core_dev *mdev) +{ + unsigned long fec_active_long; + u32 fec_active; + + if (mlx5e_get_fec_mode(mdev, &fec_active, NULL)) + return MLX5E_FEC_NOFEC; + + fec_active_long = fec_active; + return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE); +} + +#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \ + fec_stats->corrected_blocks.lanes[(idx)] = \ + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \ + fc_fec_corrected_blocks_lane##idx); \ + fec_stats->uncorrectable_blocks.lanes[(idx)] = \ + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \ + fc_fec_uncorrectable_blocks_lane##idx); \ +}) + +static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats, + u32 *ppcnt, u8 lanes) +{ + if (lanes > 3) { /* 4 lanes */ + MLX5E_STATS_SET_FEC_BLOCK(3); + MLX5E_STATS_SET_FEC_BLOCK(2); + } + if (lanes > 1) /* 2 lanes */ + MLX5E_STATS_SET_FEC_BLOCK(1); + if (lanes > 0) /* 1 lane */ + MLX5E_STATS_SET_FEC_BLOCK(0); +} + +static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt) +{ + fec_stats->corrected_blocks.total = + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, + rs_fec_corrected_blocks); + fec_stats->uncorrectable_blocks.total = + MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, + rs_fec_uncorrectable_blocks); +} + +static void fec_set_block_stats(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int mode = fec_active_mode(mdev); + + if (mode == MLX5E_FEC_NOFEC) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0)) + return; + + switch (mode) { + case MLX5E_FEC_RS_528_514: + case MLX5E_FEC_RS_544_514: + case MLX5E_FEC_LLRS_272_257_1: + fec_set_rs_stats(fec_stats, out); + return; + case MLX5E_FEC_FIRECODE: + fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev)); + } +} + +static void fec_set_corrected_bits_total(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) { u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)]; struct mlx5_core_dev *mdev = priv->mdev; - u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) @@ -1181,6 +1269,13 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv, phy_corrected_bits); } +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + fec_set_corrected_bits_total(priv, fec_stats); + fec_set_block_stats(priv, fec_stats); +} + #define PPORT_ETH_EXT_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_extended_cntrs_grp_data_layout.c##_high) @@ -2076,7 +2171,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) for (i = 0; i < NUM_PTP_CH_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_ch_stats_desc[i].format); + "%s", ptp_ch_stats_desc[i].format); if (priv->tx_ptp_opened) { for (tc = 0; tc < priv->max_opened_tc; tc++) @@ -2197,21 +2292,21 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) for (i = 0; i < max_nch; i++) for (j = 0; j < NUM_CH_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].ch, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->ch, ch_stats_desc, j); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_RQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq, rq_stats_desc, j); for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xskrq, xskrq_stats_desc, j); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->rq_xdpsq, rq_xdpsq_stats_desc, j); } @@ -2219,17 +2314,17 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(channels) for (i = 0; i < max_nch; i++) for (j = 0; j < NUM_SQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc], + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->sq[tc], sq_stats_desc, j); for (i = 0; i < max_nch; i++) { for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xsksq, xsksq_stats_desc, j); for (j = 0; j < NUM_XDPSQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq, + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i]->xdpsq, xdpsq_stats_desc, j); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 835caa1c7b74..3d908a7e1406 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -39,10 +39,6 @@ #include <linux/rhashtable.h> #include <linux/refcount.h> #include <linux/completion.h> -#include <linux/if_macvlan.h> -#include <net/tc_act/tc_pedit.h> -#include <net/tc_act/tc_csum.h> -#include <net/psample.h> #include <net/arp.h> #include <net/ipv6_stubs.h> #include <net/bareudp.h> @@ -62,6 +58,7 @@ #include "en/mod_hdr.h" #include "en/tc_tun_encap.h" #include "en/tc/sample.h" +#include "en/tc/act/act.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "lib/fs_chains.h" @@ -70,9 +67,6 @@ #include "lag/lag.h" #include "lag/mp.h" -#define nic_chains(priv) ((priv)->fs.tc.chains) -#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) - #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) @@ -209,12 +203,9 @@ mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, char *modact; int err; - err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts); - if (err) - return err; - - modact = mod_hdr_acts->actions + - (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ); + modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts); + if (IS_ERR(modact)) + return PTR_ERR(modact); /* Firmware has 5bit length field and 0 means 32bits */ if (mlen == 32) @@ -333,7 +324,7 @@ void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev, int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen; char *modact; - modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ); + modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id); /* Firmware has 5bit length field and 0 means 32bits */ if (mlen == 32) @@ -403,7 +394,7 @@ bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) return flow_flag_test(flow, ESWITCH); } -static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) +bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) { return flow_flag_test(flow, FT); } @@ -413,7 +404,7 @@ bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) return flow_flag_test(flow, OFFLOADED); } -static int get_flow_name_space(struct mlx5e_tc_flow *flow) +int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow) { return mlx5e_is_eswitch_flow(flow) ? MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; @@ -424,7 +415,7 @@ get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ? + return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr : &priv->fs.tc.mod_hdr; } @@ -437,7 +428,7 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_mod_hdr_handle *mh; mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow), - get_flow_name_space(flow), + mlx5e_get_flow_namespace(flow), &parse_attr->mod_hdr_acts); if (IS_ERR(mh)) return PTR_ERR(mh); @@ -941,7 +932,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { struct mlx5_flow_context *flow_context = &spec->flow_context; - struct mlx5_fs_chains *nic_chains = nic_chains(priv); + struct mlx5_fs_chains *nic_chains = mlx5e_nic_chains(priv); struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr; struct mlx5e_tc_table *tc = &priv->fs.tc; struct mlx5_flow_destination dest[2] = {}; @@ -1076,7 +1067,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1095,7 +1086,7 @@ void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv, struct mlx5_flow_handle *rule, struct mlx5_flow_attr *attr) { - struct mlx5_fs_chains *nic_chains = nic_chains(priv); + struct mlx5_fs_chains *nic_chains = mlx5e_nic_chains(priv); mlx5_del_flow_rules(rule); @@ -1127,21 +1118,21 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, mutex_lock(&priv->fs.tc.t_lock); if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && !IS_ERR_OR_NULL(tc->t)) { - mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL); + mlx5_chains_put_table(mlx5e_nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL); priv->fs.tc.t = NULL; } mutex_unlock(&priv->fs.tc.t_lock); - kvfree(attr->parse_attr); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); - mlx5_fc_destroy(priv->mdev, attr->counter); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(priv->mdev, attr->counter); if (flow_flag_test(flow, HAIRPIN)) mlx5e_hairpin_flow_del(priv, flow); + kvfree(attr->parse_attr); kfree(flow->attr); } @@ -1196,21 +1187,16 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) goto offload_rule_0; - if (flow_flag_test(flow, CT)) { - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - return; - } - - if (flow_flag_test(flow, SAMPLE)) { - mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); - return; - } - if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + if (flow_flag_test(flow, CT)) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + else if (flow_flag_test(flow, SAMPLE)) + mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); + else offload_rule_0: - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); } struct mlx5_flow_handle * @@ -1308,8 +1294,6 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) mutex_unlock(&uplink_priv->unready_flows_lock); } -static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv); - bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev) { struct mlx5_core_dev *out_mdev, *route_mdev; @@ -1324,7 +1308,7 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_ route_mdev->coredev_type != MLX5_COREDEV_VF) return false; - return same_hw_devs(out_priv, route_priv); + return mlx5e_same_hw_devs(out_priv, route_priv); } int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) @@ -1371,7 +1355,7 @@ int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, struct mlx5_modify_hdr *mod_hdr; mod_hdr = mlx5_modify_header_alloc(priv->mdev, - get_flow_name_space(flow), + mlx5e_get_flow_namespace(flow), mod_hdr_acts->num_actions, mod_hdr_acts->actions); if (IS_ERR(mod_hdr)) @@ -1445,7 +1429,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, metadata); if (err) - return err; + goto err_out; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } } @@ -1461,13 +1447,15 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule is only supported on chain 0"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } if (attr->dest_chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule offload doesn't support goto action"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), @@ -1475,8 +1463,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, flow_flag_test(flow, EGRESS) ? MLX5E_TC_INT_PORT_EGRESS : MLX5E_TC_INT_PORT_INGRESS); - if (IS_ERR(int_port)) - return PTR_ERR(int_port); + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); + goto err_out; + } esw_attr->int_port = int_port; } @@ -1600,6 +1590,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, else mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } + complete_all(&flow->del_hw_done); if (mlx5_flow_has_geneve_opt(flow)) mlx5_geneve_tlv_option_del(priv->mdev->geneve); @@ -1623,15 +1614,12 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - dealloc_mod_hdr_actions(&attr->parse_attr->mod_hdr_acts); + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); if (vf_tun && attr->modify_hdr) mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); else mlx5e_detach_mod_hdr(priv, flow); } - kfree(attr->sample_attr); - kvfree(attr->parse_attr); - kvfree(attr->esw_attr->rx_tun_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(esw_attr->counter_dev, attr->counter); @@ -1645,6 +1633,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); + kfree(attr->sample_attr); + kvfree(attr->esw_attr->rx_tun_attr); + kvfree(attr->parse_attr); kfree(flow->attr); } @@ -1947,6 +1938,111 @@ u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer) return ip_version; } +/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h. + * And changes inner ip_ecn depending on inner and outer ip_ecn as follows: + * +---------+----------------------------------------+ + * |Arriving | Arriving Outer Header | + * | Inner +---------+---------+---------+----------+ + * | Header | Not-ECT | ECT(0) | ECT(1) | CE | + * +---------+---------+---------+---------+----------+ + * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> | + * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* | + * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* | + * | CE | CE | CE | CE | CE | + * +---------+---------+---------+---------+----------+ + * + * Tc matches on inner after decapsulation on tunnel device, but hw offload matches + * the inner ip_ecn value before hardware decap action. + * + * Cells marked are changed from original inner packet ip_ecn value during decap, and + * so matching those values on inner ip_ecn before decap will fail. + * + * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn, + * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE, + * and such we can drop the inner ip_ecn=CE match. + */ + +static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv, + struct flow_cls_offload *f, + bool *match_inner_ecn) +{ + u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ip match; + + *match_inner_ecn = true; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + flow_rule_match_enc_ip(rule, &match); + outer_ecn_key = match.key->tos & INET_ECN_MASK; + outer_ecn_mask = match.mask->tos & INET_ECN_MASK; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + flow_rule_match_ip(rule, &match); + inner_ecn_key = match.key->tos & INET_ECN_MASK; + inner_ecn_mask = match.mask->tos & INET_ECN_MASK; + } + + if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) { + NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (!outer_ecn_mask) { + if (!inner_ecn_mask) + return 0; + + NL_SET_ERR_MSG_MOD(extack, + "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, + "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) { + NL_SET_ERR_MSG_MOD(extack, + "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); + netdev_warn(priv->netdev, + "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported"); + return -EOPNOTSUPP; + } + + if (!inner_ecn_mask) + return 0; + + /* Both inner and outer have full mask on ecn */ + + if (outer_ecn_key == INET_ECN_ECT_1) { + /* inner ecn might change by DECAP action */ + + NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported"); + netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported"); + return -EOPNOTSUPP; + } + + if (outer_ecn_key != INET_ECN_CE) + return 0; + + if (inner_ecn_key != INET_ECN_CE) { + /* Can't happen in software, as packet ecn will be changed to CE after decap */ + NL_SET_ERR_MSG_MOD(extack, + "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); + netdev_warn(priv->netdev, + "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported"); + return -EOPNOTSUPP; + } + + /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase, + * drop match on inner ecn + */ + *match_inner_ecn = false; + + return 0; +} + static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, @@ -2052,16 +2148,14 @@ static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec) outer_headers); } -static void *get_match_headers_value(u32 flags, - struct mlx5_flow_spec *spec) +void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec) { return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? get_match_inner_headers_value(spec) : get_match_outer_headers_value(spec); } -static void *get_match_headers_criteria(u32 flags, - struct mlx5_flow_spec *spec) +void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec) { return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ? get_match_inner_headers_criteria(spec) : @@ -2142,6 +2236,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; enum fs_flow_table_type fs_type; + bool match_inner_ecn = true; u16 addr_type = 0; u8 ip_proto = 0; u8 *match_level; @@ -2195,6 +2290,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, headers_c = get_match_inner_headers_criteria(spec); headers_v = get_match_inner_headers_value(spec); } + + err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn); + if (err) + return err; } err = mlx5e_flower_parse_meta(filter_dev, f); @@ -2418,10 +2517,12 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct flow_match_ip match; flow_rule_match_ip(rule, &match); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, - match.mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, - match.key->tos & 0x3); + if (match_inner_ecn) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); + } MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, match.mask->tos >> 2); @@ -2606,55 +2707,6 @@ static int parse_cls_flower(struct mlx5e_priv *priv, return err; } -struct pedit_headers { - struct ethhdr eth; - struct vlan_hdr vlan; - struct iphdr ip4; - struct ipv6hdr ip6; - struct tcphdr tcp; - struct udphdr udp; -}; - -struct pedit_headers_action { - struct pedit_headers vals; - struct pedit_headers masks; - u32 pedits; -}; - -static int pedit_header_offsets[] = { - [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), - [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), - [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), - [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), - [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), -}; - -#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) - -static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, - struct pedit_headers_action *hdrs, - struct netlink_ext_ack *extack) -{ - u32 *curr_pmask, *curr_pval; - - curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); - curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); - - if (*curr_pmask & mask) { /* disallow acting twice on the same location */ - NL_SET_ERR_MSG_MOD(extack, - "curr_pmask and new mask same. Acting twice on same location"); - goto out_err; - } - - *curr_pmask |= mask; - *curr_pval |= (val & mask); - - return 0; - -out_err: - return -EOPNOTSUPP; -} - struct mlx5_fields { u8 field; u8 field_bsize; @@ -2766,26 +2818,23 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; - int i, action_size, first, last, next_z; void *headers_c, *headers_v, *action, *vals_p; u32 *s_masks_p, *a_masks_p, s_mask, a_mask; struct mlx5e_tc_mod_hdr_acts *mod_acts; - struct mlx5_fields *f; unsigned long mask, field_mask; - int err; + int i, first, last, next_z; + struct mlx5_fields *f; u8 cmd; mod_acts = &parse_attr->mod_hdr_acts; - headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec); - headers_v = get_match_headers_value(*action_flags, &parse_attr->spec); + headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec); + headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec); set_masks = &hdrs[0].masks; add_masks = &hdrs[1].masks; set_vals = &hdrs[0].vals; add_vals = &hdrs[1].vals; - action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); - for (i = 0; i < ARRAY_SIZE(fields); i++) { bool skip; @@ -2853,18 +2902,16 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts); - if (err) { + action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts); + if (IS_ERR(action)) { NL_SET_ERR_MSG_MOD(extack, "too many pedit actions, can't offload"); mlx5_core_warn(priv->mdev, "mlx5: parsed %d pedit actions, can't do more\n", mod_acts->num_actions); - return err; + return PTR_ERR(action); } - action = mod_acts->actions + - (mod_acts->num_actions * action_size); MLX5_SET(set_action_in, action, action_type, cmd); MLX5_SET(set_action_in, action, field, f->field); @@ -2894,141 +2941,8 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, return 0; } -static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev, - int namespace) -{ - if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ - return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions); - else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ - return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions); -} - -int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, - int namespace, - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) -{ - int action_size, new_num_actions, max_hw_actions; - size_t new_sz, old_sz; - void *ret; - - if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions) - return 0; - - action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); - - max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev, - namespace); - new_num_actions = min(max_hw_actions, - mod_hdr_acts->actions ? - mod_hdr_acts->max_actions * 2 : 1); - if (mod_hdr_acts->max_actions == new_num_actions) - return -ENOSPC; - - new_sz = action_size * new_num_actions; - old_sz = mod_hdr_acts->max_actions * action_size; - ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL); - if (!ret) - return -ENOMEM; - - memset(ret + old_sz, 0, new_sz - old_sz); - mod_hdr_acts->actions = ret; - mod_hdr_acts->max_actions = new_num_actions; - - return 0; -} - -void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts) -{ - kfree(mod_hdr_acts->actions); - mod_hdr_acts->actions = NULL; - mod_hdr_acts->num_actions = 0; - mod_hdr_acts->max_actions = 0; -} - static const struct pedit_headers zero_masks = {}; -static int -parse_pedit_to_modify_hdr(struct mlx5e_priv *priv, - const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - struct netlink_ext_ack *extack) -{ - u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; - int err = -EOPNOTSUPP; - u32 mask, val, offset; - u8 htype; - - htype = act->mangle.htype; - err = -EOPNOTSUPP; /* can't be all optimistic */ - - if (htype == FLOW_ACT_MANGLE_UNSPEC) { - NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); - goto out_err; - } - - if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) { - NL_SET_ERR_MSG_MOD(extack, - "The pedit offload action is not supported"); - goto out_err; - } - - mask = act->mangle.mask; - val = act->mangle.val; - offset = act->mangle.offset; - - err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd], extack); - if (err) - goto out_err; - - hdrs[cmd].pedits++; - - return 0; -out_err: - return err; -} - -static int -parse_pedit_to_reformat(const struct flow_action_entry *act, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct netlink_ext_ack *extack) -{ - u32 mask, val, offset; - u32 *p; - - if (act->id != FLOW_ACTION_MANGLE) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported action id"); - return -EOPNOTSUPP; - } - - if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) { - NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported"); - return -EOPNOTSUPP; - } - - mask = ~act->mangle.mask; - val = act->mangle.val; - offset = act->mangle.offset; - p = (u32 *)&parse_attr->eth; - *(p + (offset >> 2)) |= (val & mask); - - return 0; -} - -static int parse_tc_pedit_action(struct mlx5e_priv *priv, - const struct flow_action_entry *act, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) -{ - if (flow && flow_flag_test(flow, L3_TO_L2_DECAP)) - return parse_pedit_to_reformat(act, parse_attr, extack); - - return parse_pedit_to_modify_hdr(priv, act, namespace, - parse_attr, hdrs, extack); -} - static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, struct pedit_headers_action *hdrs, @@ -3060,39 +2974,10 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, return 0; out_dealloc_parsed_actions: - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); return err; } -static bool csum_offload_supported(struct mlx5e_priv *priv, - u32 action, - u32 update_flags, - struct netlink_ext_ack *extack) -{ - u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | - TCA_CSUM_UPDATE_FLAG_UDP; - - /* The HW recalcs checksums only if re-writing headers */ - if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { - NL_SET_ERR_MSG_MOD(extack, - "TC csum action is only offloaded with pedit"); - netdev_warn(priv->netdev, - "TC csum action is only offloaded with pedit\n"); - return false; - } - - if (update_flags & ~prot_flags) { - NL_SET_ERR_MSG_MOD(extack, - "can't offload TC csum action for some header/s"); - netdev_warn(priv->netdev, - "can't offload TC csum action for some header/s - flags %#x\n", - update_flags); - return false; - } - - return true; -} - struct ip_ttl_word { __u8 ttl; __u8 protocol; @@ -3215,8 +3100,8 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv, u8 ip_proto; int i; - headers_c = get_match_headers_criteria(actions, spec); - headers_v = get_match_headers_value(actions, spec); + headers_c = mlx5e_get_match_headers_criteria(actions, spec); + headers_v = mlx5e_get_match_headers_value(actions, spec); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); /* for non-IP we only re-write MACs, so we're okay */ @@ -3323,7 +3208,7 @@ static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv return priv->mdev == peer_priv->mdev; } -static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) +bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { struct mlx5_core_dev *fmdev, *pmdev; u64 fsystem_guid, psystem_guid; @@ -3337,126 +3222,45 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) return (fsystem_guid == psystem_guid); } -static bool same_vf_reps(struct mlx5e_priv *priv, - struct net_device *out_dev) -{ - return mlx5e_eswitch_vf_rep(priv->netdev) && - priv->netdev == out_dev; -} - -static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, - const struct flow_action_entry *act, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - u32 *action, struct netlink_ext_ack *extack) -{ - u16 mask16 = VLAN_VID_MASK; - u16 val16 = act->vlan.vid & VLAN_VID_MASK; - const struct flow_action_entry pedit_act = { - .id = FLOW_ACTION_MANGLE, - .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH, - .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI), - .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16), - .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16), - }; - u8 match_prio_mask, match_prio_val; - void *headers_c, *headers_v; - int err; - - headers_c = get_match_headers_criteria(*action, &parse_attr->spec); - headers_v = get_match_headers_value(*action, &parse_attr->spec); - - if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) && - MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) { - NL_SET_ERR_MSG_MOD(extack, - "VLAN rewrite action must have VLAN protocol match"); - return -EOPNOTSUPP; - } - - match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio); - match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio); - if (act->vlan.prio != (match_prio_val & match_prio_mask)) { - NL_SET_ERR_MSG_MOD(extack, - "Changing VLAN prio is not supported"); - return -EOPNOTSUPP; - } - - err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack); - *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - - return err; -} - static int -add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, - struct pedit_headers_action *hdrs, - u32 *action, struct netlink_ext_ack *extack) -{ - const struct flow_action_entry prio_tag_act = { - .vlan.vid = 0, - .vlan.prio = - MLX5_GET(fte_match_set_lyr_2_4, - get_match_headers_value(*action, - &parse_attr->spec), - first_prio) & - MLX5_GET(fte_match_set_lyr_2_4, - get_match_headers_criteria(*action, - &parse_attr->spec), - first_prio), - }; - - return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB, - &prio_tag_act, parse_attr, hdrs, action, - extack); -} - -static int validate_goto_chain(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - const struct flow_action_entry *act, - u32 actions, - struct netlink_ext_ack *extack) +parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, + struct flow_action *flow_action) { - bool is_esw = mlx5e_is_eswitch_flow(flow); + struct netlink_ext_ack *extack = parse_state->extack; + struct mlx5e_tc_flow *flow = parse_state->flow; struct mlx5_flow_attr *attr = flow->attr; - bool ft_flow = mlx5e_is_ft_flow(flow); - u32 dest_chain = act->chain_index; - struct mlx5_fs_chains *chains; - struct mlx5_eswitch *esw; - u32 reformat_and_fwd; - u32 max_chain; + enum mlx5_flow_namespace_type ns_type; + struct mlx5e_priv *priv = flow->priv; + const struct flow_action_entry *act; + struct mlx5e_tc_act *tc_act; + int err, i; - esw = priv->mdev->priv.eswitch; - chains = is_esw ? esw_chains(esw) : nic_chains(priv); - max_chain = mlx5_chains_get_chain_range(chains); - reformat_and_fwd = is_esw ? - MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) : - MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table); - - if (ft_flow) { - NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); - return -EOPNOTSUPP; - } + ns_type = mlx5e_get_flow_namespace(flow); - if (!mlx5_chains_backwards_supported(chains) && - dest_chain <= attr->chain) { - NL_SET_ERR_MSG_MOD(extack, - "Goto lower numbered chain isn't supported"); - return -EOPNOTSUPP; - } + flow_action_for_each(i, act, flow_action) { + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act) { + NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action"); + return -EOPNOTSUPP; + } - if (dest_chain > max_chain) { - NL_SET_ERR_MSG_MOD(extack, - "Requested destination chain is out of supported range"); - return -EOPNOTSUPP; + if (!tc_act->can_offload(parse_state, act, i)) + return -EOPNOTSUPP; + + err = tc_act->parse_action(parse_state, act, priv, attr); + if (err) + return err; } - if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | - MLX5_FLOW_CONTEXT_ACTION_DECAP) && - !reformat_and_fwd) { - NL_SET_ERR_MSG_MOD(extack, - "Goto chain is not allowed if action has reformat or decap"); - return -EOPNOTSUPP; + flow_action_for_each(i, act, flow_action) { + tc_act = mlx5e_tc_act_get(act->id, ns_type); + if (!tc_act || !tc_act->post_parse || + !tc_act->can_offload(parse_state, act, i)) + continue; + + err = tc_act->post_parse(parse_state, priv, attr); + if (err) + return err; } return 0; @@ -3477,19 +3281,19 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) return 0; - ns_type = get_flow_name_space(flow); + ns_type = mlx5e_get_flow_namespace(flow); err = alloc_tc_pedit_action(priv, ns_type, parse_attr, hdrs, &attr->action, extack); if (err) return err; - /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ if (parse_attr->mod_hdr_acts.num_actions > 0) return 0; + /* In case all pedit actions are skipped, remove the MOD_HDR flag. */ attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); if (ns_type != MLX5_FLOW_NAMESPACE_FDB) return 0; @@ -3502,19 +3306,9 @@ actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv, } static int -parse_tc_nic_actions(struct mlx5e_priv *priv, - struct flow_action *flow_action, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) +flow_action_supported(struct flow_action *flow_action, + struct netlink_ext_ack *extack) { - struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_flow_attr *attr = flow->attr; - struct pedit_headers_action hdrs[2] = {}; - const struct flow_action_entry *act; - struct mlx5_nic_flow_attr *nic_attr; - u32 action = 0; - int err, i; - if (!flow_action_has_entries(flow_action)) { NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); return -EINVAL; @@ -3526,106 +3320,35 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - nic_attr = attr->nic_attr; - nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - parse_attr = attr->parse_attr; - - flow_action_for_each(i, act, flow_action) { - switch (act->id) { - case FLOW_ACTION_ACCEPT: - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - break; - case FLOW_ACTION_DROP: - action |= MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - break; - case FLOW_ACTION_MANGLE: - case FLOW_ACTION_ADD: - err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr, hdrs, NULL, extack); - if (err) - return err; - - action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - break; - case FLOW_ACTION_VLAN_MANGLE: - err = add_vlan_rewrite_action(priv, - MLX5_FLOW_NAMESPACE_KERNEL, - act, parse_attr, hdrs, - &action, extack); - if (err) - return err; - - break; - case FLOW_ACTION_CSUM: - if (csum_offload_supported(priv, action, - act->csum_flags, - extack)) - break; - - return -EOPNOTSUPP; - case FLOW_ACTION_REDIRECT: { - struct net_device *peer_dev = act->dev; - - if (priv->netdev->netdev_ops == peer_dev->netdev_ops && - same_hw_devs(priv, netdev_priv(peer_dev))) { - parse_attr->mirred_ifindex[0] = peer_dev->ifindex; - flow_flag_set(flow, HAIRPIN); - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - } else { - NL_SET_ERR_MSG_MOD(extack, - "device is not on same HW, can't offload"); - netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n", - peer_dev->name); - return -EOPNOTSUPP; - } - } - break; - case FLOW_ACTION_MARK: { - u32 mark = act->mark; - - if (mark & ~MLX5E_TC_FLOW_ID_MASK) { - NL_SET_ERR_MSG_MOD(extack, - "Bad flow mark - only 16 bit is supported"); - return -EOPNOTSUPP; - } - - nic_attr->flow_tag = mark; - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } - break; - case FLOW_ACTION_GOTO: - err = validate_goto_chain(priv, flow, act, action, - extack); - if (err) - return err; + return 0; +} - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->dest_chain = act->chain_index; - break; - case FLOW_ACTION_CT: - err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack); - if (err) - return err; +static int +parse_tc_nic_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_act_parse_state *parse_state; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_flow_attr *attr = flow->attr; + struct pedit_headers_action *hdrs; + int err; - flow_flag_set(flow, CT); - break; - default: - NL_SET_ERR_MSG_MOD(extack, - "The offload action is not supported in NIC action"); - return -EOPNOTSUPP; - } - } + err = flow_action_supported(flow_action, extack); + if (err) + return err; - attr->action = action; + attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + parse_attr = attr->parse_attr; + parse_state = &parse_attr->parse_state; + mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); + parse_state->ct_priv = get_ct_priv(priv); + hdrs = parse_state->hdrs; - if (attr->dest_chain && parse_attr->mirred_ifindex[0]) { - NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported"); - return -EOPNOTSUPP; - } + err = parse_tc_actions(parse_state, flow_action); + if (err) + return err; err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); if (err) @@ -3647,147 +3370,7 @@ static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv, return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && mlx5e_eswitch_vf_rep(priv->netdev) && mlx5e_eswitch_vf_rep(peer_netdev) && - same_hw_devs(priv, peer_priv)); -} - -static int parse_tc_vlan_action(struct mlx5e_priv *priv, - const struct flow_action_entry *act, - struct mlx5_esw_flow_attr *attr, - u32 *action, - struct netlink_ext_ack *extack) -{ - u8 vlan_idx = attr->total_vlan; - - if (vlan_idx >= MLX5_FS_VLAN_DEPTH) { - NL_SET_ERR_MSG_MOD(extack, "Total vlans used is greater than supported"); - return -EOPNOTSUPP; - } - - switch (act->id) { - case FLOW_ACTION_VLAN_POP: - if (vlan_idx) { - if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, - MLX5_FS_VLAN_DEPTH)) { - NL_SET_ERR_MSG_MOD(extack, - "vlan pop action is not supported"); - return -EOPNOTSUPP; - } - - *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; - } else { - *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - } - break; - case FLOW_ACTION_VLAN_PUSH: - attr->vlan_vid[vlan_idx] = act->vlan.vid; - attr->vlan_prio[vlan_idx] = act->vlan.prio; - attr->vlan_proto[vlan_idx] = act->vlan.proto; - if (!attr->vlan_proto[vlan_idx]) - attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); - - if (vlan_idx) { - if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, - MLX5_FS_VLAN_DEPTH)) { - NL_SET_ERR_MSG_MOD(extack, - "vlan push action is not supported for vlan depth > 1"); - return -EOPNOTSUPP; - } - - *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - } else { - if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && - (act->vlan.proto != htons(ETH_P_8021Q) || - act->vlan.prio)) { - NL_SET_ERR_MSG_MOD(extack, - "vlan push action is not supported"); - return -EOPNOTSUPP; - } - - *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - } - break; - default: - NL_SET_ERR_MSG_MOD(extack, "Unexpected action id for VLAN"); - return -EINVAL; - } - - attr->total_vlan = vlan_idx + 1; - - return 0; -} - -static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev, - struct net_device *out_dev) -{ - struct net_device *fdb_out_dev = out_dev; - struct net_device *uplink_upper; - - rcu_read_lock(); - uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev); - if (uplink_upper && netif_is_lag_master(uplink_upper) && - uplink_upper == out_dev) { - fdb_out_dev = uplink_dev; - } else if (netif_is_lag_master(out_dev)) { - fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev)); - if (fdb_out_dev && - (!mlx5e_eswitch_rep(fdb_out_dev) || - !netdev_port_same_parent_id(fdb_out_dev, uplink_dev))) - fdb_out_dev = NULL; - } - rcu_read_unlock(); - return fdb_out_dev; -} - -static int add_vlan_push_action(struct mlx5e_priv *priv, - struct mlx5_flow_attr *attr, - struct net_device **out_dev, - u32 *action, - struct netlink_ext_ack *extack) -{ - struct net_device *vlan_dev = *out_dev; - struct flow_action_entry vlan_act = { - .id = FLOW_ACTION_VLAN_PUSH, - .vlan.vid = vlan_dev_vlan_id(vlan_dev), - .vlan.proto = vlan_dev_vlan_proto(vlan_dev), - .vlan.prio = 0, - }; - int err; - - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action, extack); - if (err) - return err; - - rcu_read_lock(); - *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); - rcu_read_unlock(); - if (!*out_dev) - return -ENODEV; - - if (is_vlan_dev(*out_dev)) - err = add_vlan_push_action(priv, attr, out_dev, action, extack); - - return err; -} - -static int add_vlan_pop_action(struct mlx5e_priv *priv, - struct mlx5_flow_attr *attr, - u32 *action, - struct netlink_ext_ack *extack) -{ - struct flow_action_entry vlan_act = { - .id = FLOW_ACTION_VLAN_POP, - }; - int nest_level, err = 0; - - nest_level = attr->parse_attr->filter_dev->lower_level - - priv->netdev->lower_level; - while (nest_level--) { - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action, extack); - if (err) - return err; - } - - return err; + mlx5e_same_hw_devs(priv, peer_priv)); } static bool same_hw_reps(struct mlx5e_priv *priv, @@ -3799,7 +3382,7 @@ static bool same_hw_reps(struct mlx5e_priv *priv, return mlx5e_eswitch_rep(priv->netdev) && mlx5e_eswitch_rep(peer_netdev) && - same_hw_devs(priv, peer_priv); + mlx5e_same_hw_devs(priv, peer_priv); } static bool is_lag_dev(struct mlx5e_priv *priv, @@ -3823,66 +3406,6 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, same_port_devs(priv, netdev_priv(out_dev)); } -static bool is_duplicated_output_device(struct net_device *dev, - struct net_device *out_dev, - int *ifindexes, int if_count, - struct netlink_ext_ack *extack) -{ - int i; - - for (i = 0; i < if_count; i++) { - if (ifindexes[i] == out_dev->ifindex) { - NL_SET_ERR_MSG_MOD(extack, - "can't duplicate output to same device"); - netdev_err(dev, "can't duplicate output to same device: %s\n", - out_dev->name); - return true; - } - } - - return false; -} - -static int verify_uplink_forwarding(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct net_device *out_dev, - struct netlink_ext_ack *extack) -{ - struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *rep_priv; - - /* Forwarding non encapsulated traffic between - * uplink ports is allowed only if - * termination_table_raw_traffic cap is set. - * - * Input vport was stored attr->in_rep. - * In LAG case, *priv* is the private data of - * uplink which may be not the input vport. - */ - rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep); - - if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) && - mlx5e_eswitch_uplink_rep(out_dev))) - return 0; - - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, - termination_table_raw_traffic)) { - NL_SET_ERR_MSG_MOD(extack, - "devices are both uplink, can't offload forwarding"); - pr_err("devices %s %s are both uplink, can't offload forwarding\n", - priv->netdev->name, out_dev->name); - return -EOPNOTSUPP; - } else if (out_dev != rep_priv->netdev) { - NL_SET_ERR_MSG_MOD(extack, - "devices are not the same uplink, can't offload forwarding"); - pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n", - priv->netdev->name, out_dev->name); - return -EOPNOTSUPP; - } - return 0; -} - int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, int ifindex, @@ -3922,384 +3445,33 @@ int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv, return 0; } -static int parse_tc_fdb_actions(struct mlx5e_priv *priv, - struct flow_action *flow_action, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) +static int +parse_tc_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { - struct pedit_headers_action hdrs[2] = {}; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_act_parse_state *parse_state; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5e_sample_attr sample_attr = {}; - const struct ip_tunnel_info *info = NULL; struct mlx5_flow_attr *attr = flow->attr; - int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS]; - bool ft_flow = mlx5e_is_ft_flow(flow); - const struct flow_action_entry *act; struct mlx5_esw_flow_attr *esw_attr; - bool encap = false, decap = false; - u32 action = attr->action; - int err, i, if_count = 0; - bool ptype_host = false; - bool mpls_push = false; - - if (!flow_action_has_entries(flow_action)) { - NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries"); - return -EINVAL; - } + struct pedit_headers_action *hdrs; + int err; - if (!flow_action_hw_stats_check(flow_action, extack, - FLOW_ACTION_HW_STATS_DELAYED_BIT)) { - NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported"); - return -EOPNOTSUPP; - } + err = flow_action_supported(flow_action, extack); + if (err) + return err; esw_attr = attr->esw_attr; parse_attr = attr->parse_attr; + parse_state = &parse_attr->parse_state; + mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack); + parse_state->ct_priv = get_ct_priv(priv); + hdrs = parse_state->hdrs; - flow_action_for_each(i, act, flow_action) { - switch (act->id) { - case FLOW_ACTION_ACCEPT: - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->flags |= MLX5_ESW_ATTR_FLAG_ACCEPT; - break; - case FLOW_ACTION_PTYPE: - if (act->ptype != PACKET_HOST) { - NL_SET_ERR_MSG_MOD(extack, - "skbedit ptype is only supported with type host"); - return -EOPNOTSUPP; - } - - ptype_host = true; - break; - case FLOW_ACTION_DROP: - action |= MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - break; - case FLOW_ACTION_TRAP: - if (!flow_offload_has_one_action(flow_action)) { - NL_SET_ERR_MSG_MOD(extack, - "action trap is supported as a sole action only"); - return -EOPNOTSUPP; - } - action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT); - attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; - break; - case FLOW_ACTION_MPLS_PUSH: - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, - reformat_l2_to_l3_tunnel) || - act->mpls_push.proto != htons(ETH_P_MPLS_UC)) { - NL_SET_ERR_MSG_MOD(extack, - "mpls push is supported only for mpls_uc protocol"); - return -EOPNOTSUPP; - } - mpls_push = true; - break; - case FLOW_ACTION_MPLS_POP: - /* we only support mpls pop if it is the first action - * and the filter net device is bareudp. Subsequent - * actions can be pedit and the last can be mirred - * egress redirect. - */ - if (i) { - NL_SET_ERR_MSG_MOD(extack, - "mpls pop supported only as first action"); - return -EOPNOTSUPP; - } - if (!netif_is_bareudp(parse_attr->filter_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "mpls pop supported only on bareudp devices"); - return -EOPNOTSUPP; - } - - parse_attr->eth.h_proto = act->mpls_pop.proto; - action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_flag_set(flow, L3_TO_L2_DECAP); - break; - case FLOW_ACTION_MANGLE: - case FLOW_ACTION_ADD: - err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, - parse_attr, hdrs, flow, extack); - if (err) - return err; - - if (!flow_flag_test(flow, L3_TO_L2_DECAP)) { - action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - esw_attr->split_count = esw_attr->out_count; - } - break; - case FLOW_ACTION_CSUM: - if (csum_offload_supported(priv, action, - act->csum_flags, extack)) - break; - - return -EOPNOTSUPP; - case FLOW_ACTION_REDIRECT_INGRESS: { - struct net_device *out_dev; - - out_dev = act->dev; - if (!out_dev) - return -EOPNOTSUPP; - - if (!netif_is_ovs_master(out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "redirect to ingress is supported only for OVS internal ports"); - return -EOPNOTSUPP; - } - - if (netif_is_ovs_master(parse_attr->filter_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "redirect to ingress is not supported from internal port"); - return -EOPNOTSUPP; - } - - if (!ptype_host) { - NL_SET_ERR_MSG_MOD(extack, - "redirect to int port ingress requires ptype=host action"); - return -EOPNOTSUPP; - } - - if (esw_attr->out_count) { - NL_SET_ERR_MSG_MOD(extack, - "redirect to int port ingress is supported only as single destination"); - return -EOPNOTSUPP; - } - - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - - err = mlx5e_set_fwd_to_int_port_actions(priv, attr, out_dev->ifindex, - MLX5E_TC_INT_PORT_INGRESS, - &action, esw_attr->out_count); - if (err) - return err; - - esw_attr->out_count++; - - break; - } - case FLOW_ACTION_REDIRECT: - case FLOW_ACTION_MIRRED: { - struct mlx5e_priv *out_priv; - struct net_device *out_dev; - - out_dev = act->dev; - if (!out_dev) { - /* out_dev is NULL when filters with - * non-existing mirred device are replayed to - * the driver. - */ - return -EINVAL; - } - - if (mpls_push && !netif_is_bareudp(out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "mpls is supported only through a bareudp device"); - return -EOPNOTSUPP; - } - - if (ft_flow && out_dev == priv->netdev) { - /* Ignore forward to self rules generated - * by adding both mlx5 devs to the flow table - * block on a normal nft offload setup. - */ - return -EOPNOTSUPP; - } - - if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { - NL_SET_ERR_MSG_MOD(extack, - "can't support more output ports, can't offload forwarding"); - netdev_warn(priv->netdev, - "can't support more than %d output ports, can't offload forwarding\n", - esw_attr->out_count); - return -EOPNOTSUPP; - } - - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - if (encap) { - parse_attr->mirred_ifindex[esw_attr->out_count] = - out_dev->ifindex; - parse_attr->tun_info[esw_attr->out_count] = - mlx5e_dup_tun_info(info); - if (!parse_attr->tun_info[esw_attr->out_count]) - return -ENOMEM; - encap = false; - esw_attr->dests[esw_attr->out_count].flags |= - MLX5_ESW_DEST_ENCAP; - esw_attr->out_count++; - /* attr->dests[].rep is resolved when we - * handle encap - */ - } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - - if (is_duplicated_output_device(priv->netdev, - out_dev, - ifindexes, - if_count, - extack)) - return -EOPNOTSUPP; - - ifindexes[if_count] = out_dev->ifindex; - if_count++; - - out_dev = get_fdb_out_dev(uplink_dev, out_dev); - if (!out_dev) - return -ENODEV; - - if (is_vlan_dev(out_dev)) { - err = add_vlan_push_action(priv, attr, - &out_dev, - &action, extack); - if (err) - return err; - } - - if (is_vlan_dev(parse_attr->filter_dev)) { - err = add_vlan_pop_action(priv, attr, - &action, extack); - if (err) - return err; - } - - if (netif_is_macvlan(out_dev)) - out_dev = macvlan_dev_real_dev(out_dev); - - err = verify_uplink_forwarding(priv, flow, out_dev, extack); - if (err) - return err; - - if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "devices are not on same switch HW, can't offload forwarding"); - return -EOPNOTSUPP; - } - - if (same_vf_reps(priv, out_dev)) { - NL_SET_ERR_MSG_MOD(extack, - "can't forward from a VF to itself"); - return -EOPNOTSUPP; - } - - out_priv = netdev_priv(out_dev); - rpriv = out_priv->ppriv; - esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; - esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; - esw_attr->out_count++; - } else if (netif_is_ovs_master(out_dev)) { - err = mlx5e_set_fwd_to_int_port_actions(priv, attr, - out_dev->ifindex, - MLX5E_TC_INT_PORT_EGRESS, - &action, - esw_attr->out_count); - if (err) - return err; - - esw_attr->out_count++; - } else if (parse_attr->filter_dev != priv->netdev) { - /* All mlx5 devices are called to configure - * high level device filters. Therefore, the - * *attempt* to install a filter on invalid - * eswitch should not trigger an explicit error - */ - return -EINVAL; - } else { - NL_SET_ERR_MSG_MOD(extack, - "devices are not on same switch HW, can't offload forwarding"); - netdev_warn(priv->netdev, - "devices %s %s not on same switch HW, can't offload forwarding\n", - priv->netdev->name, - out_dev->name); - return -EOPNOTSUPP; - } - } - break; - case FLOW_ACTION_TUNNEL_ENCAP: - info = act->tunnel; - if (info) { - encap = true; - } else { - NL_SET_ERR_MSG_MOD(extack, - "Zero tunnel attributes is not supported"); - return -EOPNOTSUPP; - } - - break; - case FLOW_ACTION_VLAN_PUSH: - case FLOW_ACTION_VLAN_POP: - if (act->id == FLOW_ACTION_VLAN_PUSH && - (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) { - /* Replace vlan pop+push with vlan modify */ - action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - err = add_vlan_rewrite_action(priv, - MLX5_FLOW_NAMESPACE_FDB, - act, parse_attr, hdrs, - &action, extack); - } else { - err = parse_tc_vlan_action(priv, act, esw_attr, &action, extack); - } - if (err) - return err; - - esw_attr->split_count = esw_attr->out_count; - break; - case FLOW_ACTION_VLAN_MANGLE: - err = add_vlan_rewrite_action(priv, - MLX5_FLOW_NAMESPACE_FDB, - act, parse_attr, hdrs, - &action, extack); - if (err) - return err; - - esw_attr->split_count = esw_attr->out_count; - break; - case FLOW_ACTION_TUNNEL_DECAP: - decap = true; - break; - case FLOW_ACTION_GOTO: - err = validate_goto_chain(priv, flow, act, action, - extack); - if (err) - return err; - - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - attr->dest_chain = act->chain_index; - break; - case FLOW_ACTION_CT: - if (flow_flag_test(flow, SAMPLE)) { - NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); - return -EOPNOTSUPP; - } - err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack); - if (err) - return err; - - flow_flag_set(flow, CT); - esw_attr->split_count = esw_attr->out_count; - break; - case FLOW_ACTION_SAMPLE: - if (flow_flag_test(flow, CT)) { - NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); - return -EOPNOTSUPP; - } - sample_attr.rate = act->sample.rate; - sample_attr.group_num = act->sample.psample_group->group_num; - if (act->sample.truncate) - sample_attr.trunc_size = act->sample.trunc_size; - flow_flag_set(flow, SAMPLE); - break; - default: - NL_SET_ERR_MSG_MOD(extack, - "The offload action is not supported in FDB action"); - return -EOPNOTSUPP; - } - } + err = parse_tc_actions(parse_state, flow_action); + if (err) + return err; /* Forward to/from internal port can only have 1 dest */ if ((netif_is_ovs_master(parse_attr->filter_dev) || esw_attr->dest_int_port) && @@ -4309,23 +3481,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - /* always set IP version for indirect table handling */ - attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true); - - if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && - action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { - /* For prio tag mode, replace vlan pop with rewrite vlan prio - * tag rewrite. - */ - action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs, - &action, extack); - if (err) - return err; - } - - attr->action = action; - err = actions_prepare_mod_hdr_actions(priv, flow, attr, hdrs, extack); if (err) return err; @@ -4333,30 +3488,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP; - if (attr->dest_chain && decap) { - /* It can be supported if we'll create a mapping for - * the tunnel device only (without tunnel), and set - * this tunnel id with this decap flow. - * - * On restore (miss), we'll just set this saved tunnel - * device. - */ - - NL_SET_ERR_MSG(extack, "Decap with goto isn't supported"); - netdev_warn(priv->netdev, "Decap with goto isn't supported"); - return -EOPNOTSUPP; - } - - /* Allocate sample attribute only when there is a sample action and - * no errors after parsing. - */ - if (flow_flag_test(flow, SAMPLE)) { - attr->sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL); - if (!attr->sample_attr) - return -ENOMEM; - *attr->sample_attr = sample_attr; - } - return 0; } @@ -4453,7 +3584,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, flow->cookie = f->cookie; flow->priv = priv; - attr = mlx5_alloc_flow_attr(get_flow_name_space(flow)); + attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow)); if (!attr) goto err_free; @@ -4465,6 +3596,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->l3_to_l2_reformat); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); + init_completion(&flow->del_hw_done); *__flow = flow; *__parse_attr = parse_attr; @@ -4547,6 +3679,9 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; + /* always set IP version for indirect table handling */ + flow->attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); if (err) goto err_free; @@ -4708,7 +3843,7 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, err_free: flow_flag_set(flow, FAILED); - dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts); + mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); mlx5e_flow_put(priv, flow); out: return err; @@ -5008,14 +4143,8 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *ma) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = ma->common.extack; - if (!mlx5_esw_qos_enabled(esw)) { - NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device"); - return -EOPNOTSUPP; - } - if (ma->common.prio != 1) { NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); return -EINVAL; @@ -5057,7 +4186,7 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, u16 peer_vhca_id; int bkt; - if (!same_hw_devs(priv, peer_priv)) + if (!mlx5e_same_hw_devs(priv, peer_priv)) return; peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index fdb222793027..5ffae9b13066 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -151,7 +151,6 @@ enum { int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); -bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow); int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags); @@ -247,11 +246,6 @@ int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow); -int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev, - int namespace, - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); -void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts); - struct mlx5e_tc_flow; u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 792e0d6aa861..48a45aa54a3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -19,6 +19,7 @@ #include "lib/clock.h" #include "diag/fw_tracer.h" #include "mlx5_irq.h" +#include "devlink.h" enum { MLX5_EQE_OWNER_INIT_VAL = 0x1, @@ -58,6 +59,8 @@ struct mlx5_eq_table { struct mutex lock; /* sync async eqs creations */ int num_comp_eqs; struct mlx5_irq_table *irq_table; + struct mlx5_irq **comp_irqs; + struct mlx5_irq *ctrl_irq; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif @@ -265,8 +268,8 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); struct mlx5_priv *priv = &dev->priv; - u16 vecidx = param->irq_index; __be64 *pas; + u16 vecidx; void *eqc; int inlen; u32 *in; @@ -288,20 +291,16 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); init_eq_buf(eq); - eq->irq = mlx5_irq_request(dev, vecidx, param->affinity); - if (IS_ERR(eq->irq)) { - err = PTR_ERR(eq->irq); - goto err_buf; - } - + eq->irq = param->irq; vecidx = mlx5_irq_get_index(eq->irq); + inlen = MLX5_ST_SZ_BYTES(create_eq_in) + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages; in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; - goto err_irq; + goto err_buf; } pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); @@ -345,8 +344,6 @@ err_eq: err_in: kvfree(in); -err_irq: - mlx5_irq_release(eq->irq); err_buf: mlx5_frag_buf_free(dev, &eq->frag_buf); return err; @@ -400,7 +397,6 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); - mlx5_irq_release(eq->irq); mlx5_frag_buf_free(dev, &eq->frag_buf); return err; @@ -593,11 +589,8 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, eq->irq_nb.notifier_call = mlx5_eq_async_int; spin_lock_init(&eq->lock); - if (!zalloc_cpumask_var(¶m->affinity, GFP_KERNEL)) - return -ENOMEM; err = create_async_eq(dev, &eq->core, param); - free_cpumask_var(param->affinity); if (err) { mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); return err; @@ -622,17 +615,38 @@ static void cleanup_async_eq(struct mlx5_core_dev *dev, name, err); } +static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); + return MLX5_NUM_ASYNC_EQE; +} static int create_async_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_param param = {}; int err; + /* All the async_eqs are using single IRQ, request one IRQ and share its + * index among all the async_eqs of this device. + */ + table->ctrl_irq = mlx5_ctrl_irq_request(dev); + if (IS_ERR(table->ctrl_irq)) + return PTR_ERR(table->ctrl_irq); + MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, + .irq = table->ctrl_irq, .nent = MLX5_NUM_CMD_EQE, .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; @@ -645,8 +659,8 @@ static int create_async_eqs(struct mlx5_core_dev *dev) mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, - .nent = MLX5_NUM_ASYNC_EQE, + .irq = table->ctrl_irq, + .nent = async_eq_depth_devlink_param_get(dev), }; gather_async_events_mask(dev, param.mask); @@ -655,7 +669,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev) goto err2; param = (struct mlx5_eq_param) { - .irq_index = MLX5_IRQ_EQ_CTRL, + .irq = table->ctrl_irq, .nent = /* TODO: sriov max_vf + */ 1, .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, }; @@ -674,6 +688,7 @@ err2: err1: mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + mlx5_ctrl_irq_release(table->ctrl_irq); return err; } @@ -688,6 +703,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) cleanup_async_eq(dev, &table->cmd_eq, "cmd"); mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); + mlx5_ctrl_irq_release(table->ctrl_irq); } struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) @@ -715,12 +731,10 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); int err; - if (!cpumask_available(param->affinity)) - return ERR_PTR(-EINVAL); - if (!eq) return ERR_PTR(-ENOMEM); + param->irq = dev->priv.eq_table->ctrl_irq; err = create_async_eq(dev, eq, param); if (err) { kvfree(eq); @@ -780,6 +794,54 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); +static void comp_irqs_release(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + + if (mlx5_core_is_sf(dev)) + mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs); + else + mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); + kfree(table->comp_irqs); +} + +static int comp_irqs_request(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + int ncomp_eqs = table->num_comp_eqs; + u16 *cpus; + int ret; + int i; + + ncomp_eqs = table->num_comp_eqs; + table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); + if (!table->comp_irqs) + return -ENOMEM; + if (mlx5_core_is_sf(dev)) { + ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); + if (ret < 0) + goto free_irqs; + return ret; + } + + cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); + if (!cpus) { + ret = -ENOMEM; + goto free_irqs; + } + for (i = 0; i < ncomp_eqs; i++) + cpus[i] = cpumask_local_spread(i, dev->priv.numa_node); + ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs); + kfree(cpus); + if (ret < 0) + goto free_irqs; + return ret; + +free_irqs: + kfree(table->comp_irqs); + return ret; +} + static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -794,6 +856,22 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) tasklet_disable(&eq->tasklet_ctx.task); kfree(eq); } + comp_irqs_release(dev); +} + +static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); + return MLX5_COMP_EQ_SIZE; } static int create_comp_eqs(struct mlx5_core_dev *dev) @@ -805,12 +883,13 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) int err; int i; + ncomp_eqs = comp_irqs_request(dev); + if (ncomp_eqs < 0) + return ncomp_eqs; INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_eqs = table->num_comp_eqs; - nent = MLX5_COMP_EQ_SIZE; + nent = comp_eq_depth_devlink_param_get(dev); for (i = 0; i < ncomp_eqs; i++) { struct mlx5_eq_param param = {}; - int vecidx = i; eq = kzalloc(sizeof(*eq), GFP_KERNEL); if (!eq) { @@ -825,18 +904,11 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .irq_index = vecidx, + .irq = table->comp_irqs[i], .nent = nent, }; - if (!zalloc_cpumask_var(¶m.affinity, GFP_KERNEL)) { - err = -ENOMEM; - goto clean_eq; - } - cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node), - param.affinity); err = create_map_eq(dev, &eq->core, ¶m); - free_cpumask_var(param.affinity); if (err) goto clean_eq; err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); @@ -850,7 +922,9 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) list_add_tail(&eq->list, &table->comp_eqs_list); } + table->num_comp_eqs = ncomp_eqs; return 0; + clean_eq: kfree(eq); clean: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c index 425c91814b34..c275fe028b6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -14,6 +14,7 @@ #include "fs_core.h" #include "esw/indir_table.h" #include "lib/fs_chains.h" +#include "en/mod_hdr.h" #define MLX5_ESW_INDIR_TABLE_SIZE 128 #define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2) @@ -226,7 +227,7 @@ static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, goto err_handle; } - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(&mod_acts); rule->handle = handle; rule->vni = esw_attr->rx_tun_attr->vni; rule->mh = flow_act.modify_hdr; @@ -243,7 +244,7 @@ err_table: mlx5_modify_header_dealloc(esw->dev, flow_act.modify_hdr); err_mod_hdr_alloc: err_mod_hdr_regc1: - dealloc_mod_hdr_actions(&mod_acts); + mlx5e_mod_hdr_dealloc(&mod_acts); err_mod_hdr_regc0: err_ethertype: kfree(rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index df277a6cddc0..9d17206d1625 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -431,7 +431,7 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, int err = 0; if (!mlx5_esw_allowed(esw)) - return -EPERM; + return vlan ? -EPERM : 0; if (vlan || qos) set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; @@ -522,9 +522,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, return PTR_ERR(evport); mutex_lock(&esw->state_lock); - err = mlx5_esw_qos_set_vport_min_rate(esw, evport, min_rate, NULL); - if (!err) - err = mlx5_esw_qos_set_vport_max_rate(esw, evport, max_rate, NULL); + err = mlx5_esw_qos_set_vport_rate(esw, evport, max_rate, min_rate); mutex_unlock(&esw->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index c6cc67cb4f6a..11bbcd5f5b8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -130,7 +130,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, /* If vports min rate divider is 0 but their group has bw_share configured, then * need to set bw_share for vports to minimal value. */ - if (!group_level && !max_guarantee && group->bw_share) + if (!group_level && !max_guarantee && group && group->bw_share) return 1; return 0; } @@ -204,10 +204,8 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid return 0; } -int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, - struct mlx5_vport *evport, - u32 min_rate, - struct netlink_ext_ack *extack) +static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 min_rate, struct netlink_ext_ack *extack) { u32 fw_max_bw_share, previous_min_rate; bool min_rate_supported; @@ -231,10 +229,8 @@ int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, return err; } -int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, - struct mlx5_vport *evport, - u32 max_rate, - struct netlink_ext_ack *extack) +static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 max_rate, struct netlink_ext_ack *extack) { u32 act_max_rate = max_rate; bool max_rate_supported; @@ -423,7 +419,7 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, return err; /* Recalculate bw share weights of old and new groups */ - if (vport->qos.bw_share) { + if (vport->qos.bw_share || new_group->bw_share) { esw_qos_normalize_vports_min_rate(esw, curr_group, extack); esw_qos_normalize_vports_min_rate(esw, new_group, extack); } @@ -432,16 +428,13 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, } static struct mlx5_esw_rate_group * -esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group; u32 divider; int err; - if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) - return ERR_PTR(-EOPNOTSUPP); - group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); @@ -482,9 +475,32 @@ err_sched_elem: return ERR_PTR(err); } -static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack) +static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); +static void esw_qos_put(struct mlx5_eswitch *esw); + +static struct mlx5_esw_rate_group * +esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + int err; + + if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) + return ERR_PTR(-EOPNOTSUPP); + + err = esw_qos_get(esw, extack); + if (err) + return ERR_PTR(err); + + group = __esw_qos_create_rate_group(esw, extack); + if (IS_ERR(group)) + esw_qos_put(esw); + + return group; +} + +static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) { u32 divider; int err; @@ -503,7 +519,21 @@ static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); + kfree(group); + + return err; +} + +static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + int err; + + err = __esw_qos_destroy_rate_group(esw, group, extack); + esw_qos_put(esw); + return err; } @@ -526,7 +556,7 @@ static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) return false; } -void mlx5_esw_qos_create(struct mlx5_eswitch *esw) +static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; @@ -534,14 +564,10 @@ void mlx5_esw_qos_create(struct mlx5_eswitch *esw) int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) - return; + return -EOPNOTSUPP; if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) - return; - - mutex_lock(&esw->state_lock); - if (esw->qos.enabled) - goto unlock; + return -EOPNOTSUPP; MLX5_SET(scheduling_context, tsar_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); @@ -555,75 +581,94 @@ void mlx5_esw_qos_create(struct mlx5_eswitch *esw) &esw->qos.root_tsar_ix); if (err) { esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); - goto unlock; + return err; } INIT_LIST_HEAD(&esw->qos.groups); if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { - esw->qos.group0 = esw_qos_create_rate_group(esw, NULL); + esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); if (IS_ERR(esw->qos.group0)) { esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", PTR_ERR(esw->qos.group0)); + err = PTR_ERR(esw->qos.group0); goto err_group0; } } - esw->qos.enabled = true; -unlock: - mutex_unlock(&esw->state_lock); - return; + refcount_set(&esw->qos.refcnt, 1); + + return 0; err_group0: - err = mlx5_destroy_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - esw->qos.root_tsar_ix); - if (err) - esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); - mutex_unlock(&esw->state_lock); + if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix)) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); + + return err; } -void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw) +static void esw_qos_destroy(struct mlx5_eswitch *esw) { - struct devlink *devlink = priv_to_devlink(esw->dev); int err; - devlink_rate_nodes_destroy(devlink); - mutex_lock(&esw->state_lock); - if (!esw->qos.enabled) - goto unlock; - if (esw->qos.group0) - esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); + __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, esw->qos.root_tsar_ix); if (err) esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); +} - esw->qos.enabled = false; -unlock: - mutex_unlock(&esw->state_lock); +static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + int err = 0; + + lockdep_assert_held(&esw->state_lock); + + if (!refcount_inc_not_zero(&esw->qos.refcnt)) { + /* esw_qos_create() set refcount to 1 only on success. + * No need to decrement on failure. + */ + err = esw_qos_create(esw, extack); + } + + return err; } -int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - u32 max_rate, u32 bw_share) +static void esw_qos_put(struct mlx5_eswitch *esw) +{ + lockdep_assert_held(&esw->state_lock); + if (refcount_dec_and_test(&esw->qos.refcnt)) + esw_qos_destroy(esw); +} + +static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { int err; lockdep_assert_held(&esw->state_lock); - if (!esw->qos.enabled) + if (vport->qos.enabled) return 0; - if (vport->qos.enabled) - return -EEXIST; + err = esw_qos_get(esw, extack); + if (err) + return err; vport->qos.group = esw->qos.group0; err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); - if (!err) { - vport->qos.enabled = true; - trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); - } + if (err) + goto err_out; + + vport->qos.enabled = true; + trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); + + return 0; + +err_out: + esw_qos_put(esw); return err; } @@ -633,7 +678,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo int err; lockdep_assert_held(&esw->state_lock); - if (!esw->qos.enabled || !vport->qos.enabled) + if (!vport->qos.enabled) return; WARN(vport->qos.group && vport->qos.group != esw->qos.group0, "Disabling QoS on port before detaching it from group"); @@ -645,8 +690,27 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", vport->vport, err); - vport->qos.enabled = false; + memset(&vport->qos, 0, sizeof(vport->qos)); trace_mlx5_esw_vport_qos_destroy(vport); + + esw_qos_put(esw); +} + +int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 min_rate, u32 max_rate) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + err = esw_qos_vport_enable(esw, vport, 0, 0, NULL); + if (err) + return err; + + err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL); + if (!err) + err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL); + + return err; } int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) @@ -654,22 +718,29 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_vport *vport; u32 bitmask; + int err; vport = mlx5_eswitch_get_vport(esw, vport_num); if (IS_ERR(vport)) return PTR_ERR(vport); - if (!vport->qos.enabled) - return -EOPNOTSUPP; - - MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); - bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + mutex_lock(&esw->state_lock); + if (!vport->qos.enabled) { + /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ + err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL); + } else { + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + + bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + err = mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + bitmask); + } + mutex_unlock(&esw->state_lock); - return mlx5_modify_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - ctx, - vport->qos.esw_tsar_ix, - bitmask); + return err; } #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ @@ -728,7 +799,12 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void return err; mutex_lock(&esw->state_lock); - err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (err) + goto unlock; + + err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); +unlock: mutex_unlock(&esw->state_lock); return err; } @@ -749,7 +825,12 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void * return err; mutex_lock(&esw->state_lock); - err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (err) + goto unlock; + + err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); +unlock: mutex_unlock(&esw->state_lock); return err; } @@ -846,7 +927,9 @@ int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, int err; mutex_lock(&esw->state_lock); - err = esw_qos_vport_update_group(esw, vport, group, extack); + err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + if (!err) + err = esw_qos_vport_update_group(esw, vport, group, extack); mutex_unlock(&esw->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index 28451abe2d2f..0141e9d52037 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -6,18 +6,8 @@ #ifdef CONFIG_MLX5_ESWITCH -int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, - struct mlx5_vport *evport, - u32 min_rate, - struct netlink_ext_ack *extack); -int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, - struct mlx5_vport *evport, - u32 max_rate, - struct netlink_ext_ack *extack); -void mlx5_esw_qos_create(struct mlx5_eswitch *esw); -void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw); -int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - u32 max_rate, u32 bw_share); +int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, + u32 max_rate, u32 min_rate); void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ec136b499204..458ec0bca1b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -781,9 +781,6 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) if (err) return err; - /* Attach vport to the eswitch rate limiter */ - mlx5_esw_qos_vport_enable(esw, vport, vport->qos.max_rate, vport->qos.bw_share); - if (mlx5_esw_is_manager_vport(esw, vport_num)) return 0; @@ -1260,8 +1257,6 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) mlx5_eswitch_update_num_of_vfs(esw, num_vfs); - mlx5_esw_qos_create(esw); - esw->mode = mode; if (mode == MLX5_ESWITCH_LEGACY) { @@ -1290,7 +1285,6 @@ abort: if (mode == MLX5_ESWITCH_OFFLOADS) mlx5_rescan_drivers(esw->dev); - mlx5_esw_qos_destroy(esw); mlx5_esw_acls_ns_cleanup(esw); return err; } @@ -1305,12 +1299,17 @@ abort: */ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { + bool toggle_lag; int ret; if (!mlx5_esw_allowed(esw)) return 0; - mlx5_lag_disable_change(esw->dev); + toggle_lag = esw->mode == MLX5_ESWITCH_NONE; + + if (toggle_lag) + mlx5_lag_disable_change(esw->dev); + down_write(&esw->mode_lock); if (esw->mode == MLX5_ESWITCH_NONE) { ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); @@ -1324,12 +1323,16 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) esw->esw_funcs.num_vfs = num_vfs; } up_write(&esw->mode_lock); - mlx5_lag_enable_change(esw->dev); + + if (toggle_lag) + mlx5_lag_enable_change(esw->dev); + return ret; } void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) { + struct devlink *devlink = priv_to_devlink(esw->dev); int old_mode; lockdep_assert_held_write(&esw->mode_lock); @@ -1359,7 +1362,8 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) if (old_mode == MLX5_ESWITCH_OFFLOADS) mlx5_rescan_drivers(esw->dev); - mlx5_esw_qos_destroy(esw); + devlink_rate_nodes_destroy(devlink); + mlx5_esw_acls_ns_cleanup(esw); if (clear_vf) @@ -1568,10 +1572,16 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) lockdep_register_key(&esw->mode_lock_key); init_rwsem(&esw->mode_lock); lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key); + refcount_set(&esw->qos.refcnt, 0); esw->enabled_vports = 0; esw->mode = MLX5_ESWITCH_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; + else + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; dev->priv.eswitch = esw; BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); @@ -1601,6 +1611,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); + WARN_ON(refcount_read(&esw->qos.refcnt)); lockdep_unregister_key(&esw->mode_lock_key); mutex_destroy(&esw->state_lock); WARN_ON(!xa_empty(&esw->offloads.vhca_map)); @@ -1690,82 +1701,6 @@ bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num) return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF); } -static bool -is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) -{ - return vport_num == MLX5_VPORT_PF || - mlx5_eswitch_is_vf_vport(esw, vport_num) || - mlx5_esw_is_sf_vport(esw, vport_num); -} - -int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, - u8 *hw_addr, int *hw_addr_len, - struct netlink_ext_ack *extack) -{ - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - int err = -EOPNOTSUPP; - u16 vport_num; - - esw = mlx5_devlink_eswitch_get(port->devlink); - if (IS_ERR(esw)) - return PTR_ERR(esw); - - vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); - if (!is_port_function_supported(esw, vport_num)) - return -EOPNOTSUPP; - - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid port"); - return PTR_ERR(vport); - } - - mutex_lock(&esw->state_lock); - if (vport->enabled) { - ether_addr_copy(hw_addr, vport->info.mac); - *hw_addr_len = ETH_ALEN; - err = 0; - } - mutex_unlock(&esw->state_lock); - return err; -} - -int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, - const u8 *hw_addr, int hw_addr_len, - struct netlink_ext_ack *extack) -{ - struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - int err = -EOPNOTSUPP; - u16 vport_num; - - esw = mlx5_devlink_eswitch_get(port->devlink); - if (IS_ERR(esw)) { - NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr"); - return PTR_ERR(esw); - } - - vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); - if (!is_port_function_supported(esw, vport_num)) { - NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr"); - return -EINVAL; - } - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid port"); - return PTR_ERR(vport); - } - - mutex_lock(&esw->state_lock); - if (vport->enabled) - err = mlx5_esw_set_vport_mac_locked(esw, vport, hw_addr); - else - NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); - mutex_unlock(&esw->state_lock); - return err; -} - int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { @@ -1822,8 +1757,10 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, ivi->qos = evport->info.qos; ivi->spoofchk = evport->info.spoofchk; ivi->trusted = evport->info.trusted; - ivi->min_tx_rate = evport->qos.min_rate; - ivi->max_tx_rate = evport->qos.max_rate; + if (evport->qos.enabled) { + ivi->min_tx_rate = evport->qos.min_rate; + ivi->max_tx_rate = evport->qos.max_rate; + } mutex_unlock(&esw->state_lock); return 0; @@ -1934,7 +1871,7 @@ free_out: return err; } -u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) +u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -1948,7 +1885,7 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) struct mlx5_eswitch *esw; esw = dev->priv.eswitch; - return mlx5_esw_allowed(esw) ? esw->offloads.encap : + return (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS) ? esw->offloads.encap : DEVLINK_ESWITCH_ENCAP_MODE_NONE; } EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 42f8ee2e5d9f..ead5e8acc8be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -308,10 +308,14 @@ struct mlx5_eswitch { atomic64_t user_count; struct { - bool enabled; u32 root_tsar_ix; struct mlx5_esw_rate_group *group0; struct list_head groups; /* Protected by esw->state_lock */ + + /* Protected by esw->state_lock. + * Initially 0, meaning no QoS users and QoS is disabled. + */ + refcount_t refcnt; } qos; struct mlx5_esw_bridge_offloads *br_offloads; @@ -339,9 +343,6 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); -bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); -int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); - /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); @@ -516,11 +517,6 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags); -static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw) -{ - return esw->qos.enabled; -} - static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f4eaa5893886..9a7b25692505 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -295,26 +295,28 @@ esw_setup_chain_src_port_rewrite(struct mlx5_flow_destination *dest, int *i) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - int j, err; + int err; if (!(attr->flags & MLX5_ESW_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; - for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { - err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); - if (err) - goto err_setup_chain; + /* flow steering cannot handle more than one dest with the same ft + * in a single flow + */ + if (esw_attr->out_count - esw_attr->split_count > 1) + return -EOPNOTSUPP; - if (esw_attr->dests[j].pkt_reformat) { - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - flow_act->pkt_reformat = esw_attr->dests[j].pkt_reformat; - } + err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, 1, 0, *i); + if (err) + return err; + + if (esw_attr->dests[esw_attr->split_count].pkt_reformat) { + flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act->pkt_reformat = esw_attr->dests[esw_attr->split_count].pkt_reformat; } - return 0; + (*i)++; -err_setup_chain: - esw_put_dest_tables_loop(esw, attr, esw_attr->split_count, j); - return err; + return 0; } static void esw_cleanup_chain_src_port_rewrite(struct mlx5_eswitch *esw, @@ -329,14 +331,25 @@ static bool esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + bool result = false; int i; - for (i = esw_attr->split_count; i < esw_attr->out_count; i++) + /* Indirect table is supported only for flows with in_port uplink + * and the destination is vport on the same eswitch as the uplink, + * return false in case at least one of destinations doesn't meet + * this criteria. + */ + for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { if (esw_attr->dests[i].rep && mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, - esw_attr->dests[i].mdev)) - return true; - return false; + esw_attr->dests[i].mdev)) { + result = true; + } else { + result = false; + break; + } + } + return result; } static int @@ -2512,6 +2525,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, struct mlx5_eswitch *esw = master->priv.eswitch; struct mlx5_flow_table_attr ft_attr = { .max_fte = 1, .prio = 0, .level = 0, + .flags = MLX5_FLOW_TABLE_OTHER_VPORT, }; struct mlx5_flow_namespace *egress_ns; struct mlx5_flow_table *acl; @@ -3183,12 +3197,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) u64 mapping_id; int err; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) && - MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap)) - esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; - else - esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; - mutex_init(&esw->offloads.termtbl_mutex); mlx5_rdma_enable_roce(esw->dev); @@ -3286,7 +3294,6 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) esw_offloads_metadata_uninit(esw); mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); - esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) @@ -3630,7 +3637,7 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, *encap = esw->offloads.encap; unlock: up_write(&esw->mode_lock); - return 0; + return err; } static bool @@ -3862,3 +3869,62 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, return vport->metadata; } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_set); + +static bool +is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num == MLX5_VPORT_PF || + mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_esw_is_sf_vport(esw, vport_num); +} + +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + u16 vport_num; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + if (!is_port_function_supported(esw, vport_num)) + return -EOPNOTSUPP; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid port"); + return PTR_ERR(vport); + } + + mutex_lock(&esw->state_lock); + ether_addr_copy(hw_addr, vport->info.mac); + *hw_addr_len = ETH_ALEN; + mutex_unlock(&esw->state_lock); + return 0; +} + +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + u16 vport_num; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) { + NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr"); + return PTR_ERR(esw); + } + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + if (!is_port_function_supported(esw, vport_num)) { + NL_SET_ERR_MSG_MOD(extack, "Port doesn't support set hw_addr"); + return -EINVAL; + } + + return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 750b21124a1a..dafe341358c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -451,7 +451,8 @@ static int mlx5_set_extended_dest(struct mlx5_core_dev *dev, list_for_each_entry(dst, &fte->node.children, node.list) { if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) continue; - if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + if ((dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) num_encap++; num_fwd_destinations++; @@ -788,7 +789,8 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, int err; u32 *in; - if (namespace == MLX5_FLOW_NAMESPACE_FDB) + if (namespace == MLX5_FLOW_NAMESPACE_FDB || + namespace == MLX5_FLOW_NAMESPACE_FDB_BYPASS) max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); else max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size); @@ -860,6 +862,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, switch (namespace) { case MLX5_FLOW_NAMESPACE_FDB: + case MLX5_FLOW_NAMESPACE_FDB_BYPASS: max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions); table_type = FS_FT_FDB; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 386ab9a2d490..b628917e38e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1525,7 +1525,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, struct mlx5_flow_destination *d2) { if (d1->type == d2->type) { - if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + if (((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + d1->type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && d1->vport.num == d2->vport.num && d1->vport.flags == d2->vport.flags && ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ? @@ -2206,6 +2207,22 @@ struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_get_fdb_sub_ns); +static bool is_nic_rx_ns(enum mlx5_flow_namespace_type type) +{ + switch (type) { + case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_LAG: + case MLX5_FLOW_NAMESPACE_OFFLOADS: + case MLX5_FLOW_NAMESPACE_ETHTOOL: + case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_LEFTOVERS: + case MLX5_FLOW_NAMESPACE_ANCHOR: + return true; + default: + return false; + } +} + struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) { @@ -2235,31 +2252,39 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, if (steering->sniffer_tx_root_ns) return &steering->sniffer_tx_root_ns->ns; return NULL; - default: + case MLX5_FLOW_NAMESPACE_FDB_BYPASS: + root_ns = steering->fdb_root_ns; + prio = FDB_BYPASS_PATH; break; - } - - if (type == MLX5_FLOW_NAMESPACE_EGRESS || - type == MLX5_FLOW_NAMESPACE_EGRESS_KERNEL) { + case MLX5_FLOW_NAMESPACE_EGRESS: + case MLX5_FLOW_NAMESPACE_EGRESS_KERNEL: root_ns = steering->egress_root_ns; prio = type - MLX5_FLOW_NAMESPACE_EGRESS; - } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) { + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX: root_ns = steering->rdma_rx_root_ns; prio = RDMA_RX_BYPASS_PRIO; - } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) { + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL: root_ns = steering->rdma_rx_root_ns; prio = RDMA_RX_KERNEL_PRIO; - } else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX) { + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: root_ns = steering->rdma_tx_root_ns; - } else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS) { + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS: root_ns = steering->rdma_rx_root_ns; prio = RDMA_RX_COUNTERS_PRIO; - } else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS) { + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS: root_ns = steering->rdma_tx_root_ns; prio = RDMA_TX_COUNTERS_PRIO; - } else { /* Must be NIC RX */ + break; + default: /* Must be NIC RX */ + WARN_ON(!is_nic_rx_ns(type)); root_ns = steering->root_ns; prio = type; + break; } if (!root_ns) @@ -2822,6 +2847,28 @@ static int create_fdb_fast_path(struct mlx5_flow_steering *steering) return 0; } +static int create_fdb_bypass(struct mlx5_flow_steering *steering) +{ + struct mlx5_flow_namespace *ns; + struct fs_prio *prio; + int i; + + prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 0); + if (IS_ERR(prio)) + return PTR_ERR(prio); + + ns = fs_create_namespace(prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); + if (IS_ERR(ns)) + return PTR_ERR(ns); + + for (i = 0; i < MLX5_BY_PASS_NUM_REGULAR_PRIOS; i++) { + prio = fs_create_prio(ns, i, 1); + if (IS_ERR(prio)) + return PTR_ERR(prio); + } + return 0; +} + static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *maj_prio; @@ -2831,12 +2878,10 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) if (!steering->fdb_root_ns) return -ENOMEM; - maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, - 1); - if (IS_ERR(maj_prio)) { - err = PTR_ERR(maj_prio); + err = create_fdb_bypass(steering); + if (err) goto out_err; - } + err = create_fdb_fast_path(steering); if (err) goto out_err; @@ -3038,6 +3083,11 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) steering->dev = dev; dev->priv.steering = steering; + if (mlx5_fs_dr_is_supported(dev)) + steering->mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; + steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", sizeof(struct mlx5_flow_group), 0, 0, NULL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 7711db245c63..5469b08d635f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -203,7 +203,7 @@ struct mlx5_ft_underlay_qp { u32 qpn; }; -#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_c00 +#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_e00 /* Calculate the fte_match_param length and without the reserved length. * Make sure the reserved field is the last. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 31c99d53faf7..b406e0367af6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -38,9 +38,10 @@ #include "fs_cmd.h" #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) +#define MLX5_FC_BULK_QUERY_ALLOC_PERIOD msecs_to_jiffies(180 * 1000) /* Max number of counters to query in bulk read is 32K */ #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) -#define MLX5_SF_NUM_COUNTERS_BULK 6 +#define MLX5_INIT_COUNTERS_BULK 8 #define MLX5_FC_POOL_MAX_THRESHOLD BIT(18) #define MLX5_FC_POOL_USED_BUFF_RATIO 10 @@ -145,13 +146,15 @@ static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, spin_unlock(&fc_stats->counters_idr_lock); } -static int get_max_bulk_query_len(struct mlx5_core_dev *dev) +static int get_init_bulk_query_len(struct mlx5_core_dev *dev) { - int num_counters_bulk = mlx5_core_is_sf(dev) ? - MLX5_SF_NUM_COUNTERS_BULK : - MLX5_SW_MAX_COUNTERS_BULK; + return min_t(int, MLX5_INIT_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); +} - return min_t(int, num_counters_bulk, +static int get_max_bulk_query_len(struct mlx5_core_dev *dev) +{ + return min_t(int, MLX5_SW_MAX_COUNTERS_BULK, (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); } @@ -177,7 +180,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; bool query_more_counters = (first->id <= last_id); - int max_bulk_len = get_max_bulk_query_len(dev); + int cur_bulk_len = fc_stats->bulk_query_len; u32 *data = fc_stats->bulk_query_out; struct mlx5_fc *counter = first; u32 bulk_base_id; @@ -189,7 +192,7 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev, bulk_base_id = counter->id & ~0x3; /* number of counters to query inc. the last counter */ - bulk_len = min_t(int, max_bulk_len, + bulk_len = min_t(int, cur_bulk_len, ALIGN(last_id - bulk_base_id + 1, 4)); err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len, @@ -230,6 +233,41 @@ static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) mlx5_fc_free(dev, counter); } +static void mlx5_fc_stats_bulk_query_size_increase(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + int max_bulk_len = get_max_bulk_query_len(dev); + unsigned long now = jiffies; + u32 *bulk_query_out_tmp; + int max_out_len; + + if (fc_stats->bulk_query_alloc_failed && + time_before(now, fc_stats->next_bulk_query_alloc)) + return; + + max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len); + bulk_query_out_tmp = kzalloc(max_out_len, GFP_KERNEL); + if (!bulk_query_out_tmp) { + mlx5_core_warn_once(dev, + "Can't increase flow counters bulk query buffer size, insufficient memory, bulk_size(%d)\n", + max_bulk_len); + fc_stats->bulk_query_alloc_failed = true; + fc_stats->next_bulk_query_alloc = + now + MLX5_FC_BULK_QUERY_ALLOC_PERIOD; + return; + } + + kfree(fc_stats->bulk_query_out); + fc_stats->bulk_query_out = bulk_query_out_tmp; + fc_stats->bulk_query_len = max_bulk_len; + if (fc_stats->bulk_query_alloc_failed) { + mlx5_core_info(dev, + "Flow counters bulk query buffer size increased, bulk_size(%d)\n", + max_bulk_len); + fc_stats->bulk_query_alloc_failed = false; + } +} + static void mlx5_fc_stats_work(struct work_struct *work) { struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, @@ -247,15 +285,22 @@ static void mlx5_fc_stats_work(struct work_struct *work) queue_delayed_work(fc_stats->wq, &fc_stats->work, fc_stats->sampling_interval); - llist_for_each_entry(counter, addlist, addlist) + llist_for_each_entry(counter, addlist, addlist) { mlx5_fc_stats_insert(dev, counter); + fc_stats->num_counters++; + } llist_for_each_entry_safe(counter, tmp, dellist, dellist) { mlx5_fc_stats_remove(dev, counter); mlx5_fc_release(dev, counter); + fc_stats->num_counters--; } + if (fc_stats->bulk_query_len < get_max_bulk_query_len(dev) && + fc_stats->num_counters > get_init_bulk_query_len(dev)) + mlx5_fc_stats_bulk_query_size_increase(dev); + if (time_before(now, fc_stats->next_query) || list_empty(&fc_stats->counters)) return; @@ -378,8 +423,8 @@ EXPORT_SYMBOL(mlx5_fc_destroy); int mlx5_init_fc_stats(struct mlx5_core_dev *dev) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; - int max_bulk_len; - int max_out_len; + int init_bulk_len; + int init_out_len; spin_lock_init(&fc_stats->counters_idr_lock); idr_init(&fc_stats->counters_idr); @@ -387,11 +432,12 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) init_llist_head(&fc_stats->addlist); init_llist_head(&fc_stats->dellist); - max_bulk_len = get_max_bulk_query_len(dev); - max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len); - fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL); + init_bulk_len = get_init_bulk_query_len(dev); + init_out_len = mlx5_cmd_fc_get_bulk_query_out_len(init_bulk_len); + fc_stats->bulk_query_out = kzalloc(init_out_len, GFP_KERNEL); if (!fc_stats->bulk_query_out) return -ENOMEM; + fc_stats->bulk_query_len = init_bulk_len; fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); if (!fc_stats->wq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 64f1abc4dc36..737df402c927 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -420,6 +420,11 @@ static void print_health_info(struct mlx5_core_dev *dev) if (!ioread8(&h->synd)) return; + if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) { + mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n"); + return; + } + rfr_severity = ioread8(&h->rfr_severity); severity = mlx5_health_get_severity(rfr_severity); mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n", @@ -835,6 +840,9 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms); add_timer(&health->timer); + + if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc)) + queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); } void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) @@ -902,8 +910,6 @@ int mlx5_health_init(struct mlx5_core_dev *dev) INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update); - if (mlx5_core_is_pf(dev)) - queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 962d41418ce7..f4f7eaf16446 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -67,7 +67,9 @@ static void mlx5i_get_ethtool_stats(struct net_device *dev, } static int mlx5i_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -75,7 +77,9 @@ static int mlx5i_set_ringparam(struct net_device *dev, } static void mlx5i_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *param) + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kernel_param, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -105,7 +109,7 @@ static int mlx5i_set_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = mlx5i_epriv(netdev); - return mlx5e_ethtool_set_coalesce(priv, coal); + return mlx5e_ethtool_set_coalesce(priv, coal, kernel_coal, extack); } static int mlx5i_get_coalesce(struct net_device *netdev, @@ -115,7 +119,7 @@ static int mlx5i_get_coalesce(struct net_device *netdev, { struct mlx5e_priv *priv = mlx5i_epriv(netdev); - return mlx5e_ethtool_get_coalesce(priv, coal); + return mlx5e_ethtool_get_coalesce(priv, coal, kernel_coal); } static int mlx5i_get_ts_info(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index ea1efdecc88c..0a99a020a3b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -110,14 +110,14 @@ void mlx5i_cleanup(struct mlx5e_priv *priv) static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats s = { 0 }; + struct rtnl_link_stats64 s = {}; int i, j; for (i = 0; i < priv->stats_nch; i++) { struct mlx5e_channel_stats *channel_stats; struct mlx5e_rq_stats *rq_stats; - channel_stats = &priv->channel_stats[i]; + channel_stats = priv->channel_stats[i]; rq_stats = &channel_stats->rq; s.rx_packets += rq_stats->packets; @@ -128,11 +128,17 @@ static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv) s.tx_packets += sq_stats->packets; s.tx_bytes += sq_stats->bytes; - s.tx_queue_dropped += sq_stats->dropped; + s.tx_dropped += sq_stats->dropped; } } - memcpy(&priv->stats.sw, &s, sizeof(s)); + memset(&priv->stats.sw, 0, sizeof(s)); + + priv->stats.sw.rx_packets = s.rx_packets; + priv->stats.sw.rx_bytes = s.rx_bytes; + priv->stats.sw.tx_packets = s.tx_packets; + priv->stats.sw.tx_bytes = s.tx_bytes; + priv->stats.sw.tx_queue_dropped = s.tx_dropped; } void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) @@ -443,7 +449,6 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5i_stats_grps, .stats_grps_num = mlx5i_stats_grps_num, - .rx_ptp_support = false, }; /* mlx5i netdev NDos */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 5308f23702bc..0b86e78dbc0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -350,7 +350,6 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .rx_handlers = &mlx5i_rx_handlers, .max_tc = MLX5I_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), - .rx_ptp_support = false, }; const struct mlx5e_profile *mlx5i_pkey_get_profile(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c new file mode 100644 index 000000000000..380a208ab137 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "mlx5_core.h" +#include "mlx5_irq.h" +#include "pci_irq.h" + +static void cpu_put(struct mlx5_irq_pool *pool, int cpu) +{ + pool->irqs_per_cpu[cpu]--; +} + +static void cpu_get(struct mlx5_irq_pool *pool, int cpu) +{ + pool->irqs_per_cpu[cpu]++; +} + +/* Gets the least loaded CPU. e.g.: the CPU with least IRQs bound to it */ +static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, + const struct cpumask *req_mask) +{ + int best_cpu = -1; + int cpu; + + for_each_cpu_and(cpu, req_mask, cpu_online_mask) { + /* CPU has zero IRQs on it. No need to search any more CPUs. */ + if (!pool->irqs_per_cpu[cpu]) { + best_cpu = cpu; + break; + } + if (best_cpu < 0) + best_cpu = cpu; + if (pool->irqs_per_cpu[cpu] < pool->irqs_per_cpu[best_cpu]) + best_cpu = cpu; + } + if (best_cpu == -1) { + /* There isn't online CPUs in req_mask */ + mlx5_core_err(pool->dev, "NO online CPUs in req_mask (%*pbl)\n", + cpumask_pr_args(req_mask)); + best_cpu = cpumask_first(cpu_online_mask); + } + pool->irqs_per_cpu[best_cpu]++; + return best_cpu; +} + +/* Creating an IRQ from irq_pool */ +static struct mlx5_irq * +irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + cpumask_var_t auto_mask; + struct mlx5_irq *irq; + u32 irq_index; + int err; + + if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL)) + return ERR_PTR(-ENOMEM); + err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); + if (err) + return ERR_PTR(err); + if (pool->irqs_per_cpu) { + if (cpumask_weight(req_mask) > 1) + /* if req_mask contain more then one CPU, set the least loadad CPU + * of req_mask + */ + cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask); + else + cpu_get(pool, cpumask_first(req_mask)); + } + irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask); + free_cpumask_var(auto_mask); + return irq; +} + +/* Looking for the IRQ with the smallest refcount that fits req_mask. + * If pool is sf_comp_pool, then we are looking for an IRQ with any of the + * requested CPUs in req_mask. + * for example: req_mask = 0xf, irq0_mask = 0x10, irq1_mask = 0x1. irq0_mask + * isn't subset of req_mask, so we will skip it. irq1_mask is subset of req_mask, + * we don't skip it. + * If pool is sf_ctrl_pool, then all IRQs have the same mask, so any IRQ will + * fit. And since mask is subset of itself, we will pass the first if bellow. + */ +static struct mlx5_irq * +irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + int start = pool->xa_num_irqs.min; + int end = pool->xa_num_irqs.max; + struct mlx5_irq *irq = NULL; + struct mlx5_irq *iter; + int irq_refcount = 0; + unsigned long index; + + lockdep_assert_held(&pool->lock); + xa_for_each_range(&pool->irqs, index, iter, start, end) { + struct cpumask *iter_mask = mlx5_irq_get_affinity_mask(iter); + int iter_refcount = mlx5_irq_read_locked(iter); + + if (!cpumask_subset(iter_mask, req_mask)) + /* skip IRQs with a mask which is not subset of req_mask */ + continue; + if (iter_refcount < pool->min_threshold) + /* If we found an IRQ with less than min_thres, return it */ + return iter; + if (!irq || iter_refcount < irq_refcount) { + /* In case we won't find an IRQ with less than min_thres, + * keep a pointer to the least used IRQ + */ + irq_refcount = iter_refcount; + irq = iter; + } + } + return irq; +} + +/** + * mlx5_irq_affinity_request - request an IRQ according to the given mask. + * @pool: IRQ pool to request from. + * @req_mask: cpumask requested for this IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. + */ +struct mlx5_irq * +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + struct mlx5_irq *least_loaded_irq, *new_irq; + + mutex_lock(&pool->lock); + least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask); + if (least_loaded_irq && + mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold) + goto out; + /* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */ + new_irq = irq_pool_request_irq(pool, req_mask); + if (IS_ERR(new_irq)) { + if (!least_loaded_irq) { + /* We failed to create an IRQ and we didn't find an IRQ */ + mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", + PTR_ERR(new_irq)); + mutex_unlock(&pool->lock); + return new_irq; + } + /* We failed to create a new IRQ for the requested affinity, + * sharing existing IRQ. + */ + goto out; + } + least_loaded_irq = new_irq; + goto unlock; +out: + mlx5_irq_get_locked(least_loaded_irq); + if (mlx5_irq_read_locked(least_loaded_irq) > pool->max_threshold) + mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n", + pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(least_loaded_irq)), pool->name, + mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ); +unlock: + mutex_unlock(&pool->lock); + return least_loaded_irq; +} + +void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, + int num_irqs) +{ + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + int i; + + for (i = 0; i < num_irqs; i++) { + int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i])); + + synchronize_irq(pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(irqs[i]))); + if (mlx5_irq_put(irqs[i])) + if (pool->irqs_per_cpu) + cpu_put(pool, cpu); + } +} + +/** + * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device. + * @dev: mlx5 device that is requesting the IRQs. + * @nirqs: number of IRQs to request. + * @irqs: an output array of IRQs pointers. + * + * Each IRQ is bounded to at most 1 CPU. + * This function is requesting IRQs according to the default assignment. + * The default assignment policy is: + * - in each iteration, request the least loaded IRQ which is not bound to any + * CPU of the previous IRQs requested. + * + * This function returns the number of IRQs requested, (which might be smaller than + * @nirqs), if successful, or a negative error code in case of an error. + */ +int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs) +{ + struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + cpumask_var_t req_mask; + struct mlx5_irq *irq; + int i = 0; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_copy(req_mask, cpu_online_mask); + for (i = 0; i < nirqs; i++) { + if (mlx5_irq_pool_is_sf_pool(pool)) + irq = mlx5_irq_affinity_request(pool, req_mask); + else + /* In case SF pool doesn't exists, fallback to the PF IRQs. + * The PF IRQs are already allocated and binded to CPU + * at this point. Hence, only an index is needed. + */ + irq = mlx5_irq_request(dev, i, NULL); + if (IS_ERR(irq)) + break; + irqs[i] = irq; + cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask); + mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", + pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), + cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), + mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); + } + free_cpumask_var(req_mask); + if (!i) + return PTR_ERR(irq); + return i; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 48d2ea690d7a..4ddf6b330a44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -615,6 +615,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, bool is_bonded, is_in_lag, mode_supported; int bond_status = 0; int num_slaves = 0; + int changed = 0; int idx; if (!netif_is_lag_master(upper)) @@ -653,27 +654,27 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, */ is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; - if (!mlx5_lag_is_ready(ldev) && is_in_lag) { - NL_SET_ERR_MSG_MOD(info->info.extack, - "Can't activate LAG offload, PF is configured with more than 64 VFs"); - return 0; - } - /* Lag mode must be activebackup or hash. */ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; - if (is_in_lag && !mode_supported) - NL_SET_ERR_MSG_MOD(info->info.extack, - "Can't activate LAG offload, TX type isn't supported"); - is_bonded = is_in_lag && mode_supported; if (tracker->is_bonded != is_bonded) { tracker->is_bonded = is_bonded; - return 1; + changed = 1; } - return 0; + if (!is_in_lag) + return changed; + + if (!mlx5_lag_is_ready(ldev)) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, PF is configured with more than 64 VFs"); + else if (!mode_supported) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, TX type isn't supported"); + + return changed; } static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, @@ -716,9 +717,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, ldev = container_of(this, struct mlx5_lag, nb); - if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE) - return NOTIFY_DONE; - tracker = ldev->tracker; switch (event) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index bf4d3cbefa63..1ca01a5b6cdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -268,10 +268,8 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fen_info = container_of(info, struct fib_entry_notifier_info, info); fi = fen_info->fi; - if (fi->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); - return notifier_from_errno(-EINVAL); - } + if (fi->nh) + return NOTIFY_DONE; fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index ad63dd45c8fb..a6592f9c3c05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -608,4 +608,5 @@ void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev) if (port_sel->tunnel) mlx5_destroy_ttc_table(port_sel->inner.ttc); mlx5_lag_destroy_definers(ldev); + memset(port_sel, 0, sizeof(*port_sel)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 97e5845b4cfd..d5e47630e284 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -121,6 +121,9 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) { + if (!mlx5_chains_prios_supported(chains)) + return 1; + if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index 0dd96a6b140d..c1df0d3595d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -31,11 +31,11 @@ static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_type dev->timeouts->to[type] = val; } -static void tout_set_def_val(struct mlx5_core_dev *dev) +void mlx5_tout_set_def_val(struct mlx5_core_dev *dev) { int i; - for (i = MLX5_TO_FW_PRE_INIT_TIMEOUT_MS; i < MAX_TIMEOUT_TYPES; i++) + for (i = 0; i < MAX_TIMEOUT_TYPES; i++) tout_set(dev, tout_def_sw_val[i], i); } @@ -45,7 +45,6 @@ int mlx5_tout_init(struct mlx5_core_dev *dev) if (!dev->timeouts) return -ENOMEM; - tout_set_def_val(dev); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index 31faa5c17aa9..1c42ead782fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -34,6 +34,7 @@ int mlx5_tout_init(struct mlx5_core_dev *dev); void mlx5_tout_cleanup(struct mlx5_core_dev *dev); void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); int mlx5_tout_query_dtor(struct mlx5_core_dev *dev); +void mlx5_tout_set_def_val(struct mlx5_core_dev *dev); u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a92a92a52346..2c774f367199 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -98,6 +98,8 @@ enum { MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, }; +#define LOG_MAX_SUPPORTED_QPS 0xff + static struct mlx5_profile profile[] = { [0] = { .mask = 0, @@ -109,7 +111,7 @@ static struct mlx5_profile profile[] = { [2] = { .mask = MLX5_PROF_MASK_QP_SIZE | MLX5_PROF_MASK_MR_CACHE, - .log_max_qp = 18, + .log_max_qp = LOG_MAX_SUPPORTED_QPS, .mr_cache[0] = { .size = 500, .limit = 250 @@ -484,10 +486,26 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP); } +static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &val); + if (!err) + return val.vu32; + mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) { struct mlx5_profile *prof = &dev->profile; void *set_hca_cap; + int max_uc_list; int err; err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); @@ -507,7 +525,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) to_fw_pkey_sz(dev, 128)); /* Check log_max_qp from HCA caps to set in current profile */ - if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { + if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { + prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); + } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", prof->log_max_qp, MLX5_CAP_GEN_MAX(dev, log_max_qp)); @@ -561,6 +581,11 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) if (MLX5_CAP_GEN(dev, roce_rw_supported)) MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev)); + max_uc_list = max_uc_list_get_devlink_param(dev); + if (max_uc_list > 0) + MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list, + ilog2(max_uc_list)); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } @@ -992,11 +1017,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) if (mlx5_core_is_pf(dev)) pcie_print_link_status(dev->pdev); - err = mlx5_tout_init(dev); - if (err) { - mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); - return err; - } + mlx5_tout_set_def_val(dev); /* wait for firmware to accept initialization segments configurations */ @@ -1005,13 +1026,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) if (err) { mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n", mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); - goto err_tout_cleanup; + return err; } err = mlx5_cmd_init(dev); if (err) { mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); - goto err_tout_cleanup; + return err; } mlx5_tout_query_iseg(dev); @@ -1075,18 +1096,16 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) mlx5_set_driver_version(dev); - mlx5_start_health_poll(dev); - err = mlx5_query_hca_caps(dev); if (err) { mlx5_core_err(dev, "query hca failed\n"); - goto stop_health; + goto reclaim_boot_pages; } + mlx5_start_health_poll(dev); + return 0; -stop_health: - mlx5_stop_health_poll(dev, boot); reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_disable_hca: @@ -1094,8 +1113,6 @@ err_disable_hca: err_cmd_cleanup: mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); -err_tout_cleanup: - mlx5_tout_cleanup(dev); return err; } @@ -1114,7 +1131,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) mlx5_core_disable_hca(dev, 0); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_cleanup(dev); - mlx5_tout_cleanup(dev); return 0; } @@ -1476,6 +1492,12 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mlx5_debugfs_root); INIT_LIST_HEAD(&priv->traps); + err = mlx5_tout_init(dev); + if (err) { + mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); + goto err_timeout_init; + } + err = mlx5_health_init(dev); if (err) goto err_health_init; @@ -1501,6 +1523,8 @@ err_adev_init: err_pagealloc_init: mlx5_health_cleanup(dev); err_health_init: + mlx5_tout_cleanup(dev); +err_timeout_init: debugfs_remove(dev->priv.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); @@ -1518,6 +1542,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mlx5_adev_cleanup(dev); mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); + mlx5_tout_cleanup(dev); debugfs_remove_recursive(dev->priv.dbg_root); mutex_destroy(&priv->pgdir_mutex); mutex_destroy(&priv->alloc_mutex); @@ -1604,12 +1629,28 @@ static void remove_one(struct pci_dev *pdev) mlx5_devlink_free(devlink); } +#define mlx5_pci_trace(dev, fmt, ...) ({ \ + struct mlx5_core_dev *__dev = (dev); \ + mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \ + __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \ + __dev->pci_status, ##__VA_ARGS__); \ +}) + +static const char *result2str(enum pci_ers_result result) +{ + return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" : + result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" : + result == PCI_ERS_RESULT_RECOVERED ? "recovered" : + "unknown"; +} + static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + enum pci_ers_result res; - mlx5_core_info(dev, "%s was called\n", __func__); + mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state); mlx5_enter_error_state(dev, false); mlx5_error_sw_reset(dev); @@ -1617,8 +1658,11 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); - return state == pci_channel_io_perm_failure ? + res = state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; + + mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res)); + return res; } /* wait for the device to show vital signs by waiting @@ -1652,28 +1696,34 @@ static int wait_vital(struct pci_dev *pdev) static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) { + enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT; struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err; - mlx5_core_info(dev, "%s was called\n", __func__); + mlx5_pci_trace(dev, "Enter\n"); err = mlx5_pci_enable_device(dev); if (err) { mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n", __func__, err); - return PCI_ERS_RESULT_DISCONNECT; + goto out; } pci_set_master(pdev); pci_restore_state(pdev); pci_save_state(pdev); - if (wait_vital(pdev)) { - mlx5_core_err(dev, "%s: wait_vital timed out\n", __func__); - return PCI_ERS_RESULT_DISCONNECT; + err = wait_vital(pdev); + if (err) { + mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n", + __func__, err); + goto out; } - return PCI_ERS_RESULT_RECOVERED; + res = PCI_ERS_RESULT_RECOVERED; +out: + mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res)); + return res; } static void mlx5_pci_resume(struct pci_dev *pdev) @@ -1681,14 +1731,12 @@ static void mlx5_pci_resume(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err; - mlx5_core_info(dev, "%s was called\n", __func__); + mlx5_pci_trace(dev, "Enter, loading driver..\n"); err = mlx5_load_one(dev); - if (err) - mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n", - __func__, err); - else - mlx5_core_info(dev, "%s: device recovered\n", __func__); + + mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err, + !err ? "recovered" : "Failed"); } static const struct pci_error_handlers mlx5_err_handler = { @@ -1809,12 +1857,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) int mlx5_recover_device(struct mlx5_core_dev *dev) { - int ret = -EIO; + if (!mlx5_core_is_sf(dev)) { + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) + return -EIO; + } - mlx5_pci_disable_device(dev); - if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) - ret = mlx5_load_one(dev); - return ret; + return mlx5_load_one(dev); } static struct pci_driver mlx5_core_driver = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index bb677329ea08..6f8baa0f2a73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -305,5 +305,6 @@ static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) bool mlx5_eth_supported(struct mlx5_core_dev *dev); bool mlx5_rdma_supported(struct mlx5_core_dev *dev); bool mlx5_vnet_supported(struct mlx5_core_dev *dev); +bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index 8116815663a7..23cb63fa4588 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -22,12 +22,40 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn, int msix_vec_count); int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs); +struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev); +void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq); struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, struct cpumask *affinity); -void mlx5_irq_release(struct mlx5_irq *irq); +int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, + struct mlx5_irq **irqs); +void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs); int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb); int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb); struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); int mlx5_irq_get_index(struct mlx5_irq *irq); +struct mlx5_irq_pool; +#ifdef CONFIG_MLX5_SF +int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs); +struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, + const struct cpumask *req_mask); +void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, + int num_irqs); +#else +static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, + struct mlx5_irq **irqs) +{ + return -EOPNOTSUPP; +} + +static inline struct mlx5_irq * +mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, + struct mlx5_irq **irqs, int num_irqs) {} +#endif #endif /* __MLX5_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 830444f927d4..90fec0649ef5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -7,15 +7,12 @@ #include <linux/mlx5/driver.h> #include "mlx5_core.h" #include "mlx5_irq.h" +#include "pci_irq.h" #include "lib/sf.h" #ifdef CONFIG_RFS_ACCEL #include <linux/cpu_rmap.h> #endif -#define MLX5_MAX_IRQ_NAME (32) -/* max irq_index is 2047, so four chars */ -#define MLX5_MAX_IRQ_IDX_CHARS (4) - #define MLX5_SFS_PER_CTRL_IRQ 64 #define MLX5_IRQ_CTRL_SF_MAX 8 /* min num of vectors for SFs to be enabled */ @@ -25,7 +22,6 @@ #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX) #define MLX5_EQ_SHARE_IRQ_MIN_COMP (1) #define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4) -#define MLX5_EQ_REFS_PER_IRQ (2) struct mlx5_irq { struct atomic_notifier_head nh; @@ -37,16 +33,6 @@ struct mlx5_irq { int irqn; }; -struct mlx5_irq_pool { - char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; - struct xa_limit xa_num_irqs; - struct mutex lock; /* sync IRQs creations */ - struct xarray irqs; - u32 max_threshold; - u32 min_threshold; - struct mlx5_core_dev *dev; -}; - struct mlx5_irq_table { struct mlx5_irq_pool *pf_pool; struct mlx5_irq_pool *sf_ctrl_pool; @@ -153,18 +139,28 @@ static void irq_release(struct mlx5_irq *irq) kfree(irq); } -static void irq_put(struct mlx5_irq *irq) +int mlx5_irq_put(struct mlx5_irq *irq) { struct mlx5_irq_pool *pool = irq->pool; + int ret = 0; mutex_lock(&pool->lock); irq->refcount--; - if (!irq->refcount) + if (!irq->refcount) { irq_release(irq); + ret = 1; + } mutex_unlock(&pool->lock); + return ret; +} + +int mlx5_irq_read_locked(struct mlx5_irq *irq) +{ + lockdep_assert_held(&irq->pool->lock); + return irq->refcount; } -static int irq_get_locked(struct mlx5_irq *irq) +int mlx5_irq_get_locked(struct mlx5_irq *irq) { lockdep_assert_held(&irq->pool->lock); if (WARN_ON_ONCE(!irq->refcount)) @@ -178,7 +174,7 @@ static int irq_get(struct mlx5_irq *irq) int err; mutex_lock(&irq->pool->lock); - err = irq_get_locked(irq); + err = mlx5_irq_get_locked(irq); mutex_unlock(&irq->pool->lock); return err; } @@ -210,12 +206,8 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx) snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx); } -static bool irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) -{ - return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); -} - -static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) +struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + const struct cpumask *affinity) { struct mlx5_core_dev *dev = pool->dev; char name[MLX5_MAX_IRQ_NAME]; @@ -226,7 +218,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) if (!irq) return ERR_PTR(-ENOMEM); irq->irqn = pci_irq_vector(dev->pdev, i); - if (!irq_pool_is_sf_pool(pool)) + if (!mlx5_irq_pool_is_sf_pool(pool)) irq_set_name(pool, name, i); else irq_sf_set_name(pool, name, i); @@ -244,6 +236,10 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) err = -ENOMEM; goto err_cpumask; } + if (affinity) { + cpumask_copy(irq->mask, affinity); + irq_set_affinity_hint(irq->irqn, irq->mask); + } irq->pool = pool; irq->refcount = 1; irq->index = i; @@ -255,6 +251,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) } return irq; err_xa: + irq_set_affinity_hint(irq->irqn, NULL); free_cpumask_var(irq->mask); err_cpumask: free_irq(irq->irqn, &irq->nh); @@ -275,7 +272,7 @@ int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) return -ENOENT; ret = atomic_notifier_chain_register(&irq->nh, nb); if (ret) - irq_put(irq); + mlx5_irq_put(irq); return ret; } @@ -284,7 +281,7 @@ int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) int err = 0; err = atomic_notifier_chain_unregister(&irq->nh, nb); - irq_put(irq); + mlx5_irq_put(irq); return err; } @@ -300,131 +297,121 @@ int mlx5_irq_get_index(struct mlx5_irq *irq) /* irq_pool API */ -/* creating an irq from irq_pool */ -static struct mlx5_irq *irq_pool_create_irq(struct mlx5_irq_pool *pool, - struct cpumask *affinity) +/* requesting an irq from a given pool according to given index */ +static struct mlx5_irq * +irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, + struct cpumask *affinity) { struct mlx5_irq *irq; - u32 irq_index; - int err; - err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, - GFP_KERNEL); - if (err) - return ERR_PTR(err); - irq = irq_request(pool, irq_index); - if (IS_ERR(irq)) - return irq; - cpumask_copy(irq->mask, affinity); - irq_set_affinity_hint(irq->irqn, irq->mask); + mutex_lock(&pool->lock); + irq = xa_load(&pool->irqs, vecidx); + if (irq) { + mlx5_irq_get_locked(irq); + goto unlock; + } + irq = mlx5_irq_alloc(pool, vecidx, affinity); +unlock: + mutex_unlock(&pool->lock); return irq; } -/* looking for the irq with the smallest refcount and the same affinity */ -static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, - struct cpumask *affinity) +static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_table) { - int start = pool->xa_num_irqs.min; - int end = pool->xa_num_irqs.max; - struct mlx5_irq *irq = NULL; - struct mlx5_irq *iter; - unsigned long index; + return irq_table->sf_ctrl_pool; +} - lockdep_assert_held(&pool->lock); - xa_for_each_range(&pool->irqs, index, iter, start, end) { - if (!cpumask_equal(iter->mask, affinity)) - continue; - if (iter->refcount < pool->min_threshold) - return iter; - if (!irq || iter->refcount < irq->refcount) - irq = iter; - } - return irq; +static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) +{ + return irq_table->sf_comp_pool; } -/* requesting an irq from a given pool according to given affinity */ -static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool, - struct cpumask *affinity) +struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) { - struct mlx5_irq *least_loaded_irq, *new_irq; + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; - mutex_lock(&pool->lock); - least_loaded_irq = irq_pool_find_least_loaded(pool, affinity); - if (least_loaded_irq && - least_loaded_irq->refcount < pool->min_threshold) - goto out; - new_irq = irq_pool_create_irq(pool, affinity); - if (IS_ERR(new_irq)) { - if (!least_loaded_irq) { - mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n", - cpumask_first(affinity)); - mutex_unlock(&pool->lock); - return new_irq; - } - /* We failed to create a new IRQ for the requested affinity, - * sharing existing IRQ. - */ - goto out; - } - least_loaded_irq = new_irq; - goto unlock; -out: - irq_get_locked(least_loaded_irq); - if (least_loaded_irq->refcount > pool->max_threshold) - mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n", - least_loaded_irq->irqn, pool->name, - least_loaded_irq->refcount / MLX5_EQ_REFS_PER_IRQ); -unlock: - mutex_unlock(&pool->lock); - return least_loaded_irq; + if (mlx5_core_is_sf(dev)) + pool = sf_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. + */ + return pool ? pool : irq_table->pf_pool; } -/* requesting an irq from a given pool according to given index */ -static struct mlx5_irq * -irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, - struct cpumask *affinity) +static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) { - struct mlx5_irq *irq; + struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = NULL; - mutex_lock(&pool->lock); - irq = xa_load(&pool->irqs, vecidx); - if (irq) { - irq_get_locked(irq); - goto unlock; + if (mlx5_core_is_sf(dev)) + pool = sf_ctrl_irq_pool_get(irq_table); + + /* In some configs, there won't be a pool of SFs IRQs. Hence, returning + * the PF IRQs pool in case the SF pool doesn't exist. + */ + return pool ? pool : irq_table->pf_pool; +} + +/** + * mlx5_irqs_release - release one or more IRQs back to the system. + * @irqs: IRQs to be released. + * @nirqs: number of IRQs to be released. + */ +static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) +{ + int i; + + for (i = 0; i < nirqs; i++) { + synchronize_irq(irqs[i]->irqn); + mlx5_irq_put(irqs[i]); } - irq = irq_request(pool, vecidx); - if (IS_ERR(irq) || !affinity) - goto unlock; - cpumask_copy(irq->mask, affinity); - if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max && - cpumask_empty(irq->mask)) - cpumask_set_cpu(0, irq->mask); - irq_set_affinity_hint(irq->irqn, irq->mask); -unlock: - mutex_unlock(&pool->lock); - return irq; } -static struct mlx5_irq_pool *find_sf_irq_pool(struct mlx5_irq_table *irq_table, - int i, struct cpumask *affinity) +/** + * mlx5_ctrl_irq_release - release a ctrl IRQ back to the system. + * @ctrl_irq: ctrl IRQ to be released. + */ +void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) { - if (cpumask_empty(affinity) && i == MLX5_IRQ_EQ_CTRL) - return irq_table->sf_ctrl_pool; - return irq_table->sf_comp_pool; + mlx5_irqs_release(&ctrl_irq, 1); } /** - * mlx5_irq_release - release an IRQ back to the system. - * @irq: irq to be released. + * mlx5_ctrl_irq_request - request a ctrl IRQ for mlx5 device. + * @dev: mlx5 device that requesting the IRQ. + * + * This function returns a pointer to IRQ, or ERR_PTR in case of error. */ -void mlx5_irq_release(struct mlx5_irq *irq) +struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) { - synchronize_irq(irq->irqn); - irq_put(irq); + struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); + cpumask_var_t req_mask; + struct mlx5_irq *irq; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return ERR_PTR(-ENOMEM); + cpumask_copy(req_mask, cpu_online_mask); + if (!mlx5_irq_pool_is_sf_pool(pool)) { + /* In case we are allocating a control IRQ for PF/VF */ + if (!pool->xa_num_irqs.max) { + cpumask_clear(req_mask); + /* In case we only have a single IRQ for PF/VF */ + cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask); + } + /* Allocate the IRQ in the last index of the pool */ + irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask); + } else { + irq = mlx5_irq_affinity_request(pool, req_mask); + } + + free_cpumask_var(req_mask); + return irq; } /** - * mlx5_irq_request - request an IRQ for mlx5 device. + * mlx5_irq_request - request an IRQ for mlx5 PF/VF device. * @dev: mlx5 device that requesting the IRQ. * @vecidx: vector index of the IRQ. This argument is ignore if affinity is * provided. @@ -439,23 +426,8 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, struct mlx5_irq_pool *pool; struct mlx5_irq *irq; - if (mlx5_core_is_sf(dev)) { - pool = find_sf_irq_pool(irq_table, vecidx, affinity); - if (!pool) - /* we don't have IRQs for SFs, using the PF IRQs */ - goto pf_irq; - if (cpumask_empty(affinity) && !strcmp(pool->name, "mlx5_sf_comp")) - /* In case an SF user request IRQ with vecidx */ - irq = irq_pool_request_vector(pool, vecidx, NULL); - else - irq = irq_pool_request_affinity(pool, affinity); - goto out; - } -pf_irq: pool = irq_table->pf_pool; - vecidx = (vecidx == MLX5_IRQ_EQ_CTRL) ? pool->xa_num_irqs.max : vecidx; irq = irq_pool_request_vector(pool, vecidx, affinity); -out: if (IS_ERR(irq)) return irq; mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n", @@ -464,6 +436,51 @@ out: return irq; } +/** + * mlx5_irqs_release_vectors - release one or more IRQs back to the system. + * @irqs: IRQs to be released. + * @nirqs: number of IRQs to be released. + */ +void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) +{ + mlx5_irqs_release(irqs, nirqs); +} + +/** + * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device. + * @dev: mlx5 device that is requesting the IRQs. + * @cpus: CPUs array for binding the IRQs + * @nirqs: number of IRQs to request. + * @irqs: an output array of IRQs pointers. + * + * Each IRQ is bound to at most 1 CPU. + * This function is requests nirqs IRQs, starting from @vecidx. + * + * This function returns the number of IRQs requested, (which might be smaller than + * @nirqs), if successful, or a negative error code in case of an error. + */ +int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, + struct mlx5_irq **irqs) +{ + cpumask_var_t req_mask; + struct mlx5_irq *irq; + int i; + + if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL)) + return -ENOMEM; + for (i = 0; i < nirqs; i++) { + cpumask_set_cpu(cpus[i], req_mask); + irq = mlx5_irq_request(dev, i, req_mask); + if (IS_ERR(irq)) + break; + cpumask_clear(req_mask); + irqs[i] = irq; + } + + free_cpumask_var(req_mask); + return i ? i : PTR_ERR(irq); +} + static struct mlx5_irq_pool * irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, u32 min_threshold, u32 max_threshold) @@ -479,7 +496,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, pool->xa_num_irqs.max = start + size - 1; if (name) snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS, - name); + "%s", name); pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", @@ -500,6 +517,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) irq_release(irq); xa_destroy(&pool->irqs); mutex_destroy(&pool->lock); + kfree(pool->irqs_per_cpu); kvfree(pool); } @@ -547,7 +565,17 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec) err = PTR_ERR(table->sf_comp_pool); goto err_sf_ctrl; } + + table->sf_comp_pool->irqs_per_cpu = kcalloc(nr_cpu_ids, sizeof(u16), GFP_KERNEL); + if (!table->sf_comp_pool->irqs_per_cpu) { + err = -ENOMEM; + goto err_irqs_per_cpu; + } + return 0; + +err_irqs_per_cpu: + irq_pool_free(table->sf_comp_pool); err_sf_ctrl: irq_pool_free(table->sf_ctrl_pool); err_pf: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h new file mode 100644 index 000000000000..5c7e68bee43a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __PCI_IRQ_H__ +#define __PCI_IRQ_H__ + +#include <linux/mlx5/driver.h> + +#define MLX5_MAX_IRQ_NAME (32) +/* max irq_index is 2047, so four chars */ +#define MLX5_MAX_IRQ_IDX_CHARS (4) +#define MLX5_EQ_REFS_PER_IRQ (2) + +struct mlx5_irq; + +struct mlx5_irq_pool { + char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS]; + struct xa_limit xa_num_irqs; + struct mutex lock; /* sync IRQs creations */ + struct xarray irqs; + u32 max_threshold; + u32 min_threshold; + u16 *irqs_per_cpu; + struct mlx5_core_dev *dev; +}; + +struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev); +static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) +{ + return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); +} + +struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, + const struct cpumask *affinity); +int mlx5_irq_get_locked(struct mlx5_irq *irq); +int mlx5_irq_read_locked(struct mlx5_irq *irq); +int mlx5_irq_put(struct mlx5_irq *irq); + +#endif /* __PCI_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index f37db7cc32a6..7da012ff0d41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -30,10 +30,7 @@ bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) { struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; - if (!mlx5_sf_dev_supported(dev)) - return false; - - return !xa_empty(&table->devices); + return table && !xa_empty(&table->devices); } static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c index 252d6017387d..17aa348989cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c @@ -247,7 +247,7 @@ int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) { struct mlx5_sf_hw_table *table; u16 max_ext_fn = 0; - u16 ext_base_id; + u16 ext_base_id = 0; u16 max_fn = 0; u16 base_id; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 07936841ce99..c61a5e83c78c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -1560,6 +1560,12 @@ dr_action_modify_check_is_ttl_modify(const void *sw_action) return sw_field == MLX5_ACTION_IN_FIELD_OUT_IP_TTL; } +static bool dr_action_modify_ttl_ignore(struct mlx5dr_domain *dmn) +{ + return !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps) && + !MLX5_CAP_ESW_FLOWTABLE(dmn->mdev, fdb_ipv4_ttl_modify); +} + static int dr_actions_convert_modify_header(struct mlx5dr_action *action, u32 max_hw_actions, u32 num_sw_actions, @@ -1591,8 +1597,13 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, if (ret) return ret; - if (!(*modify_ttl)) - *modify_ttl = dr_action_modify_check_is_ttl_modify(sw_action); + if (!(*modify_ttl) && + dr_action_modify_check_is_ttl_modify(sw_action)) { + if (dr_action_modify_ttl_ignore(dmn)) + continue; + + *modify_ttl = true; + } /* Convert SW action to HW action */ ret = dr_action_modify_sw_to_hw(dmn, @@ -1631,7 +1642,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, * modify actions doesn't exceeds the limit */ hw_idx++; - if ((num_sw_actions + hw_idx - i) >= max_hw_actions) { + if (hw_idx >= max_hw_actions) { mlx5dr_dbg(dmn, "Modify header action number exceeds HW limit\n"); return -EINVAL; } @@ -1642,6 +1653,10 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, hw_idx++; } + /* if the resulting HW actions list is empty, add NOP action */ + if (!hw_idx) + hw_idx++; + *num_hw_actions = hw_idx; return 0; @@ -1792,7 +1807,7 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, int mlx5dr_action_destroy(struct mlx5dr_action *action) { - if (refcount_read(&action->refcount) > 1) + if (WARN_ON_ONCE(refcount_read(&action->refcount) > 1)) return -EBUSY; switch (action->action_type) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 1d8febed0d76..4dd619d238cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -132,6 +132,13 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new); + /* geneve_tlv_option_0_exist is the indication of + * STE support for lookup type flex_parser_ok + */ + caps->flex_parser_ok_bits_supp = + MLX5_CAP_FLOWTABLE(mdev, + flow_table_properties_nic_receive.ft_field_support.geneve_tlv_option_0_exist); + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) { caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); @@ -152,7 +159,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->flex_parser_id_mpls_over_gre = MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_gre); - if (caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED) + if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED) caps->flex_parser_id_mpls_over_udp = MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label); @@ -599,7 +606,8 @@ static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev, for (i = 0; i < fte->dests_size; i++) { if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) continue; - if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + if ((fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) num_encap++; num_fwd_destinations++; @@ -724,12 +732,19 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: id = fte->dest_arr[i].ft_id; break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: case MLX5_FLOW_DESTINATION_TYPE_VPORT: - id = fte->dest_arr[i].vport.num; - MLX5_SET(dest_format_struct, in_dests, - destination_eswitch_owner_vhca_id_valid, - !!(fte->dest_arr[i].vport.flags & - MLX5_FLOW_DEST_VPORT_VHCA_ID)); + if (type == MLX5_FLOW_DESTINATION_TYPE_VPORT) { + id = fte->dest_arr[i].vport.num; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, + !!(fte->dest_arr[i].vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); + } else { + id = 0; + MLX5_SET(dest_format_struct, in_dests, + destination_eswitch_owner_vhca_id_valid, 1); + } MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, fte->dest_arr[i].vport.vhca_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c new file mode 100644 index 000000000000..2784cd59fefe --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -0,0 +1,649 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include "dr_types.h" + +#define DR_DBG_PTR_TO_ID(p) ((u64)(uintptr_t)(p) & 0xFFFFFFFFULL) + +enum dr_dump_rec_type { + DR_DUMP_REC_TYPE_DOMAIN = 3000, + DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER = 3001, + DR_DUMP_REC_TYPE_DOMAIN_INFO_DEV_ATTR = 3002, + DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT = 3003, + DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS = 3004, + DR_DUMP_REC_TYPE_DOMAIN_SEND_RING = 3005, + + DR_DUMP_REC_TYPE_TABLE = 3100, + DR_DUMP_REC_TYPE_TABLE_RX = 3101, + DR_DUMP_REC_TYPE_TABLE_TX = 3102, + + DR_DUMP_REC_TYPE_MATCHER = 3200, + DR_DUMP_REC_TYPE_MATCHER_MASK = 3201, + DR_DUMP_REC_TYPE_MATCHER_RX = 3202, + DR_DUMP_REC_TYPE_MATCHER_TX = 3203, + DR_DUMP_REC_TYPE_MATCHER_BUILDER = 3204, + + DR_DUMP_REC_TYPE_RULE = 3300, + DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 = 3301, + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0 = 3302, + DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 = 3303, + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1 = 3304, + + DR_DUMP_REC_TYPE_ACTION_ENCAP_L2 = 3400, + DR_DUMP_REC_TYPE_ACTION_ENCAP_L3 = 3401, + DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR = 3402, + DR_DUMP_REC_TYPE_ACTION_DROP = 3403, + DR_DUMP_REC_TYPE_ACTION_QP = 3404, + DR_DUMP_REC_TYPE_ACTION_FT = 3405, + DR_DUMP_REC_TYPE_ACTION_CTR = 3406, + DR_DUMP_REC_TYPE_ACTION_TAG = 3407, + DR_DUMP_REC_TYPE_ACTION_VPORT = 3408, + DR_DUMP_REC_TYPE_ACTION_DECAP_L2 = 3409, + DR_DUMP_REC_TYPE_ACTION_DECAP_L3 = 3410, + DR_DUMP_REC_TYPE_ACTION_DEVX_TIR = 3411, + DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN = 3412, + DR_DUMP_REC_TYPE_ACTION_POP_VLAN = 3413, + DR_DUMP_REC_TYPE_ACTION_SAMPLER = 3415, + DR_DUMP_REC_TYPE_ACTION_INSERT_HDR = 3420, + DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421 +}; + +void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->dump_info.dbg_mutex); + list_add_tail(&tbl->dbg_node, &tbl->dmn->dbg_tbl_list); + mutex_unlock(&tbl->dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl) +{ + mutex_lock(&tbl->dmn->dump_info.dbg_mutex); + list_del(&tbl->dbg_node); + mutex_unlock(&tbl->dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + mutex_lock(&dmn->dump_info.dbg_mutex); + list_add_tail(&rule->dbg_node, &rule->matcher->dbg_rule_list); + mutex_unlock(&dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule) +{ + struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + + mutex_lock(&dmn->dump_info.dbg_mutex); + list_del(&rule->dbg_node); + mutex_unlock(&dmn->dump_info.dbg_mutex); +} + +static u64 dr_dump_icm_to_idx(u64 icm_addr) +{ + return (icm_addr >> 6) & 0xffffffff; +} + +#define DR_HEX_SIZE 256 + +static void +dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size) +{ + if (WARN_ON_ONCE(DR_HEX_SIZE < 2 * size + 1)) + size = DR_HEX_SIZE / 2 - 1; /* truncate */ + + bin2hex(hex, src, size); + hex[2 * size] = 0; /* NULL-terminate */ +} + +static int +dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, + struct mlx5dr_rule_action_member *action_mem) +{ + struct mlx5dr_action *action = action_mem->action; + const u64 action_id = DR_DBG_PTR_TO_ID(action); + + switch (action->action_type) { + case DR_ACTION_TYP_DROP: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_DROP, action_id, rule_id); + break; + case DR_ACTION_TYP_FT: + if (action->dest_tbl->is_fw_tbl) + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_FT, action_id, + rule_id, action->dest_tbl->fw_tbl.id); + else + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_FT, action_id, + rule_id, action->dest_tbl->tbl->table_id); + + break; + case DR_ACTION_TYP_CTR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_CTR, action_id, rule_id, + action->ctr->ctr_id + action->ctr->offset); + break; + case DR_ACTION_TYP_TAG: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_TAG, action_id, rule_id, + action->flow_tag->flow_tag); + break; + case DR_ACTION_TYP_MODIFY_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_MODIFY_HDR, action_id, + rule_id, action->rewrite->index); + break; + case DR_ACTION_TYP_VPORT: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_VPORT, action_id, rule_id, + action->vport->caps->num); + break; + case DR_ACTION_TYP_TNL_L2_TO_L2: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_DECAP_L2, action_id, + rule_id); + break; + case DR_ACTION_TYP_TNL_L3_TO_L2: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_DECAP_L3, action_id, + rule_id, action->rewrite->index); + break; + case DR_ACTION_TYP_L2_TO_TNL_L2: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_ENCAP_L2, action_id, + rule_id, action->reformat->id); + break; + case DR_ACTION_TYP_L2_TO_TNL_L3: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_ENCAP_L3, action_id, + rule_id, action->reformat->id); + break; + case DR_ACTION_TYP_POP_VLAN: + seq_printf(file, "%d,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_POP_VLAN, action_id, + rule_id); + break; + case DR_ACTION_TYP_PUSH_VLAN: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_PUSH_VLAN, action_id, + rule_id, action->push_vlan->vlan_hdr); + break; + case DR_ACTION_TYP_INSERT_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_INSERT_HDR, action_id, + rule_id, action->reformat->id, + action->reformat->param_0, + action->reformat->param_1); + break; + case DR_ACTION_TYP_REMOVE_HDR: + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR, action_id, + rule_id, action->reformat->id, + action->reformat->param_0, + action->reformat->param_1); + break; + case DR_ACTION_TYP_SAMPLER: + seq_printf(file, + "%d,0x%llx,0x%llx,0x%x,0x%x,0x%x,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_ACTION_SAMPLER, action_id, rule_id, + 0, 0, action->sampler->sampler_id, + action->sampler->rx_icm_addr, + action->sampler->tx_icm_addr); + break; + default: + return 0; + } + + return 0; +} + +static int +dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste, + bool is_rx, const u64 rule_id, u8 format_ver) +{ + char hw_ste_dump[DR_HEX_SIZE]; + u32 mem_rec_type; + + if (format_ver == MLX5_STEERING_FORMAT_CONNECTX_5) { + mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 : + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V0; + } else { + mem_rec_type = is_rx ? DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V1 : + DR_DUMP_REC_TYPE_RULE_TX_ENTRY_V1; + } + + dr_dump_hex_print(hw_ste_dump, (char *)ste->hw_ste, DR_STE_SIZE_REDUCED); + + seq_printf(file, "%d,0x%llx,0x%llx,%s\n", mem_rec_type, + dr_dump_icm_to_idx(mlx5dr_ste_get_icm_addr(ste)), rule_id, + hw_ste_dump); + + return 0; +} + +static int +dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx, + bool is_rx, const u64 rule_id, u8 format_ver) +{ + struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES]; + struct mlx5dr_ste *curr_ste = rule_rx_tx->last_rule_ste; + int ret, i; + + if (mlx5dr_rule_get_reverse_rule_members(ste_arr, curr_ste, &i)) + return 0; + + while (i--) { + ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id, + format_ver); + if (ret < 0) + return ret; + } + + return 0; +} + +static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule) +{ + struct mlx5dr_rule_action_member *action_mem; + const u64 rule_id = DR_DBG_PTR_TO_ID(rule); + struct mlx5dr_rule_rx_tx *rx = &rule->rx; + struct mlx5dr_rule_rx_tx *tx = &rule->tx; + u8 format_ver; + int ret; + + format_ver = rule->matcher->tbl->dmn->info.caps.sw_format_ver; + + seq_printf(file, "%d,0x%llx,0x%llx\n", DR_DUMP_REC_TYPE_RULE, rule_id, + DR_DBG_PTR_TO_ID(rule->matcher)); + + if (rx->nic_matcher) { + ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver); + if (ret < 0) + return ret; + } + + if (tx->nic_matcher) { + ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver); + if (ret < 0) + return ret; + } + + list_for_each_entry(action_mem, &rule->rule_actions_list, list) { + ret = dr_dump_rule_action_mem(file, rule_id, action_mem); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask, + u8 criteria, const u64 matcher_id) +{ + char dump[DR_HEX_SIZE]; + + seq_printf(file, "%d,0x%llx,", DR_DUMP_REC_TYPE_MATCHER_MASK, + matcher_id); + + if (criteria & DR_MATCHER_CRITERIA_OUTER) { + dr_dump_hex_print(dump, (char *)&mask->outer, sizeof(mask->outer)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_INNER) { + dr_dump_hex_print(dump, (char *)&mask->inner, sizeof(mask->inner)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC) { + dr_dump_hex_print(dump, (char *)&mask->misc, sizeof(mask->misc)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC2) { + dr_dump_hex_print(dump, (char *)&mask->misc2, sizeof(mask->misc2)); + seq_printf(file, "%s,", dump); + } else { + seq_puts(file, ","); + } + + if (criteria & DR_MATCHER_CRITERIA_MISC3) { + dr_dump_hex_print(dump, (char *)&mask->misc3, sizeof(mask->misc3)); + seq_printf(file, "%s\n", dump); + } else { + seq_puts(file, ",\n"); + } + + return 0; +} + +static int +dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder, + u32 index, bool is_rx, const u64 matcher_id) +{ + seq_printf(file, "%d,0x%llx,%d,%d,0x%x\n", + DR_DUMP_REC_TYPE_MATCHER_BUILDER, matcher_id, index, is_rx, + builder->lu_type); + + return 0; +} + +static int +dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx, + struct mlx5dr_matcher_rx_tx *matcher_rx_tx, + const u64 matcher_id) +{ + enum dr_dump_rec_type rec_type; + int i, ret; + + rec_type = is_rx ? DR_DUMP_REC_TYPE_MATCHER_RX : + DR_DUMP_REC_TYPE_MATCHER_TX; + + seq_printf(file, "%d,0x%llx,0x%llx,%d,0x%llx,0x%llx\n", + rec_type, DR_DBG_PTR_TO_ID(matcher_rx_tx), + matcher_id, matcher_rx_tx->num_of_builders, + dr_dump_icm_to_idx(matcher_rx_tx->s_htbl->chunk->icm_addr), + dr_dump_icm_to_idx(matcher_rx_tx->e_anchor->chunk->icm_addr)); + + for (i = 0; i < matcher_rx_tx->num_of_builders; i++) { + ret = dr_dump_matcher_builder(file, + &matcher_rx_tx->ste_builder[i], + i, is_rx, matcher_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_matcher_rx_tx *rx = &matcher->rx; + struct mlx5dr_matcher_rx_tx *tx = &matcher->tx; + u64 matcher_id; + int ret; + + matcher_id = DR_DBG_PTR_TO_ID(matcher); + + seq_printf(file, "%d,0x%llx,0x%llx,%d\n", DR_DUMP_REC_TYPE_MATCHER, + matcher_id, DR_DBG_PTR_TO_ID(matcher->tbl), matcher->prio); + + ret = dr_dump_matcher_mask(file, &matcher->mask, + matcher->match_criteria, matcher_id); + if (ret < 0) + return ret; + + if (rx->nic_tbl) { + ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id); + if (ret < 0) + return ret; + } + + if (tx->nic_tbl) { + ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher) +{ + struct mlx5dr_rule *rule; + int ret; + + ret = dr_dump_matcher(file, matcher); + if (ret < 0) + return ret; + + list_for_each_entry(rule, &matcher->dbg_rule_list, dbg_node) { + ret = dr_dump_rule(file, rule); + if (ret < 0) + return ret; + } + + return 0; +} + +static int +dr_dump_table_rx_tx(struct seq_file *file, bool is_rx, + struct mlx5dr_table_rx_tx *table_rx_tx, + const u64 table_id) +{ + enum dr_dump_rec_type rec_type; + + rec_type = is_rx ? DR_DUMP_REC_TYPE_TABLE_RX : + DR_DUMP_REC_TYPE_TABLE_TX; + + seq_printf(file, "%d,0x%llx,0x%llx\n", rec_type, table_id, + dr_dump_icm_to_idx(table_rx_tx->s_anchor->chunk->icm_addr)); + + return 0; +} + +static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table) +{ + struct mlx5dr_table_rx_tx *rx = &table->rx; + struct mlx5dr_table_rx_tx *tx = &table->tx; + int ret; + + seq_printf(file, "%d,0x%llx,0x%llx,%d,%d\n", DR_DUMP_REC_TYPE_TABLE, + DR_DBG_PTR_TO_ID(table), DR_DBG_PTR_TO_ID(table->dmn), + table->table_type, table->level); + + if (rx->nic_dmn) { + ret = dr_dump_table_rx_tx(file, true, rx, + DR_DBG_PTR_TO_ID(table)); + if (ret < 0) + return ret; + } + + if (tx->nic_dmn) { + ret = dr_dump_table_rx_tx(file, false, tx, + DR_DBG_PTR_TO_ID(table)); + if (ret < 0) + return ret; + } + return 0; +} + +static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl) +{ + struct mlx5dr_matcher *matcher; + int ret; + + ret = dr_dump_table(file, tbl); + if (ret < 0) + return ret; + + list_for_each_entry(matcher, &tbl->matcher_list, list_node) { + ret = dr_dump_matcher_all(file, matcher); + if (ret < 0) + return ret; + } + return 0; +} + +static int +dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring, + const u64 domain_id) +{ + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n", + DR_DUMP_REC_TYPE_DOMAIN_SEND_RING, DR_DBG_PTR_TO_ID(ring), + domain_id, ring->cq->mcq.cqn, ring->qp->qpn); + return 0; +} + +static int +dr_dump_domain_info_flex_parser(struct seq_file *file, + const char *flex_parser_name, + const u8 flex_parser_value, + const u64 domain_id) +{ + seq_printf(file, "%d,0x%llx,%s,0x%x\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_FLEX_PARSER, domain_id, + flex_parser_name, flex_parser_value); + return 0; +} + +static int +dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps, + const u64 domain_id) +{ + struct mlx5dr_cmd_vport_cap *vport_caps; + unsigned long i, vports_num; + + xa_for_each(&caps->vports.vports_caps_xa, vports_num, vport_caps) + ; /* count the number of vports in xarray */ + + seq_printf(file, "%d,0x%llx,0x%x,0x%llx,0x%llx,0x%x,%lu,%d\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_CAPS, domain_id, caps->gvmi, + caps->nic_rx_drop_address, caps->nic_tx_drop_address, + caps->flex_protocols, vports_num, caps->eswitch_manager); + + xa_for_each(&caps->vports.vports_caps_xa, i, vport_caps) { + vport_caps = xa_load(&caps->vports.vports_caps_xa, i); + + seq_printf(file, "%d,0x%llx,%lu,0x%x,0x%llx,0x%llx\n", + DR_DUMP_REC_TYPE_DOMAIN_INFO_VPORT, domain_id, i, + vport_caps->vport_gvmi, vport_caps->icm_address_rx, + vport_caps->icm_address_tx); + } + return 0; +} + +static int +dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info, + const u64 domain_id) +{ + int ret; + + ret = dr_dump_domain_info_caps(file, &info->caps, domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0", + info->caps.flex_parser_id_icmp_dw0, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1", + info->caps.flex_parser_id_icmp_dw1, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0", + info->caps.flex_parser_id_icmpv6_dw0, + domain_id); + if (ret < 0) + return ret; + + ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1", + info->caps.flex_parser_id_icmpv6_dw1, + domain_id); + if (ret < 0) + return ret; + + return 0; +} + +static int +dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn) +{ + u64 domain_id = DR_DBG_PTR_TO_ID(dmn); + int ret; + + seq_printf(file, "%d,0x%llx,%d,0%x,%d,%s\n", DR_DUMP_REC_TYPE_DOMAIN, + domain_id, dmn->type, dmn->info.caps.gvmi, + dmn->info.supp_sw_steering, pci_name(dmn->mdev->pdev)); + + ret = dr_dump_domain_info(file, &dmn->info, domain_id); + if (ret < 0) + return ret; + + if (dmn->info.supp_sw_steering) { + ret = dr_dump_send_ring(file, dmn->send_ring, domain_id); + if (ret < 0) + return ret; + } + + return 0; +} + +static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn) +{ + struct mlx5dr_table *tbl; + int ret; + + mutex_lock(&dmn->dump_info.dbg_mutex); + mlx5dr_domain_lock(dmn); + + ret = dr_dump_domain(file, dmn); + if (ret < 0) + goto unlock_mutex; + + list_for_each_entry(tbl, &dmn->dbg_tbl_list, dbg_node) { + ret = dr_dump_table_all(file, tbl); + if (ret < 0) + break; + } + +unlock_mutex: + mlx5dr_domain_unlock(dmn); + mutex_unlock(&dmn->dump_info.dbg_mutex); + return ret; +} + +static int dr_dump_show(struct seq_file *file, void *priv) +{ + return dr_dump_domain_all(file, file->private); +} +DEFINE_SHOW_ATTRIBUTE(dr_dump); + +void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn) +{ + struct mlx5_core_dev *dev = dmn->mdev; + char file_name[128]; + + if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) { + mlx5_core_warn(dev, + "Steering dump is not supported for NIC RX/TX domains\n"); + return; + } + + dmn->dump_info.steering_debugfs = + debugfs_create_dir("steering", dev->priv.dbg_root); + dmn->dump_info.fdb_debugfs = + debugfs_create_dir("fdb", dmn->dump_info.steering_debugfs); + + sprintf(file_name, "dmn_%p", dmn); + debugfs_create_file(file_name, 0444, dmn->dump_info.fdb_debugfs, + dmn, &dr_dump_fops); + + INIT_LIST_HEAD(&dmn->dbg_tbl_list); + mutex_init(&dmn->dump_info.dbg_mutex); +} + +void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn) +{ + debugfs_remove_recursive(dmn->dump_info.steering_debugfs); + mutex_destroy(&dmn->dump_info.dbg_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h new file mode 100644 index 000000000000..def6cf853eea --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +struct mlx5dr_dbg_dump_info { + struct mutex dbg_mutex; /* protect dbg lists */ + struct dentry *steering_debugfs; + struct dentry *fdb_debugfs; +}; + +void mlx5dr_dbg_init_dump(struct mlx5dr_domain *dmn); +void mlx5dr_dbg_uninit_dump(struct mlx5dr_domain *dmn); +void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl); +void mlx5dr_dbg_tbl_del(struct mlx5dr_table *tbl); +void mlx5dr_dbg_rule_add(struct mlx5dr_rule *rule); +void mlx5dr_dbg_rule_del(struct mlx5dr_rule *rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 49089cbe897c..5fa7f9d6d8b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include <linux/mlx5/eswitch.h> +#include <linux/err.h> #include "dr_types.h" #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ @@ -72,9 +73,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) } dmn->uar = mlx5_get_uars_page(dmn->mdev); - if (!dmn->uar) { + if (IS_ERR(dmn->uar)) { mlx5dr_err(dmn, "Couldn't allocate UAR\n"); - ret = -ENOMEM; + ret = PTR_ERR(dmn->uar); goto clean_pd; } @@ -135,25 +136,14 @@ static void dr_domain_fill_uplink_caps(struct mlx5dr_domain *dmn, static int dr_domain_query_vport(struct mlx5dr_domain *dmn, u16 vport_number, + bool other_vport, struct mlx5dr_cmd_vport_cap *vport_caps) { - u16 cmd_vport = vport_number; - bool other_vport = true; int ret; - if (vport_number == MLX5_VPORT_UPLINK) { - dr_domain_fill_uplink_caps(dmn, vport_caps); - return 0; - } - - if (dmn->info.caps.is_ecpf && vport_number == MLX5_VPORT_ECPF) { - other_vport = false; - cmd_vport = 0; - } - ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev, other_vport, - cmd_vport, + vport_number, &vport_caps->icm_address_rx, &vport_caps->icm_address_tx); if (ret) @@ -161,7 +151,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn, ret = mlx5dr_cmd_query_gvmi(dmn->mdev, other_vport, - cmd_vport, + vport_number, &vport_caps->vport_gvmi); if (ret) return ret; @@ -174,11 +164,15 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn, static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn) { - return dr_domain_query_vport(dmn, - dmn->info.caps.is_ecpf ? MLX5_VPORT_ECPF : 0, + return dr_domain_query_vport(dmn, 0, false, &dmn->info.caps.vports.esw_manager_caps); } +static void dr_domain_query_uplink(struct mlx5dr_domain *dmn) +{ + dr_domain_fill_uplink_caps(dmn, &dmn->info.caps.vports.uplink_caps); +} + static struct mlx5dr_cmd_vport_cap * dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) { @@ -190,7 +184,7 @@ dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) if (!vport_caps) return NULL; - ret = dr_domain_query_vport(dmn, vport, vport_caps); + ret = dr_domain_query_vport(dmn, vport, true, vport_caps); if (ret) { kvfree(vport_caps); return NULL; @@ -207,16 +201,26 @@ dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) return vport_caps; } +static bool dr_domain_is_esw_mgr_vport(struct mlx5dr_domain *dmn, u16 vport) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (caps->is_ecpf && vport == MLX5_VPORT_ECPF) || + (!caps->is_ecpf && vport == 0); +} + struct mlx5dr_cmd_vport_cap * mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport) { struct mlx5dr_cmd_caps *caps = &dmn->info.caps; struct mlx5dr_cmd_vport_cap *vport_caps; - if ((caps->is_ecpf && vport == MLX5_VPORT_ECPF) || - (!caps->is_ecpf && vport == 0)) + if (dr_domain_is_esw_mgr_vport(dmn, vport)) return &caps->vports.esw_manager_caps; + if (vport == MLX5_VPORT_UPLINK) + return &caps->vports.uplink_caps; + vport_load: vport_caps = xa_load(&caps->vports.vports_caps_xa, vport); if (vport_caps) @@ -241,17 +245,6 @@ static void dr_domain_clear_vports(struct mlx5dr_domain *dmn) } } -static int dr_domain_query_uplink(struct mlx5dr_domain *dmn) -{ - struct mlx5dr_cmd_vport_cap *vport_caps; - - vport_caps = mlx5dr_domain_get_vport_cap(dmn, MLX5_VPORT_UPLINK); - if (!vport_caps) - return -EINVAL; - - return 0; -} - static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, struct mlx5dr_domain *dmn) { @@ -281,11 +274,7 @@ static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, goto free_vports_caps_xa; } - ret = dr_domain_query_uplink(dmn); - if (ret) { - mlx5dr_err(dmn, "Failed to query uplink vport caps (err: %d)", ret); - goto free_vports_caps_xa; - } + dr_domain_query_uplink(dmn); return 0; @@ -406,7 +395,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) } dr_domain_init_csum_recalc_fts(dmn); - + mlx5dr_dbg_init_dump(dmn); return dmn; uninit_caps: @@ -442,11 +431,12 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) { - if (refcount_read(&dmn->refcount) > 1) + if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1)) return -EBUSY; /* make sure resources are not used by the hardware */ mlx5dr_cmd_sync_steering(dmn->mdev); + mlx5dr_dbg_uninit_dump(dmn); dr_domain_uninit_csum_recalc_fts(dmn); dr_domain_uninit_resources(dmn); dr_domain_caps_uninit(dmn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index 75c775bee351..e87cf498c77b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -141,6 +141,19 @@ static bool dr_mask_is_tnl_geneve_tlv_opt(struct mlx5dr_match_misc3 *misc3) } static bool +dr_matcher_supp_flex_parser_ok(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_parser_ok_bits_supp; +} + +static bool dr_mask_is_tnl_geneve_tlv_opt_exist_set(struct mlx5dr_match_misc *misc, + struct mlx5dr_domain *dmn) +{ + return dr_matcher_supp_flex_parser_ok(&dmn->info.caps) && + misc->geneve_tlv_option_0_exist; +} + +static bool dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps) { return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || @@ -359,7 +372,7 @@ static bool dr_mask_is_tnl_mpls_over_gre(struct mlx5dr_match_param *mask, static int dr_matcher_supp_tnl_mpls_over_udp(struct mlx5dr_cmd_caps *caps) { - return caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED; + return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED; } static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask, @@ -368,6 +381,12 @@ static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask, return DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(&mask->misc2) && dr_matcher_supp_tnl_mpls_over_udp(&dmn->info.caps); } + +static bool dr_mask_is_tnl_header_0_1_set(struct mlx5dr_match_misc5 *misc5) +{ + return misc5->tunnel_header_0 || misc5->tunnel_header_1; +} + int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, enum mlx5dr_ipv outer_ipv, @@ -424,6 +443,9 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) mask.misc4 = matcher->mask.misc4; + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC5) + mask.misc5 = matcher->mask.misc5; + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, &matcher->mask, NULL); if (ret) @@ -443,7 +465,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (matcher->match_criteria & (DR_MATCHER_CRITERIA_OUTER | DR_MATCHER_CRITERIA_MISC | DR_MATCHER_CRITERIA_MISC2 | - DR_MATCHER_CRITERIA_MISC3)) { + DR_MATCHER_CRITERIA_MISC3 | + DR_MATCHER_CRITERIA_MISC5)) { inner = false; if (dr_mask_is_wqe_metadata_set(&mask.misc2)) @@ -511,6 +534,10 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++], &mask, &dmn->info.caps, inner, rx); + if (dr_mask_is_tnl_geneve_tlv_opt_exist_set(&mask.misc, dmn)) + mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); } else if (dr_mask_is_tnl_gtpu_any(&mask, dmn)) { if (dr_mask_is_tnl_gtpu_flex_parser_0(&mask, dmn)) mlx5dr_ste_build_tnl_gtpu_flex_parser_0(ste_ctx, &sb[idx++], @@ -525,6 +552,9 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (dr_mask_is_tnl_gtpu(&mask, dmn)) mlx5dr_ste_build_tnl_gtpu(ste_ctx, &sb[idx++], &mask, inner, rx); + } else if (dr_mask_is_tnl_header_0_1_set(&mask.misc5)) { + mlx5dr_ste_build_tnl_header_0_1(ste_ctx, &sb[idx++], + &mask, inner, rx); } if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) @@ -653,10 +683,10 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, return 0; } -static int dr_matcher_connect(struct mlx5dr_domain *dmn, - struct mlx5dr_matcher_rx_tx *curr_nic_matcher, - struct mlx5dr_matcher_rx_tx *next_nic_matcher, - struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +static int dr_nic_matcher_connect(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *curr_nic_matcher, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) { struct mlx5dr_table_rx_tx *nic_tbl = curr_nic_matcher->nic_tbl; struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; @@ -712,58 +742,50 @@ static int dr_matcher_connect(struct mlx5dr_domain *dmn, return 0; } -static int dr_matcher_add_to_tbl(struct mlx5dr_matcher *matcher) +int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher) { - struct mlx5dr_matcher *next_matcher, *prev_matcher, *tmp_matcher; - struct mlx5dr_table *tbl = matcher->tbl; - struct mlx5dr_domain *dmn = tbl->dmn; + struct mlx5dr_matcher_rx_tx *next_nic_matcher, *prev_nic_matcher, *tmp_nic_matcher; + struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl; bool first = true; int ret; - next_matcher = NULL; - list_for_each_entry(tmp_matcher, &tbl->matcher_list, matcher_list) { - if (tmp_matcher->prio >= matcher->prio) { - next_matcher = tmp_matcher; + /* If the nic matcher is already on its parent nic table list, + * then it is already connected to the chain of nic matchers. + */ + if (!list_empty(&nic_matcher->list_node)) + return 0; + + next_nic_matcher = NULL; + list_for_each_entry(tmp_nic_matcher, &nic_tbl->nic_matcher_list, list_node) { + if (tmp_nic_matcher->prio >= nic_matcher->prio) { + next_nic_matcher = tmp_nic_matcher; break; } first = false; } - prev_matcher = NULL; - if (next_matcher && !first) - prev_matcher = list_prev_entry(next_matcher, matcher_list); + prev_nic_matcher = NULL; + if (next_nic_matcher && !first) + prev_nic_matcher = list_prev_entry(next_nic_matcher, list_node); else if (!first) - prev_matcher = list_last_entry(&tbl->matcher_list, - struct mlx5dr_matcher, - matcher_list); - - if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || - dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { - ret = dr_matcher_connect(dmn, &matcher->rx, - next_matcher ? &next_matcher->rx : NULL, - prev_matcher ? &prev_matcher->rx : NULL); - if (ret) - return ret; - } + prev_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list, + struct mlx5dr_matcher_rx_tx, + list_node); - if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || - dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { - ret = dr_matcher_connect(dmn, &matcher->tx, - next_matcher ? &next_matcher->tx : NULL, - prev_matcher ? &prev_matcher->tx : NULL); - if (ret) - return ret; - } + ret = dr_nic_matcher_connect(dmn, nic_matcher, + next_nic_matcher, prev_nic_matcher); + if (ret) + return ret; - if (prev_matcher) - list_add(&matcher->matcher_list, &prev_matcher->matcher_list); - else if (next_matcher) - list_add_tail(&matcher->matcher_list, - &next_matcher->matcher_list); + if (prev_nic_matcher) + list_add(&nic_matcher->list_node, &prev_nic_matcher->list_node); + else if (next_nic_matcher) + list_add_tail(&nic_matcher->list_node, &next_nic_matcher->list_node); else - list_add(&matcher->matcher_list, &tbl->matcher_list); + list_add(&nic_matcher->list_node, &nic_matcher->nic_tbl->nic_matcher_list); - return 0; + return ret; } static void dr_matcher_uninit_nic(struct mlx5dr_matcher_rx_tx *nic_matcher) @@ -822,6 +844,9 @@ static int dr_matcher_init_nic(struct mlx5dr_matcher *matcher, struct mlx5dr_domain *dmn = matcher->tbl->dmn; int ret; + nic_matcher->prio = matcher->prio; + INIT_LIST_HEAD(&nic_matcher->list_node); + ret = dr_matcher_set_all_ste_builders(matcher, nic_matcher); if (ret) return ret; @@ -872,13 +897,12 @@ uninit_nic_rx: return ret; } -static int dr_matcher_init(struct mlx5dr_matcher *matcher, - struct mlx5dr_match_parameters *mask) +static int dr_matcher_copy_param(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) { + struct mlx5dr_domain *dmn = matcher->tbl->dmn; struct mlx5dr_match_parameters consumed_mask; - struct mlx5dr_table *tbl = matcher->tbl; - struct mlx5dr_domain *dmn = tbl->dmn; - int i, ret; + int i, ret = 0; if (matcher->match_criteria >= DR_MATCHER_CRITERIA_MAX) { mlx5dr_err(dmn, "Invalid match criteria attribute\n"); @@ -898,10 +922,36 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, consumed_mask.match_sz = mask->match_sz; memcpy(consumed_mask.match_buf, mask->match_buf, mask->match_sz); mlx5dr_ste_copy_param(matcher->match_criteria, - &matcher->mask, &consumed_mask, - true); + &matcher->mask, &consumed_mask, true); + + /* Check that all mask data was consumed */ + for (i = 0; i < consumed_mask.match_sz; i++) { + if (!((u8 *)consumed_mask.match_buf)[i]) + continue; + + mlx5dr_dbg(dmn, + "Match param mask contains unsupported parameters\n"); + ret = -EOPNOTSUPP; + break; + } + + kfree(consumed_mask.match_buf); } + return ret; +} + +static int dr_matcher_init(struct mlx5dr_matcher *matcher, + struct mlx5dr_match_parameters *mask) +{ + struct mlx5dr_table *tbl = matcher->tbl; + struct mlx5dr_domain *dmn = tbl->dmn; + int ret; + + ret = dr_matcher_copy_param(matcher, mask); + if (ret) + return ret; + switch (dmn->type) { case MLX5DR_DOMAIN_TYPE_NIC_RX: matcher->rx.nic_tbl = &tbl->rx; @@ -919,24 +969,25 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher, default: WARN_ON(true); ret = -EINVAL; - goto free_consumed_mask; - } - - /* Check that all mask data was consumed */ - for (i = 0; i < consumed_mask.match_sz; i++) { - if (consumed_mask.match_buf[i]) { - mlx5dr_dbg(dmn, "Match param mask contains unsupported parameters\n"); - ret = -EOPNOTSUPP; - goto free_consumed_mask; - } } - ret = 0; -free_consumed_mask: - kfree(consumed_mask.match_buf); return ret; } +static void dr_matcher_add_to_dbg_list(struct mlx5dr_matcher *matcher) +{ + mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex); + list_add(&matcher->list_node, &matcher->tbl->matcher_list); + mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex); +} + +static void dr_matcher_remove_from_dbg_list(struct mlx5dr_matcher *matcher) +{ + mutex_lock(&matcher->tbl->dmn->dump_info.dbg_mutex); + list_del(&matcher->list_node); + mutex_unlock(&matcher->tbl->dmn->dump_info.dbg_mutex); +} + struct mlx5dr_matcher * mlx5dr_matcher_create(struct mlx5dr_table *tbl, u32 priority, @@ -956,7 +1007,8 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl, matcher->prio = priority; matcher->match_criteria = match_criteria_enable; refcount_set(&matcher->refcount, 1); - INIT_LIST_HEAD(&matcher->matcher_list); + INIT_LIST_HEAD(&matcher->list_node); + INIT_LIST_HEAD(&matcher->dbg_rule_list); mlx5dr_domain_lock(tbl->dmn); @@ -964,16 +1016,12 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl, if (ret) goto free_matcher; - ret = dr_matcher_add_to_tbl(matcher); - if (ret) - goto matcher_uninit; + dr_matcher_add_to_dbg_list(matcher); mlx5dr_domain_unlock(tbl->dmn); return matcher; -matcher_uninit: - dr_matcher_uninit(matcher); free_matcher: mlx5dr_domain_unlock(tbl->dmn); kfree(matcher); @@ -982,10 +1030,10 @@ dec_ref: return NULL; } -static int dr_matcher_disconnect(struct mlx5dr_domain *dmn, - struct mlx5dr_table_rx_tx *nic_tbl, - struct mlx5dr_matcher_rx_tx *next_nic_matcher, - struct mlx5dr_matcher_rx_tx *prev_nic_matcher) +static int dr_matcher_disconnect_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_matcher_rx_tx *next_nic_matcher, + struct mlx5dr_matcher_rx_tx *prev_nic_matcher) { struct mlx5dr_domain_rx_tx *nic_dmn = nic_tbl->nic_dmn; struct mlx5dr_htbl_connect_info info; @@ -1012,43 +1060,34 @@ static int dr_matcher_disconnect(struct mlx5dr_domain *dmn, &info, true); } -static int dr_matcher_remove_from_tbl(struct mlx5dr_matcher *matcher) +int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher) { - struct mlx5dr_matcher *prev_matcher, *next_matcher; - struct mlx5dr_table *tbl = matcher->tbl; - struct mlx5dr_domain *dmn = tbl->dmn; - int ret = 0; + struct mlx5dr_matcher_rx_tx *prev_nic_matcher, *next_nic_matcher; + struct mlx5dr_table_rx_tx *nic_tbl = nic_matcher->nic_tbl; + int ret; - if (list_is_last(&matcher->matcher_list, &tbl->matcher_list)) - next_matcher = NULL; - else - next_matcher = list_next_entry(matcher, matcher_list); + /* If the nic matcher is not on its parent nic table list, + * then it is detached - no need to disconnect it. + */ + if (list_empty(&nic_matcher->list_node)) + return 0; - if (matcher->matcher_list.prev == &tbl->matcher_list) - prev_matcher = NULL; + if (list_is_last(&nic_matcher->list_node, &nic_tbl->nic_matcher_list)) + next_nic_matcher = NULL; else - prev_matcher = list_prev_entry(matcher, matcher_list); - - if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || - dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX) { - ret = dr_matcher_disconnect(dmn, &tbl->rx, - next_matcher ? &next_matcher->rx : NULL, - prev_matcher ? &prev_matcher->rx : NULL); - if (ret) - return ret; - } + next_nic_matcher = list_next_entry(nic_matcher, list_node); - if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB || - dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX) { - ret = dr_matcher_disconnect(dmn, &tbl->tx, - next_matcher ? &next_matcher->tx : NULL, - prev_matcher ? &prev_matcher->tx : NULL); - if (ret) - return ret; - } + if (nic_matcher->list_node.prev == &nic_tbl->nic_matcher_list) + prev_nic_matcher = NULL; + else + prev_nic_matcher = list_prev_entry(nic_matcher, list_node); - list_del(&matcher->matcher_list); + ret = dr_matcher_disconnect_nic(dmn, nic_tbl, next_nic_matcher, prev_nic_matcher); + if (ret) + return ret; + list_del_init(&nic_matcher->list_node); return 0; } @@ -1056,12 +1095,12 @@ int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher) { struct mlx5dr_table *tbl = matcher->tbl; - if (refcount_read(&matcher->refcount) > 1) + if (WARN_ON_ONCE(refcount_read(&matcher->refcount) > 1)) return -EBUSY; mlx5dr_domain_lock(tbl->dmn); - dr_matcher_remove_from_tbl(matcher); + dr_matcher_remove_from_dbg_list(matcher); dr_matcher_uninit(matcher); refcount_dec(&matcher->tbl->refcount); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 6a390e981b09..b4374578425b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -5,11 +5,6 @@ #define DR_RULE_MAX_STE_CHAIN (DR_RULE_MAX_STES + DR_ACTION_MAX_STES) -struct mlx5dr_rule_action_member { - struct mlx5dr_action *action; - struct list_head list; -}; - static int dr_rule_append_to_miss_list(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste *new_last_ste, struct list_head *miss_list, @@ -979,14 +974,36 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, return false; } } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC5) { + s_idx = offsetof(struct mlx5dr_match_param, misc5); + e_idx = min(s_idx + sizeof(param->misc5), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, "Rule misc5 parameters contains a value not specified by mask\n"); + return false; + } + } return true; } static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule, struct mlx5dr_rule_rx_tx *nic_rule) { + /* Check if this nic rule was actually created, or was it skipped + * and only the other type of the RX/TX nic rule was created. + */ + if (!nic_rule->last_rule_ste) + return 0; + mlx5dr_domain_nic_lock(nic_rule->nic_matcher->nic_tbl->nic_dmn); dr_rule_clean_rule_members(rule, nic_rule); + + nic_rule->nic_matcher->rules--; + if (!nic_rule->nic_matcher->rules) + mlx5dr_matcher_remove_from_tbl_nic(rule->matcher->tbl->dmn, + nic_rule->nic_matcher); + mlx5dr_domain_nic_unlock(nic_rule->nic_matcher->nic_tbl->nic_dmn); return 0; @@ -1003,6 +1020,8 @@ static int dr_rule_destroy_rule(struct mlx5dr_rule *rule) { struct mlx5dr_domain *dmn = rule->matcher->tbl->dmn; + mlx5dr_dbg_rule_del(rule); + switch (dmn->type) { case MLX5DR_DOMAIN_TYPE_NIC_RX: dr_rule_destroy_rule_nic(rule, &rule->rx); @@ -1091,24 +1110,28 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, mlx5dr_domain_nic_lock(nic_dmn); + ret = mlx5dr_matcher_add_to_tbl_nic(dmn, nic_matcher); + if (ret) + goto free_hw_ste; + ret = mlx5dr_matcher_select_builders(matcher, nic_matcher, dr_rule_get_ipv(¶m->outer), dr_rule_get_ipv(¶m->inner)); if (ret) - goto free_hw_ste; + goto remove_from_nic_tbl; /* Set the tag values inside the ste array */ ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr); if (ret) - goto free_hw_ste; + goto remove_from_nic_tbl; /* Set the actions values/addresses inside the ste array */ ret = mlx5dr_actions_build_ste_arr(matcher, nic_matcher, actions, num_actions, hw_ste_arr, &new_hw_ste_arr_sz); if (ret) - goto free_hw_ste; + goto remove_from_nic_tbl; cur_htbl = nic_matcher->s_htbl; @@ -1155,6 +1178,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, if (htbl) mlx5dr_htbl_put(htbl); + nic_matcher->rules++; + mlx5dr_domain_nic_unlock(nic_dmn); kfree(hw_ste_arr); @@ -1168,6 +1193,10 @@ free_rule: list_del(&ste_info->send_list); kfree(ste_info); } + +remove_from_nic_tbl: + mlx5dr_matcher_remove_from_tbl_nic(dmn, nic_matcher); + free_hw_ste: mlx5dr_domain_nic_unlock(nic_dmn); kfree(hw_ste_arr); @@ -1257,6 +1286,8 @@ dr_rule_create_rule(struct mlx5dr_matcher *matcher, if (ret) goto remove_action_members; + INIT_LIST_HEAD(&rule->dbg_node); + mlx5dr_dbg_rule_add(rule); return rule; remove_action_members: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 219a5474a8a4..7e61742e58a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -719,6 +719,8 @@ static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec, bo spec->vxlan_vni = IFC_GET_CLR(fte_match_set_misc, mask, vxlan_vni, clr); spec->geneve_vni = IFC_GET_CLR(fte_match_set_misc, mask, geneve_vni, clr); + spec->geneve_tlv_option_0_exist = + IFC_GET_CLR(fte_match_set_misc, mask, geneve_tlv_option_0_exist, clr); spec->geneve_oam = IFC_GET_CLR(fte_match_set_misc, mask, geneve_oam, clr); spec->outer_ipv6_flow_label = @@ -880,6 +882,26 @@ static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec, IFC_GET_CLR(fte_match_set_misc4, mask, prog_sample_field_value_3, clr); } +static void dr_ste_copy_mask_misc5(char *mask, struct mlx5dr_match_misc5 *spec, bool clr) +{ + spec->macsec_tag_0 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_0, clr); + spec->macsec_tag_1 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_1, clr); + spec->macsec_tag_2 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_2, clr); + spec->macsec_tag_3 = + IFC_GET_CLR(fte_match_set_misc5, mask, macsec_tag_3, clr); + spec->tunnel_header_0 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_0, clr); + spec->tunnel_header_1 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_1, clr); + spec->tunnel_header_2 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_2, clr); + spec->tunnel_header_3 = + IFC_GET_CLR(fte_match_set_misc5, mask, tunnel_header_3, clr); +} + void mlx5dr_ste_copy_param(u8 match_criteria, struct mlx5dr_match_param *set_param, struct mlx5dr_match_parameters *mask, @@ -966,6 +988,20 @@ void mlx5dr_ste_copy_param(u8 match_criteria, } dr_ste_copy_mask_misc4(buff, &set_param->misc4, clr); } + + param_location += sizeof(struct mlx5dr_match_misc4); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC5) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc5)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc5(buff, &set_param->misc5, clr); + } } void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx, @@ -1180,6 +1216,21 @@ void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask); } +void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + if (!ste_ctx->build_tnl_geneve_tlv_opt_exist_init) + return; + + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_geneve_tlv_opt_exist_init(sb, mask); +} + void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -1269,6 +1320,16 @@ void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_flex_parser_1_init(sb, mask); } +void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_header_0_1_init(sb, mask); +} + static struct mlx5dr_ste_ctx *mlx5dr_ste_ctx_arr[] = { [MLX5_STEERING_FORMAT_CONNECTX_5] = &ste_ctx_v0, [MLX5_STEERING_FORMAT_CONNECTX_6DX] = &ste_ctx_v1, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h index 2d52d065dc8b..ca8fa32b8680 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -135,12 +135,14 @@ struct mlx5dr_ste_ctx { void DR_STE_CTX_BUILDER(tnl_vxlan_gpe); void DR_STE_CTX_BUILDER(tnl_geneve); void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt); + void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt_exist); void DR_STE_CTX_BUILDER(register_0); void DR_STE_CTX_BUILDER(register_1); void DR_STE_CTX_BUILDER(src_gvmi_qpn); void DR_STE_CTX_BUILDER(flex_parser_0); void DR_STE_CTX_BUILDER(flex_parser_1); void DR_STE_CTX_BUILDER(tnl_gtpu); + void DR_STE_CTX_BUILDER(tnl_header_0_1); void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_0); void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index b0649c2877dd..2d62950f7a29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -80,6 +80,7 @@ enum { DR_STE_V0_LU_TYPE_GENERAL_PURPOSE = 0x18, DR_STE_V0_LU_TYPE_STEERING_REGISTERS_0 = 0x2f, DR_STE_V0_LU_TYPE_STEERING_REGISTERS_1 = 0x30, + DR_STE_V0_LU_TYPE_TUNNEL_HEADER = 0x34, DR_STE_V0_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, }; @@ -1704,7 +1705,7 @@ static void dr_ste_v0_set_flex_parser(u32 *misc4_field_id, u32 id = *misc4_field_id; u8 *parser_ptr; - if (parser_is_used[id]) + if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id]) return; parser_is_used[id] = true; @@ -1875,6 +1876,27 @@ dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag; } +static int dr_ste_v0_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc5 *misc5 = &value->misc5; + + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0); + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1); + + return 0; +} + +static void dr_ste_v0_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_TUNNEL_HEADER; + dr_ste_v0_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_header_0_1_tag; +} + struct mlx5dr_ste_ctx ste_ctx_v0 = { /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v0_build_eth_l2_src_dst_init, @@ -1903,6 +1925,7 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = { .build_flex_parser_0_init = &dr_ste_v0_build_flex_parser_0_init, .build_flex_parser_1_init = &dr_ste_v0_build_flex_parser_1_init, .build_tnl_gtpu_init = &dr_ste_v0_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v0_build_tnl_header_0_1_init, .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_init, .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_init, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index cb9cf67b0a02..6ca06800f1d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -47,6 +47,7 @@ enum { DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f, DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f, DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110, + DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011, DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111, DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112, DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113, @@ -1713,6 +1714,27 @@ dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tag; } +static int dr_ste_v1_build_tnl_header_0_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc5 *misc5 = &value->misc5; + + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_0, misc5, tunnel_header_0); + DR_STE_SET_TAG(tunnel_header, tag, tunnel_header_1, misc5, tunnel_header_1); + + return 0; +} + +static void dr_ste_v1_build_tnl_header_0_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + dr_ste_v1_build_tnl_header_0_1_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_header_0_1_tag; +} + static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, u8 *tag) @@ -1833,7 +1855,7 @@ static void dr_ste_v1_set_flex_parser(u32 *misc4_field_id, u32 id = *misc4_field_id; u8 *parser_ptr; - if (parser_is_used[id]) + if (id >= DR_NUM_OF_FLEX_PARSERS || parser_is_used[id]) return; parser_is_used[id] = true; @@ -1921,6 +1943,32 @@ dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag; } +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + struct mlx5dr_match_misc *misc = &value->misc; + + if (misc->geneve_tlv_option_0_exist) { + MLX5_SET(ste_flex_parser_ok, tag, flex_parsers_ok, 1 << parser_id); + misc->geneve_tlv_option_0_exist = 0; + } + + return 0; +} + +static void +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_OK; + dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_tag; +} + static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, u8 *tag) @@ -2020,12 +2068,14 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_tnl_geneve_tlv_opt_exist_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init, .build_register_0_init = &dr_ste_v1_build_register_0_init, .build_register_1_init = &dr_ste_v1_build_register_1_init, .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init, .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index 30ae3cda6d2e..8ca110643cc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -3,69 +3,66 @@ #include "dr_types.h" -int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, - struct mlx5dr_action *action) +static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_table_rx_tx *nic_tbl, + struct mlx5dr_action *action) { - struct mlx5dr_matcher *last_matcher = NULL; + struct mlx5dr_matcher_rx_tx *last_nic_matcher = NULL; struct mlx5dr_htbl_connect_info info; struct mlx5dr_ste_htbl *last_htbl; int ret; + if (!list_empty(&nic_tbl->nic_matcher_list)) + last_nic_matcher = list_last_entry(&nic_tbl->nic_matcher_list, + struct mlx5dr_matcher_rx_tx, + list_node); + + if (last_nic_matcher) + last_htbl = last_nic_matcher->e_anchor; + else + last_htbl = nic_tbl->s_anchor; + + if (action) + nic_tbl->default_icm_addr = + nic_tbl->nic_dmn->type == DR_DOMAIN_NIC_TYPE_RX ? + action->dest_tbl->tbl->rx.s_anchor->chunk->icm_addr : + action->dest_tbl->tbl->tx.s_anchor->chunk->icm_addr; + else + nic_tbl->default_icm_addr = nic_tbl->nic_dmn->default_icm_addr; + + info.type = CONNECT_MISS; + info.miss_icm_addr = nic_tbl->default_icm_addr; + + ret = mlx5dr_ste_htbl_init_and_postsend(dmn, nic_tbl->nic_dmn, + last_htbl, &info, true); + if (ret) + mlx5dr_dbg(dmn, "Failed to set NIC RX/TX miss action, ret %d\n", ret); + + return ret; +} + +int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, + struct mlx5dr_action *action) +{ + int ret; + if (action && action->action_type != DR_ACTION_TYP_FT) return -EOPNOTSUPP; mlx5dr_domain_lock(tbl->dmn); - if (!list_empty(&tbl->matcher_list)) - last_matcher = list_last_entry(&tbl->matcher_list, - struct mlx5dr_matcher, - matcher_list); - if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_RX || tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { - if (last_matcher) - last_htbl = last_matcher->rx.e_anchor; - else - last_htbl = tbl->rx.s_anchor; - - tbl->rx.default_icm_addr = action ? - action->dest_tbl->tbl->rx.s_anchor->chunk->icm_addr : - tbl->rx.nic_dmn->default_icm_addr; - - info.type = CONNECT_MISS; - info.miss_icm_addr = tbl->rx.default_icm_addr; - - ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn, - tbl->rx.nic_dmn, - last_htbl, - &info, true); - if (ret) { - mlx5dr_dbg(tbl->dmn, "Failed to set RX miss action, ret %d\n", ret); + ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->rx, action); + if (ret) goto out; - } } if (tbl->dmn->type == MLX5DR_DOMAIN_TYPE_NIC_TX || tbl->dmn->type == MLX5DR_DOMAIN_TYPE_FDB) { - if (last_matcher) - last_htbl = last_matcher->tx.e_anchor; - else - last_htbl = tbl->tx.s_anchor; - - tbl->tx.default_icm_addr = action ? - action->dest_tbl->tbl->tx.s_anchor->chunk->icm_addr : - tbl->tx.nic_dmn->default_icm_addr; - - info.type = CONNECT_MISS; - info.miss_icm_addr = tbl->tx.default_icm_addr; - - ret = mlx5dr_ste_htbl_init_and_postsend(tbl->dmn, - tbl->tx.nic_dmn, - last_htbl, &info, true); - if (ret) { - mlx5dr_dbg(tbl->dmn, "Failed to set TX miss action, ret %d\n", ret); + ret = dr_table_set_miss_action_nic(tbl->dmn, &tbl->tx, action); + if (ret) goto out; - } } /* Release old action */ @@ -122,6 +119,8 @@ static int dr_table_init_nic(struct mlx5dr_domain *dmn, struct mlx5dr_htbl_connect_info info; int ret; + INIT_LIST_HEAD(&nic_tbl->nic_matcher_list); + nic_tbl->default_icm_addr = nic_dmn->default_icm_addr; nic_tbl->s_anchor = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, @@ -266,6 +265,8 @@ struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u if (ret) goto uninit_tbl; + INIT_LIST_HEAD(&tbl->dbg_node); + mlx5dr_dbg_tbl_add(tbl); return tbl; uninit_tbl: @@ -281,9 +282,10 @@ int mlx5dr_table_destroy(struct mlx5dr_table *tbl) { int ret; - if (refcount_read(&tbl->refcount) > 1) + if (WARN_ON_ONCE(refcount_read(&tbl->refcount) > 1)) return -EBUSY; + mlx5dr_dbg_tbl_del(tbl); ret = dr_table_destroy_sw_owned_tbl(tbl); if (ret) return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 3028b776da00..1b3d484b99be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -11,6 +11,7 @@ #include "lib/mlx5.h" #include "mlx5_ifc_dr.h" #include "mlx5dr.h" +#include "dr_dbg.h" #define DR_RULE_MAX_STES 18 #define DR_ACTION_MAX_STES 5 @@ -104,7 +105,8 @@ enum mlx5dr_matcher_criteria { DR_MATCHER_CRITERIA_MISC2 = 1 << 3, DR_MATCHER_CRITERIA_MISC3 = 1 << 4, DR_MATCHER_CRITERIA_MISC4 = 1 << 5, - DR_MATCHER_CRITERIA_MAX = 1 << 6, + DR_MATCHER_CRITERIA_MISC5 = 1 << 6, + DR_MATCHER_CRITERIA_MAX = 1 << 7, }; enum mlx5dr_action_type { @@ -440,6 +442,11 @@ void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_match_param *mask, struct mlx5dr_cmd_caps *caps, bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve_tlv_opt_exist(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -454,6 +461,10 @@ void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_match_param *mask, struct mlx5dr_cmd_caps *caps, bool inner, bool rx); +void mlx5dr_ste_build_tnl_header_0_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -494,57 +505,64 @@ struct mlx5dr_match_spec { /* Incoming packet Ethertype - this is the Ethertype * following the last VLAN tag of the packet */ - u32 ethertype:16; u32 smac_15_0:16; /* Source MAC address of incoming packet */ + u32 ethertype:16; + u32 dmac_47_16; /* Destination MAC address of incoming packet */ - /* VLAN ID of first VLAN tag in the incoming packet. + + u32 dmac_15_0:16; /* Destination MAC address of incoming packet */ + /* Priority of first VLAN tag in the incoming packet. * Valid only when cvlan_tag==1 or svlan_tag==1 */ - u32 first_vid:12; + u32 first_prio:3; /* CFI bit of first VLAN tag in the incoming packet. * Valid only when cvlan_tag==1 or svlan_tag==1 */ u32 first_cfi:1; - /* Priority of first VLAN tag in the incoming packet. + /* VLAN ID of first VLAN tag in the incoming packet. * Valid only when cvlan_tag==1 or svlan_tag==1 */ - u32 first_prio:3; - u32 dmac_15_0:16; /* Destination MAC address of incoming packet */ - /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK; - * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS + u32 first_vid:12; + + u32 ip_protocol:8; /* IP protocol */ + /* Differentiated Services Code Point derived from + * Traffic Class/TOS field of IPv6/v4 */ - u32 tcp_flags:9; - u32 ip_version:4; /* IP version */ - u32 frag:1; /* Packet is an IP fragment */ - /* The first vlan in the packet is s-vlan (0x8a88). - * cvlan_tag and svlan_tag cannot be set together + u32 ip_dscp:6; + /* Explicit Congestion Notification derived from + * Traffic Class/TOS field of IPv6/v4 */ - u32 svlan_tag:1; + u32 ip_ecn:2; /* The first vlan in the packet is c-vlan (0x8100). * cvlan_tag and svlan_tag cannot be set together */ u32 cvlan_tag:1; - /* Explicit Congestion Notification derived from - * Traffic Class/TOS field of IPv6/v4 + /* The first vlan in the packet is s-vlan (0x8a88). + * cvlan_tag and svlan_tag cannot be set together */ - u32 ip_ecn:2; - /* Differentiated Services Code Point derived from - * Traffic Class/TOS field of IPv6/v4 + u32 svlan_tag:1; + u32 frag:1; /* Packet is an IP fragment */ + u32 ip_version:4; /* IP version */ + /* TCP flags. ;Bit 0: FIN;Bit 1: SYN;Bit 2: RST;Bit 3: PSH;Bit 4: ACK; + * Bit 5: URG;Bit 6: ECE;Bit 7: CWR;Bit 8: NS */ - u32 ip_dscp:6; - u32 ip_protocol:8; /* IP protocol */ + u32 tcp_flags:9; + + /* TCP source port.;tcp and udp sport/dport are mutually exclusive */ + u32 tcp_sport:16; /* TCP destination port. * tcp and udp sport/dport are mutually exclusive */ u32 tcp_dport:16; - /* TCP source port.;tcp and udp sport/dport are mutually exclusive */ - u32 tcp_sport:16; + + u32 reserved_auto1:24; u32 ttl_hoplimit:8; - u32 reserved:24; - /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */ - u32 udp_dport:16; + /* UDP source port.;tcp and udp sport/dport are mutually exclusive */ u32 udp_sport:16; + /* UDP destination port.;tcp and udp sport/dport are mutually exclusive */ + u32 udp_dport:16; + /* IPv6 source address of incoming packets * For IPv4 address use bits 31:0 (rest of the bits are reserved) * This field should be qualified by an appropriate ethertype @@ -588,96 +606,114 @@ struct mlx5dr_match_spec { }; struct mlx5dr_match_misc { - u32 source_sqn:24; /* Source SQN */ - u32 source_vhca_port:4; - /* used with GRE, sequence number exist when gre_s_present == 1 */ - u32 gre_s_present:1; - /* used with GRE, key exist when gre_k_present == 1 */ - u32 gre_k_present:1; - u32 reserved_auto1:1; /* used with GRE, checksum exist when gre_c_present == 1 */ u32 gre_c_present:1; + u32 reserved_auto1:1; + /* used with GRE, key exist when gre_k_present == 1 */ + u32 gre_k_present:1; + /* used with GRE, sequence number exist when gre_s_present == 1 */ + u32 gre_s_present:1; + u32 source_vhca_port:4; + u32 source_sqn:24; /* Source SQN */ + + u32 source_eswitch_owner_vhca_id:16; /* Source port.;0xffff determines wire port */ u32 source_port:16; - u32 source_eswitch_owner_vhca_id:16; - /* VLAN ID of first VLAN tag the inner header of the incoming packet. - * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 - */ - u32 inner_second_vid:12; - /* CFI bit of first VLAN tag in the inner header of the incoming packet. - * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 - */ - u32 inner_second_cfi:1; - /* Priority of second VLAN tag in the inner header of the incoming packet. - * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 - */ - u32 inner_second_prio:3; - /* VLAN ID of first VLAN tag the outer header of the incoming packet. + + /* Priority of second VLAN tag in the outer header of the incoming packet. * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 */ - u32 outer_second_vid:12; + u32 outer_second_prio:3; /* CFI bit of first VLAN tag in the outer header of the incoming packet. * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 */ u32 outer_second_cfi:1; - /* Priority of second VLAN tag in the outer header of the incoming packet. + /* VLAN ID of first VLAN tag the outer header of the incoming packet. * Valid only when outer_second_cvlan_tag ==1 or outer_second_svlan_tag ==1 */ - u32 outer_second_prio:3; - u32 gre_protocol:16; /* GRE Protocol (outer) */ - u32 reserved_auto3:12; - /* The second vlan in the inner header of the packet is s-vlan (0x8a88). - * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + u32 outer_second_vid:12; + /* Priority of second VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 */ - u32 inner_second_svlan_tag:1; - /* The second vlan in the outer header of the packet is s-vlan (0x8a88). + u32 inner_second_prio:3; + /* CFI bit of first VLAN tag in the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_cfi:1; + /* VLAN ID of first VLAN tag the inner header of the incoming packet. + * Valid only when inner_second_cvlan_tag ==1 or inner_second_svlan_tag ==1 + */ + u32 inner_second_vid:12; + + u32 outer_second_cvlan_tag:1; + u32 inner_second_cvlan_tag:1; + /* The second vlan in the outer header of the packet is c-vlan (0x8100). * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together */ u32 outer_second_svlan_tag:1; /* The second vlan in the inner header of the packet is c-vlan (0x8100). * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together */ - u32 inner_second_cvlan_tag:1; - /* The second vlan in the outer header of the packet is c-vlan (0x8100). + u32 inner_second_svlan_tag:1; + /* The second vlan in the outer header of the packet is s-vlan (0x8a88). * outer_second_cvlan_tag and outer_second_svlan_tag cannot be set together */ - u32 outer_second_cvlan_tag:1; - u32 gre_key_l:8; /* GRE Key [7:0] (outer) */ + u32 reserved_auto2:12; + /* The second vlan in the inner header of the packet is s-vlan (0x8a88). + * inner_second_cvlan_tag and inner_second_svlan_tag cannot be set together + */ + u32 gre_protocol:16; /* GRE Protocol (outer) */ + u32 gre_key_h:24; /* GRE Key[31:8] (outer) */ - u32 reserved_auto4:8; + u32 gre_key_l:8; /* GRE Key [7:0] (outer) */ + u32 vxlan_vni:24; /* VXLAN VNI (outer) */ - u32 geneve_oam:1; /* GENEVE OAM field (outer) */ - u32 reserved_auto5:7; + u32 reserved_auto3:8; + u32 geneve_vni:24; /* GENEVE VNI field (outer) */ + u32 reserved_auto4:6; + u32 geneve_tlv_option_0_exist:1; + u32 geneve_oam:1; /* GENEVE OAM field (outer) */ + + u32 reserved_auto5:12; u32 outer_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (outer) */ + u32 reserved_auto6:12; u32 inner_ipv6_flow_label:20; /* Flow label of incoming IPv6 packet (inner) */ - u32 reserved_auto7:12; - u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */ + + u32 reserved_auto7:10; u32 geneve_opt_len:6; /* GENEVE OptLen (outer) */ - u32 reserved_auto8:10; + u32 geneve_protocol_type:16; /* GENEVE protocol type (outer) */ + + u32 reserved_auto8:8; u32 bth_dst_qp:24; /* Destination QP in BTH header */ - u32 reserved_auto9:8; - u8 reserved_auto10[20]; + + u32 reserved_auto9; + u32 outer_esp_spi; + u32 reserved_auto10[3]; }; struct mlx5dr_match_misc2 { - u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */ - u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */ - u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */ u32 outer_first_mpls_label:20; /* First MPLS LABEL (outer) */ - u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */ - u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */ - u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */ + u32 outer_first_mpls_exp:3; /* First MPLS EXP (outer) */ + u32 outer_first_mpls_s_bos:1; /* First MPLS S_BOS (outer) */ + u32 outer_first_mpls_ttl:8; /* First MPLS TTL (outer) */ + u32 inner_first_mpls_label:20; /* First MPLS LABEL (inner) */ - u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */ - u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */ - u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */ + u32 inner_first_mpls_exp:3; /* First MPLS EXP (inner) */ + u32 inner_first_mpls_s_bos:1; /* First MPLS S_BOS (inner) */ + u32 inner_first_mpls_ttl:8; /* First MPLS TTL (inner) */ + u32 outer_first_mpls_over_gre_label:20; /* last MPLS LABEL (outer) */ - u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */ - u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */ - u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_gre_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_gre_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_gre_ttl:8; /* last MPLS TTL (outer) */ + u32 outer_first_mpls_over_udp_label:20; /* last MPLS LABEL (outer) */ + u32 outer_first_mpls_over_udp_exp:3; /* last MPLS EXP (outer) */ + u32 outer_first_mpls_over_udp_s_bos:1; /* last MPLS S_BOS (outer) */ + u32 outer_first_mpls_over_udp_ttl:8; /* last MPLS TTL (outer) */ + u32 metadata_reg_c_7; /* metadata_reg_c_7 */ u32 metadata_reg_c_6; /* metadata_reg_c_6 */ u32 metadata_reg_c_5; /* metadata_reg_c_5 */ @@ -687,7 +723,7 @@ struct mlx5dr_match_misc2 { u32 metadata_reg_c_1; /* metadata_reg_c_1 */ u32 metadata_reg_c_0; /* metadata_reg_c_0 */ u32 metadata_reg_a; /* metadata_reg_a */ - u8 reserved_auto2[12]; + u32 reserved_auto1[3]; }; struct mlx5dr_match_misc3 { @@ -695,24 +731,34 @@ struct mlx5dr_match_misc3 { u32 outer_tcp_seq_num; u32 inner_tcp_ack_num; u32 outer_tcp_ack_num; - u32 outer_vxlan_gpe_vni:24; + u32 reserved_auto1:8; - u32 reserved_auto2:16; - u32 outer_vxlan_gpe_flags:8; + u32 outer_vxlan_gpe_vni:24; + u32 outer_vxlan_gpe_next_protocol:8; + u32 outer_vxlan_gpe_flags:8; + u32 reserved_auto2:16; + u32 icmpv4_header_data; u32 icmpv6_header_data; - u8 icmpv6_code; - u8 icmpv6_type; - u8 icmpv4_code; + u8 icmpv4_type; + u8 icmpv4_code; + u8 icmpv6_type; + u8 icmpv6_code; + u32 geneve_tlv_option_0_data; - u8 gtpu_msg_flags; - u8 gtpu_msg_type; + u32 gtpu_teid; + + u8 gtpu_msg_type; + u8 gtpu_msg_flags; + u32 reserved_auto3:16; + u32 gtpu_dw_2; u32 gtpu_first_ext_dw_0; u32 gtpu_dw_0; + u32 reserved_auto4; }; struct mlx5dr_match_misc4 { @@ -724,6 +770,18 @@ struct mlx5dr_match_misc4 { u32 prog_sample_field_id_2; u32 prog_sample_field_value_3; u32 prog_sample_field_id_3; + u32 reserved_auto1[8]; +}; + +struct mlx5dr_match_misc5 { + u32 macsec_tag_0; + u32 macsec_tag_1; + u32 macsec_tag_2; + u32 macsec_tag_3; + u32 tunnel_header_0; + u32 tunnel_header_1; + u32 tunnel_header_2; + u32 tunnel_header_3; }; struct mlx5dr_match_param { @@ -733,6 +791,7 @@ struct mlx5dr_match_param { struct mlx5dr_match_misc2 misc2; struct mlx5dr_match_misc3 misc3; struct mlx5dr_match_misc4 misc4; + struct mlx5dr_match_misc5 misc5; }; #define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ @@ -764,6 +823,7 @@ struct mlx5dr_roce_cap { struct mlx5dr_vports { struct mlx5dr_cmd_vport_cap esw_manager_caps; + struct mlx5dr_cmd_vport_cap uplink_caps; struct xarray vports_caps_xa; }; @@ -788,6 +848,7 @@ struct mlx5dr_cmd_caps { u8 flex_parser_id_gtpu_teid; u8 flex_parser_id_gtpu_dw_2; u8 flex_parser_id_gtpu_first_ext_dw_0; + u8 flex_parser_ok_bits_supp; u8 max_ft_level; u16 roce_min_src_udp; u8 sw_format_ver; @@ -842,12 +903,15 @@ struct mlx5dr_domain { struct mlx5dr_domain_info info; struct xarray csum_fts_xa; struct mlx5dr_ste_ctx *ste_ctx; + struct list_head dbg_tbl_list; + struct mlx5dr_dbg_dump_info dump_info; }; struct mlx5dr_table_rx_tx { struct mlx5dr_ste_htbl *s_anchor; struct mlx5dr_domain_rx_tx *nic_dmn; u64 default_icm_addr; + struct list_head nic_matcher_list; }; struct mlx5dr_table { @@ -861,6 +925,7 @@ struct mlx5dr_table { struct list_head matcher_list; struct mlx5dr_action *miss_action; refcount_t refcount; + struct list_head dbg_node; }; struct mlx5dr_matcher_rx_tx { @@ -874,18 +939,21 @@ struct mlx5dr_matcher_rx_tx { u8 num_of_builders_arr[DR_RULE_IPV_MAX][DR_RULE_IPV_MAX]; u64 default_icm_addr; struct mlx5dr_table_rx_tx *nic_tbl; + u32 prio; + struct list_head list_node; + u32 rules; }; struct mlx5dr_matcher { struct mlx5dr_table *tbl; struct mlx5dr_matcher_rx_tx rx; struct mlx5dr_matcher_rx_tx tx; - struct list_head matcher_list; + struct list_head list_node; /* Used for both matchers and dbg managing */ u32 prio; struct mlx5dr_match_param mask; u8 match_criteria; refcount_t refcount; - struct mlx5dv_flow_matcher *dv_matcher; + struct list_head dbg_rule_list; }; struct mlx5dr_ste_action_modify_field { @@ -957,6 +1025,11 @@ struct mlx5dr_action_flow_tag { u32 flow_tag; }; +struct mlx5dr_rule_action_member { + struct mlx5dr_action *action; + struct list_head list; +}; + struct mlx5dr_action { enum mlx5dr_action_type action_type; refcount_t refcount; @@ -997,6 +1070,7 @@ struct mlx5dr_rule { struct mlx5dr_rule_rx_tx rx; struct mlx5dr_rule_rx_tx tx; struct list_head rule_actions_list; + struct list_head dbg_node; u32 flow_source; }; @@ -1049,6 +1123,11 @@ static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn) mlx5dr_domain_nic_unlock(&dmn->info.rx); } +int mlx5dr_matcher_add_to_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher); +int mlx5dr_matcher_remove_from_tbl_nic(struct mlx5dr_domain *dmn, + struct mlx5dr_matcher_rx_tx *nic_matcher); + int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, enum mlx5dr_ipv outer_ipv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 2632d5ae9bc0..a476da2424f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2019 Mellanox Technologies */ +#include <linux/mlx5/vport.h> #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" @@ -194,6 +195,15 @@ static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain, dest_attr->vport.vhca_id); } +static struct mlx5dr_action *create_uplink_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + + return mlx5dr_action_create_dest_vport(domain, MLX5_VPORT_UPLINK, 1, + dest_attr->vport.vhca_id); +} + static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain, struct mlx5_flow_rule *dst) { @@ -218,7 +228,8 @@ static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domai static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst) { - return dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + return (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT || + dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) && dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID; } @@ -411,8 +422,11 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, fs_dr_actions[fs_dr_num_actions++] = tmp_action; term_actions[num_term_actions++].dest = tmp_action; break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: case MLX5_FLOW_DESTINATION_TYPE_VPORT: - tmp_action = create_vport_action(domain, dst); + tmp_action = type == MLX5_FLOW_DESTINATION_TYPE_VPORT ? + create_vport_action(domain, dst) : + create_uplink_action(domain, dst); if (!tmp_action) { err = -ENOMEM; goto free_actions; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h index d2a937f69784..9604b2091358 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h @@ -447,6 +447,14 @@ struct mlx5_ifc_ste_flex_parser_1_bits { u8 flex_parser_4[0x20]; }; +struct mlx5_ifc_ste_flex_parser_ok_bits { + u8 flex_parser_3[0x20]; + u8 flex_parser_2[0x20]; + u8 flex_parsers_ok[0x8]; + u8 reserved_at_48[0x18]; + u8 flex_parser_0[0x20]; +}; + struct mlx5_ifc_ste_flex_parser_tnl_bits { u8 flex_parser_tunneling_header_63_32[0x20]; @@ -490,6 +498,14 @@ struct mlx5_ifc_ste_flex_parser_tnl_gtpu_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_ste_tunnel_header_bits { + u8 tunnel_header_0[0x20]; + + u8 tunnel_header_1[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_ste_general_purpose_bits { u8 general_purpose_lookup_field[0x20]; |