diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core')
32 files changed, 1867 insertions, 787 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 3797cc7c1288..caa837e5e2b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1728,7 +1728,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) if (cmd->cmdif_rev > CMD_IF_REV) { dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n", CMD_IF_REV, cmd->cmdif_rev); - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto err_free_page; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 32d4af9b594d..336d4738b807 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -179,6 +179,8 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n", cq->cqn); + cq->uar = dev->priv.uar; + return 0; err_cmd: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index a9dbc28f6b97..a62f4b6a21a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -71,6 +71,16 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (dev_ctx->context) { spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (dev_ctx->intf->pfault) { + if (priv->pfault) { + mlx5_core_err(dev, "multiple page fault handlers not supported"); + } else { + priv->pfault_ctx = dev_ctx->context; + priv->pfault = dev_ctx->intf->pfault; + } + } +#endif spin_unlock_irq(&priv->ctx_lock); } else { kfree(dev_ctx); @@ -97,6 +107,15 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (!dev_ctx) return; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + spin_lock_irq(&priv->ctx_lock); + if (priv->pfault == dev_ctx->intf->pfault) + priv->pfault = NULL; + spin_unlock_irq(&priv->ctx_lock); + + synchronize_srcu(&priv->pfault_srcu); +#endif + spin_lock_irq(&priv->ctx_lock); list_del(&dev_ctx->list); spin_unlock_irq(&priv->ctx_lock); @@ -329,6 +348,20 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, spin_unlock_irqrestore(&priv->ctx_lock, flags); } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +void mlx5_core_page_fault(struct mlx5_core_dev *dev, + struct mlx5_pagefault *pfault) +{ + struct mlx5_priv *priv = &dev->priv; + int srcu_idx; + + srcu_idx = srcu_read_lock(&priv->pfault_srcu); + if (priv->pfault) + priv->pfault(dev, priv->pfault_ctx, pfault); + srcu_read_unlock(&priv->pfault_srcu, srcu_idx); +} +#endif + void mlx5_dev_list_lock(void) { mutex_lock(&mlx5_intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 951dbd58594d..f6a6ded204f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -51,6 +51,9 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +#define MLX5E_HW2SW_MTU(hwmtu) ((hwmtu) - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +#define MLX5E_SW2HW_MTU(swmtu) ((swmtu) + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) + #define MLX5E_MAX_NUM_TC 8 #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6 @@ -67,8 +70,13 @@ #define MLX5_RX_HEADROOM NET_SKB_PAD -#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ -#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ + (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \ + max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) +#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) +#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) + #define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) @@ -98,6 +106,7 @@ #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) +#define MLX5E_MIN_NUM_CHANNELS 0x1 #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 @@ -111,8 +120,7 @@ #define MLX5E_XDP_IHS_DS_COUNT \ DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT \ - (MLX5E_XDP_IHS_DS_COUNT + \ - (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) + ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) #define MLX5E_XDP_TX_WQEBBS \ DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) @@ -259,6 +267,7 @@ struct mlx5e_tstamp { struct mlx5_core_dev *mdev; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; + u8 *pps_pin_caps; }; enum { @@ -369,6 +378,7 @@ struct mlx5e_rq { unsigned long state; int ix; + u16 rx_headroom; struct mlx5e_rx_am am; /* Adaptive Moderation */ struct bpf_prog *xdp_prog; @@ -479,7 +489,7 @@ struct mlx5e_sq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; - struct mlx5_uar uar; + struct mlx5_sq_bfreg bfreg; struct mlx5e_channel *channel; int tc; u32 rate_limit; @@ -568,8 +578,9 @@ struct mlx5e_vlan_table { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx5_flow_handle *active_vlans_rule[VLAN_N_VID]; struct mlx5_flow_handle *untagged_rule; - struct mlx5_flow_handle *any_vlan_rule; - bool filter_disabled; + struct mlx5_flow_handle *any_cvlan_rule; + struct mlx5_flow_handle *any_svlan_rule; + bool filter_disabled; }; struct mlx5e_l2_table { @@ -777,9 +788,11 @@ void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, struct skb_shared_hwtstamps *hwts); void mlx5e_timestamp_init(struct mlx5e_priv *priv); void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); +void mlx5e_pps_event_handler(struct mlx5e_priv *priv, + struct ptp_clock_event *event); int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); -void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val); +void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); @@ -791,7 +804,8 @@ void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd); int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix); -void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv); +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, + enum mlx5e_traffic_types tt); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -802,11 +816,12 @@ int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) { - u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; + u16 ofst = sq->bf_offset; /* ensure wqe is visible to device before updating doorbell record */ dma_wmb(); @@ -832,7 +847,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) struct mlx5_core_cq *mcq; mcq = &cq->mcq; - mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); } static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) @@ -840,12 +855,6 @@ static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) return wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); } -static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) -{ - return min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); -} - extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; @@ -863,12 +872,12 @@ static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {} static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { - return -ENOTSUPP; + return -EOPNOTSUPP; } #else int mlx5e_arfs_create_tables(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 746a92c13644..37e66eef6fb5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -37,6 +37,22 @@ enum { MLX5E_CYCLES_SHIFT = 23 }; +enum { + MLX5E_PIN_MODE_IN = 0x0, + MLX5E_PIN_MODE_OUT = 0x1, +}; + +enum { + MLX5E_OUT_PATTERN_PULSE = 0x0, + MLX5E_OUT_PATTERN_PERIODIC = 0x1, +}; + +enum { + MLX5E_EVENT_MODE_DISABLE = 0x0, + MLX5E_EVENT_MODE_REPETETIVE = 0x1, + MLX5E_EVENT_MODE_ONCE_TILL_ARM = 0x2, +}; + void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp, struct skb_shared_hwtstamps *hwts) { @@ -90,11 +106,12 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) return -ERANGE; } + mutex_lock(&priv->state_lock); /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_def); + mlx5e_modify_rx_cqe_compression_locked(priv, priv->params.rx_cqe_compress_def); break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -112,14 +129,16 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: /* Disable CQE compression */ netdev_warn(dev, "Disabling cqe compression"); - mlx5e_modify_rx_cqe_compression(priv, false); + mlx5e_modify_rx_cqe_compression_locked(priv, false); config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: + mutex_unlock(&priv->state_lock); return -ERANGE; } memcpy(&priv->tstamp.hwtstamp_config, &config, sizeof(config)); + mutex_unlock(&priv->state_lock); return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; @@ -189,6 +208,18 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) int neg_adj = 0; struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, ptp_info); + struct mlx5e_priv *priv = + container_of(tstamp, struct mlx5e_priv, tstamp); + + if (MLX5_CAP_GEN(priv->mdev, pps_modify)) { + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + /* For future use need to add a loop for finding all 1PPS out pins */ + MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT); + MLX5_SET(mtpps_reg, in, out_periodic_adjustment, delta & 0xFFFF); + + mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + } if (delta < 0) { neg_adj = 1; @@ -208,6 +239,124 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) return 0; } +static int mlx5e_extts_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5e_tstamp *tstamp = + container_of(ptp, struct mlx5e_tstamp, ptp_info); + struct mlx5e_priv *priv = + container_of(tstamp, struct mlx5e_priv, tstamp); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u8 pattern = 0; + int pin = -1; + int err = 0; + + if (!MLX5_CAP_GEN(priv->mdev, pps) || + !MLX5_CAP_GEN(priv->mdev, pps_modify)) + return -EOPNOTSUPP; + + if (rq->extts.index >= tstamp->ptp_info.n_pins) + return -EINVAL; + + if (on) { + pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index); + if (pin < 0) + return -EBUSY; + } + + if (rq->extts.flags & PTP_FALLING_EDGE) + pattern = 1; + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + + err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); + if (err) + return err; + + return mlx5_set_mtppse(priv->mdev, pin, 0, + MLX5E_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5e_perout_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5e_tstamp *tstamp = + container_of(ptp, struct mlx5e_tstamp, ptp_info); + struct mlx5e_priv *priv = + container_of(tstamp, struct mlx5e_priv, tstamp); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u64 nsec_now, nsec_delta, time_stamp; + u64 cycles_now, cycles_delta; + struct timespec64 ts; + unsigned long flags; + int pin = -1; + s64 ns; + + if (!MLX5_CAP_GEN(priv->mdev, pps_modify)) + return -EOPNOTSUPP; + + if (rq->perout.index >= tstamp->ptp_info.n_pins) + return -EINVAL; + + if (on) { + pin = ptp_find_pin(tstamp->ptp, PTP_PF_PEROUT, + rq->perout.index); + if (pin < 0) + return -EBUSY; + } + + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + ns = timespec64_to_ns(&ts); + if (on) + if ((ns >> 1) != 500000000LL) + return -EINVAL; + ts.tv_sec = rq->perout.start.sec; + ts.tv_nsec = rq->perout.start.nsec; + ns = timespec64_to_ns(&ts); + cycles_now = mlx5_read_internal_timer(tstamp->mdev); + write_lock_irqsave(&tstamp->lock, flags); + nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); + nsec_delta = ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, + tstamp->cycles.mult); + write_unlock_irqrestore(&tstamp->lock, flags); + time_stamp = cycles_now + cycles_delta; + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT); + MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); + + return mlx5_set_mtpps(priv->mdev, in, sizeof(in)); +} + +static int mlx5e_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + return mlx5e_extts_configure(ptp, rq, on); + case PTP_CLK_REQ_PEROUT: + return mlx5e_perout_configure(ptp, rq, on); + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0; +} + static const struct ptp_clock_info mlx5e_ptp_clock_info = { .owner = THIS_MODULE, .max_adj = 100000000, @@ -221,6 +370,7 @@ static const struct ptp_clock_info mlx5e_ptp_clock_info = { .gettime64 = mlx5e_ptp_gettime, .settime64 = mlx5e_ptp_settime, .enable = NULL, + .verify = NULL, }; static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp) @@ -229,6 +379,62 @@ static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp) tstamp->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; } +static int mlx5e_init_pin_config(struct mlx5e_tstamp *tstamp) +{ + int i; + + tstamp->ptp_info.pin_config = + kzalloc(sizeof(*tstamp->ptp_info.pin_config) * + tstamp->ptp_info.n_pins, GFP_KERNEL); + if (!tstamp->ptp_info.pin_config) + return -ENOMEM; + tstamp->ptp_info.enable = mlx5e_ptp_enable; + tstamp->ptp_info.verify = mlx5e_ptp_verify; + + for (i = 0; i < tstamp->ptp_info.n_pins; i++) { + snprintf(tstamp->ptp_info.pin_config[i].name, + sizeof(tstamp->ptp_info.pin_config[i].name), + "mlx5_pps%d", i); + tstamp->ptp_info.pin_config[i].index = i; + tstamp->ptp_info.pin_config[i].func = PTP_PF_NONE; + tstamp->ptp_info.pin_config[i].chan = i; + } + + return 0; +} + +static void mlx5e_get_pps_caps(struct mlx5e_priv *priv, + struct mlx5e_tstamp *tstamp) +{ + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + mlx5_query_mtpps(priv->mdev, out, sizeof(out)); + + tstamp->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, + cap_number_of_pps_pins); + tstamp->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_in_pins); + tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_out_pins); + + tstamp->pps_pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); + tstamp->pps_pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); + tstamp->pps_pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); + tstamp->pps_pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); + tstamp->pps_pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); + tstamp->pps_pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); + tstamp->pps_pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); + tstamp->pps_pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); +} + +void mlx5e_pps_event_handler(struct mlx5e_priv *priv, + struct ptp_clock_event *event) +{ + struct mlx5e_tstamp *tstamp = &priv->tstamp; + + ptp_clock_event(tstamp->ptp, event); +} + void mlx5e_timestamp_init(struct mlx5e_priv *priv) { struct mlx5e_tstamp *tstamp = &priv->tstamp; @@ -272,6 +478,18 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) tstamp->ptp_info = mlx5e_ptp_clock_info; snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp"); + /* Initialize 1PPS data structures */ +#define MAX_PIN_NUM 8 + tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL); + if (tstamp->pps_pin_caps) { + if (MLX5_CAP_GEN(priv->mdev, pps)) + mlx5e_get_pps_caps(priv, tstamp); + if (tstamp->ptp_info.n_pins) + mlx5e_init_pin_config(tstamp); + } else { + mlx5_core_warn(priv->mdev, "1PPS initialization failed\n"); + } + tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, &priv->mdev->pdev->dev); if (IS_ERR(tstamp->ptp)) { @@ -293,5 +511,8 @@ void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv) priv->tstamp.ptp = NULL; } + kfree(tstamp->pps_pin_caps); + kfree(tstamp->ptp_info.pin_config); + cancel_delayed_work_sync(&tstamp->overflow_work); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index f175518ff07a..bd898d8deda0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -89,16 +89,10 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) struct mlx5e_resources *res = &mdev->mlx5e_res; int err; - err = mlx5_alloc_map_uar(mdev, &res->cq_uar, false); - if (err) { - mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); - return err; - } - err = mlx5_core_alloc_pd(mdev, &res->pdn); if (err) { mlx5_core_err(mdev, "alloc pd failed, %d\n", err); - goto err_unmap_free_uar; + return err; } err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn); @@ -121,9 +115,6 @@ err_dealloc_transport_domain: mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); err_dealloc_pd: mlx5_core_dealloc_pd(mdev, res->pdn); -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &res->cq_uar); - return err; } @@ -134,7 +125,6 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) mlx5_core_destroy_mkey(mdev, &res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); - mlx5_unmap_free_uar(mdev, &res->cq_uar); } int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index f0b460f47f29..0523ed47f597 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -89,7 +89,7 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev, int i; if (!MLX5_CAP_GEN(priv->mdev, ets)) - return -ENOTSUPP; + return -EOPNOTSUPP; ets->ets_cap = mlx5_max_tc(priv->mdev) + 1; for (i = 0; i < ets->ets_cap; i++) { @@ -236,7 +236,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, int err; if (!MLX5_CAP_GEN(priv->mdev, ets)) - return -ENOTSUPP; + return -EOPNOTSUPP; err = mlx5e_dbcnl_validate_ets(netdev, ets); if (err) @@ -402,7 +402,7 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev) struct mlx5_core_dev *mdev = priv->mdev; struct ieee_ets ets; struct ieee_pfc pfc; - int err = -ENOTSUPP; + int err = -EOPNOTSUPP; int i; if (!MLX5_CAP_GEN(mdev, ets)) @@ -511,6 +511,11 @@ static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + if (!MLX5_CAP_GEN(priv->mdev, ets)) { + netdev_err(netdev, "%s, ets is not supported\n", __func__); + return; + } + if (priority >= CEE_DCBX_MAX_PRIO) { netdev_err(netdev, "%s, priority is out of range\n", __func__); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 33a399a8b5d5..a004a5a1a4c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -170,7 +170,8 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return NUM_SW_COUNTERS + MLX5E_NUM_Q_CNTRS(priv) + - NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS(priv) + + NUM_PCIE_COUNTERS(priv) + MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv) + @@ -218,6 +219,14 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_stats_desc[i].format); + + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc[i].format); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, @@ -330,6 +339,14 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, pport_2819_stats_desc, i); + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i); + + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc, i); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], @@ -535,7 +552,7 @@ static void mlx5e_get_channels(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); - ch->max_combined = mlx5e_get_max_num_channels(priv->mdev); + ch->max_combined = priv->profile->max_nch(priv->mdev); ch->combined_count = priv->params.num_channels; } @@ -543,7 +560,6 @@ static int mlx5e_set_channels(struct net_device *dev, struct ethtool_channels *ch) { struct mlx5e_priv *priv = netdev_priv(dev); - int ncv = mlx5e_get_max_num_channels(priv->mdev); unsigned int count = ch->combined_count; bool arfs_enabled; bool was_opened; @@ -554,16 +570,6 @@ static int mlx5e_set_channels(struct net_device *dev, __func__); return -EINVAL; } - if (ch->rx_count || ch->tx_count) { - netdev_info(dev, "%s: separate rx/tx count not supported\n", - __func__); - return -EINVAL; - } - if (count > ncv) { - netdev_info(dev, "%s: count (%d) > max (%d)\n", - __func__, count, ncv); - return -EINVAL; - } if (priv->params.num_channels == count) return 0; @@ -606,7 +612,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) - return -ENOTSUPP; + return -EOPNOTSUPP; coal->rx_coalesce_usecs = priv->params.rx_cq_moderation.usec; coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts; @@ -631,7 +637,7 @@ static int mlx5e_set_coalesce(struct net_device *netdev, int i; if (!MLX5_CAP_GEN(mdev, cq_moderation)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&priv->state_lock); @@ -991,15 +997,18 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) { - struct mlx5_core_dev *mdev = priv->mdev; void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - int i; + struct mlx5_core_dev *mdev = priv->mdev; + int ctxlen = MLX5_ST_SZ_BYTES(tirc); + int tt; MLX5_SET(modify_tir_in, in, bitmask.hash, 1); - mlx5e_build_tir_ctx_hash(tirc, priv); - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5_core_modify_tir(mdev, priv->indir_tir[i].tirn, in, inlen); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); + mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); + } } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, @@ -1007,6 +1016,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, { struct mlx5e_priv *priv = netdev_priv(dev); int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + bool hash_changed = false; void *in; if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && @@ -1028,14 +1038,21 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); } - if (key) + if (hfunc != ETH_RSS_HASH_NO_CHANGE && + hfunc != priv->params.rss_hfunc) { + priv->params.rss_hfunc = hfunc; + hash_changed = true; + } + + if (key) { memcpy(priv->params.toeplitz_hash_key, key, sizeof(priv->params.toeplitz_hash_key)); + hash_changed = hash_changed || + priv->params.rss_hfunc == ETH_RSS_HASH_TOP; + } - if (hfunc != ETH_RSS_HASH_NO_CHANGE) - priv->params.rss_hfunc = hfunc; - - mlx5e_modify_tirs_hash(priv, in, inlen); + if (hash_changed) + mlx5e_modify_tirs_hash(priv, in, inlen); mutex_unlock(&priv->state_lock); @@ -1307,7 +1324,7 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) u32 mlx5_wol_mode; if (!wol_supported) - return -ENOTSUPP; + return -EOPNOTSUPP; if (wol->wolopts & ~wol_supported) return -EINVAL; @@ -1437,7 +1454,7 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (!rx_mode_changed) return 0; @@ -1459,28 +1476,20 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - int err = 0; - bool reset; if (!MLX5_CAP_GEN(mdev, cqe_compression)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (enable && priv->tstamp.hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) { netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n"); return -EINVAL; } - reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - - if (reset) - mlx5e_close_locked(netdev); - - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, enable); + mlx5e_modify_rx_cqe_compression_locked(priv, enable); priv->params.rx_cqe_compress_def = enable; + mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); - if (reset) - err = mlx5e_open_locked(netdev); - return err; + return 0; } static int mlx5e_handle_pflag(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 1fe80de5d68f..f2762e45c8ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -150,7 +150,8 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, - MLX5E_VLAN_RULE_TYPE_ANY_VID, + MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, MLX5E_VLAN_RULE_TYPE_MATCH_VID, }; @@ -172,19 +173,31 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, dest.ft = priv->fs.l2.ft.t; spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); + switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: rule_p = &priv->fs.vlan.untagged_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); break; - case MLX5E_VLAN_RULE_TYPE_ANY_VID: - rule_p = &priv->fs.vlan.any_vlan_rule; - MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1); + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + rule_p = &priv->fs.vlan.any_cvlan_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + rule_p = &priv->fs.vlan.any_svlan_rule; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ rule_p = &priv->fs.vlan.active_vlans_rule[vid]; - MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, @@ -235,10 +248,16 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, priv->fs.vlan.untagged_rule = NULL; } break; - case MLX5E_VLAN_RULE_TYPE_ANY_VID: - if (priv->fs.vlan.any_vlan_rule) { - mlx5_del_flow_rules(priv->fs.vlan.any_vlan_rule); - priv->fs.vlan.any_vlan_rule = NULL; + case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: + if (priv->fs.vlan.any_cvlan_rule) { + mlx5_del_flow_rules(priv->fs.vlan.any_cvlan_rule); + priv->fs.vlan.any_cvlan_rule = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: + if (priv->fs.vlan.any_svlan_rule) { + mlx5_del_flow_rules(priv->fs.vlan.any_svlan_rule); + priv->fs.vlan.any_svlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_VID: @@ -252,6 +271,23 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, } } +static void mlx5e_del_any_vid_rules(struct mlx5e_priv *priv) +{ + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + +static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); + if (err) + return err; + + return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); +} + void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) { if (!priv->fs.vlan.filter_disabled) @@ -260,7 +296,7 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) priv->fs.vlan.filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + mlx5e_del_any_vid_rules(priv); } void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) @@ -271,7 +307,7 @@ void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) priv->fs.vlan.filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + mlx5e_add_any_vid_rules(priv); } int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, @@ -308,7 +344,7 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) if (priv->fs.vlan.filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + mlx5e_add_any_vid_rules(priv); } static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) @@ -323,7 +359,7 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) if (priv->fs.vlan.filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); + mlx5e_del_any_vid_rules(priv); } #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ @@ -503,8 +539,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) if (enable_promisc) { mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); if (!priv->fs.vlan.filter_disabled) - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); + mlx5e_add_any_vid_rules(priv); } if (enable_allmulti) mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); @@ -519,8 +554,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) mlx5e_del_l2_flow_rule(priv, &ea->allmulti); if (disable_promisc) { if (!priv->fs.vlan.filter_disabled) - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, - 0); + mlx5e_del_any_vid_rules(priv); mlx5e_del_l2_flow_rule(priv, &ea->promisc); } @@ -976,11 +1010,13 @@ err_destroy_flow_table: return err; } -#define MLX5E_NUM_VLAN_GROUPS 2 +#define MLX5E_NUM_VLAN_GROUPS 3 #define MLX5E_VLAN_GROUP0_SIZE BIT(12) #define MLX5E_VLAN_GROUP1_SIZE BIT(1) +#define MLX5E_VLAN_GROUP2_SIZE BIT(0) #define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ - MLX5E_VLAN_GROUP1_SIZE) + MLX5E_VLAN_GROUP1_SIZE +\ + MLX5E_VLAN_GROUP2_SIZE) static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, int inlen) @@ -991,7 +1027,7 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP0_SIZE; @@ -1003,7 +1039,7 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); @@ -1012,6 +1048,17 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in goto err_destroy_groups; ft->num_groups++; + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + return 0; err_destroy_groups: @@ -1089,7 +1136,7 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) MLX5_FLOW_NAMESPACE_KERNEL); if (!priv->fs.ns) - return -EINVAL; + return -EOPNOTSUPP; err = mlx5e_arfs_create_tables(priv); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index d088effd7160..d55fff0ba388 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -92,7 +92,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_ETHTOOL); if (!ns) - return ERR_PTR(-ENOTSUPP); + return ERR_PTR(-EOPNOTSUPP); table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.log_max_ft_size)), @@ -237,9 +237,9 @@ static int set_flow_attrs(u32 *match_c, u32 *match_v, if ((fs->flow_type & FLOW_EXT) && (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) { MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, - vlan_tag, 1); + cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, - vlan_tag, 1); + cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, first_vid, 0xfff); MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1236b27b1493..8ef64c4db2c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -31,6 +31,7 @@ */ #include <net/tc_act/tc_gact.h> +#include <linux/crash_dump.h> #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> @@ -78,21 +79,30 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, reg_umr_sq); } -static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) { priv->params.rq_wq_type = rq_type; + priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.log_rq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; priv->params.mpwqe_log_stride_sz = MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) ? - MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : - MLX5_MPWRQ_LOG_STRIDE_SIZE; + MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(priv->mdev) : + MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(priv->mdev); priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - priv->params.mpwqe_log_stride_sz; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.log_rq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + + /* Extra room needed for build_skb */ + priv->params.lro_wqe_sz -= MLX5_RX_HEADROOM + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, BIT(priv->params.log_rq_size)); @@ -268,6 +278,12 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) { + out = pstats->phy_statistical_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { out = pstats->per_prio_counters[prio]; @@ -291,11 +307,34 @@ static void mlx5e_update_q_counter(struct mlx5e_priv *priv) &qcnt->rx_out_of_buffer); } +static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie; + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(mpcnt_reg); + void *out; + u32 *in; + + if (!MLX5_CAP_MCAM_FEATURE(mdev, pcie_performance_group)) + return; + + in = mlx5_vzalloc(sz); + if (!in) + return; + + out = pcie_stats->pcie_perf_counters; + MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0); + + kvfree(in); +} + void mlx5e_update_stats(struct mlx5e_priv *priv) { - mlx5e_update_q_counter(priv); - mlx5e_update_vport_counters(priv); + mlx5e_update_pcie_counters(priv); mlx5e_update_pport_counters(priv); + mlx5e_update_vport_counters(priv); + mlx5e_update_q_counter(priv); mlx5e_update_sw_counters(priv); } @@ -317,6 +356,8 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, enum mlx5_dev_event event, unsigned long param) { struct mlx5e_priv *priv = vpriv; + struct ptp_clock_event ptp_event; + struct mlx5_eqe *eqe = NULL; if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) return; @@ -326,7 +367,15 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, case MLX5_DEV_EVENT_PORT_DOWN: queue_work(priv->wq, &priv->update_carrier_work); break; - + case MLX5_DEV_EVENT_PPS: + eqe = (struct mlx5_eqe *)param; + ptp_event.type = PTP_CLOCK_EXTTS; + ptp_event.index = eqe->data.pps.pin; + ptp_event.timestamp = + timecounter_cyc2time(&priv->tstamp.clock, + be64_to_cpu(eqe->data.pps.time_stamp)); + mlx5e_pps_event_handler(vpriv, &ptp_event); + break; default: break; } @@ -343,9 +392,6 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC)); } -#define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) -#define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) - static inline int mlx5e_get_wqe_mtt_sz(void) { /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. @@ -372,7 +418,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, cseg->imm = rq->mkey_be; ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; - ucseg->klm_octowords = + ucseg->xlt_octowords = cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); ucseg->bsf_octowords = cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); @@ -534,9 +580,13 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->buff.map_dir = DMA_FROM_DEVICE; - if (rq->xdp_prog) + if (rq->xdp_prog) { rq->buff.map_dir = DMA_BIDIRECTIONAL; + rq->rx_headroom = XDP_PACKET_HEADROOM; + } else { + rq->buff.map_dir = DMA_FROM_DEVICE; + rq->rx_headroom = MLX5_RX_HEADROOM; + } switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -586,7 +636,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, byte_count = rq->buff.wqe_sz; /* calc the required page order */ - frag_sz = MLX5_RX_HEADROOM + + frag_sz = rq->rx_headroom + byte_count /* packet data */ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); frag_sz = SKB_DATA_ALIGN(frag_sz); @@ -967,10 +1017,11 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, sq->channel = c; sq->tc = tc; - err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); + err = mlx5_alloc_bfreg(mdev, &sq->bfreg, MLX5_CAP_GEN(mdev, bf), false); if (err) return err; + sq->uar_map = sq->bfreg.map; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, @@ -979,17 +1030,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, goto err_unmap_free_uar; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - if (sq->uar.bf_map) { + if (sq->bfreg.wc) set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state); - sq->uar_map = sq->uar.bf_map; - } else { - sq->uar_map = sq->uar.map; - } + sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; sq->max_inline = param->max_inline; - sq->min_inline_mode = - MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT ? - param->min_inline_mode : 0; + sq->min_inline_mode = param->min_inline_mode; err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu)); if (err) @@ -1012,7 +1058,7 @@ err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &sq->uar); + mlx5_free_bfreg(mdev, &sq->bfreg); return err; } @@ -1024,7 +1070,7 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); - mlx5_unmap_free_uar(priv->mdev, &sq->uar); + mlx5_free_bfreg(priv->mdev, &sq->bfreg); } static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) @@ -1053,12 +1099,15 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); - MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); + + if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) + MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, sq->uar.index); + MLX5_SET(wq, wq, uar_page, sq->bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); @@ -1216,7 +1265,6 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &mdev->mlx5e_res.cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -1265,7 +1313,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); MLX5_SET(cqc, cqc, c_eqn, eqn); - MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); @@ -1472,6 +1520,14 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return is_kdump_kernel() ? + MLX5E_MIN_NUM_CHANNELS : + min_t(int, mdev->priv.eq_table.num_comp_vectors, + MLX5E_MAX_NUM_CHANNELS); +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) @@ -1677,7 +1733,7 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, uar_page, priv->mdev->mlx5e_res.cq_uar.index); + MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, @@ -1756,8 +1812,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); param->max_inline = priv->params.tx_max_inline; - /* FOR XDP SQs will support only L2 inline mode */ - param->min_inline_mode = MLX5_INLINE_MODE_NONE; + param->min_inline_mode = priv->params.tx_min_inline_mode; param->type = MLX5E_SQ_XDP; } @@ -2022,8 +2077,23 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout); } -void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv) +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, + enum mlx5e_traffic_types tt) { + void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) + +#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) + +#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_IPSEC_SPI) + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(priv->params.rss_hfunc)); if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) { @@ -2035,6 +2105,88 @@ void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv) MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); memcpy(rss_key, priv->params.toeplitz_hash_key, len); } + + switch (tt) { + case MLX5E_TT_IPV4_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV6_TCP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV4_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV6_UDP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + MLX5_L4_PROT_TYPE_UDP); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_L4PORTS); + break; + + case MLX5E_TT_IPV4_IPSEC_AH: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV6_IPSEC_AH: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV4_IPSEC_ESP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV6_IPSEC_ESP: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP_IPSEC_SPI); + break; + + case MLX5E_TT_IPV4: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + + case MLX5E_TT_IPV6: + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + MLX5_L3_PROT_TYPE_IPV6); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + MLX5_HASH_IP); + break; + default: + WARN_ONCE(true, "%s: bad traffic type!\n", __func__); + } } static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) @@ -2296,7 +2448,6 @@ static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &mdev->mlx5e_res.cq_uar; cq->priv = priv; @@ -2404,110 +2555,13 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, enum mlx5e_traffic_types tt) { - void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); -#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP) - -#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP |\ - MLX5_HASH_FIELD_SEL_L4_SPORT |\ - MLX5_HASH_FIELD_SEL_L4_DPORT) - -#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP |\ - MLX5_HASH_FIELD_SEL_IPSEC_SPI) - mlx5e_build_tir_ctx_lro(tirc, priv); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_tir_ctx_hash(tirc, priv); - - switch (tt) { - case MLX5E_TT_IPV4_TCP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_TCP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV6_TCP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_TCP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV4_UDP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_UDP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV6_UDP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_UDP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV4_IPSEC_AH: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; - - case MLX5E_TT_IPV6_IPSEC_AH: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; - - case MLX5E_TT_IPV4_IPSEC_ESP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; - - case MLX5E_TT_IPV6_IPSEC_ESP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; - - case MLX5E_TT_IPV4: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP); - break; - - case MLX5E_TT_IPV6: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP); - break; - default: - WARN_ONCE(true, - "mlx5e_build_indir_tir_ctx: bad traffic type!\n"); - } + mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, @@ -2686,7 +2740,7 @@ mqprio: return mlx5e_setup_tc(dev, tc->tc); } -static struct rtnl_link_stats64 * +static void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -2729,7 +2783,6 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); - return stats; } static void mlx5e_set_rx_mode(struct net_device *dev) @@ -2987,11 +3040,8 @@ static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - if (min_tx_rate) - return -EOPNOTSUPP; - return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1, - max_tx_rate); + max_tx_rate, min_tx_rate); } static int mlx5_vport_link2ifla(u8 esw_link) @@ -3159,11 +3209,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) bool reset, was_opened; int i; - if (prog && prog->xdp_adjust_head) { - netdev_err(netdev, "Does not support bpf_xdp_adjust_head()\n"); - return -EOPNOTSUPP; - } - mutex_lock(&priv->state_lock); if ((netdev->features & NETIF_F_LRO) && prog) { @@ -3331,7 +3376,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) { if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return -ENOTSUPP; + return -EOPNOTSUPP; if (!MLX5_CAP_GEN(mdev, eth_net_offloads) || !MLX5_CAP_GEN(mdev, nic_flow_table) || !MLX5_CAP_ETH(mdev, csum_cap) || @@ -3343,7 +3388,7 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) < 3) { mlx5_core_warn(mdev, "Not creating net device, some required device capabilities are missing\n"); - return -ENOTSUPP; + return -EOPNOTSUPP; } if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable)) mlx5_core_warn(mdev, "Self loop back prevention is not supported\n"); @@ -3432,22 +3477,6 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; } -static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev, - u8 *min_inline_mode) -{ - switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { - case MLX5_CAP_INLINE_MODE_L2: - *min_inline_mode = MLX5_INLINE_MODE_L2; - break; - case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: - mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode); - break; - case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: - *min_inline_mode = MLX5_INLINE_MODE_NONE; - break; - } -} - u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@ -3481,7 +3510,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); - priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + priv->params.log_sq_size = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ priv->params.rx_cqe_compress_def = false; @@ -3495,6 +3526,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, cqe_compress_heuristic(link_speed, pci_bw); } + MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, + priv->params.rx_cqe_compress_def); + mlx5e_set_rq_priv_params(priv); if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) priv->params.lro_en = true; @@ -3507,7 +3541,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - mlx5e_query_min_inline(mdev, &priv->params.tx_min_inline_mode); + mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode); + if (priv->params.tx_min_inline_mode == MLX5_INLINE_MODE_NONE && + !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + priv->params.tx_min_inline_mode = MLX5_INLINE_MODE_L2; + priv->params.num_tc = 1; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; @@ -3517,16 +3555,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - - /* Extra room needed for build_skb */ - MLX5_RX_HEADROOM - - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - /* Initialize pflags */ MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, priv->params.rx_cqe_compress_def); mutex_init(&priv->state_lock); @@ -3675,14 +3706,8 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev, static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - mlx5e_vxlan_cleanup(priv); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - mlx5_eswitch_unregister_vport_rep(esw, 0); - if (priv->xdp_prog) bpf_prog_put(priv->xdp_prog); } @@ -3807,9 +3832,14 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) static void mlx5e_nic_disable(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + queue_work(priv->wq, &priv->set_rx_mode_work); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5_eswitch_unregister_vport_rep(esw, 0); mlx5e_disable_async_events(priv); - mlx5_lag_remove(priv->mdev); + mlx5_lag_remove(mdev); } static const struct mlx5e_profile mlx5e_nic_profile = { @@ -3941,6 +3971,19 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) } } +static void mlx5e_unregister_vport_rep(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return; + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); +} + void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3987,6 +4030,7 @@ static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) return err; } + mlx5e_register_vport_rep(mdev); return 0; } @@ -3998,6 +4042,7 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) if (!netif_device_present(netdev)) return; + mlx5e_unregister_vport_rep(mdev); mlx5e_detach_netdev(mdev, netdev); mlx5e_destroy_mdev_resources(mdev); } @@ -4016,8 +4061,6 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (err) return NULL; - mlx5e_register_vport_rep(mdev); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) ppriv = &esw->offloads.vport_reps[0]; @@ -4069,13 +4112,7 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); struct mlx5e_priv *priv = vpriv; - int vport; - - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport); unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 850378893b25..2c864574a9d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -374,13 +374,12 @@ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, return -EINVAL; } -static struct rtnl_link_stats64 * +static void mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); - return stats; } static const struct switchdev_ops mlx5e_rep_switchdev_ops = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0e2fb3ed1790..3d371688fbbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -30,9 +30,11 @@ * SOFTWARE. */ +#include <linux/prefetch.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <linux/bpf_trace.h> #include <net/busy_poll.h> #include "en.h" #include "en_tc.h" @@ -92,19 +94,18 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, struct mlx5e_cq *cq, u32 cqcc) { - u16 wqe_cnt_step; - cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; cq->title.op_own &= 0xf0; cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz); cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); - wqe_cnt_step = - rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - mpwrq_get_cqe_consumed_strides(&cq->title) : 1; - cq->decmprs_wqe_counter = - (cq->decmprs_wqe_counter + wqe_cnt_step) & rq->wq.sz_m1; + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + cq->decmprs_wqe_counter += + mpwrq_get_cqe_consumed_strides(&cq->title); + else + cq->decmprs_wqe_counter = + (cq->decmprs_wqe_counter + 1) & rq->wq.sz_m1; } static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, @@ -155,29 +156,26 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; } -void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val) +void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val) { bool was_opened; if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) return; - mutex_lock(&priv->state_lock); - if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) == val) - goto unlock; + return; was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (was_opened) mlx5e_close_locked(priv->netdev); MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, val); + mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); if (was_opened) mlx5e_open_locked(priv->netdev); -unlock: - mutex_unlock(&priv->state_lock); } #define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) @@ -193,6 +191,9 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, return false; } + if (unlikely(page_is_pfmemalloc(dma_info->page))) + return false; + cache->page_cache[cache->tail] = *dma_info; cache->tail = tail_next; return true; @@ -264,7 +265,7 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) if (unlikely(mlx5e_page_alloc_mapped(rq, di))) return -ENOMEM; - wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM); + wqe->data.addr = cpu_to_be64(di->addr + rq->rx_headroom); return 0; } @@ -644,10 +645,9 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, +static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - unsigned int data_offset, - int len) + const struct xdp_buff *xdp) { struct mlx5e_sq *sq = &rq->channel->xdp_sq; struct mlx5_wq_cyc *wq = &sq->wq; @@ -658,10 +658,18 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg; + u8 ds_cnt = MLX5E_XDP_TX_DS_COUNT; - dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE; - unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE; - void *data = page_address(di->page) + data_offset; + ptrdiff_t data_offset = xdp->data - xdp->data_hard_start; + dma_addr_t dma_addr = di->addr + data_offset; + unsigned int dma_len = xdp->data_end - xdp->data; + + if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || + MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) { + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + return false; + } if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { if (sq->db.xdp.doorbell) { @@ -671,7 +679,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, } rq->stats.xdp_tx_full++; mlx5e_page_release(rq, di, true); - return; + return false; } dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, @@ -679,11 +687,17 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, memset(wqe, 0, sizeof(*wqe)); - /* copy the inline part */ - memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE); - eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + dseg = (struct mlx5_wqe_data_seg *)eseg + 1; + /* copy the inline part if required */ + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + dma_len -= MLX5E_XDP_MIN_INLINE; + dma_addr += MLX5E_XDP_MIN_INLINE; - dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); + ds_cnt += MLX5E_XDP_IHS_DS_COUNT; + dseg++; + } /* write the dma part */ dseg->addr = cpu_to_be64(dma_addr); @@ -691,7 +705,7 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, dseg->lkey = sq->mkey_be; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); sq->db.xdp.di[pi] = *di; wi->opcode = MLX5_OPCODE_SEND; @@ -700,32 +714,39 @@ static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, sq->db.xdp.doorbell = true; rq->stats.xdp_tx++; + return true; } /* returns true if packet was consumed by xdp */ -static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, - const struct bpf_prog *prog, - struct mlx5e_dma_info *di, - void *data, u16 len) +static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, + void *va, u16 *rx_headroom, u32 *len) { + const struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); struct xdp_buff xdp; u32 act; if (!prog) return false; - xdp.data = data; - xdp.data_end = xdp.data + len; + xdp.data = va + *rx_headroom; + xdp.data_end = xdp.data + *len; + xdp.data_hard_start = va; + act = bpf_prog_run_xdp(prog, &xdp); switch (act) { case XDP_PASS: + *rx_headroom = xdp.data - xdp.data_hard_start; + *len = xdp.data_end - xdp.data; return false; case XDP_TX: - mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len); + if (unlikely(!mlx5e_xmit_xdp_frame(rq, di, &xdp))) + trace_xdp_exception(rq->netdev, prog, act); return true; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: + trace_xdp_exception(rq->netdev, prog, act); case XDP_DROP: rq->stats.xdp_drop++; mlx5e_page_release(rq, di, true); @@ -740,15 +761,16 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_dma_info *di; struct sk_buff *skb; void *va, *data; + u16 rx_headroom = rq->rx_headroom; bool consumed; di = &rq->dma_info[wqe_counter]; va = page_address(di->page); - data = va + MLX5_RX_HEADROOM; + data = va + rx_headroom; dma_sync_single_range_for_cpu(rq->pdev, di->addr, - MLX5_RX_HEADROOM, + rx_headroom, rq->buff.wqe_sz, DMA_FROM_DEVICE); prefetch(data); @@ -760,8 +782,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, } rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, READ_ONCE(rq->xdp_prog), di, data, - cqe_bcnt); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt); rcu_read_unlock(); if (consumed) return NULL; /* page/packet was consumed by XDP */ @@ -777,7 +798,7 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, page_ref_inc(di->page); mlx5e_page_release(rq, di, true); - skb_reserve(skb, MLX5_RX_HEADROOM); + skb_reserve(skb, rx_headroom); skb_put(skb, cqe_bcnt); return skb; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index 1fffe48a93cc..cbfac06b7ffd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -109,7 +109,6 @@ static bool mlx5e_am_on_top(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n"); return true; case MLX5E_AM_GOING_RIGHT: return (am->steps_left > 1) && (am->steps_right == 1); @@ -123,7 +122,6 @@ static void mlx5e_am_turn(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_turn: PARKING\n"); break; case MLX5E_AM_GOING_RIGHT: am->tune_state = MLX5E_AM_GOING_LEFT; @@ -144,7 +142,6 @@ static int mlx5e_am_step(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_step: PARKING\n"); break; case MLX5E_AM_GOING_RIGHT: if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1)) @@ -282,10 +279,8 @@ static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start, u32 delta_us = ktime_us_delta(end->time, start->time); unsigned int npkts = end->pkt_ctr - start->pkt_ctr; - if (!delta_us) { - WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n"); + if (!delta_us) return; - } curr_stats->ppms = (npkts * USEC_PER_MSEC) / delta_us; curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 65442c36a6e1..31e3cb7ee5fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include <linux/prefetch.h> #include <linux/ip.h> #include <linux/udp.h> #include <net/udp.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index ba5db1dd23a9..53e4992d6511 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -39,7 +39,7 @@ #define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ (*(u32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ - be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) @@ -201,6 +201,12 @@ static const struct counter_desc vport_stats_desc[] = { #define PPORT_2819_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_PHY_STATISTICAL_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.phys_layer_statistical_cntrs.c##_high) +#define PPORT_PHY_STATISTICAL_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \ + counter_set.phys_layer_statistical_cntrs.c##_high) #define PPORT_PER_PRIO_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_per_prio_grp_data_layout.c##_high) @@ -215,6 +221,7 @@ struct mlx5e_pport_stats { __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; static const struct counter_desc pport_802_3_stats_desc[] = { @@ -260,6 +267,11 @@ static const struct counter_desc pport_2819_stats_desc[] = { { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, }; +static const struct counter_desc pport_phy_statistical_stats_desc[] = { + { "rx_symbol_errors_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, + { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, +}; + static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, @@ -276,6 +288,21 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; +#define PCIE_PERF_OFF(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) +#define PCIE_PERF_GET(pcie_stats, c) \ + MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c) + +struct mlx5e_pcie_stats { + __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; +}; + +static const struct counter_desc pcie_perf_stats_desc[] = { + { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, + { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, +}; + struct mlx5e_rq_stats { u64 packets; u64 bytes; @@ -360,15 +387,23 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) #define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) #define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) +#define NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) \ + (ARRAY_SIZE(pport_phy_statistical_stats_desc) * \ + MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) +#define NUM_PCIE_PERF_COUNTERS(priv) \ + (ARRAY_SIZE(pcie_perf_stats_desc) * \ + MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ ARRAY_SIZE(pport_per_prio_pfc_stats_desc) -#define NUM_PPORT_COUNTERS (NUM_PPORT_802_3_COUNTERS + \ +#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_802_3_COUNTERS + \ NUM_PPORT_2863_COUNTERS + \ NUM_PPORT_2819_COUNTERS + \ + NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ NUM_PPORT_PRIO) +#define NUM_PCIE_COUNTERS(priv) NUM_PCIE_PERF_COUNTERS(priv) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) @@ -378,6 +413,7 @@ struct mlx5e_stats { struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; struct rtnl_link_stats64 vf_vport; + struct mlx5e_pcie_stats pcie; }; static const struct counter_desc mlx5e_pme_status_desc[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f8829b517156..44406a5ec15d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -161,15 +161,21 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, } } +/* we get here also when setting rule to the FW failed, etc. It means that the + * flow rule itself might not exist, but some offloading related to the actions + * should be cleaned. + */ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_fc *counter = NULL; - counter = mlx5_flow_rule_counter(flow->rule); - - mlx5_del_flow_rules(flow->rule); + if (!IS_ERR(flow->rule)) { + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); + } if (esw && esw->mode == SRIOV_OFFLOADS) { mlx5_eswitch_del_vlan_action(esw, flow->attr); @@ -177,8 +183,6 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5e_detach_encap(priv, flow); } - mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; @@ -225,6 +229,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + struct flow_dissector_key_control *enc_control = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + f->key); + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { struct flow_dissector_key_ports *key = skb_flow_dissector_target(f->dissector, @@ -237,28 +246,34 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, /* Full udp dst port must be given */ if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) - return -EOPNOTSUPP; - - /* udp src port isn't supported */ - if (memchr_inv(&mask->src, 0, sizeof(mask->src))) - return -EOPNOTSUPP; + goto vxlan_match_offload_err; if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) parse_vxlan_attr(spec, f); - else + else { + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst)); return -EOPNOTSUPP; + } MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(key->src)); } else { /* udp dst port must be given */ - return -EOPNOTSUPP; +vxlan_match_offload_err: + netdev_warn(priv->netdev, + "IP tunnel decap offload supported only for vxlan, must set UDP dport\n"); + return -EOPNOTSUPP; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { struct flow_dissector_key_ipv4_addrs *key = skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, @@ -280,10 +295,36 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, ntohl(key->dst)); - } - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + f->mask); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); + } /* Enforce DMAC when offloading incoming tunneled flows. * Flow counters require a match on the DMAC. @@ -343,6 +384,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, f->key); switch (key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: if (parse_tunnel_attr(priv, spec, f)) return -EOPNOTSUPP; break; @@ -375,6 +417,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, key->flags & FLOW_DIS_IS_FRAGMENT); + + /* the HW doesn't need L3 inline to match on frag=no */ + if (key->flags & FLOW_DIS_IS_FRAGMENT) + *min_inline = MLX5_INLINE_MODE_IP; } } @@ -438,8 +484,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, FLOW_DISSECTOR_KEY_VLAN, f->mask); if (mask->vlan_id || mask->vlan_priority) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); @@ -622,15 +668,15 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } -static inline int cmp_encap_info(struct mlx5_encap_info *a, - struct mlx5_encap_info *b) +static inline int cmp_encap_info(struct ip_tunnel_key *a, + struct ip_tunnel_key *b) { return memcmp(a, b, sizeof(*a)); } -static inline int hash_encap_info(struct mlx5_encap_info *info) +static inline int hash_encap_info(struct ip_tunnel_key *key) { - return jhash(info, sizeof(*info), 0); + return jhash(key, sizeof(*key), 0); } static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, @@ -638,41 +684,76 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, struct net_device **out_dev, struct flowi4 *fl4, struct neighbour **out_n, - __be32 *saddr, int *out_ttl) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct rtable *rt; struct neighbour *n = NULL; - int ttl; #if IS_ENABLED(CONFIG_INET) + int ret; + rt = ip_route_output_key(dev_net(mirred_dev), fl4); - if (IS_ERR(rt)) { - pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr); - return -EOPNOTSUPP; - } + ret = PTR_ERR_OR_ZERO(rt); + if (ret) + return ret; #else return -EOPNOTSUPP; #endif + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) + *out_dev = mlx5_eswitch_get_uplink_netdev(esw); + else + *out_dev = rt->dst.dev; - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) { - pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n", - __func__); - ip_rt_put(rt); - return -EOPNOTSUPP; - } - - ttl = ip4_dst_hoplimit(&rt->dst); + *out_ttl = ip4_dst_hoplimit(&rt->dst); n = dst_neigh_lookup(&rt->dst, &fl4->daddr); ip_rt_put(rt); if (!n) return -ENOMEM; *out_n = n; - *saddr = fl4->saddr; - *out_ttl = ttl; - *out_dev = rt->dst.dev; + return 0; +} + +static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + int *out_ttl) +{ + struct neighbour *n = NULL; + struct dst_entry *dst; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int ret; + + dst = ip6_route_output(dev_net(mirred_dev), NULL, fl6); + ret = dst->error; + if (ret) { + dst_release(dst); + return ret; + } + + *out_ttl = ip6_dst_hoplimit(dst); + /* if the egress device isn't on the same HW e-switch, we use the uplink */ + if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) + *out_dev = mlx5_eswitch_get_uplink_netdev(esw); + else + *out_dev = dst->dev; +#else + return -EOPNOTSUPP; +#endif + + n = dst_neigh_lookup(dst, &fl6->daddr); + dst_release(dst); + if (!n) + return -ENOMEM; + + *out_n = n; return 0; } @@ -712,19 +793,52 @@ static int gen_vxlan_header_ipv4(struct net_device *out_dev, return encap_size; } +static int gen_vxlan_header_ipv6(struct net_device *out_dev, + char buf[], + unsigned char h_dest[ETH_ALEN], + int ttl, + struct in6_addr *daddr, + struct in6_addr *saddr, + __be16 udp_dst_port, + __be32 vx_vni) +{ + int encap_size = VXLAN_HLEN + sizeof(struct ipv6hdr) + ETH_HLEN; + struct ethhdr *eth = (struct ethhdr *)buf; + struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); + struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); + struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + + memset(buf, 0, encap_size); + + ether_addr_copy(eth->h_dest, h_dest); + ether_addr_copy(eth->h_source, out_dev->dev_addr); + eth->h_proto = htons(ETH_P_IPV6); + + ip6_flow_hdr(ip6h, 0, 0); + /* the HW fills up ipv6 payload len */ + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = ttl; + ip6h->daddr = *daddr; + ip6h->saddr = *saddr; + + udp->dest = udp_dst_port; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(vx_vni); + + return encap_size; +} + static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5_encap_entry *e, struct net_device **out_dev) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + int encap_size, ttl, err; + struct neighbour *n = NULL; struct flowi4 fl4 = {}; - struct neighbour *n; char *encap_header; - int encap_size; - __be32 saddr; - int ttl; - int err; encap_header = kzalloc(max_encap_size, GFP_KERNEL); if (!encap_header) @@ -733,36 +847,108 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: fl4.flowi4_proto = IPPROTO_UDP; - fl4.fl4_dport = e->tun_info.tp_dst; + fl4.fl4_dport = tun_key->tp_dst; break; default: err = -EOPNOTSUPP; goto out; } - fl4.daddr = e->tun_info.daddr; + fl4.flowi4_tos = tun_key->tos; + fl4.daddr = tun_key->u.ipv4.dst; + fl4.saddr = tun_key->u.ipv4.src; err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev, - &fl4, &n, &saddr, &ttl); + &fl4, &n, &ttl); if (err) goto out; + if (!(n->nud_state & NUD_VALID)) { + pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); + err = -EOPNOTSUPP; + goto out; + } + e->n = n; e->out_dev = *out_dev; + neigh_ha_snapshot(e->h_dest, n, *out_dev); + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, + e->h_dest, ttl, + fl4.daddr, + fl4.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + encap_size, encap_header, &e->encap_id); +out: + if (err && n) + neigh_release(n); + kfree(encap_header); + return err; +} + +static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5_encap_entry *e, + struct net_device **out_dev) + +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + int encap_size, err, ttl = 0; + struct neighbour *n = NULL; + struct flowi6 fl6 = {}; + char *encap_header; + + encap_header = kzalloc(max_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + switch (e->tunnel_type) { + case MLX5_HEADER_TYPE_VXLAN: + fl6.flowi6_proto = IPPROTO_UDP; + fl6.fl6_dport = tun_key->tp_dst; + break; + default: + err = -EOPNOTSUPP; + goto out; + } + + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + fl6.daddr = tun_key->u.ipv6.dst; + fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev, + &fl6, &n, &ttl); + if (err) + goto out; + if (!(n->nud_state & NUD_VALID)) { - err = -ENOTSUPP; + pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr); + err = -EOPNOTSUPP; goto out; } + e->n = n; + e->out_dev = *out_dev; + neigh_ha_snapshot(e->h_dest, n, *out_dev); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header, + encap_size = gen_vxlan_header_ipv6(*out_dev, encap_header, e->h_dest, ttl, - e->tun_info.daddr, - saddr, e->tun_info.tp_dst, - e->tun_info.tun_id); + &fl6.daddr, + &fl6.saddr, tun_key->tp_dst, + tunnel_id_to_key32(tun_key->tun_id)); break; default: err = -EOPNOTSUPP; @@ -772,6 +958,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, encap_size, encap_header, &e->encap_id); out: + if (err && n) + neigh_release(n); kfree(encap_header); return err; } @@ -784,40 +972,38 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; unsigned short family = ip_tunnel_info_af(tun_info); struct ip_tunnel_key *key = &tun_info->key; - struct mlx5_encap_info info; struct mlx5_encap_entry *e; struct net_device *out_dev; + int tunnel_type, err = -EOPNOTSUPP; uintptr_t hash_key; bool found = false; - int tunnel_type; - int err; - /* udp dst port must be given */ + /* udp dst port must be set */ if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) + goto vxlan_encap_offload_err; + + /* setting udp src port isn't supported */ + if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) { +vxlan_encap_offload_err: + netdev_warn(priv->netdev, + "must set udp dst port and not set udp src port\n"); return -EOPNOTSUPP; + } if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - info.tp_dst = key->tp_dst; - info.tun_id = tunnel_id_to_key32(key->tun_id); tunnel_type = MLX5_HEADER_TYPE_VXLAN; } else { + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst)); return -EOPNOTSUPP; } - switch (family) { - case AF_INET: - info.daddr = key->u.ipv4.dst; - break; - default: - return -EOPNOTSUPP; - } - - hash_key = hash_encap_info(&info); + hash_key = hash_encap_info(key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - if (!cmp_encap_info(&e->tun_info, &info)) { + if (!cmp_encap_info(&e->tun_info.key, key)) { found = true; break; } @@ -832,11 +1018,15 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, if (!e) return -ENOMEM; - e->tun_info = info; + e->tun_info = *tun_info; e->tunnel_type = tunnel_type; INIT_LIST_HEAD(&e->flows); - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + if (family == AF_INET) + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + else if (family == AF_INET6) + err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev); + if (err) goto out_err; @@ -986,7 +1176,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_free; + goto err_del_rule; } err = rhashtable_insert_fast(&tc->ht, &flow->node, @@ -997,7 +1187,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto out; err_del_rule: - mlx5_del_flow_rules(flow->rule); + mlx5e_tc_del_flow(priv, flow); err_free: kfree(flow); @@ -1050,10 +1240,14 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + preempt_disable(); + tcf_exts_to_list(f->exts, &actions); list_for_each_entry(a, &actions, list) tcf_action_stats_update(a, bytes, packets, lastuse); + preempt_enable(); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index cfb68371c397..f193128bac4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -154,6 +154,8 @@ static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, int hlen; switch (mode) { + case MLX5_INLINE_MODE_NONE: + return 0; case MLX5_INLINE_MODE_TCP_UDP: hlen = eth_get_headlen(skb->data, skb_headlen(skb)); if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) @@ -283,21 +285,23 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) wi->num_bytes = num_bytes; - if (skb_vlan_tag_present(skb)) { - mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data, - &skb_len); - ihs += VLAN_HLEN; - } else { - memcpy(eseg->inline_hdr_start, skb_data, ihs); - mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (ihs) { + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); + ihs += VLAN_HLEN; + } else { + memcpy(eseg->inline_hdr.start, skb_data, ihs); + mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + } + eseg->inline_hdr.sz = cpu_to_be16(ihs); + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); + } else if (skb_vlan_tag_present(skb)) { + eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); } - eseg->inline_hdr_sz = cpu_to_be16(ihs); - - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr_start), - MLX5_SEND_WQE_DS); - dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; + dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; wi->num_dma = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 8ffcc8808e50..ea5d8d37a75c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -54,6 +54,7 @@ enum { MLX5_NUM_SPARE_EQE = 0x80, MLX5_NUM_ASYNC_EQE = 0x100, MLX5_NUM_CMD_EQE = 32, + MLX5_NUM_PF_DRAIN = 64, }; enum { @@ -153,6 +154,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: return "MLX5_EVENT_TYPE_PAGE_FAULT"; + case MLX5_EVENT_TYPE_PPS_EVENT: + return "MLX5_EVENT_TYPE_PPS_EVENT"; default: return "Unrecognized event"; } @@ -188,10 +191,193 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm) mb(); } -static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +static void eqe_pf_action(struct work_struct *work) +{ + struct mlx5_pagefault *pfault = container_of(work, + struct mlx5_pagefault, + work); + struct mlx5_eq *eq = pfault->eq; + + mlx5_core_page_fault(eq->dev, pfault); + mempool_free(pfault, eq->pf_ctx.pool); +} + +static void eq_pf_process(struct mlx5_eq *eq) +{ + struct mlx5_core_dev *dev = eq->dev; + struct mlx5_eqe_page_fault *pf_eqe; + struct mlx5_pagefault *pfault; + struct mlx5_eqe *eqe; + int set_ci = 0; + + while ((eqe = next_eqe_sw(eq))) { + pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC); + if (!pfault) { + schedule_work(&eq->pf_ctx.work); + break; + } + + dma_rmb(); + pf_eqe = &eqe->data.page_fault; + pfault->event_subtype = eqe->sub_type; + pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); + + mlx5_core_dbg(dev, + "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", + eqe->sub_type, pfault->bytes_committed); + + switch (eqe->sub_type) { + case MLX5_PFAULT_SUBTYPE_RDMA: + /* RDMA based event */ + pfault->type = + be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; + pfault->token = + be32_to_cpu(pf_eqe->rdma.pftype_token) & + MLX5_24BIT_MASK; + pfault->rdma.r_key = + be32_to_cpu(pf_eqe->rdma.r_key); + pfault->rdma.packet_size = + be16_to_cpu(pf_eqe->rdma.packet_length); + pfault->rdma.rdma_op_len = + be32_to_cpu(pf_eqe->rdma.rdma_op_len); + pfault->rdma.rdma_va = + be64_to_cpu(pf_eqe->rdma.rdma_va); + mlx5_core_dbg(dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", + pfault->type, pfault->token, + pfault->rdma.r_key); + mlx5_core_dbg(dev, + "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", + pfault->rdma.rdma_op_len, + pfault->rdma.rdma_va); + break; + + case MLX5_PFAULT_SUBTYPE_WQE: + /* WQE based event */ + pfault->type = + be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24; + pfault->token = + be32_to_cpu(pf_eqe->wqe.token); + pfault->wqe.wq_num = + be32_to_cpu(pf_eqe->wqe.pftype_wq) & + MLX5_24BIT_MASK; + pfault->wqe.wqe_index = + be16_to_cpu(pf_eqe->wqe.wqe_index); + pfault->wqe.packet_size = + be16_to_cpu(pf_eqe->wqe.packet_length); + mlx5_core_dbg(dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", + pfault->type, pfault->token, + pfault->wqe.wq_num, + pfault->wqe.wqe_index); + break; + + default: + mlx5_core_warn(dev, + "Unsupported page fault event sub-type: 0x%02hhx\n", + eqe->sub_type); + /* Unsupported page faults should still be + * resolved by the page fault handler + */ + } + + pfault->eq = eq; + INIT_WORK(&pfault->work, eqe_pf_action); + queue_work(eq->pf_ctx.wq, &pfault->work); + + ++eq->cons_index; + ++set_ci; + + if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { + eq_update_ci(eq, 0); + set_ci = 0; + } + } + + eq_update_ci(eq, 1); +} + +static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) +{ + struct mlx5_eq *eq = eq_ptr; + unsigned long flags; + + if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) { + eq_pf_process(eq); + spin_unlock_irqrestore(&eq->pf_ctx.lock, flags); + } else { + schedule_work(&eq->pf_ctx.work); + } + + return IRQ_HANDLED; +} + +/* mempool_refill() was proposed but unfortunately wasn't accepted + * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html + * Chip workaround. + */ +static void mempool_refill(mempool_t *pool) +{ + while (pool->curr_nr < pool->min_nr) + mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); +} + +static void eq_pf_action(struct work_struct *work) +{ + struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work); + + mempool_refill(eq->pf_ctx.pool); + + spin_lock_irq(&eq->pf_ctx.lock); + eq_pf_process(eq); + spin_unlock_irq(&eq->pf_ctx.lock); +} + +static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name) +{ + spin_lock_init(&pf_ctx->lock); + INIT_WORK(&pf_ctx->work, eq_pf_action); + + pf_ctx->wq = alloc_ordered_workqueue(name, + WQ_MEM_RECLAIM); + if (!pf_ctx->wq) + return -ENOMEM; + + pf_ctx->pool = mempool_create_kmalloc_pool + (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault)); + if (!pf_ctx->pool) + goto err_wq; + + return 0; +err_wq: + destroy_workqueue(pf_ctx->wq); + return -ENOMEM; +} + +int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, + u32 wq_num, u8 type, int error) +{ + u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; + + MLX5_SET(page_fault_resume_in, in, opcode, + MLX5_CMD_OP_PAGE_FAULT_RESUME); + MLX5_SET(page_fault_resume_in, in, error, !!error); + MLX5_SET(page_fault_resume_in, in, page_fault_type, type); + MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); + MLX5_SET(page_fault_resume_in, in, token, token); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); +#endif + +static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) { + struct mlx5_eq *eq = eq_ptr; + struct mlx5_core_dev *dev = eq->dev; struct mlx5_eqe *eqe; - int eqes_found = 0; int set_ci = 0; u32 cqn = -1; u32 rsn; @@ -276,12 +462,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) } break; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - case MLX5_EVENT_TYPE_PAGE_FAULT: - mlx5_eq_pagefault(dev, eqe); - break; -#endif - #ifdef CONFIG_MLX5_CORE_EN case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); @@ -292,6 +472,10 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_port_module_event(dev, eqe); break; + case MLX5_EVENT_TYPE_PPS_EVENT: + if (dev->event) + dev->event(dev, MLX5_DEV_EVENT_PPS, (unsigned long)eqe); + break; default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); @@ -299,7 +483,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) } ++eq->cons_index; - eqes_found = 1; ++set_ci; /* The HCA will think the queue has overflowed if we @@ -319,17 +502,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) if (cqn != -1) tasklet_schedule(&eq->tasklet_ctx.task); - return eqes_found; -} - -static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr) -{ - struct mlx5_eq *eq = eq_ptr; - struct mlx5_core_dev *dev = eq->dev; - - mlx5_eq_int(dev, eq); - - /* MSI-X vectors always belong to us */ return IRQ_HANDLED; } @@ -345,22 +517,32 @@ static void init_eq_buf(struct mlx5_eq *eq) } int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, struct mlx5_uar *uar) + int nent, u64 mask, const char *name, + enum mlx5_eq_type type) { u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; + irq_handler_t handler; __be64 *pas; void *eqc; int inlen; u32 *in; int err; + eq->type = type; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) return err; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (type == MLX5_EQ_TYPE_PF) + handler = mlx5_eq_pf_int; + else +#endif + handler = mlx5_eq_int; + init_eq_buf(eq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + @@ -380,7 +562,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); - MLX5_SET(eqc, eqc, uar_page, uar->index); + MLX5_SET(eqc, eqc, uar_page, priv->uar->index); MLX5_SET(eqc, eqc, intr, vecidx); MLX5_SET(eqc, eqc, log_page_size, eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -395,8 +577,8 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = priv->msix_arr[vecidx].vector; eq->dev = dev; - eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, mlx5_msix_handler, 0, + eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; + err = request_irq(eq->irqn, handler, 0, priv->irq_info[vecidx].name, eq); if (err) goto err_eq; @@ -405,11 +587,20 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_irq; - INIT_LIST_HEAD(&eq->tasklet_ctx.list); - INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); - spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, - (unsigned long)&eq->tasklet_ctx); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (type == MLX5_EQ_TYPE_PF) { + err = init_pf_ctx(&eq->pf_ctx, name); + if (err) + goto err_irq; + } else +#endif + { + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + } /* EQs are created in ARMED state */ @@ -444,7 +635,16 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); synchronize_irq(eq->irqn); - tasklet_disable(&eq->tasklet_ctx.task); + + if (eq->type == MLX5_EQ_TYPE_COMP) { + tasklet_disable(&eq->tasklet_ctx.task); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + } else if (eq->type == MLX5_EQ_TYPE_PF) { + cancel_work_sync(&eq->pf_ctx.work); + destroy_workqueue(eq->pf_ctx.wq); + mempool_destroy(eq->pf_ctx.pool); +#endif + } mlx5_buf_free(dev, &eq->buf); return err; @@ -479,8 +679,6 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; - if (MLX5_CAP_GEN(dev, pg)) - async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && MLX5_CAP_GEN(dev, vport_group_manager) && @@ -492,9 +690,12 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) else mlx5_core_dbg(dev, "port_module_event is not set\n"); + if (MLX5_CAP_GEN(dev, pps)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, - "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); + "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); return err; @@ -504,7 +705,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, MLX5_NUM_ASYNC_EQE, async_event_mask, - "mlx5_async_eq", &dev->priv.uuari.uars[0]); + "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; @@ -514,13 +715,33 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) MLX5_EQ_VEC_PAGES, /* TODO: sriov max_vf + */ 1, 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", - &dev->priv.uuari.uars[0]); + MLX5_EQ_TYPE_ASYNC); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_create_map_eq(dev, &table->pfault_eq, + MLX5_EQ_VEC_PFAULT, + MLX5_NUM_ASYNC_EQE, + 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + "mlx5_page_fault_eq", + MLX5_EQ_TYPE_PF); + if (err) { + mlx5_core_warn(dev, "failed to create page fault EQ %d\n", + err); + goto err3; + } + } + return err; +err3: + mlx5_destroy_unmap_eq(dev, &table->pages_eq); +#else + return err; +#endif err2: mlx5_destroy_unmap_eq(dev, &table->async_eq); @@ -536,6 +757,14 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = &dev->priv.eq_table; int err; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (MLX5_CAP_GEN(dev, pg)) { + err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); + if (err) + return err; + } +#endif + err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f14d9c9ba773..fcd5bc7e31db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -133,7 +133,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) - return -ENOTSUPP; + return -EOPNOTSUPP; esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n", vport, vlan, qos, set_flags); @@ -353,7 +353,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { esw_warn(dev, "Failed to get FDB flow namespace\n"); - return -ENOMEM; + return -EOPNOTSUPP; } flow_group_in = mlx5_vzalloc(inlen); @@ -962,7 +962,7 @@ static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch egress flow namespace\n"); - return -EIO; + return -EOPNOTSUPP; } flow_group_in = mlx5_vzalloc(inlen); @@ -979,7 +979,7 @@ static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); @@ -1079,7 +1079,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n"); - return -EIO; + return -EOPNOTSUPP; } flow_group_in = mlx5_vzalloc(inlen); @@ -1098,7 +1098,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); @@ -1115,7 +1115,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); @@ -1254,7 +1254,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, } if (vport->info.vlan || vport->info.qos) - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); if (vport->info.spoofchk) { MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); @@ -1335,8 +1335,8 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, } /* Allowed vlan rule */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); @@ -1415,7 +1415,7 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw) } static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, - u32 initial_max_rate) + u32 initial_max_rate, u32 initial_bw_share) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -1439,6 +1439,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, esw->qos.root_tsar_id); MLX5_SET(scheduling_context, &sched_ctx, max_average_bw, initial_max_rate); + MLX5_SET(scheduling_context, &sched_ctx, bw_share, initial_bw_share); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -1473,7 +1474,7 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) } static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, - u32 max_rate) + u32 max_rate, u32 bw_share) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -1497,7 +1498,9 @@ static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, esw->qos.root_tsar_id); MLX5_SET(scheduling_context, &sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, &sched_ctx, bw_share, bw_share); bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; err = mlx5_modify_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -1563,7 +1566,8 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_apply_vport_conf(esw, vport); /* Attach vport to the eswitch rate limiter */ - if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate)) + if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate, + vport->qos.bw_share)) esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num); /* Sync with current vport context */ @@ -1630,7 +1634,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); - return -ENOTSUPP; + return -EOPNOTSUPP; } if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) @@ -1952,6 +1956,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, ivi->qos = evport->info.qos; ivi->spoofchk = evport->info.spoofchk; ivi->trusted = evport->info.trusted; + ivi->min_tx_rate = evport->info.min_rate; ivi->max_tx_rate = evport->info.max_rate; mutex_unlock(&esw->state_lock); @@ -2046,23 +2051,103 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, return 0; } -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, - int vport, u32 max_rate) +static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_vport *evport; + u32 max_guarantee = 0; + int i; + + for (i = 0; i <= esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled || evport->info.min_rate < max_guarantee) + continue; + max_guarantee = evport->info.min_rate; + } + + return max_t(u32, max_guarantee / fw_max_bw_share, 1); +} + +static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_vport *evport; + u32 vport_max_rate; + u32 vport_min_rate; + u32 bw_share; + int err; + int i; + + for (i = 0; i <= esw->total_vports; i++) { + evport = &esw->vports[i]; + if (!evport->enabled) + continue; + vport_min_rate = evport->info.min_rate; + vport_max_rate = evport->info.max_rate; + bw_share = MLX5_MIN_BW_SHARE; + + if (vport_min_rate) + bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate, + divider, + fw_max_bw_share); + + if (bw_share == evport->qos.bw_share) + continue; + + err = esw_vport_qos_config(esw, i, vport_max_rate, + bw_share); + if (!err) + evport->qos.bw_share = bw_share; + else + return err; + } + + return 0; +} + +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, + u32 max_rate, u32 min_rate) { + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + bool min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + bool max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); struct mlx5_vport *evport; + u32 previous_min_rate; + u32 divider; int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported)) + return -EOPNOTSUPP; mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - err = esw_vport_qos_config(esw, vport, max_rate); + + if (min_rate == evport->info.min_rate) + goto set_max_rate; + + previous_min_rate = evport->info.min_rate; + evport->info.min_rate = min_rate; + divider = calculate_vports_min_rate_divider(esw); + err = normalize_vports_min_rate(esw, divider); + if (err) { + evport->info.min_rate = previous_min_rate; + goto unlock; + } + +set_max_rate: + if (max_rate == evport->info.max_rate) + goto unlock; + + err = esw_vport_qos_config(esw, vport, max_rate, evport->qos.bw_share); if (!err) evport->info.max_rate = max_rate; +unlock: mutex_unlock(&esw->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 8661dd3f542c..5b78883d5654 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -36,6 +36,7 @@ #include <linux/if_ether.h> #include <linux/if_link.h> #include <net/devlink.h> +#include <net/ip_tunnels.h> #include <linux/mlx5/device.h> #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -49,6 +50,11 @@ #define FDB_UPLINK_VPORT 0xffff +#define MLX5_MIN_BW_SHARE 1 + +#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ + min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit) + /* L2 -mac address based- hash helpers */ struct l2addr_node { struct hlist_node hlist; @@ -115,6 +121,7 @@ struct mlx5_vport_info { u8 qos; u64 node_guid; int link_state; + u32 min_rate; u32 max_rate; bool spoofchk; bool trusted; @@ -137,6 +144,7 @@ struct mlx5_vport { struct { bool enabled; u32 esw_tsar_ix; + u32 bw_share; } qos; bool enabled; @@ -248,8 +256,8 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, int vport, bool spoofchk); int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, int vport_num, bool setting); -int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, - int vport, u32 max_rate); +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, + u32 max_rate, u32 min_rate); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi); int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, @@ -274,18 +282,12 @@ enum { #define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 #define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 -struct mlx5_encap_info { - __be32 daddr; - __be32 tun_id; - __be16 tp_dst; -}; - struct mlx5_encap_entry { struct hlist_node encap_hlist; struct list_head flows; u32 encap_id; struct neighbour *n; - struct mlx5_encap_info tun_info; + struct ip_tunnel_info tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 03293ed1cc22..4f5b0d47d5f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -166,7 +166,7 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, return 0; out_notsupp: - return -ENOTSUPP; + return -EOPNOTSUPP; } int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, @@ -402,19 +402,18 @@ out: } #define MAX_PF_SQ 256 -#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */ #define ESW_OFFLOADS_NUM_GROUPS 4 static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int table_size, ix, esw_size, err = 0; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; struct mlx5_flow_group *g; u32 *flow_group_in; void *match_criteria; - int table_size, ix, err = 0; u32 flags = 0; flow_group_in = mlx5_vzalloc(inlen); @@ -424,18 +423,23 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { esw_warn(dev, "Failed to get FDB flow namespace\n"); + err = -EOPNOTSUPP; goto ns_err; } - esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), + MLX5_CAP_GEN(dev, max_flow_counter), ESW_OFFLOADS_NUM_GROUPS); + + esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS, + 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN; fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, - ESW_OFFLOADS_NUM_ENTRIES, + esw_size, ESW_OFFLOADS_NUM_GROUPS, 0, flags); if (IS_ERR(fdb)) { @@ -535,7 +539,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); if (!ns) { esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); - return -ENOMEM; + return -EOPNOTSUPP; } ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0); @@ -655,7 +659,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw) esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err); err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); if (err1) - esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err); + esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err1); } if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { if (mlx5_eswitch_inline_mode_get(esw, @@ -674,9 +678,14 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) int vport; int err; + /* disable PF RoCE so missed packets don't go through RoCE steering */ + mlx5_dev_list_lock(); + mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + err = esw_create_offloads_fdb_table(esw, nvports); if (err) - return err; + goto create_fdb_err; err = esw_create_offloads_table(esw); if (err) @@ -696,11 +705,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) goto err_reps; } - /* disable PF RoCE so missed packets don't go through RoCE steering */ - mlx5_dev_list_lock(); - mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); - return 0; err_reps: @@ -717,6 +721,13 @@ create_fg_err: create_ft_err: esw_destroy_offloads_fdb_table(esw); + +create_fdb_err: + /* enable back PF RoCE */ + mlx5_dev_list_lock(); + mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + return err; } @@ -724,11 +735,6 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; - /* enable back PF RoCE */ - mlx5_dev_list_lock(); - mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); - mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); if (err) { @@ -738,6 +744,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); } + /* enable back PF RoCE */ + mlx5_dev_list_lock(); + mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_dev_list_unlock(); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index c4478ecd8056..b64a781c7e85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -322,7 +322,7 @@ int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, flow_table_properties_nic_receive. flow_modify_en); if (!atomic_mod_cap) - return -ENOTSUPP; + return -EOPNOTSUPP; opmod = 1; return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte); @@ -473,10 +473,13 @@ int mlx5_encap_alloc(struct mlx5_core_dev *dev, int err; u32 *in; - if (size > MLX5_CAP_ESW(dev, max_encap_header_size)) + if (size > max_encap_size) { + mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n", + size, max_encap_size); return -EINVAL; + } - in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + max_encap_size, + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size, GFP_KERNEL); if (!in) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 0ac7a2fc916c..2478516a61e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1232,10 +1232,18 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, fs_for_each_fte(fte, fg) { nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); if (compare_match_value(&fg->mask, match_value, &fte->val) && - (flow_act->action & fte->action) && - flow_act->flow_tag == fte->flow_tag) { + (flow_act->action & fte->action)) { int old_action = fte->action; + if (fte->flow_tag != flow_act->flow_tag) { + mlx5_core_warn(get_dev(&fte->node), + "FTE flow tag %u already exists with different flow tag %u\n", + fte->flow_tag, + flow_act->flow_tag); + handle = ERR_PTR(-EEXIST); + goto unlock_fte; + } + fte->action |= flow_act->action; handle = add_rule_fte(fte, fg, dest, dest_num, old_action != flow_act->action); @@ -1665,7 +1673,7 @@ static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio, #define FLOW_TABLE_BIT_SZ 1 #define GET_FLOW_TABLE_CAP(dev, offset) \ - ((be32_to_cpu(*((__be32 *)(dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE]) + \ + ((be32_to_cpu(*((__be32 *)(dev->caps.hca_cur[MLX5_CAP_FLOW_TABLE]) + \ offset / 32)) >> \ (32 - FLOW_TABLE_BIT_SZ - (offset & 0x1f))) & FLOW_TABLE_BIT_SZ) static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) @@ -1822,7 +1830,7 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering) struct mlx5_flow_table *ft; ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); - if (!ns) + if (WARN_ON(!ns)) return -EINVAL; ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0); if (IS_ERR(ft)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 5718aada6605..d0bbefa08af7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -91,6 +91,20 @@ out: } EXPORT_SYMBOL(mlx5_core_query_vendor_id); +static int mlx5_get_pcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_pcam_reg(dev, dev->caps.pcam, + MLX5_PCAM_FEATURE_ENHANCED_FEATURES, + MLX5_PCAM_REGS_5000_TO_507F); +} + +static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_mcam_reg(dev, dev->caps.mcam, + MLX5_MCAM_FEATURE_ENHANCED_FEATURES, + MLX5_MCAM_REGS_FIRST_128); +} + int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { int err; @@ -154,6 +168,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, pcam_reg)) + mlx5_get_pcam_reg(dev); + + if (MLX5_CAP_GEN(dev, mcam_reg)) + mlx5_get_mcam_reg(dev); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 5bcf93422ee0..d0515391d33b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -231,21 +231,6 @@ static const char *hsynd_str(u8 synd) } } -static u16 get_maj(u32 fw) -{ - return fw >> 28; -} - -static u16 get_min(u32 fw) -{ - return fw >> 16 & 0xfff; -} - -static u16 get_sub(u32 fw) -{ - return fw & 0xffff; -} - static void print_health_info(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -263,13 +248,14 @@ static void print_health_info(struct mlx5_core_dev *dev) dev_err(&dev->pdev->dev, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr)); dev_err(&dev->pdev->dev, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra)); - fw = ioread32be(&h->fw_ver); - sprintf(fw_str, "%d.%d.%d", get_maj(fw), get_min(fw), get_sub(fw)); + sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); dev_err(&dev->pdev->dev, "fw_ver %s\n", fw_str); dev_err(&dev->pdev->dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); dev_err(&dev->pdev->dev, "irisc_index %d\n", ioread8(&h->irisc_index)); dev_err(&dev->pdev->dev, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd))); dev_err(&dev->pdev->dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); + fw = ioread32be(&h->fw_ver); + dev_err(&dev->pdev->dev, "raw fw_ver 0x%08x\n", fw); } static unsigned long get_next_poll_jiffies(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6547f22e6b9b..c4242a4e8130 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -152,6 +152,26 @@ static struct mlx5_profile profile[] = { .size = 8, .limit = 4 }, + .mr_cache[16] = { + .size = 8, + .limit = 4 + }, + .mr_cache[17] = { + .size = 8, + .limit = 4 + }, + .mr_cache[18] = { + .size = 8, + .limit = 4 + }, + .mr_cache[19] = { + .size = 4, + .limit = 2 + }, + .mr_cache[20] = { + .size = 4, + .limit = 2 + }, }, }; @@ -398,11 +418,11 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, switch (cap_mode) { case HCA_CAP_OPMOD_GET_MAX: - memcpy(dev->hca_caps_max[cap_type], hca_caps, + memcpy(dev->caps.hca_max[cap_type], hca_caps, MLX5_UN_SZ_BYTES(hca_cap_union)); break; case HCA_CAP_OPMOD_GET_CUR: - memcpy(dev->hca_caps_cur[cap_type], hca_caps, + memcpy(dev->caps.hca_cur[cap_type], hca_caps, MLX5_UN_SZ_BYTES(hca_cap_union)); break; default: @@ -493,7 +513,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); - memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL], + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_GENERAL], MLX5_ST_SZ_BYTES(cmd_hca_cap)); mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", @@ -517,8 +537,18 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) /* disable cmdif checksum */ MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + /* If the HCA supports 4K UARs use it */ + if (MLX5_CAP_GEN_MAX(dev, uar_4k)) + MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); + if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte)) + MLX5_SET(cmd_hca_cap, + set_hca_cap, + cache_line_128byte, + cache_line_size() == 128 ? 1 : 0); + err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); @@ -739,7 +769,7 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - name, &dev->priv.uuari.uars[0]); + name, MLX5_EQ_TYPE_COMP); if (err) { kfree(eq); goto clean; @@ -807,7 +837,7 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return 0; } - return -ENOTSUPP; + return -EOPNOTSUPP; } @@ -899,8 +929,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } - MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); - err = mlx5_init_cq_table(dev); if (err) { dev_err(&pdev->dev, "failed to initialize cq table\n"); @@ -1079,8 +1107,8 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_cleanup_once; } - err = mlx5_alloc_uuars(dev, &priv->uuari); - if (err) { + dev->priv.uar = mlx5_get_uars_page(dev); + if (!dev->priv.uar) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); goto err_disable_msix; } @@ -1088,7 +1116,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, err = mlx5_start_eqs(dev); if (err) { dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); - goto err_free_uar; + goto err_put_uars; } err = alloc_comp_eqs(dev); @@ -1154,8 +1182,8 @@ err_affinity_hints: err_stop_eqs: mlx5_stop_eqs(dev); -err_free_uar: - mlx5_free_uuars(dev, &priv->uuari); +err_put_uars: + mlx5_put_uars_page(dev, priv->uar); err_disable_msix: mlx5_disable_msix(dev); @@ -1195,7 +1223,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, { int err = 0; - mlx5_drain_health_wq(dev); + if (cleanup) + mlx5_drain_health_wq(dev); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { @@ -1217,7 +1246,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); - mlx5_free_uuars(dev, &priv->uuari); + mlx5_put_uars_page(dev, priv->uar); mlx5_disable_msix(dev); if (cleanup) mlx5_cleanup_once(dev); @@ -1283,10 +1312,24 @@ static int init_one(struct pci_dev *pdev, spin_lock_init(&priv->ctx_lock); mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); + +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + err = init_srcu_struct(&priv->pfault_srcu); + if (err) { + dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n", + err); + goto clean_dev; + } +#endif + mutex_init(&priv->bfregs.reg_head.lock); + mutex_init(&priv->bfregs.wc_head.lock); + INIT_LIST_HEAD(&priv->bfregs.reg_head.list); + INIT_LIST_HEAD(&priv->bfregs.wc_head.list); + err = mlx5_pci_init(dev, priv); if (err) { dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); - goto clean_dev; + goto clean_srcu; } err = mlx5_health_init(dev); @@ -1303,9 +1346,7 @@ static int init_one(struct pci_dev *pdev, goto clean_health; } - err = request_module_nowait(MLX5_IB_MOD); - if (err) - pr_info("failed request module on %s\n", MLX5_IB_MOD); + request_module_nowait(MLX5_IB_MOD); err = devlink_register(devlink, &pdev->dev); if (err) @@ -1320,7 +1361,11 @@ clean_health: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); +clean_srcu: +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + cleanup_srcu_struct(&priv->pfault_srcu); clean_dev: +#endif pci_set_drvdata(pdev, NULL); devlink_free(devlink); @@ -1345,6 +1390,9 @@ static void remove_one(struct pci_dev *pdev) mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + cleanup_srcu_struct(&priv->pfault_srcu); +#endif pci_set_drvdata(pdev, NULL); devlink_free(devlink); } @@ -1359,9 +1407,10 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state */ + /* In case of kernel call save the pci state and drain the health wq */ if (state) { pci_save_state(pdev); + mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index d4a99c9757cb..b3dabe6e8836 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -86,6 +86,8 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); +void mlx5_core_page_fault(struct mlx5_core_dev *dev, + struct mlx5_pagefault *pfault); void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); @@ -111,6 +113,11 @@ u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); void mlx5_cq_tasklet_cb(unsigned long data); +int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, + u8 access_reg_group); +int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group, + u8 access_reg_group); + void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); @@ -136,6 +143,11 @@ void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); +int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); +int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); +int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); +int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); + void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index d2ec9d232a70..141583daf5a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -74,6 +74,30 @@ out: } EXPORT_SYMBOL_GPL(mlx5_core_access_reg); +int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, + u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(pcam_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(pcam_reg); + + MLX5_SET(pcam_reg, in, feature_group, feature_group); + MLX5_SET(pcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(dev, in, sz, pcam, sz, MLX5_REG_PCAM, 0, 0); +} + +int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group, + u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(mcam_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(mcam_reg); + + MLX5_SET(mcam_reg, in, feature_group, feature_group); + MLX5_SET(mcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0); +} + struct mlx5_reg_pcap { u8 rsvd0; u8 port_num; @@ -620,7 +644,7 @@ static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, u32 out[MLX5_ST_SZ_DW(qtct_reg)]; if (!MLX5_CAP_GEN(mdev, ets)) - return -ENOTSUPP; + return -EOPNOTSUPP; return mlx5_core_access_reg(mdev, in, inlen, out, sizeof(out), MLX5_REG_QETCR, 0, 1); @@ -632,7 +656,7 @@ static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, u32 in[MLX5_ST_SZ_DW(qtct_reg)]; if (!MLX5_CAP_GEN(mdev, ets)) - return -ENOTSUPP; + return -EOPNOTSUPP; memset(in, 0, sizeof(in)); return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen, @@ -866,3 +890,51 @@ void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) module_num, mlx5_pme_status[module_status - 1], mlx5_pme_error[error_type]); } + +int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) +{ + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + return mlx5_core_access_reg(mdev, in, sizeof(in), mtpps, + mtpps_size, MLX5_REG_MTPPS, 0, 0); +} + +int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) +{ + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + return mlx5_core_access_reg(mdev, mtpps, mtpps_size, out, + sizeof(out), MLX5_REG_MTPPS, 0, 1); +} + +int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode) +{ + u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + int err = 0; + + MLX5_SET(mtppse_reg, in, pin, pin); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MTPPSE, 0, 0); + if (err) + return err; + + *arm = MLX5_GET(mtppse_reg, in, event_arm); + *mode = MLX5_GET(mtppse_reg, in, event_generation_mode); + + return err; +} + +int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode) +{ + u32 out[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(mtppse_reg)] = {0}; + + MLX5_SET(mtppse_reg, in, pin, pin); + MLX5_SET(mtppse_reg, in, event_arm, arm); + MLX5_SET(mtppse_reg, in, event_generation_mode, mode); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MTPPSE, 0, 1); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index d0a4005fe63a..cbbcef2884be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -143,95 +143,6 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) mlx5_core_put_rsc(common); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) -{ - struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault; - int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK; - struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn); - struct mlx5_core_qp *qp = - container_of(common, struct mlx5_core_qp, common); - struct mlx5_pagefault pfault; - - if (!qp) { - mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n", - qpn); - return; - } - - pfault.event_subtype = eqe->sub_type; - pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) & - (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA); - pfault.bytes_committed = be32_to_cpu( - pf_eqe->bytes_committed); - - mlx5_core_dbg(dev, - "PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n", - eqe->sub_type, pfault.flags); - - switch (eqe->sub_type) { - case MLX5_PFAULT_SUBTYPE_RDMA: - /* RDMA based event */ - pfault.rdma.r_key = - be32_to_cpu(pf_eqe->rdma.r_key); - pfault.rdma.packet_size = - be16_to_cpu(pf_eqe->rdma.packet_length); - pfault.rdma.rdma_op_len = - be32_to_cpu(pf_eqe->rdma.rdma_op_len); - pfault.rdma.rdma_va = - be64_to_cpu(pf_eqe->rdma.rdma_va); - mlx5_core_dbg(dev, - "PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n", - qpn, pfault.rdma.r_key); - mlx5_core_dbg(dev, - "PAGE_FAULT: rdma_op_len: 0x%08x,\n", - pfault.rdma.rdma_op_len); - mlx5_core_dbg(dev, - "PAGE_FAULT: rdma_va: 0x%016llx,\n", - pfault.rdma.rdma_va); - mlx5_core_dbg(dev, - "PAGE_FAULT: bytes_committed: 0x%06x\n", - pfault.bytes_committed); - break; - - case MLX5_PFAULT_SUBTYPE_WQE: - /* WQE based event */ - pfault.wqe.wqe_index = - be16_to_cpu(pf_eqe->wqe.wqe_index); - pfault.wqe.packet_size = - be16_to_cpu(pf_eqe->wqe.packet_length); - mlx5_core_dbg(dev, - "PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n", - qpn, pfault.wqe.wqe_index); - mlx5_core_dbg(dev, - "PAGE_FAULT: bytes_committed: 0x%06x\n", - pfault.bytes_committed); - break; - - default: - mlx5_core_warn(dev, - "Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n", - eqe->sub_type, qpn); - /* Unsupported page faults should still be resolved by the - * page fault handler - */ - } - - if (qp->pfault_handler) { - qp->pfault_handler(qp, &pfault); - } else { - mlx5_core_err(dev, - "ODP event for QP %08x, without a fault handler in QP\n", - qpn); - /* Page fault will remain unresolved. QP will hang until it is - * destroyed - */ - } - - mlx5_core_put_rsc(common); -} -#endif - static int create_qprqsq_common(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, int rsc_type) @@ -506,31 +417,6 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) } EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, - u8 flags, int error) -{ - u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; - - MLX5_SET(page_fault_resume_in, in, opcode, - MLX5_CMD_OP_PAGE_FAULT_RESUME); - MLX5_SET(page_fault_resume_in, in, qpn, qpn); - - if (flags & MLX5_PAGE_FAULT_RESUME_REQUESTOR) - MLX5_SET(page_fault_resume_in, in, req_res, 1); - if (flags & MLX5_PAGE_FAULT_RESUME_WRITE) - MLX5_SET(page_fault_resume_in, in, read_write, 1); - if (flags & MLX5_PAGE_FAULT_RESUME_RDMA) - MLX5_SET(page_fault_resume_in, in, rdma, 1); - if (error) - MLX5_SET(page_fault_resume_in, in, error, 1); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} -EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); -#endif - int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *rq) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index ab0b896621a0..2e6b0f290ddc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -37,11 +37,6 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -enum { - NUM_DRIVER_UARS = 4, - NUM_LOW_LAT_UUARS = 4, -}; - int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) { u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0}; @@ -67,167 +62,269 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) } EXPORT_SYMBOL(mlx5_cmd_free_uar); -static int need_uuar_lock(int uuarn) +static int uars_per_sys_page(struct mlx5_core_dev *mdev) { - int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; - - if (uuarn == 0 || tot_uuars - NUM_LOW_LAT_UUARS) - return 0; + if (MLX5_CAP_GEN(mdev, uar_4k)) + return MLX5_CAP_GEN(mdev, num_of_uars_per_page); return 1; } -int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index) { - int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; - struct mlx5_bf *bf; - phys_addr_t addr; - int err; + u32 system_page_index; + + if (MLX5_CAP_GEN(mdev, uar_4k)) + system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT); + else + system_page_index = index; + + return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index; +} + +static void up_rel_func(struct kref *kref) +{ + struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); + + list_del(&up->list); + if (mlx5_cmd_free_uar(up->mdev, up->index)) + mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); + kfree(up->reg_bitmap); + kfree(up->fp_bitmap); + kfree(up); +} + +static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, + bool map_wc) +{ + struct mlx5_uars_page *up; + int err = -ENOMEM; + phys_addr_t pfn; + int bfregs; int i; - uuari->num_uars = NUM_DRIVER_UARS; - uuari->num_low_latency_uuars = NUM_LOW_LAT_UUARS; + bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR; + up = kzalloc(sizeof(*up), GFP_KERNEL); + if (!up) + return ERR_PTR(err); - mutex_init(&uuari->lock); - uuari->uars = kcalloc(uuari->num_uars, sizeof(*uuari->uars), GFP_KERNEL); - if (!uuari->uars) - return -ENOMEM; + up->mdev = mdev; + up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + if (!up->reg_bitmap) + goto error1; - uuari->bfs = kcalloc(tot_uuars, sizeof(*uuari->bfs), GFP_KERNEL); - if (!uuari->bfs) { - err = -ENOMEM; - goto out_uars; - } + up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + if (!up->fp_bitmap) + goto error1; - uuari->bitmap = kcalloc(BITS_TO_LONGS(tot_uuars), sizeof(*uuari->bitmap), - GFP_KERNEL); - if (!uuari->bitmap) { - err = -ENOMEM; - goto out_bfs; - } + for (i = 0; i < bfregs; i++) + if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR) + set_bit(i, up->reg_bitmap); + else + set_bit(i, up->fp_bitmap); - uuari->count = kcalloc(tot_uuars, sizeof(*uuari->count), GFP_KERNEL); - if (!uuari->count) { - err = -ENOMEM; - goto out_bitmap; - } + up->bfregs = bfregs; + up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; - for (i = 0; i < uuari->num_uars; i++) { - err = mlx5_cmd_alloc_uar(dev, &uuari->uars[i].index); - if (err) - goto out_count; + err = mlx5_cmd_alloc_uar(mdev, &up->index); + if (err) { + mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); + goto error1; + } - addr = dev->iseg_base + ((phys_addr_t)(uuari->uars[i].index) << PAGE_SHIFT); - uuari->uars[i].map = ioremap(addr, PAGE_SIZE); - if (!uuari->uars[i].map) { - mlx5_cmd_free_uar(dev, uuari->uars[i].index); + pfn = uar2pfn(mdev, up->index); + if (map_wc) { + up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -EAGAIN; + goto error2; + } + } else { + up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { err = -ENOMEM; - goto out_count; + goto error2; } - mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n", - uuari->uars[i].index, uuari->uars[i].map); - } - - for (i = 0; i < tot_uuars; i++) { - bf = &uuari->bfs[i]; - - bf->buf_size = (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) / 2; - bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE]; - bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map; - bf->reg = NULL; /* Add WC support */ - bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * - (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) + - MLX5_BF_OFFSET; - bf->need_lock = need_uuar_lock(i); - spin_lock_init(&bf->lock); - spin_lock_init(&bf->lock32); - bf->uuarn = i; } + kref_init(&up->ref_count); + mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n", + up->index, up->bfregs); + return up; + +error2: + if (mlx5_cmd_free_uar(mdev, up->index)) + mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index); +error1: + kfree(up->fp_bitmap); + kfree(up->reg_bitmap); + kfree(up); + return ERR_PTR(err); +} - return 0; - -out_count: - for (i--; i >= 0; i--) { - iounmap(uuari->uars[i].map); - mlx5_cmd_free_uar(dev, uuari->uars[i].index); +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_uars_page *ret; + + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + if (list_empty(&mdev->priv.bfregs.reg_head.list)) { + ret = alloc_uars_page(mdev, false); + if (IS_ERR(ret)) { + ret = NULL; + goto out; + } + list_add(&ret->list, &mdev->priv.bfregs.reg_head.list); + } else { + ret = list_first_entry(&mdev->priv.bfregs.reg_head.list, + struct mlx5_uars_page, list); + kref_get(&ret->ref_count); } - kfree(uuari->count); +out: + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); -out_bitmap: - kfree(uuari->bitmap); - -out_bfs: - kfree(uuari->bfs); + return ret; +} +EXPORT_SYMBOL(mlx5_get_uars_page); -out_uars: - kfree(uuari->uars); - return err; +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up) +{ + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); } +EXPORT_SYMBOL(mlx5_put_uars_page); -int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi) { - int i = uuari->num_uars; + /* return the offset in bytes from the start of the page to the + * blue flame area of the UAR + */ + return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE + + (dbi % MLX5_BFREGS_PER_UAR) * + (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET; +} - for (i--; i >= 0; i--) { - iounmap(uuari->uars[i].map); - mlx5_cmd_free_uar(dev, uuari->uars[i].index); +static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct list_head *head; + unsigned long *bitmap; + unsigned int *avail; + struct mutex *lock; /* pointer to right mutex */ + int dbi; + + bfregs = &mdev->priv.bfregs; + if (map_wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; } - - kfree(uuari->count); - kfree(uuari->bitmap); - kfree(uuari->bfs); - kfree(uuari->uars); + mutex_lock(lock); + if (list_empty(head)) { + up = alloc_uars_page(mdev, map_wc); + if (IS_ERR(up)) { + mutex_unlock(lock); + return PTR_ERR(up); + } + list_add(&up->list, head); + } else { + up = list_entry(head->next, struct mlx5_uars_page, list); + kref_get(&up->ref_count); + } + if (fast_path) { + bitmap = up->fp_bitmap; + avail = &up->fp_avail; + } else { + bitmap = up->reg_bitmap; + avail = &up->reg_avail; + } + dbi = find_first_bit(bitmap, up->bfregs); + clear_bit(dbi, bitmap); + (*avail)--; + if (!(*avail)) + list_del(&up->list); + + bfreg->map = up->map + map_offset(mdev, dbi); + bfreg->up = up; + bfreg->wc = map_wc; + bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR; + mutex_unlock(lock); return 0; } -int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar, - bool map_wc) +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) { - phys_addr_t pfn; - phys_addr_t uar_bar_start; int err; - err = mlx5_cmd_alloc_uar(mdev, &uar->index); - if (err) { - mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); - return err; - } + err = alloc_bfreg(mdev, bfreg, map_wc, fast_path); + if (!err) + return 0; - uar_bar_start = pci_resource_start(mdev->pdev, 0); - pfn = (uar_bar_start >> PAGE_SHIFT) + uar->index; + if (err == -EAGAIN && map_wc) + return alloc_bfreg(mdev, bfreg, false, fast_path); - if (map_wc) { - uar->bf_map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->bf_map) { - mlx5_core_warn(mdev, "ioremap_wc() failed\n"); - uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->map) - goto err_free_uar; - } - } else { - uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->map) - goto err_free_uar; - } + return err; +} +EXPORT_SYMBOL(mlx5_alloc_bfreg); - return 0; +static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev, + struct mlx5_uars_page *up, + struct mlx5_sq_bfreg *bfreg) +{ + unsigned int uar_idx; + unsigned int bfreg_idx; + unsigned int bf_reg_size; -err_free_uar: - mlx5_core_warn(mdev, "ioremap() failed\n"); - err = -ENOMEM; - mlx5_cmd_free_uar(mdev, uar->index); + bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size); - return err; + uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT; + bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size; + + return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx; } -EXPORT_SYMBOL(mlx5_alloc_map_uar); -void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg) { - if (uar->map) - iounmap(uar->map); - else - iounmap(uar->bf_map); - mlx5_cmd_free_uar(mdev, uar->index); + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct mutex *lock; /* pointer to right mutex */ + unsigned int dbi; + bool fp; + unsigned int *avail; + unsigned long *bitmap; + struct list_head *head; + + bfregs = &mdev->priv.bfregs; + if (bfreg->wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + up = bfreg->up; + dbi = addr_to_dbi_in_syspage(mdev, up, bfreg); + fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR; + if (fp) { + avail = &up->fp_avail; + bitmap = up->fp_bitmap; + } else { + avail = &up->reg_avail; + bitmap = up->reg_bitmap; + } + mutex_lock(lock); + (*avail)++; + set_bit(dbi, bitmap); + if (*avail == 1) + list_add_tail(&up->list, head); + + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(lock); } -EXPORT_SYMBOL(mlx5_unmap_free_uar); +EXPORT_SYMBOL(mlx5_free_bfreg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 269e4401c342..15c2294dd2b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -127,6 +127,23 @@ int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline); +void mlx5_query_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5_CAP_INLINE_MODE_L2: + *min_inline_mode = MLX5_INLINE_MODE_L2; + break; + case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: + mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode); + break; + case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: + *min_inline_mode = MLX5_INLINE_MODE_NONE; + break; + } +} +EXPORT_SYMBOL_GPL(mlx5_query_min_inline); + int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, u16 vport, u8 min_inline) { @@ -532,7 +549,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, if (!MLX5_CAP_GEN(mdev, vport_group_manager)) return -EACCES; if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) - return -ENOTSUPP; + return -EOPNOTSUPP; in = mlx5_vzalloc(inlen); if (!in) |