diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
84 files changed, 20797 insertions, 4685 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 9ca3734ebb6b..5098e7f21987 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -24,13 +24,6 @@ config MLX4_EN_DCB If unsure, set to Y -config MLX4_EN_VXLAN - bool "VXLAN offloads Support" - default y - depends on MLX4_EN && VXLAN && !(MLX4_EN=y && VXLAN=m) - ---help--- - Say Y here if you want to use VXLAN offloads in the driver. - config MLX4_CORE tristate depends on PCI diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 0c51c69f802f..249a4584401a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -576,41 +576,48 @@ out: return res; } -/* - * Handling for queue buffers -- we allocate a bunch of memory and - * register it in a memory region at HCA virtual address 0. If the - * requested size is > max_direct, we split the allocation into - * multiple pages, so we don't require too much contiguous memory. - */ -int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf, gfp_t gfp) +static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, + struct mlx4_buf *buf, gfp_t gfp) { dma_addr_t t; - if (size <= max_direct) { - buf->nbufs = 1; - buf->npages = 1; - buf->page_shift = get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_alloc_coherent(&dev->persist->pdev->dev, - size, &t, gfp); - if (!buf->direct.buf) - return -ENOMEM; + buf->nbufs = 1; + buf->npages = 1; + buf->page_shift = get_order(size) + PAGE_SHIFT; + buf->direct.buf = + dma_zalloc_coherent(&dev->persist->pdev->dev, + size, &t, gfp); + if (!buf->direct.buf) + return -ENOMEM; - buf->direct.map = t; + buf->direct.map = t; - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } - memset(buf->direct.buf, 0, size); + return 0; +} + +/* Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ +int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, + struct mlx4_buf *buf, gfp_t gfp) +{ + if (size <= max_direct) { + return mlx4_buf_direct_alloc(dev, size, buf, gfp); } else { + dma_addr_t t; int i; - buf->direct.buf = NULL; - buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; - buf->npages = buf->nbufs; + buf->direct.buf = NULL; + buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; + buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), gfp); @@ -619,28 +626,12 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = - dma_alloc_coherent(&dev->persist->pdev->dev, - PAGE_SIZE, - &t, gfp); + dma_zalloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &t, gfp); if (!buf->page_list[i].buf) goto err_free; buf->page_list[i].map = t; - - memset(buf->page_list[i].buf, 0, PAGE_SIZE); - } - - if (BITS_PER_LONG == 64) { - struct page **pages; - pages = kmalloc(sizeof *pages * buf->nbufs, gfp); - if (!pages) - goto err_free; - for (i = 0; i < buf->nbufs; ++i) - pages[i] = virt_to_page(buf->page_list[i].buf); - buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); - kfree(pages); - if (!buf->direct.buf) - goto err_free; } } @@ -655,15 +646,11 @@ EXPORT_SYMBOL_GPL(mlx4_buf_alloc); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) { - int i; - - if (buf->nbufs == 1) + if (buf->nbufs == 1) { dma_free_coherent(&dev->persist->pdev->dev, size, - buf->direct.buf, - buf->direct.map); - else { - if (BITS_PER_LONG == 64) - vunmap(buf->direct.buf); + buf->direct.buf, buf->direct.map); + } else { + int i; for (i = 0; i < buf->nbufs; ++i) if (buf->page_list[i].buf) @@ -789,7 +776,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db) EXPORT_SYMBOL_GPL(mlx4_db_free); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, - int size, int max_direct) + int size) { int err; @@ -799,7 +786,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, *wqres->db.db = 0; - err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL); + err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL); if (err) goto err_db; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e94ca1c3fc7c..f04a423ff79d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2597,7 +2597,6 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) priv->cmd.free_head = 0; sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds); - spin_lock_init(&priv->cmd.context_lock); for (priv->cmd.token_mask = 1; priv->cmd.token_mask < priv->cmd.max_cmds; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index af975a2b74c6..132cea655920 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -73,22 +73,16 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, */ set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, - cq->buf_size, 2 * PAGE_SIZE); + cq->buf_size); set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_cq; - err = mlx4_en_map_buffer(&cq->wqres.buf); - if (err) - goto err_res; - cq->buf = (struct mlx4_cqe *)cq->wqres.buf.direct.buf; *pcq = cq; return 0; -err_res: - mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); err_cq: kfree(cq); *pcq = NULL; @@ -177,7 +171,6 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq = *pcq; - mlx4_en_unmap_buffer(&cq->wqres.buf); mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) && cq->is_tx == RX) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index f01918c63f28..b04760a5034b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -37,6 +37,11 @@ #include "mlx4_en.h" #include "fw_qos.h" +enum { + MLX4_CEE_STATE_DOWN = 0, + MLX4_CEE_STATE_UP = 1, +}; + /* Definitions for QCN */ @@ -80,13 +85,205 @@ struct mlx4_congestion_control_mb_prio_802_1_qau_statistics { __be32 reserved3[4]; }; +static u8 mlx4_en_dcbnl_getcap(struct net_device *dev, int capid, u8 *cap) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + switch (capid) { + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_DCBX: + *cap = priv->dcbx_cap; + break; + case DCB_CAP_ATTR_PFC_TCS: + *cap = 1 << mlx4_max_tc(priv->mdev->dev); + break; + default: + *cap = false; + break; + } + + return 0; +} + +static u8 mlx4_en_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + return priv->cee_config.pfc_state; +} + +static void mlx4_en_dcbnl_setpfcstate(struct net_device *netdev, u8 state) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + priv->cee_config.pfc_state = state; +} + +static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority, + u8 *setting) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + *setting = priv->cee_config.dcb_pfc[priority]; +} + +static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority, + u8 setting) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + priv->cee_config.dcb_pfc[priority] = setting; + priv->cee_config.pfc_state = true; +} + +static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + + if (!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED)) + return -EINVAL; + + if (tcid == DCB_NUMTCS_ATTR_PFC) + *num = mlx4_max_tc(priv->mdev->dev); + else + *num = 0; + + return 0; +} + +static u8 mlx4_en_dcbnl_set_all(struct net_device *netdev) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct mlx4_en_dev *mdev = priv->mdev; + + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return 1; + + if (priv->cee_config.pfc_state) { + int tc; + + priv->prof->rx_pause = 0; + priv->prof->tx_pause = 0; + for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) { + u8 tc_mask = 1 << tc; + + switch (priv->cee_config.dcb_pfc[tc]) { + case pfc_disabled: + priv->prof->tx_ppp &= ~tc_mask; + priv->prof->rx_ppp &= ~tc_mask; + break; + case pfc_enabled_full: + priv->prof->tx_ppp |= tc_mask; + priv->prof->rx_ppp |= tc_mask; + break; + case pfc_enabled_tx: + priv->prof->tx_ppp |= tc_mask; + priv->prof->rx_ppp &= ~tc_mask; + break; + case pfc_enabled_rx: + priv->prof->tx_ppp &= ~tc_mask; + priv->prof->rx_ppp |= tc_mask; + break; + default: + break; + } + } + en_dbg(DRV, priv, "Set pfc on\n"); + } else { + priv->prof->rx_pause = 1; + priv->prof->tx_pause = 1; + en_dbg(DRV, priv, "Set pfc off\n"); + } + + if (mlx4_SET_PORT_general(mdev->dev, priv->port, + priv->rx_skb_size + ETH_FCS_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp)) { + en_err(priv, "Failed setting pause params\n"); + return 1; + } + + return 0; +} + +static u8 mlx4_en_dcbnl_get_state(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (priv->flags & MLX4_EN_FLAG_DCB_ENABLED) + return MLX4_CEE_STATE_UP; + + return MLX4_CEE_STATE_DOWN; +} + +static u8 mlx4_en_dcbnl_set_state(struct net_device *dev, u8 state) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int num_tcs = 0; + + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return 1; + + if (!!(state) == !!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED)) + return 0; + + if (state) { + priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; + num_tcs = IEEE_8021QAZ_MAX_TCS; + } else { + priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; + } + + if (mlx4_en_setup_tc(dev, num_tcs)) + return 1; + + return 0; +} + +/* On success returns a non-zero 802.1p user priority bitmap + * otherwise returns 0 as the invalid user priority bitmap to + * indicate an error. + */ +static int mlx4_en_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct dcb_app app = { + .selector = idtype, + .protocol = id, + }; + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return 0; + + return dcb_getapp(netdev, &app); +} + +static int mlx4_en_dcbnl_setapp(struct net_device *netdev, u8 idtype, + u16 id, u8 up) +{ + struct mlx4_en_priv *priv = netdev_priv(netdev); + struct dcb_app app; + + if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE)) + return -EINVAL; + + memset(&app, 0, sizeof(struct dcb_app)); + app.selector = idtype; + app.protocol = id; + app.priority = up; + + return dcb_setapp(netdev, &app); +} + static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets) { struct mlx4_en_priv *priv = netdev_priv(dev); struct ieee_ets *my_ets = &priv->ets; - /* No IEEE PFC settings available */ if (!my_ets) return -EINVAL; @@ -237,18 +434,51 @@ static int mlx4_en_dcbnl_ieee_setpfc(struct net_device *dev, static u8 mlx4_en_dcbnl_getdcbx(struct net_device *dev) { - return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; + struct mlx4_en_priv *priv = netdev_priv(dev); + + return priv->dcbx_cap; } static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode) { + struct mlx4_en_priv *priv = netdev_priv(dev); + struct ieee_ets ets = {0}; + struct ieee_pfc pfc = {0}; + + if (mode == priv->dcbx_cap) + return 0; + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || - (mode & DCB_CAP_DCBX_VER_CEE) || - !(mode & DCB_CAP_DCBX_VER_IEEE) || + ((mode & DCB_CAP_DCBX_VER_IEEE) && + (mode & DCB_CAP_DCBX_VER_CEE)) || !(mode & DCB_CAP_DCBX_HOST)) - return 1; + goto err; + + priv->dcbx_cap = mode; + + ets.ets_cap = IEEE_8021QAZ_MAX_TCS; + pfc.pfc_cap = IEEE_8021QAZ_MAX_TCS; + + if (mode & DCB_CAP_DCBX_VER_IEEE) { + if (mlx4_en_dcbnl_ieee_setets(dev, &ets)) + goto err; + if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc)) + goto err; + } else if (mode & DCB_CAP_DCBX_VER_CEE) { + if (mlx4_en_dcbnl_set_all(dev)) + goto err; + } else { + if (mlx4_en_dcbnl_ieee_setets(dev, &ets)) + goto err; + if (mlx4_en_dcbnl_ieee_setpfc(dev, &pfc)) + goto err; + if (mlx4_en_setup_tc(dev, 0)) + goto err; + } return 0; +err: + return 1; } #define MLX4_RATELIMIT_UNITS_IN_KB 100000 /* rate-limit HW unit in Kbps */ @@ -463,24 +693,46 @@ static int mlx4_en_dcbnl_ieee_getqcnstats(struct net_device *dev, } const struct dcbnl_rtnl_ops mlx4_en_dcbnl_ops = { - .ieee_getets = mlx4_en_dcbnl_ieee_getets, - .ieee_setets = mlx4_en_dcbnl_ieee_setets, - .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate, - .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate, - .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, - .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + .ieee_getets = mlx4_en_dcbnl_ieee_getets, + .ieee_setets = mlx4_en_dcbnl_ieee_setets, + .ieee_getmaxrate = mlx4_en_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlx4_en_dcbnl_ieee_setmaxrate, + .ieee_getqcn = mlx4_en_dcbnl_ieee_getqcn, + .ieee_setqcn = mlx4_en_dcbnl_ieee_setqcn, + .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats, + .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, + .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + + .getstate = mlx4_en_dcbnl_get_state, + .setstate = mlx4_en_dcbnl_set_state, + .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg, + .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg, + .setall = mlx4_en_dcbnl_set_all, + .getcap = mlx4_en_dcbnl_getcap, + .getnumtcs = mlx4_en_dcbnl_getnumtcs, + .getpfcstate = mlx4_en_dcbnl_getpfcstate, + .setpfcstate = mlx4_en_dcbnl_setpfcstate, + .getapp = mlx4_en_dcbnl_getapp, + .setapp = mlx4_en_dcbnl_setapp, .getdcbx = mlx4_en_dcbnl_getdcbx, .setdcbx = mlx4_en_dcbnl_setdcbx, - .ieee_getqcn = mlx4_en_dcbnl_ieee_getqcn, - .ieee_setqcn = mlx4_en_dcbnl_ieee_setqcn, - .ieee_getqcnstats = mlx4_en_dcbnl_ieee_getqcnstats, }; const struct dcbnl_rtnl_ops mlx4_en_dcbnl_pfc_ops = { .ieee_getpfc = mlx4_en_dcbnl_ieee_getpfc, .ieee_setpfc = mlx4_en_dcbnl_ieee_setpfc, + .setstate = mlx4_en_dcbnl_set_state, + .getpfccfg = mlx4_en_dcbnl_get_pfc_cfg, + .setpfccfg = mlx4_en_dcbnl_set_pfc_cfg, + .setall = mlx4_en_dcbnl_set_all, + .getnumtcs = mlx4_en_dcbnl_getnumtcs, + .getpfcstate = mlx4_en_dcbnl_getpfcstate, + .setpfcstate = mlx4_en_dcbnl_setpfcstate, + .getapp = mlx4_en_dcbnl_getapp, + .setapp = mlx4_en_dcbnl_setapp, + .getdcbx = mlx4_en_dcbnl_getdcbx, .setdcbx = mlx4_en_dcbnl_setdcbx, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index c761194bb323..bdda17d2ea0f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -362,7 +362,7 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, for (i = 0; i < NUM_MAIN_STATS; i++, bitmap_iterator_inc(&it)) if (bitmap_iterator_test(&it)) - data[index++] = ((unsigned long *)&priv->stats)[i]; + data[index++] = ((unsigned long *)&dev->stats)[i]; for (i = 0; i < NUM_PORT_STATS; i++, bitmap_iterator_inc(&it)) if (bitmap_iterator_test(&it)) @@ -1042,6 +1042,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; u32 rx_size, tx_size; int port_up = 0; int err = 0; @@ -1061,22 +1063,25 @@ static int mlx4_en_set_ringparam(struct net_device *dev, tx_size == priv->tx_ring[0]->size) return 0; + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + mutex_lock(&mdev->state_lock); + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.tx_ring_size = tx_size; + new_prof.rx_ring_size = rx_size; + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + if (err) + goto out; + if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev, 1); } - mlx4_en_free_resources(priv); - - priv->prof->tx_ring_size = tx_size; - priv->prof->rx_ring_size = rx_size; + mlx4_en_safe_replace_resources(priv, tmp); - err = mlx4_en_alloc_resources(priv); - if (err) { - en_err(priv, "Failed reallocating port resources\n"); - goto out; - } if (port_up) { err = mlx4_en_start_port(dev); if (err) @@ -1084,8 +1089,8 @@ static int mlx4_en_set_ringparam(struct net_device *dev, } err = mlx4_en_moderation_update(priv); - out: + kfree(tmp); mutex_unlock(&mdev->state_lock); return err; } @@ -1107,7 +1112,7 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); - return priv->rx_ring_num; + return rounddown_pow_of_two(priv->rx_ring_num); } static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev) @@ -1141,19 +1146,17 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key, u8 *hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_rss_map *rss_map = &priv->rss_map; - int rss_rings; - size_t n = priv->rx_ring_num; + u32 n = mlx4_en_get_rxfh_indir_size(dev); + u32 i, rss_rings; int err = 0; - rss_rings = priv->prof->rss_rings ?: priv->rx_ring_num; - rss_rings = 1 << ilog2(rss_rings); + rss_rings = priv->prof->rss_rings ?: n; + rss_rings = rounddown_pow_of_two(rss_rings); - while (n--) { + for (i = 0; i < n; i++) { if (!ring_index) break; - ring_index[n] = rss_map->qps[n % rss_rings].qpn - - rss_map->base_qpn; + ring_index[i] = i % rss_rings; } if (key) memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); @@ -1166,6 +1169,7 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, const u8 *key, const u8 hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); + u32 n = mlx4_en_get_rxfh_indir_size(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; @@ -1175,18 +1179,18 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, /* Calculate RSS table size and make sure flows are spread evenly * between rings */ - for (i = 0; i < priv->rx_ring_num; i++) { + for (i = 0; i < n; i++) { if (!ring_index) - continue; + break; if (i > 0 && !ring_index[i] && !rss_rings) rss_rings = i; - if (ring_index[i] != (i % (rss_rings ?: priv->rx_ring_num))) + if (ring_index[i] != (i % (rss_rings ?: n))) return -EINVAL; } if (!rss_rings) - rss_rings = priv->rx_ring_num; + rss_rings = n; /* RSS table size must be an order of 2 */ if (!is_power_of_2(rss_rings)) @@ -1714,6 +1718,8 @@ static int mlx4_en_set_channels(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; int port_up = 0; int err = 0; @@ -1723,25 +1729,35 @@ static int mlx4_en_set_channels(struct net_device *dev, !channel->tx_count || !channel->rx_count) return -EINVAL; - mutex_lock(&mdev->state_lock); - if (priv->port_up) { - port_up = 1; - mlx4_en_stop_port(dev, 1); + if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) { + en_err(priv, "Minimum %d tx channels required with XDP on\n", + priv->xdp_ring_num / MLX4_EN_NUM_UP + 1); + return -EINVAL; } - mlx4_en_free_resources(priv); + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; - priv->num_tx_rings_p_up = channel->tx_count; - priv->tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP; - priv->rx_ring_num = channel->rx_count; + mutex_lock(&mdev->state_lock); + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + new_prof.num_tx_rings_p_up = channel->tx_count; + new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP; + new_prof.rx_ring_num = channel->rx_count; - err = mlx4_en_alloc_resources(priv); - if (err) { - en_err(priv, "Failed reallocating port resources\n"); + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + if (err) goto out; + + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); } - netif_set_real_num_tx_queues(dev, priv->tx_ring_num); + mlx4_en_safe_replace_resources(priv, tmp); + + netif_set_real_num_tx_queues(dev, priv->tx_ring_num - + priv->xdp_ring_num); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); if (dev->num_tc) @@ -1757,8 +1773,8 @@ static int mlx4_en_set_channels(struct net_device *dev, } err = mlx4_en_moderation_update(priv); - out: + kfree(tmp); mutex_unlock(&mdev->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index b4b258c8ca47..fedb829276f4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -31,6 +31,7 @@ * */ +#include <linux/bpf.h> #include <linux/etherdevice.h> #include <linux/tcp.h> #include <linux/if_vlan.h> @@ -67,6 +68,18 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) offset += priv->num_tx_rings_p_up; } +#ifdef CONFIG_MLX4_EN_DCB + if (!mlx4_is_slave(priv->mdev->dev)) { + if (up) { + if (priv->dcbx_cap) + priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; + } else { + priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED; + priv->cee_config.pfc_state = false; + } + } +#endif /* CONFIG_MLX4_EN_DCB */ + return 0; } @@ -406,14 +419,18 @@ static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, mutex_lock(&mdev->state_lock); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); - if (err) + if (err) { en_err(priv, "Failed configuring VLAN filter\n"); + goto out; + } } - if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) - en_dbg(HW, priv, "failed adding vlan %d\n", vid); - mutex_unlock(&mdev->state_lock); + err = mlx4_register_vlan(mdev->dev, priv->port, vid, &idx); + if (err) + en_dbg(HW, priv, "Failed adding vlan %d\n", vid); - return 0; +out: + mutex_unlock(&mdev->state_lock); + return err; } static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, @@ -421,7 +438,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - int err; + int err = 0; en_dbg(HW, priv, "Killing VID:%d\n", vid); @@ -438,7 +455,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, } mutex_unlock(&mdev->state_lock); - return 0; + return err; } static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac) @@ -1197,8 +1214,8 @@ static void mlx4_en_netpoll(struct net_device *dev) struct mlx4_en_cq *cq; int i; - for (i = 0; i < priv->rx_ring_num; i++) { - cq = priv->rx_cq[i]; + for (i = 0; i < priv->tx_ring_num; i++) { + cq = priv->tx_cq[i]; napi_schedule(&cq->napi); } } @@ -1296,15 +1313,16 @@ static void mlx4_en_tx_timeout(struct net_device *dev) } -static struct net_device_stats *mlx4_en_get_stats(struct net_device *dev) +static struct rtnl_link_stats64 * +mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx4_en_priv *priv = netdev_priv(dev); spin_lock_bh(&priv->stats_lock); - memcpy(&priv->ret_stats, &priv->stats, sizeof(priv->stats)); + netdev_stats_to_stats64(stats, &dev->stats); spin_unlock_bh(&priv->stats_lock); - return &priv->ret_stats; + return stats; } static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) @@ -1505,6 +1523,24 @@ static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask); } +static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv, + int tx_ring_idx) +{ + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[tx_ring_idx]; + int rr_index; + + rr_index = (priv->xdp_ring_num - priv->tx_ring_num) + tx_ring_idx; + if (rr_index >= 0) { + tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc; + tx_ring->recycle_ring = priv->rx_ring[rr_index]; + en_dbg(DRV, priv, + "Set tx_ring[%d]->recycle_ring = rx_ring[%d]\n", + tx_ring_idx, rr_index); + } else { + tx_ring->recycle_ring = NULL; + } +} + int mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1627,6 +1663,8 @@ int mlx4_en_start_port(struct net_device *dev) } tx_ring->tx_queue = netdev_get_tx_queue(dev, i); + mlx4_en_init_recycle_ring(priv, i); + /* Arm CQ for TX completions */ mlx4_en_arm_cq(priv, cq); @@ -1691,10 +1729,9 @@ int mlx4_en_start_port(struct net_device *dev) /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->rx_mode_task); -#ifdef CONFIG_MLX4_EN_VXLAN if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) - vxlan_get_rx_port(dev); -#endif + udp_tunnel_get_rx_info(dev); + priv->port_up = true; netif_tx_start_all_queues(dev); netif_device_attach(dev); @@ -1856,6 +1893,7 @@ static void mlx4_en_restart(struct work_struct *work) en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); + rtnl_lock(); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_stop_port(dev, 1); @@ -1863,6 +1901,7 @@ static void mlx4_en_restart(struct work_struct *work) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); + rtnl_unlock(); } static void mlx4_en_clear_stats(struct net_device *dev) @@ -1874,7 +1913,6 @@ static void mlx4_en_clear_stats(struct net_device *dev) if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) en_dbg(HW, priv, "Failed dumping statistics\n"); - memset(&priv->stats, 0, sizeof(priv->stats)); memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); memset(&priv->port_stats, 0, sizeof(priv->port_stats)); @@ -1890,6 +1928,11 @@ static void mlx4_en_clear_stats(struct net_device *dev) priv->tx_ring[i]->bytes = 0; priv->tx_ring[i]->packets = 0; priv->tx_ring[i]->tx_csum = 0; + priv->tx_ring[i]->tx_dropped = 0; + priv->tx_ring[i]->queue_stopped = 0; + priv->tx_ring[i]->wake_queue = 0; + priv->tx_ring[i]->tso_packets = 0; + priv->tx_ring[i]->xmit_more = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->bytes = 0; @@ -1943,7 +1986,7 @@ static int mlx4_en_close(struct net_device *dev) return 0; } -void mlx4_en_free_resources(struct mlx4_en_priv *priv) +static void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; @@ -1968,7 +2011,7 @@ void mlx4_en_free_resources(struct mlx4_en_priv *priv) } -int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) +static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; @@ -2025,11 +2068,91 @@ err: return -ENOMEM; } +static void mlx4_en_shutdown(struct net_device *dev) +{ + rtnl_lock(); + netif_device_detach(dev); + mlx4_en_close(dev); + rtnl_unlock(); +} + +static int mlx4_en_copy_priv(struct mlx4_en_priv *dst, + struct mlx4_en_priv *src, + struct mlx4_en_port_profile *prof) +{ + memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config, + sizeof(dst->hwtstamp_config)); + dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up; + dst->tx_ring_num = prof->tx_ring_num; + dst->rx_ring_num = prof->rx_ring_num; + dst->flags = prof->flags; + dst->mdev = src->mdev; + dst->port = src->port; + dst->dev = src->dev; + dst->prof = prof; + dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + + DS_SIZE * MLX4_EN_MAX_RX_FRAGS); + + dst->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS, + GFP_KERNEL); + if (!dst->tx_ring) + return -ENOMEM; + + dst->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS, + GFP_KERNEL); + if (!dst->tx_cq) { + kfree(dst->tx_ring); + return -ENOMEM; + } + return 0; +} + +static void mlx4_en_update_priv(struct mlx4_en_priv *dst, + struct mlx4_en_priv *src) +{ + memcpy(dst->rx_ring, src->rx_ring, + sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num); + memcpy(dst->rx_cq, src->rx_cq, + sizeof(struct mlx4_en_cq *) * src->rx_ring_num); + memcpy(&dst->hwtstamp_config, &src->hwtstamp_config, + sizeof(dst->hwtstamp_config)); + dst->tx_ring_num = src->tx_ring_num; + dst->rx_ring_num = src->rx_ring_num; + dst->tx_ring = src->tx_ring; + dst->tx_cq = src->tx_cq; + memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile)); +} + +int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp, + struct mlx4_en_port_profile *prof) +{ + mlx4_en_copy_priv(tmp, priv, prof); + + if (mlx4_en_alloc_resources(tmp)) { + en_warn(priv, + "%s: Resource allocation failed, using previous configuration\n", + __func__); + kfree(tmp->tx_ring); + kfree(tmp->tx_cq); + return -ENOMEM; + } + return 0; +} + +void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp) +{ + mlx4_en_free_resources(priv); + mlx4_en_update_priv(priv, tmp); +} void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + bool shutdown = mdev->dev->persist->interface_state & + MLX4_INTERFACE_STATE_SHUTDOWN; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); @@ -2037,7 +2160,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev) if (priv->registered) { devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev, priv->port)); - unregister_netdev(dev); + if (shutdown) + mlx4_en_shutdown(dev); + else + unregister_netdev(dev); } if (priv->allocated) @@ -2057,12 +2183,17 @@ void mlx4_en_destroy_netdev(struct net_device *dev) mdev->upper[priv->port] = NULL; mutex_unlock(&mdev->state_lock); +#ifdef CONFIG_RFS_ACCEL + mlx4_en_cleanup_filters(priv); +#endif + mlx4_en_free_resources(priv); kfree(priv->tx_ring); kfree(priv->tx_cq); - free_netdev(dev); + if (!shutdown) + free_netdev(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) @@ -2078,6 +2209,11 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) en_err(priv, "Bad MTU size:%d.\n", new_mtu); return -EPERM; } + if (priv->xdp_ring_num && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) { + en_err(priv, "MTU size:%d requires frags but XDP running\n", + new_mtu); + return -EOPNOTSUPP; + } dev->mtu = new_mtu; if (netif_running(dev)) { @@ -2335,7 +2471,6 @@ static int mlx4_en_get_phys_port_id(struct net_device *dev, return 0; } -#ifdef CONFIG_MLX4_EN_VXLAN static void mlx4_en_add_vxlan_offloads(struct work_struct *work) { int ret; @@ -2355,8 +2490,12 @@ out: } /* set offloads */ - priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | - NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; + priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; } static void mlx4_en_del_vxlan_offloads(struct work_struct *work) @@ -2365,8 +2504,12 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, vxlan_del_task); /* unset offloads */ - priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM | - NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL); + priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL); ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 0); @@ -2377,15 +2520,19 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) } static void mlx4_en_add_vxlan_port(struct net_device *dev, - sa_family_t sa_family, __be16 port) + struct udp_tunnel_info *ti) { struct mlx4_en_priv *priv = netdev_priv(dev); + __be16 port = ti->port; __be16 current_port; - if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) return; - if (sa_family == AF_INET6) + if (ti->sa_family != AF_INET) + return; + + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) return; current_port = priv->vxlan_port; @@ -2400,15 +2547,19 @@ static void mlx4_en_add_vxlan_port(struct net_device *dev, } static void mlx4_en_del_vxlan_port(struct net_device *dev, - sa_family_t sa_family, __be16 port) + struct udp_tunnel_info *ti) { struct mlx4_en_priv *priv = netdev_priv(dev); + __be16 port = ti->port; __be16 current_port; - if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) return; - if (sa_family == AF_INET6) + if (ti->sa_family != AF_INET) + return; + + if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) return; current_port = priv->vxlan_port; @@ -2425,9 +2576,24 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, netdev_features_t features) { features = vlan_features_check(skb, features); - return vxlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + + /* The ConnectX-3 doesn't support outer IPv6 checksums but it does + * support inner IPv6 checksums and segmentation so we need to + * strip that feature if this is an IPv6 encapsulated frame. + */ + if (skb->encapsulation && + (skb->ip_summed == CHECKSUM_PARTIAL)) { + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (!priv->vxlan_port || + (ip_hdr(skb)->version != 4) || + (udp_hdr(skb)->dest != priv->vxlan_port)) + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } + + return features; } -#endif static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate) { @@ -2456,12 +2622,109 @@ static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 m return err; } +static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct bpf_prog *old_prog; + int xdp_ring_num; + int port_up = 0; + int err; + int i; + + xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0; + + /* No need to reconfigure buffers when simply swapping the + * program for a new one. + */ + if (priv->xdp_ring_num == xdp_ring_num) { + if (prog) { + prog = bpf_prog_add(prog, priv->rx_ring_num - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + for (i = 0; i < priv->rx_ring_num; i++) { + /* This xchg is paired with READ_ONCE in the fastpath */ + old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + return 0; + } + + if (priv->num_frags > 1) { + en_err(priv, "Cannot set XDP if MTU requires multiple frags\n"); + return -EOPNOTSUPP; + } + + if (priv->tx_ring_num < xdp_ring_num + MLX4_EN_NUM_UP) { + en_err(priv, + "Minimum %d tx channels required to run XDP\n", + (xdp_ring_num + MLX4_EN_NUM_UP) / MLX4_EN_NUM_UP); + return -EINVAL; + } + + if (prog) { + prog = bpf_prog_add(prog, priv->rx_ring_num - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + priv->xdp_ring_num = xdp_ring_num; + netif_set_real_num_tx_queues(dev, priv->tx_ring_num - + priv->xdp_ring_num); + + for (i = 0; i < priv->rx_ring_num; i++) { + old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) { + en_err(priv, "Failed starting port %d for XDP change\n", + priv->port); + queue_work(mdev->workqueue, &priv->watchdog_task); + } + } + + mutex_unlock(&mdev->state_lock); + return 0; +} + +static bool mlx4_xdp_attached(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return !!priv->xdp_ring_num; +} + +static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx4_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = mlx4_xdp_attached(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, .ndo_stop = mlx4_en_close, .ndo_start_xmit = mlx4_en_xmit, .ndo_select_queue = mlx4_en_select_queue, - .ndo_get_stats = mlx4_en_get_stats, + .ndo_get_stats64 = mlx4_en_get_stats64, .ndo_set_rx_mode = mlx4_en_set_rx_mode, .ndo_set_mac_address = mlx4_en_set_mac, .ndo_validate_addr = eth_validate_addr, @@ -2480,12 +2743,11 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, -#ifdef CONFIG_MLX4_EN_VXLAN - .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, - .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, + .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port, + .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, -#endif .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, + .ndo_xdp = mlx4_xdp, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -2493,7 +2755,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_stop = mlx4_en_close, .ndo_start_xmit = mlx4_en_xmit, .ndo_select_queue = mlx4_en_select_queue, - .ndo_get_stats = mlx4_en_get_stats, + .ndo_get_stats64 = mlx4_en_get_stats64, .ndo_set_rx_mode = mlx4_en_set_rx_mode, .ndo_set_mac_address = mlx4_en_set_mac, .ndo_validate_addr = eth_validate_addr, @@ -2518,12 +2780,11 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, -#ifdef CONFIG_MLX4_EN_VXLAN - .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, - .ndo_del_vxlan_port = mlx4_en_del_vxlan_port, + .ndo_udp_tunnel_add = mlx4_en_add_vxlan_port, + .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, -#endif .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, + .ndo_xdp = mlx4_xdp, }; struct mlx4_en_bond { @@ -2813,10 +3074,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); -#ifdef CONFIG_MLX4_EN_VXLAN INIT_WORK(&priv->vxlan_add_task, mlx4_en_add_vxlan_offloads); INIT_WORK(&priv->vxlan_del_task, mlx4_en_del_vxlan_offloads); -#endif #ifdef CONFIG_RFS_ACCEL INIT_LIST_HEAD(&priv->filters); spin_lock_init(&priv->filters_lock); @@ -2856,6 +3115,14 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->msg_enable = MLX4_EN_MSG_LEVEL; #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { + priv->dcbx_cap = DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_HOST | + DCB_CAP_DCBX_VER_IEEE; + priv->flags |= MLX4_EN_DCB_ENABLED; + priv->cee_config.pfc_state = false; + + for (i = 0; i < MLX4_EN_NUM_UP; i++) + priv->cee_config.dcb_pfc[i] = pfc_disabled; + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { dev->dcbnl_ops = &mlx4_en_dcbnl_ops; } else { @@ -2907,7 +3174,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, - MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); + MLX4_EN_PAGE_SIZE); if (err) { en_err(priv, "Failed to allocate page for rx qps\n"); goto out; @@ -2990,8 +3257,13 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { - dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - dev->features |= NETIF_F_GSO_UDP_TUNNEL; + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } mdev->pndev[port] = dev; @@ -3071,6 +3343,8 @@ int mlx4_en_reset_config(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_port_profile new_prof; + struct mlx4_en_priv *tmp; int port_up = 0; int err = 0; @@ -3087,19 +3361,29 @@ int mlx4_en_reset_config(struct net_device *dev, return -EINVAL; } + tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + mutex_lock(&mdev->state_lock); + + memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); + memcpy(&new_prof.hwtstamp_config, &ts_config, sizeof(ts_config)); + + err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof); + if (err) + goto out; + if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev, 1); } - mlx4_en_free_resources(priv); - en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n", - ts_config.rx_filter, !!(features & NETIF_F_HW_VLAN_CTAG_RX)); + ts_config.rx_filter, + !!(features & NETIF_F_HW_VLAN_CTAG_RX)); - priv->hwtstamp_config.tx_type = ts_config.tx_type; - priv->hwtstamp_config.rx_filter = ts_config.rx_filter; + mlx4_en_safe_replace_resources(priv, tmp); if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) { if (features & NETIF_F_HW_VLAN_CTAG_RX) @@ -3133,11 +3417,6 @@ int mlx4_en_reset_config(struct net_device *dev, dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; } - err = mlx4_en_alloc_resources(priv); - if (err) { - en_err(priv, "Failed reallocating port resources\n"); - goto out; - } if (port_up) { err = mlx4_en_start_port(dev); if (err) @@ -3146,6 +3425,8 @@ int mlx4_en_reset_config(struct net_device *dev, out: mutex_unlock(&mdev->state_lock); - netdev_features_change(dev); + kfree(tmp); + if (!err) + netdev_features_change(dev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 20b6c2e678b8..5aa8b751f417 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -152,8 +152,9 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) struct mlx4_counter tmp_counter_stats; struct mlx4_en_stat_out_mbox *mlx4_en_stats; struct mlx4_en_stat_out_flow_control_mbox *flowstats; - struct mlx4_en_priv *priv = netdev_priv(mdev->pndev[port]); - struct net_device_stats *stats = &priv->stats; + struct net_device *dev = mdev->pndev[port]; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; struct mlx4_cmd_mailbox *mailbox; u64 in_mod = reset << 8 | port; int err; @@ -188,6 +189,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) } stats->tx_packets = 0; stats->tx_bytes = 0; + stats->tx_dropped = 0; priv->port_stats.tx_chksum_offload = 0; priv->port_stats.queue_stopped = 0; priv->port_stats.wake_queue = 0; @@ -199,6 +201,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->tx_packets += ring->packets; stats->tx_bytes += ring->bytes; + stats->tx_dropped += ring->tx_dropped; priv->port_stats.tx_chksum_offload += ring->tx_csum; priv->port_stats.queue_stopped += ring->queue_stopped; priv->port_stats.wake_queue += ring->wake_queue; @@ -237,21 +240,12 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->multicast = en_stats_adder(&mlx4_en_stats->MCAST_prio_0, &mlx4_en_stats->MCAST_prio_1, NUM_PRIORITIES); - stats->collisions = 0; stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP) + sw_rx_dropped; stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); - stats->rx_over_errors = 0; stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); - stats->rx_frame_errors = 0; stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); - stats->rx_missed_errors = 0; - stats->tx_aborted_errors = 0; - stats->tx_carrier_errors = 0; - stats->tx_fifo_errors = 0; - stats->tx_heartbeat_errors = 0; - stats->tx_window_errors = 0; - stats->tx_dropped = be32_to_cpu(mlx4_en_stats->TDROP); + stats->tx_dropped += be32_to_cpu(mlx4_en_stats->TDROP); /* RX stats */ priv->pkstats.rx_multicast_packets = stats->multicast; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 02e925d6f734..a6b0db0e0383 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -107,37 +107,6 @@ int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp, return ret; } -int mlx4_en_map_buffer(struct mlx4_buf *buf) -{ - struct page **pages; - int i; - - if (BITS_PER_LONG == 64 || buf->nbufs == 1) - return 0; - - pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL); - if (!pages) - return -ENOMEM; - - for (i = 0; i < buf->nbufs; ++i) - pages[i] = virt_to_page(buf->page_list[i].buf); - - buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); - kfree(pages); - if (!buf->direct.buf) - return -ENOMEM; - - return 0; -} - -void mlx4_en_unmap_buffer(struct mlx4_buf *buf) -{ - if (BITS_PER_LONG == 64 || buf->nbufs == 1) - return; - - vunmap(buf->direct.buf); -} - void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event) { return; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ca3a38421ee7..2040dad8611d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -32,6 +32,7 @@ */ #include <net/busy_poll.h> +#include <linux/bpf.h> #include <linux/mlx4/cq.h> #include <linux/slab.h> #include <linux/mlx4/qp.h> @@ -57,7 +58,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, struct page *page; dma_addr_t dma; - for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) { + for (order = frag_info->order; ;) { gfp_t gfp = _gfp; if (order) @@ -70,7 +71,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, return -ENOMEM; } dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, - PCI_DMA_FROMDEVICE); + frag_info->dma_dir); if (dma_mapping_error(priv->ddev, dma)) { put_page(page); return -ENOMEM; @@ -124,7 +125,8 @@ out: while (i--) { if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, - page_alloc[i].page_size, PCI_DMA_FROMDEVICE); + page_alloc[i].page_size, + priv->frag_info[i].dma_dir); page = page_alloc[i].page; /* Revert changes done by mlx4_alloc_pages */ page_ref_sub(page, page_alloc[i].page_size / @@ -145,7 +147,7 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, if (next_frag_end > frags[i].page_size) dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, - PCI_DMA_FROMDEVICE); + frag_info->dma_dir); if (frags[i].page) put_page(frags[i].page); @@ -176,7 +178,8 @@ out: page_alloc = &ring->page_alloc[i]; dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, PCI_DMA_FROMDEVICE); + page_alloc->page_size, + priv->frag_info[i].dma_dir); page = page_alloc->page; /* Revert changes done by mlx4_alloc_pages */ page_ref_sub(page, page_alloc->page_size / @@ -201,7 +204,7 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, i, page_count(page_alloc->page)); dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, PCI_DMA_FROMDEVICE); + page_alloc->page_size, frag_info->dma_dir); while (page_alloc->page_offset + frag_info->frag_stride < page_alloc->page_size) { put_page(page_alloc->page); @@ -244,6 +247,12 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); + if (ring->page_cache.index > 0) { + frags[0] = ring->page_cache.buf[--ring->page_cache.index]; + rx_desc->data[0].addr = cpu_to_be64(frags[0].dma); + return 0; + } + return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp); } @@ -394,17 +403,11 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, /* Allocate HW buffers on provided NUMA node */ set_dev_node(&mdev->dev->persist->pdev->dev, node); - err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, - ring->buf_size, 2 * PAGE_SIZE); + err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_info; - err = mlx4_en_map_buffer(&ring->wqres.buf); - if (err) { - en_err(priv, "Failed to map RX buffer\n"); - goto err_hwq; - } ring->buf = ring->wqres.buf.direct.buf; ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter; @@ -412,8 +415,6 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, *pring = ring; return 0; -err_hwq: - mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_info: vfree(ring->rx_info); ring->rx_info = NULL; @@ -510,27 +511,55 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) } } +/* When the rx ring is running in page-per-packet mode, a released frame can go + * directly into a small cache, to avoid unmapping or touching the page + * allocator. In bpf prog performance scenarios, buffers are either forwarded + * or dropped, never converted to skbs, so every page can come directly from + * this cache when it is sized to be a multiple of the napi budget. + */ +bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_alloc *frame) +{ + struct mlx4_en_page_cache *cache = &ring->page_cache; + + if (cache->index >= MLX4_EN_CACHE_SIZE) + return false; + + cache->buf[cache->index++] = *frame; + return true; +} + void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring = *pring; + struct bpf_prog *old_prog; - mlx4_en_unmap_buffer(&ring->wqres.buf); + old_prog = READ_ONCE(ring->xdp_prog); + if (old_prog) + bpf_prog_put(old_prog); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); vfree(ring->rx_info); ring->rx_info = NULL; kfree(ring); *pring = NULL; -#ifdef CONFIG_RFS_ACCEL - mlx4_en_cleanup_filters(priv); -#endif } void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { + int i; + + for (i = 0; i < ring->page_cache.index; i++) { + struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i]; + + dma_unmap_page(priv->ddev, frame->dma, frame->page_size, + priv->frag_info[0].dma_dir); + put_page(frame->page); + } + ring->page_cache.index = 0; mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; @@ -752,7 +781,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mlx4_en_rx_alloc *frags; struct mlx4_en_rx_desc *rx_desc; + struct bpf_prog *xdp_prog; + int doorbell_pending; struct sk_buff *skb; + int tx_index; int index; int nr; unsigned int length; @@ -768,6 +800,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (budget <= 0) return polled; + xdp_prog = READ_ONCE(ring->xdp_prog); + doorbell_pending = 0; + tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring; + /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ @@ -844,6 +880,43 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); + /* A bpf program gets first chance to drop the packet. It may + * read bytes but not past the end of the frag. + */ + if (xdp_prog) { + struct xdp_buff xdp; + dma_addr_t dma; + u32 act; + + dma = be64_to_cpu(rx_desc->data[0].addr); + dma_sync_single_for_cpu(priv->ddev, dma, + priv->frag_info[0].frag_size, + DMA_FROM_DEVICE); + + xdp.data = page_address(frags[0].page) + + frags[0].page_offset; + xdp.data_end = xdp.data + length; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + if (!mlx4_en_xmit_frame(frags, dev, + length, tx_index, + &doorbell_pending)) + goto consumed; + break; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + if (mlx4_en_rx_recycle(ring, frags)) + goto consumed; + goto next; + } + } + if (likely(dev->features & NETIF_F_RXCSUM)) { if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { @@ -995,6 +1068,7 @@ next: for (nr = 0; nr < priv->num_frags; nr++) mlx4_en_free_frag(priv, frags, nr); +consumed: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; @@ -1003,6 +1077,9 @@ next: } out: + if (doorbell_pending) + mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]); + AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ @@ -1070,22 +1147,35 @@ static const int frag_sizes[] = { void mlx4_en_calc_rx_buf(struct net_device *dev) { + enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE; struct mlx4_en_priv *priv = netdev_priv(dev); - /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple - * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). - */ - int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN); + int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu); + int order = MLX4_EN_ALLOC_PREFER_ORDER; + u32 align = SMP_CACHE_BYTES; int buf_size = 0; int i = 0; + /* bpf requires buffers to be set up as 1 packet per page. + * This only works when num_frags == 1. + */ + if (priv->xdp_ring_num) { + dma_dir = PCI_DMA_BIDIRECTIONAL; + /* This will gain efficient xdp frame recycling at the expense + * of more costly truesize accounting + */ + align = PAGE_SIZE; + order = 0; + } + while (buf_size < eff_mtu) { + priv->frag_info[i].order = order; priv->frag_info[i].frag_size = (eff_mtu > buf_size + frag_sizes[i]) ? frag_sizes[i] : eff_mtu - buf_size; priv->frag_info[i].frag_prefix_size = buf_size; priv->frag_info[i].frag_stride = - ALIGN(priv->frag_info[i].frag_size, - SMP_CACHE_BYTES); + ALIGN(priv->frag_info[i].frag_size, align); + priv->frag_info[i].dma_dir = dma_dir; buf_size += priv->frag_info[i].frag_size; i++; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index a386f047c1af..e2509bba3e7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -41,6 +41,7 @@ #include <linux/vmalloc.h> #include <linux/tcp.h> #include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/moduleparam.h> #include "mlx4_en.h" @@ -93,20 +94,13 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, /* Allocate HW buffers on provided NUMA node */ set_dev_node(&mdev->dev->persist->pdev->dev, node); - err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, - 2 * PAGE_SIZE); + err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_bounce; } - err = mlx4_en_map_buffer(&ring->wqres.buf); - if (err) { - en_err(priv, "Failed to map TX buffer\n"); - goto err_hwq_res; - } - ring->buf = ring->wqres.buf.direct.buf; en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n", @@ -117,7 +111,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, MLX4_RESERVE_ETH_BF_QP); if (err) { en_err(priv, "failed reserving qp for TX ring\n"); - goto err_map; + goto err_hwq_res; } err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL); @@ -154,8 +148,6 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, err_reserve: mlx4_qp_release_range(mdev->dev, ring->qpn, 1); -err_map: - mlx4_en_unmap_buffer(&ring->wqres.buf); err_hwq_res: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_bounce: @@ -182,7 +174,6 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, mlx4_qp_remove(mdev->dev, &ring->qp); mlx4_qp_free(mdev->dev, &ring->qp); mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); - mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); kfree(ring->bounce_buf); ring->bounce_buf = NULL; @@ -205,6 +196,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, ring->last_nr_txbb = 1; memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); memset(ring->buf, 0, ring->buf_size); + ring->free_tx_desc = mlx4_en_free_tx_desc; ring->qp_state = MLX4_QP_STATE_RST; ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8); @@ -274,10 +266,10 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, } -static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, - int napi_mode) +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; @@ -353,6 +345,27 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, return tx_info->nr_txbb; } +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode) +{ + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + struct mlx4_en_rx_alloc frame = { + .page = tx_info->page, + .dma = tx_info->map0_dma, + .page_offset = 0, + .page_size = PAGE_SIZE, + }; + + if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { + dma_unmap_page(priv->ddev, tx_info->map0_dma, + PAGE_SIZE, priv->frag_info[0].dma_dir); + put_page(tx_info->page); + } + + return tx_info->nr_txbb; +} int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { @@ -371,7 +384,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) } while (ring->cons != ring->prod) { - ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, + ring->last_nr_txbb = ring->free_tx_desc(priv, ring, ring->cons & ring->size_mask, !!(ring->cons & ring->size), 0, 0 /* Non-NAPI caller */); @@ -453,7 +466,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */ - last_nr_txbb = mlx4_en_free_tx_desc( + last_nr_txbb = ring->free_tx_desc( priv, ring, ring_index, !!((ring_cons + txbbs_skipped) & ring->size), timestamp, napi_budget); @@ -485,6 +498,9 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; + if (ring->free_tx_desc == mlx4_en_recycle_tx_desc) + return done < budget; + netdev_tx_completed_queue(ring->tx_queue, packets, bytes); /* Wakeup Tx queue if this stopped, and ring is not full. @@ -640,8 +656,7 @@ static int get_real_size(const struct sk_buff *skb, static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, const struct sk_buff *skb, const struct skb_shared_info *shinfo, - int real_size, u16 *vlan_tag, - int tx_ind, void *fragptr) + void *fragptr) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; @@ -709,10 +724,66 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src, __iowrite64_copy(dst, src, bytecnt / 8); } +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring) +{ + wmb(); + /* Since there is no iowrite*_native() that writes the + * value as is, without byteswapping - using the one + * the doesn't do byteswapping in the relevant arch + * endianness. + */ +#if defined(__LITTLE_ENDIAN) + iowrite32( +#else + iowrite32be( +#endif + ring->doorbell_qpn, + ring->bf.uar->map + MLX4_SEND_DOORBELL); +} + +static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring, + struct mlx4_en_tx_desc *tx_desc, + union mlx4_wqe_qpn_vlan qpn_vlan, + int desc_size, int bf_index, + __be32 op_own, bool bf_ok, + bool send_doorbell) +{ + tx_desc->ctrl.qpn_vlan = qpn_vlan; + + if (bf_ok) { + op_own |= htonl((bf_index & 0xffff) << 8); + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + + wmb(); + + mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, + desc_size); + + wmb(); + + ring->bf.offset ^= ring->bf.buf_size; + } else { + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + if (send_doorbell) + mlx4_en_xmit_doorbell(ring); + else + ring->xmit_more++; + } +} + netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; struct device *ddev = priv->ddev; struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; @@ -724,7 +795,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) int real_size; u32 index, bf_index; __be32 op_own; - u16 vlan_tag = 0; u16 vlan_proto = 0; int i_frag; int lso_header_size; @@ -734,20 +804,21 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bool stop_queue; bool inline_ok; u32 ring_cons; - - if (!priv->port_up) - goto tx_drop; + bool bf_ok; tx_ind = skb_get_queue_mapping(skb); ring = priv->tx_ring[tx_ind]; + if (!priv->port_up) + goto tx_drop; + /* fetch ring->cons far ahead before needing it to avoid stall */ ring_cons = ACCESS_ONCE(ring->cons); real_size = get_real_size(skb, shinfo, dev, &lso_header_size, &inline_ok, &fragptr); if (unlikely(!real_size)) - goto tx_drop; + goto tx_drop_count; /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); @@ -755,12 +826,20 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { if (netif_msg_tx_err(priv)) en_warn(priv, "Oversized header or SG list\n"); - goto tx_drop; + goto tx_drop_count; } + bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { - vlan_tag = skb_vlan_tag_get(skb); + qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); vlan_proto = be16_to_cpu(skb->vlan_proto); + if (vlan_proto == ETH_P_8021AD) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; + else if (vlan_proto == ETH_P_8021Q) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; + else + qpn_vlan.ins_vlan = 0; + bf_ok = false; } netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); @@ -780,6 +859,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; + bf_ok = false; } /* Save skb in tx_info ring */ @@ -916,12 +996,21 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len); if (tx_info->inl) - build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag, - tx_ind, fragptr); + build_inline_wqe(tx_desc, skb, shinfo, fragptr); if (skb->encapsulation) { - struct iphdr *ipv4 = (struct iphdr *)skb_inner_network_header(skb); - if (ipv4->protocol == IPPROTO_TCP || ipv4->protocol == IPPROTO_UDP) + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + u8 proto; + + ip.hdr = skb_inner_network_header(skb); + proto = (ip.v4->version == 4) ? ip.v4->protocol : + ip.v6->nexthdr; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP); else op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP); @@ -945,60 +1034,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) real_size = (real_size / 16) & 0x3f; - if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && - !skb_vlan_tag_present(skb) && send_doorbell) { - tx_desc->ctrl.bf_qpn = ring->doorbell_qpn | - cpu_to_be32(real_size); - - op_own |= htonl((bf_index & 0xffff) << 8); - /* Ensure new descriptor hits memory - * before setting ownership of this descriptor to HW - */ - dma_wmb(); - tx_desc->ctrl.owner_opcode = op_own; - - wmb(); - - mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, - desc_size); + bf_ok &= desc_size <= MAX_BF && send_doorbell; - wmb(); - - ring->bf.offset ^= ring->bf.buf_size; - } else { - tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); - if (vlan_proto == ETH_P_8021AD) - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; - else if (vlan_proto == ETH_P_8021Q) - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; - else - tx_desc->ctrl.ins_vlan = 0; + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; - tx_desc->ctrl.fence_size = real_size; - - /* Ensure new descriptor hits memory - * before setting ownership of this descriptor to HW - */ - dma_wmb(); - tx_desc->ctrl.owner_opcode = op_own; - if (send_doorbell) { - wmb(); - /* Since there is no iowrite*_native() that writes the - * value as is, without byteswapping - using the one - * the doesn't do byteswapping in the relevant arch - * endianness. - */ -#if defined(__LITTLE_ENDIAN) - iowrite32( -#else - iowrite32be( -#endif - ring->doorbell_qpn, - ring->bf.uar->map + MLX4_SEND_DOORBELL); - } else { - ring->xmit_more++; - } - } + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index, + op_own, bf_ok, send_doorbell); if (unlikely(stop_queue)) { /* If queue was emptied after the if (stop_queue) , and before @@ -1027,9 +1071,114 @@ tx_drop_unmap: PCI_DMA_TODEVICE); } +tx_drop_count: + ring->tx_dropped++; tx_drop: dev_kfree_skb_any(skb); - priv->stats.tx_dropped++; return NETDEV_TX_OK; } +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, + struct net_device *dev, unsigned int length, + int tx_ind, int *doorbell_pending) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; + struct mlx4_en_tx_ring *ring; + struct mlx4_en_tx_desc *tx_desc; + struct mlx4_wqe_data_seg *data; + struct mlx4_en_tx_info *tx_info; + int index, bf_index; + bool send_doorbell; + int nr_txbb = 1; + bool stop_queue; + dma_addr_t dma; + int real_size; + __be32 op_own; + u32 ring_cons; + bool bf_ok; + + BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, + "mlx4_en_xmit_frame requires minimum size tx desc"); + + ring = priv->tx_ring[tx_ind]; + + if (!priv->port_up) + goto tx_drop; + + if (mlx4_en_is_tx_ring_full(ring)) + goto tx_drop_count; + + /* fetch ring->cons far ahead before needing it to avoid stall */ + ring_cons = READ_ONCE(ring->cons); + + index = ring->prod & ring->size_mask; + tx_info = &ring->tx_info[index]; + + bf_ok = ring->bf_enabled; + + /* Track current inflight packets for performance analysis */ + AVG_PERF_COUNTER(priv->pstats.inflight_avg, + (u32)(ring->prod - ring_cons - 1)); + + bf_index = ring->prod; + tx_desc = ring->buf + index * TXBB_SIZE; + data = &tx_desc->data; + + dma = frame->dma; + + tx_info->page = frame->page; + frame->page = NULL; + tx_info->map0_dma = dma; + tx_info->map0_byte_count = length; + tx_info->nr_txbb = nr_txbb; + tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); + tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->ts_requested = 0; + tx_info->nr_maps = 1; + tx_info->linear = 1; + tx_info->inl = 0; + + dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE); + + data->addr = cpu_to_be64(dma); + data->lkey = ring->mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(length); + + /* tx completion can avoid cache line miss for common cases */ + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + + op_own = cpu_to_be32(MLX4_OPCODE_SEND) | + ((ring->prod & ring->size) ? + cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); + + ring->packets++; + ring->bytes += tx_info->nr_bytes; + AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); + + ring->prod += nr_txbb; + + stop_queue = mlx4_en_is_tx_ring_full(ring); + send_doorbell = stop_queue || + *doorbell_pending > MLX4_EN_DOORBELL_BUDGET; + bf_ok &= send_doorbell; + + real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f; + + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; + + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index, + op_own, bf_ok, send_doorbell); + *doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1; + + return NETDEV_TX_OK; + +tx_drop_count: + ring->tx_dropped++; +tx_drop: + return NETDEV_TX_BUSY; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index f613977455e0..cf8f8a72a801 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1305,8 +1305,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) return 0; err_out_unmap: - while (i >= 0) - mlx4_free_eq(dev, &priv->eq_table.eq[i--]); + while (i > 0) + mlx4_free_eq(dev, &priv->eq_table.eq[--i]); #ifdef CONFIG_RFS_ACCEL for (i = 1; i <= dev->caps.num_ports; i++) { if (mlx4_priv(dev)->port[i].rmap) { diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index e97094598b2d..d728704d0c7b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -721,6 +721,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 #define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c +#define QUERY_DEV_CAP_DIAG_RPRT_PER_PORT 0x9c #define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d #define QUERY_DEV_CAP_VXLAN 0x9e #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 @@ -935,6 +936,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; if (field32 & (1 << 7)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT; + MLX4_GET(field32, outbox, QUERY_DEV_CAP_DIAG_RPRT_PER_PORT); + if (field32 & (1 << 17)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT; MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); if (field & 1<<6) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; @@ -1128,6 +1132,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c port_cap->max_pkeys = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); port_cap->max_vl = field & 0xf; + port_cap->max_tc_eth = field >> 4; MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); port_cap->log_max_macs = field & 0xf; port_cap->log_max_vlans = field >> 4; @@ -2456,6 +2461,42 @@ int mlx4_NOP(struct mlx4_dev *dev) MLX4_CMD_NATIVE); } +int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier, + const u32 offset[], + u32 value[], size_t array_len, u8 port) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + size_t i; + int ret; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + outbox = mailbox->buf; + + ret = mlx4_cmd_box(dev, 0, mailbox->dma, port, op_modifier, + MLX4_CMD_DIAG_RPRT, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (ret) + goto out; + + for (i = 0; i < array_len; i++) { + if (offset[i] > MLX4_MAILBOX_SIZE) { + ret = -EINVAL; + goto out; + } + + MLX4_GET(value[i], outbox, offset[i]); + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return ret; +} +EXPORT_SYMBOL(mlx4_query_diag_counters); + int mlx4_get_phys_port_id(struct mlx4_dev *dev) { u8 port; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 7ea258af636a..cdbd76f10ced 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -53,6 +53,7 @@ struct mlx4_port_cap { int ib_mtu; int max_port_width; int max_vl; + int max_tc_eth; int max_gids; int max_pkeys; u64 def_mac; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index dec77d6f0ac9..0e8b7c44931f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -147,7 +147,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable) if (enable) { dev->flags |= MLX4_FLAG_BONDED; } else { - ret = mlx4_virt2phy_port_map(dev, 1, 2); + ret = mlx4_virt2phy_port_map(dev, 1, 2); if (ret) { mlx4_err(dev, "Fail to reset port map\n"); return ret; @@ -218,6 +218,9 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; + if (!(dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)) + return; + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 12c77a70abdb..7183ac4135d2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -292,6 +292,7 @@ static int _mlx4_dev_port(struct mlx4_dev *dev, int port, dev->caps.pkey_table_len[port] = port_cap->max_pkeys; dev->caps.port_width_cap[port] = port_cap->max_port_width; dev->caps.eth_mtu_cap[port] = port_cap->eth_mtu; + dev->caps.max_tc_eth = port_cap->max_tc_eth; dev->caps.def_mac[port] = port_cap->def_mac; dev->caps.supported_type[port] = port_cap->supported_port_types; dev->caps.suggested_type[port] = port_cap->suggested_type; @@ -2599,7 +2600,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) err = mlx4_init_uar_table(dev); if (err) { mlx4_err(dev, "Failed to initialize user access region table, aborting\n"); - return err; + return err; } err = mlx4_uar_alloc(dev, &priv->driver_uar); @@ -2969,6 +2970,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) mlx4_err(dev, "Failed to create mtu file for port %d\n", port); device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); + devlink_port_unregister(&info->devlink_port); info->port = -1; } @@ -2983,6 +2985,8 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); device_remove_file(&info->dev->persist->pdev->dev, &info->port_mtu_attr); + devlink_port_unregister(&info->devlink_port); + #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(info->rmap); info->rmap = NULL; @@ -3222,6 +3226,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); + spin_lock_init(&priv->cmd.context_lock); INIT_LIST_HEAD(&priv->bf_list); mutex_init(&priv->bf_mutex); @@ -4134,8 +4139,11 @@ static void mlx4_shutdown(struct pci_dev *pdev) mlx4_info(persist->dev, "mlx4_shutdown was called\n"); mutex_lock(&persist->interface_state_mutex); - if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) { + /* Notify mlx4 clients that the kernel is being shut down */ + persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN; mlx4_unload_one(pdev); + } mutex_unlock(&persist->interface_state_mutex); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 6aa73972d478..94b891c118c1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -618,8 +618,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, err = mlx4_READ_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; + if (err) + goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; @@ -657,8 +657,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; + if (err) + goto out_mailbox; } } } @@ -1102,7 +1102,7 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; - int index, prev; + int index = -1, prev; int link = 0; int i; int err; @@ -1181,7 +1181,7 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], goto out; out: - if (prot == MLX4_PROT_ETH) { + if (prot == MLX4_PROT_ETH && index != -1) { /* manage the steering entry for promisc mode */ if (new_entry) err = new_steering_entry(dev, port, steer, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 63b1aeae2c03..9099dbd04951 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -132,6 +132,7 @@ enum { MLX4_EN_NUM_UP) #define MLX4_EN_DEFAULT_TX_WORK 256 +#define MLX4_EN_DOORBELL_BUDGET 8 /* Target number of packets to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 44 @@ -164,6 +165,10 @@ enum { #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) #define MLX4_EN_MIN_MTU 46 +/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple + * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). + */ +#define MLX4_EN_EFF_MTU(mtu) ((mtu) + ETH_HLEN + (2 * VLAN_HLEN)) #define ETH_BCAST 0xffffffffffffULL #define MLX4_EN_LOOPBACK_RETRIES 5 @@ -215,7 +220,10 @@ enum cq_type { struct mlx4_en_tx_info { - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct page *page; + }; dma_addr_t map0_dma; u32 map0_byte_count; u32 nr_txbb; @@ -255,6 +263,14 @@ struct mlx4_en_rx_alloc { u32 page_size; }; +#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) +struct mlx4_en_page_cache { + u32 index; + struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE]; +}; + +struct mlx4_en_priv; + struct mlx4_en_tx_ring { /* cache line used and dirtied in tx completion * (mlx4_en_free_tx_buf()) @@ -270,6 +286,7 @@ struct mlx4_en_tx_ring { unsigned long tx_csum; unsigned long tso_packets; unsigned long xmit_more; + unsigned int tx_dropped; struct mlx4_bf bf; unsigned long queue_stopped; @@ -287,6 +304,11 @@ struct mlx4_en_tx_ring { __be32 mr_key; void *buf; struct mlx4_en_tx_info *tx_info; + struct mlx4_en_rx_ring *recycle_ring; + u32 (*free_tx_desc)(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, + u64 timestamp, int napi_mode); u8 *bounce_buf; struct mlx4_qp_context context; int qpn; @@ -318,6 +340,8 @@ struct mlx4_en_rx_ring { u8 fcs_del; void *buf; void *rx_info; + struct bpf_prog *xdp_prog; + struct mlx4_en_page_cache page_cache; unsigned long bytes; unsigned long packets; unsigned long csum_ok; @@ -352,12 +376,14 @@ struct mlx4_en_port_profile { u32 rx_ring_num; u32 tx_ring_size; u32 rx_ring_size; + u8 num_tx_rings_p_up; u8 rx_pause; u8 rx_ppp; u8 tx_pause; u8 tx_ppp; int rss_rings; int inline_thold; + struct hwtstamp_config hwtstamp_config; }; struct mlx4_en_profile { @@ -437,7 +463,9 @@ struct mlx4_en_mc_list { struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; - u16 frag_stride; + u32 frag_stride; + enum dma_data_direction dma_dir; + int order; }; #ifdef CONFIG_MLX4_EN_DCB @@ -447,6 +475,17 @@ struct mlx4_en_frag_info { #define MLX4_EN_TC_ETS 7 +enum dcb_pfc_type { + pfc_disabled = 0, + pfc_enabled_full, + pfc_enabled_tx, + pfc_enabled_rx +}; + +struct mlx4_en_cee_config { + bool pfc_state; + enum dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP]; +}; #endif struct ethtool_flow_id { @@ -466,6 +505,9 @@ enum { MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3), MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4), MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP = (1 << 5), +#ifdef CONFIG_MLX4_EN_DCB + MLX4_EN_FLAG_DCB_ENABLED = (1 << 6), +#endif }; #define PORT_BEACON_MAX_LIMIT (65535) @@ -482,8 +524,6 @@ struct mlx4_en_priv { struct mlx4_en_port_profile *prof; struct net_device *dev; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - struct net_device_stats stats; - struct net_device_stats ret_stats; struct mlx4_en_port_state port_state; spinlock_t stats_lock; struct ethtool_flow_id ethtool_rules[MAX_NUM_OF_FS_RULES]; @@ -535,6 +575,7 @@ struct mlx4_en_priv { struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; u16 num_frags; u16 log_rx_info; + int xdp_ring_num; struct mlx4_en_tx_ring **tx_ring; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; @@ -546,10 +587,8 @@ struct mlx4_en_priv { struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; -#ifdef CONFIG_MLX4_EN_VXLAN struct work_struct vxlan_add_task; struct work_struct vxlan_del_task; -#endif struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; struct mlx4_en_counter_stats pf_stats; @@ -571,9 +610,12 @@ struct mlx4_en_priv { u32 counter_index; #ifdef CONFIG_MLX4_EN_DCB +#define MLX4_EN_DCB_ENABLED 0x3 struct ieee_ets ets; u16 maxrate[IEEE_8021QAZ_MAX_TCS]; enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS]; + struct mlx4_en_cee_config cee_config; + u8 dcbx_cap; #endif #ifdef CONFIG_RFS_ACCEL spinlock_t filters_lock; @@ -624,8 +666,11 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, u8 rx_ppp, u8 rx_pause, u8 tx_ppp, u8 tx_pause); -void mlx4_en_free_resources(struct mlx4_en_priv *priv); -int mlx4_en_alloc_resources(struct mlx4_en_priv *priv); +int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp, + struct mlx4_en_port_profile *prof); +void mlx4_en_safe_replace_resources(struct mlx4_en_priv *priv, + struct mlx4_en_priv *tmp); int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq, int entries, int ring, enum cq_type mode, int node); @@ -640,6 +685,12 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, + struct net_device *dev, unsigned int length, + int tx_ind, int *doorbell_pending); +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); +bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_alloc *frame); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, @@ -668,12 +719,18 @@ int mlx4_en_process_rx_cq(struct net_device *dev, int budget); int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget); int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget); +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode); +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, struct mlx4_qp_context *context); void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); -int mlx4_en_map_buffer(struct mlx4_buf *buf); -void mlx4_en_unmap_buffer(struct mlx4_buf *buf); int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp, int loopback); void mlx4_en_calc_rx_buf(struct net_device *dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 93195191f45b..395b5463cfd9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -248,7 +248,7 @@ static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order) offset, order); return; } - __mlx4_free_mtt_range(dev, offset, order); + __mlx4_free_mtt_range(dev, offset, order); } void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt) diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index b3cc3ab63799..6fc156a3918d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -205,7 +205,9 @@ int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node) goto free_uar; } - uar->bf_map = io_mapping_map_wc(priv->bf_mapping, uar->index << PAGE_SHIFT); + uar->bf_map = io_mapping_map_wc(priv->bf_mapping, + uar->index << PAGE_SHIFT, + PAGE_SIZE); if (!uar->bf_map) { err = -ENOMEM; goto unamp_uar; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 087b23b320cb..c5b2064297a1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -52,6 +52,7 @@ #define MLX4_FLAG_V_IGNORE_FCS_MASK 0x2 #define MLX4_IGNORE_FCS_MASK 0x1 +#define MLX4_TC_MAX_NUMBER 8 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) { @@ -2015,3 +2016,14 @@ out: return ret; } EXPORT_SYMBOL(mlx4_get_module_info); + +int mlx4_max_tc(struct mlx4_dev *dev) +{ + u8 num_tc = dev->caps.max_tc_eth; + + if (!num_tc) + num_tc = MLX4_TC_MAX_NUMBER; + + return num_tc; +} +EXPORT_SYMBOL(mlx4_max_tc); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index cd9b2b28df88..8b81114bdc72 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2372,16 +2372,15 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, __mlx4_mpt_release(dev, index); break; case RES_OP_MAP_ICM: - index = get_param_l(&in_param); - id = index & mpt_mask(dev); - err = mr_res_start_move_to(dev, slave, id, - RES_MPT_RESERVED, &mpt); - if (err) - return err; - - __mlx4_mpt_free_icm(dev, mpt->key); - res_end_move(dev, slave, RES_MPT, id); + index = get_param_l(&in_param); + id = index & mpt_mask(dev); + err = mr_res_start_move_to(dev, slave, id, + RES_MPT_RESERVED, &mpt); + if (err) return err; + + __mlx4_mpt_free_icm(dev, mpt->key); + res_end_move(dev, slave, RES_MPT, id); break; default: err = -EINVAL; @@ -4253,9 +4252,8 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) && !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { - mlx4_warn(dev, - "Src check LB for slave %d isn't supported\n", - slave); + mlx4_warn(dev, "Src check LB for slave %d isn't supported\n", + slave); return -ENOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index f5c3b9465d8d..aae46884bf93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -4,6 +4,7 @@ config MLX5_CORE tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" + depends on MAY_USE_DEVLINK depends on PCI default n ---help--- @@ -31,10 +32,3 @@ config MLX5_CORE_EN_DCB This flag is depended on the kernel's DCB support. If unsure, set to Y - -config MLX5_CORE_EN_VXLAN - bool "VXLAN offloads Support" - default y - depends on MLX5_CORE_EN && VXLAN && !(MLX5_CORE=y && VXLAN=m) - ---help--- - Say Y here if you want to use VXLAN offloads in the driver. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index bf65b71c7360..05cc1effc13c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -1,12 +1,13 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ - health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o + health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ + mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + fs_counters.o rl.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ - en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ - en_txrx.o en_clock.o en_tc.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ + en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ + en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o -mlx5_core-$(CONFIG_MLX5_CORE_EN_VXLAN) += vxlan.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index eb926e1ee71c..c2ec01a22d55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -143,13 +143,14 @@ static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) return cmd->cmd_buf + (idx << cmd->log_stride); } -static u8 xor8_buf(void *buf, int len) +static u8 xor8_buf(void *buf, size_t offset, int len) { u8 *ptr = buf; u8 sum = 0; int i; + int end = len + offset; - for (i = 0; i < len; i++) + for (i = offset; i < end; i++) sum ^= ptr[i]; return sum; @@ -157,41 +158,49 @@ static u8 xor8_buf(void *buf, int len) static int verify_block_sig(struct mlx5_cmd_prot_block *block) { - if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff) + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + int xor_len = sizeof(*block) - sizeof(block->data) - 1; + + if (xor8_buf(block, rsvd0_off, xor_len) != 0xff) return -EINVAL; - if (xor8_buf(block, sizeof(*block)) != 0xff) + if (xor8_buf(block, 0, sizeof(*block)) != 0xff) return -EINVAL; return 0; } -static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token, - int csum) +static void calc_block_sig(struct mlx5_cmd_prot_block *block) { - block->token = token; - if (csum) { - block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - - sizeof(block->data) - 2); - block->sig = ~xor8_buf(block, sizeof(*block) - 1); - } + int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2; + size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0); + + block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len); + block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1); } -static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum) +static void calc_chain_sig(struct mlx5_cmd_msg *msg) { struct mlx5_cmd_mailbox *next = msg->next; - - while (next) { - calc_block_sig(next->buf, token, csum); + int size = msg->len; + int blen = size - min_t(int, sizeof(msg->first.data), size); + int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) + / MLX5_CMD_DATA_BLOCK_SIZE; + int i = 0; + + for (i = 0; i < n && next; i++) { + calc_block_sig(next->buf); next = next->next; } } static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) { - ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); - calc_chain_sig(ent->in, ent->token, csum); - calc_chain_sig(ent->out, ent->token, csum); + ent->lay->sig = ~xor8_buf(ent->lay, 0, sizeof(*ent->lay)); + if (csum) { + calc_chain_sig(ent->in); + calc_chain_sig(ent->out); + } } static void poll_timeout(struct mlx5_cmd_work_ent *ent) @@ -222,12 +231,17 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent) struct mlx5_cmd_mailbox *next = ent->out->next; int err; u8 sig; + int size = ent->out->len; + int blen = size - min_t(int, sizeof(ent->out->first.data), size); + int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) + / MLX5_CMD_DATA_BLOCK_SIZE; + int i = 0; - sig = xor8_buf(ent->lay, sizeof(*ent->lay)); + sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay)); if (sig != 0xff) return -EINVAL; - while (next) { + for (i = 0; i < n && next; i++) { err = verify_block_sig(next->buf); if (err) return err; @@ -294,6 +308,13 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_FLOW_TABLE: case MLX5_CMD_OP_DESTROY_FLOW_GROUP: case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -320,8 +341,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: case MLX5_CMD_OP_SQERR2RTS_QP: - case MLX5_CMD_OP_2ERR_QP: - case MLX5_CMD_OP_2RST_QP: case MLX5_CMD_OP_QUERY_QP: case MLX5_CMD_OP_SQD_RTS_QP: case MLX5_CMD_OP_INIT2INIT_QP: @@ -341,7 +360,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: - case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: case MLX5_CMD_OP_SET_ROCE_ADDRESS: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: @@ -389,12 +407,15 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_RQT: case MLX5_CMD_OP_MODIFY_RQT: case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_CREATE_FLOW_TABLE: case MLX5_CMD_OP_QUERY_FLOW_TABLE: case MLX5_CMD_OP_CREATE_FLOW_GROUP: case MLX5_CMD_OP_QUERY_FLOW_GROUP: - case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -406,178 +427,143 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, const char *mlx5_command_str(int command) { - switch (command) { - case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: - return "QUERY_HCA_VPORT_CONTEXT"; - - case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT: - return "MODIFY_HCA_VPORT_CONTEXT"; - - case MLX5_CMD_OP_QUERY_HCA_CAP: - return "QUERY_HCA_CAP"; - - case MLX5_CMD_OP_SET_HCA_CAP: - return "SET_HCA_CAP"; - - case MLX5_CMD_OP_QUERY_ADAPTER: - return "QUERY_ADAPTER"; - - case MLX5_CMD_OP_INIT_HCA: - return "INIT_HCA"; - - case MLX5_CMD_OP_TEARDOWN_HCA: - return "TEARDOWN_HCA"; - - case MLX5_CMD_OP_ENABLE_HCA: - return "MLX5_CMD_OP_ENABLE_HCA"; - - case MLX5_CMD_OP_DISABLE_HCA: - return "MLX5_CMD_OP_DISABLE_HCA"; - - case MLX5_CMD_OP_QUERY_PAGES: - return "QUERY_PAGES"; - - case MLX5_CMD_OP_MANAGE_PAGES: - return "MANAGE_PAGES"; - - case MLX5_CMD_OP_CREATE_MKEY: - return "CREATE_MKEY"; - - case MLX5_CMD_OP_QUERY_MKEY: - return "QUERY_MKEY"; - - case MLX5_CMD_OP_DESTROY_MKEY: - return "DESTROY_MKEY"; - - case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: - return "QUERY_SPECIAL_CONTEXTS"; - - case MLX5_CMD_OP_CREATE_EQ: - return "CREATE_EQ"; - - case MLX5_CMD_OP_DESTROY_EQ: - return "DESTROY_EQ"; - - case MLX5_CMD_OP_QUERY_EQ: - return "QUERY_EQ"; - - case MLX5_CMD_OP_CREATE_CQ: - return "CREATE_CQ"; - - case MLX5_CMD_OP_DESTROY_CQ: - return "DESTROY_CQ"; - - case MLX5_CMD_OP_QUERY_CQ: - return "QUERY_CQ"; - - case MLX5_CMD_OP_MODIFY_CQ: - return "MODIFY_CQ"; - - case MLX5_CMD_OP_CREATE_QP: - return "CREATE_QP"; - - case MLX5_CMD_OP_DESTROY_QP: - return "DESTROY_QP"; - - case MLX5_CMD_OP_RST2INIT_QP: - return "RST2INIT_QP"; - - case MLX5_CMD_OP_INIT2RTR_QP: - return "INIT2RTR_QP"; - - case MLX5_CMD_OP_RTR2RTS_QP: - return "RTR2RTS_QP"; - - case MLX5_CMD_OP_RTS2RTS_QP: - return "RTS2RTS_QP"; - - case MLX5_CMD_OP_SQERR2RTS_QP: - return "SQERR2RTS_QP"; - - case MLX5_CMD_OP_2ERR_QP: - return "2ERR_QP"; - - case MLX5_CMD_OP_2RST_QP: - return "2RST_QP"; - - case MLX5_CMD_OP_QUERY_QP: - return "QUERY_QP"; - - case MLX5_CMD_OP_MAD_IFC: - return "MAD_IFC"; - - case MLX5_CMD_OP_INIT2INIT_QP: - return "INIT2INIT_QP"; - - case MLX5_CMD_OP_CREATE_PSV: - return "CREATE_PSV"; - - case MLX5_CMD_OP_DESTROY_PSV: - return "DESTROY_PSV"; - - case MLX5_CMD_OP_CREATE_SRQ: - return "CREATE_SRQ"; - - case MLX5_CMD_OP_DESTROY_SRQ: - return "DESTROY_SRQ"; - - case MLX5_CMD_OP_QUERY_SRQ: - return "QUERY_SRQ"; - - case MLX5_CMD_OP_ARM_RQ: - return "ARM_RQ"; - - case MLX5_CMD_OP_CREATE_XRC_SRQ: - return "CREATE_XRC_SRQ"; - - case MLX5_CMD_OP_DESTROY_XRC_SRQ: - return "DESTROY_XRC_SRQ"; - - case MLX5_CMD_OP_QUERY_XRC_SRQ: - return "QUERY_XRC_SRQ"; - - case MLX5_CMD_OP_ARM_XRC_SRQ: - return "ARM_XRC_SRQ"; - - case MLX5_CMD_OP_ALLOC_PD: - return "ALLOC_PD"; - - case MLX5_CMD_OP_DEALLOC_PD: - return "DEALLOC_PD"; - - case MLX5_CMD_OP_ALLOC_UAR: - return "ALLOC_UAR"; - - case MLX5_CMD_OP_DEALLOC_UAR: - return "DEALLOC_UAR"; - - case MLX5_CMD_OP_ATTACH_TO_MCG: - return "ATTACH_TO_MCG"; - - case MLX5_CMD_OP_DETTACH_FROM_MCG: - return "DETTACH_FROM_MCG"; - - case MLX5_CMD_OP_ALLOC_XRCD: - return "ALLOC_XRCD"; - - case MLX5_CMD_OP_DEALLOC_XRCD: - return "DEALLOC_XRCD"; - - case MLX5_CMD_OP_ACCESS_REG: - return "MLX5_CMD_OP_ACCESS_REG"; - - case MLX5_CMD_OP_SET_WOL_ROL: - return "SET_WOL_ROL"; - - case MLX5_CMD_OP_QUERY_WOL_ROL: - return "QUERY_WOL_ROL"; - - case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: - return "ADD_VXLAN_UDP_DPORT"; - - case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: - return "DELETE_VXLAN_UDP_DPORT"; +#define MLX5_COMMAND_STR_CASE(__cmd) case MLX5_CMD_OP_ ## __cmd: return #__cmd + switch (command) { + MLX5_COMMAND_STR_CASE(QUERY_HCA_CAP); + MLX5_COMMAND_STR_CASE(QUERY_ADAPTER); + MLX5_COMMAND_STR_CASE(INIT_HCA); + MLX5_COMMAND_STR_CASE(TEARDOWN_HCA); + MLX5_COMMAND_STR_CASE(ENABLE_HCA); + MLX5_COMMAND_STR_CASE(DISABLE_HCA); + MLX5_COMMAND_STR_CASE(QUERY_PAGES); + MLX5_COMMAND_STR_CASE(MANAGE_PAGES); + MLX5_COMMAND_STR_CASE(SET_HCA_CAP); + MLX5_COMMAND_STR_CASE(QUERY_ISSI); + MLX5_COMMAND_STR_CASE(SET_ISSI); + MLX5_COMMAND_STR_CASE(CREATE_MKEY); + MLX5_COMMAND_STR_CASE(QUERY_MKEY); + MLX5_COMMAND_STR_CASE(DESTROY_MKEY); + MLX5_COMMAND_STR_CASE(QUERY_SPECIAL_CONTEXTS); + MLX5_COMMAND_STR_CASE(PAGE_FAULT_RESUME); + MLX5_COMMAND_STR_CASE(CREATE_EQ); + MLX5_COMMAND_STR_CASE(DESTROY_EQ); + MLX5_COMMAND_STR_CASE(QUERY_EQ); + MLX5_COMMAND_STR_CASE(GEN_EQE); + MLX5_COMMAND_STR_CASE(CREATE_CQ); + MLX5_COMMAND_STR_CASE(DESTROY_CQ); + MLX5_COMMAND_STR_CASE(QUERY_CQ); + MLX5_COMMAND_STR_CASE(MODIFY_CQ); + MLX5_COMMAND_STR_CASE(CREATE_QP); + MLX5_COMMAND_STR_CASE(DESTROY_QP); + MLX5_COMMAND_STR_CASE(RST2INIT_QP); + MLX5_COMMAND_STR_CASE(INIT2RTR_QP); + MLX5_COMMAND_STR_CASE(RTR2RTS_QP); + MLX5_COMMAND_STR_CASE(RTS2RTS_QP); + MLX5_COMMAND_STR_CASE(SQERR2RTS_QP); + MLX5_COMMAND_STR_CASE(2ERR_QP); + MLX5_COMMAND_STR_CASE(2RST_QP); + MLX5_COMMAND_STR_CASE(QUERY_QP); + MLX5_COMMAND_STR_CASE(SQD_RTS_QP); + MLX5_COMMAND_STR_CASE(INIT2INIT_QP); + MLX5_COMMAND_STR_CASE(CREATE_PSV); + MLX5_COMMAND_STR_CASE(DESTROY_PSV); + MLX5_COMMAND_STR_CASE(CREATE_SRQ); + MLX5_COMMAND_STR_CASE(DESTROY_SRQ); + MLX5_COMMAND_STR_CASE(QUERY_SRQ); + MLX5_COMMAND_STR_CASE(ARM_RQ); + MLX5_COMMAND_STR_CASE(CREATE_XRC_SRQ); + MLX5_COMMAND_STR_CASE(DESTROY_XRC_SRQ); + MLX5_COMMAND_STR_CASE(QUERY_XRC_SRQ); + MLX5_COMMAND_STR_CASE(ARM_XRC_SRQ); + MLX5_COMMAND_STR_CASE(CREATE_DCT); + MLX5_COMMAND_STR_CASE(DESTROY_DCT); + MLX5_COMMAND_STR_CASE(DRAIN_DCT); + MLX5_COMMAND_STR_CASE(QUERY_DCT); + MLX5_COMMAND_STR_CASE(ARM_DCT_FOR_KEY_VIOLATION); + MLX5_COMMAND_STR_CASE(QUERY_VPORT_STATE); + MLX5_COMMAND_STR_CASE(MODIFY_VPORT_STATE); + MLX5_COMMAND_STR_CASE(QUERY_ESW_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_ESW_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_NIC_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_NIC_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_ROCE_ADDRESS); + MLX5_COMMAND_STR_CASE(SET_ROCE_ADDRESS); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY); + MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER); + MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); + MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); + MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); + MLX5_COMMAND_STR_CASE(ALLOC_PD); + MLX5_COMMAND_STR_CASE(DEALLOC_PD); + MLX5_COMMAND_STR_CASE(ALLOC_UAR); + MLX5_COMMAND_STR_CASE(DEALLOC_UAR); + MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION); + MLX5_COMMAND_STR_CASE(ACCESS_REG); + MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG); + MLX5_COMMAND_STR_CASE(DETTACH_FROM_MCG); + MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG); + MLX5_COMMAND_STR_CASE(MAD_IFC); + MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX); + MLX5_COMMAND_STR_CASE(SET_MAD_DEMUX); + MLX5_COMMAND_STR_CASE(NOP); + MLX5_COMMAND_STR_CASE(ALLOC_XRCD); + MLX5_COMMAND_STR_CASE(DEALLOC_XRCD); + MLX5_COMMAND_STR_CASE(ALLOC_TRANSPORT_DOMAIN); + MLX5_COMMAND_STR_CASE(DEALLOC_TRANSPORT_DOMAIN); + MLX5_COMMAND_STR_CASE(QUERY_CONG_STATUS); + MLX5_COMMAND_STR_CASE(MODIFY_CONG_STATUS); + MLX5_COMMAND_STR_CASE(QUERY_CONG_PARAMS); + MLX5_COMMAND_STR_CASE(MODIFY_CONG_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_CONG_STATISTICS); + MLX5_COMMAND_STR_CASE(ADD_VXLAN_UDP_DPORT); + MLX5_COMMAND_STR_CASE(DELETE_VXLAN_UDP_DPORT); + MLX5_COMMAND_STR_CASE(SET_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(QUERY_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(SET_WOL_ROL); + MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL); + MLX5_COMMAND_STR_CASE(CREATE_TIR); + MLX5_COMMAND_STR_CASE(MODIFY_TIR); + MLX5_COMMAND_STR_CASE(DESTROY_TIR); + MLX5_COMMAND_STR_CASE(QUERY_TIR); + MLX5_COMMAND_STR_CASE(CREATE_SQ); + MLX5_COMMAND_STR_CASE(MODIFY_SQ); + MLX5_COMMAND_STR_CASE(DESTROY_SQ); + MLX5_COMMAND_STR_CASE(QUERY_SQ); + MLX5_COMMAND_STR_CASE(CREATE_RQ); + MLX5_COMMAND_STR_CASE(MODIFY_RQ); + MLX5_COMMAND_STR_CASE(DESTROY_RQ); + MLX5_COMMAND_STR_CASE(QUERY_RQ); + MLX5_COMMAND_STR_CASE(CREATE_RMP); + MLX5_COMMAND_STR_CASE(MODIFY_RMP); + MLX5_COMMAND_STR_CASE(DESTROY_RMP); + MLX5_COMMAND_STR_CASE(QUERY_RMP); + MLX5_COMMAND_STR_CASE(CREATE_TIS); + MLX5_COMMAND_STR_CASE(MODIFY_TIS); + MLX5_COMMAND_STR_CASE(DESTROY_TIS); + MLX5_COMMAND_STR_CASE(QUERY_TIS); + MLX5_COMMAND_STR_CASE(CREATE_RQT); + MLX5_COMMAND_STR_CASE(MODIFY_RQT); + MLX5_COMMAND_STR_CASE(DESTROY_RQT); + MLX5_COMMAND_STR_CASE(QUERY_RQT); + MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ROOT); + MLX5_COMMAND_STR_CASE(CREATE_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(DESTROY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(CREATE_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(DESTROY_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); default: return "unknown command opcode"; } } @@ -634,11 +620,36 @@ static void dump_command(struct mlx5_core_dev *dev, pr_debug("\n"); } +static u16 msg_to_opcode(struct mlx5_cmd_msg *in) +{ + struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); + + return be16_to_cpu(hdr->opcode); +} + +static void cb_timeout_handler(struct work_struct *work) +{ + struct delayed_work *dwork = container_of(work, struct delayed_work, + work); + struct mlx5_cmd_work_ent *ent = container_of(dwork, + struct mlx5_cmd_work_ent, + cb_timeout_work); + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, + cmd); + + ent->ret = -ETIMEDOUT; + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx); +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); struct mlx5_cmd *cmd = ent->cmd; struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); + unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); struct mlx5_cmd_layout *lay; struct semaphore *sem; unsigned long flags; @@ -659,7 +670,6 @@ static void cmd_work_handler(struct work_struct *work) spin_unlock_irqrestore(&cmd->alloc_lock, flags); } - ent->token = alloc_token(cmd); cmd->ent_arr[ent->idx] = ent; lay = get_inst(cmd, ent->idx); ent->lay = lay; @@ -679,6 +689,9 @@ static void cmd_work_handler(struct work_struct *work) dump_command(dev, ent, 1); ent->ts1 = ktime_get_ns(); + if (ent->callback) + schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -723,13 +736,6 @@ static const char *deliv_status_to_str(u8 status) } } -static u16 msg_to_opcode(struct mlx5_cmd_msg *in) -{ - struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); - - return be16_to_cpu(hdr->opcode); -} - static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); @@ -738,13 +744,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) if (cmd->mode == CMD_MODE_POLLING) { wait_for_completion(&ent->done); - err = ent->ret; - } else { - if (!wait_for_completion_timeout(&ent->done, timeout)) - err = -ETIMEDOUT; - else - err = 0; + } else if (!wait_for_completion_timeout(&ent->done, timeout)) { + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1UL << ent->idx); } + + err = ent->ret; + if (err == -ETIMEDOUT) { mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), @@ -773,7 +779,8 @@ static u8 *get_status_ptr(struct mlx5_outbox_hdr *out) static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, void *uout, int uout_size, mlx5_cmd_cbk_t callback, - void *context, int page_queue, u8 *status) + void *context, int page_queue, u8 *status, + u8 token) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -790,9 +797,12 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, if (IS_ERR(ent)) return PTR_ERR(ent); + ent->token = token; + if (!callback) init_completion(&ent->done); + INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler); INIT_WORK(&ent->work, cmd_work_handler); if (page_queue) { cmd_work_handler(&ent->work); @@ -802,28 +812,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, goto out_free; } - if (!callback) { - err = wait_func(dev, ent); - if (err == -ETIMEDOUT) - goto out; - - ds = ent->ts2 - ent->ts1; - op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); - if (op < ARRAY_SIZE(cmd->stats)) { - stats = &cmd->stats[op]; - spin_lock_irq(&stats->lock); - stats->sum += ds; - ++stats->n; - spin_unlock_irq(&stats->lock); - } - mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, - "fw exec time for %s is %lld nsec\n", - mlx5_command_str(op), ds); - *status = ent->status; - free_cmd(ent); - } + if (callback) + goto out; - return err; + err = wait_func(dev, ent); + if (err == -ETIMEDOUT) + goto out_free; + + ds = ent->ts2 - ent->ts1; + op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); + if (op < ARRAY_SIZE(cmd->stats)) { + stats = &cmd->stats[op]; + spin_lock_irq(&stats->lock); + stats->sum += ds; + ++stats->n; + spin_unlock_irq(&stats->lock); + } + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, + "fw exec time for %s is %lld nsec\n", + mlx5_command_str(op), ds); + *status = ent->status; out_free: free_cmd(ent); @@ -862,7 +870,8 @@ static const struct file_operations fops = { .write = dbg_write, }; -static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size) +static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size, + u8 token) { struct mlx5_cmd_prot_block *block; struct mlx5_cmd_mailbox *next; @@ -888,6 +897,7 @@ static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size) memcpy(block->data, from, copy); from += copy; size -= copy; + block->token = token; next = next->next; } @@ -957,7 +967,8 @@ static void free_cmd_box(struct mlx5_core_dev *dev, } static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, - gfp_t flags, int size) + gfp_t flags, int size, + u8 token) { struct mlx5_cmd_mailbox *tmp, *head = NULL; struct mlx5_cmd_prot_block *block; @@ -986,6 +997,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, tmp->next = head; block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0); block->block_num = cpu_to_be32(n - i - 1); + block->token = token; head = tmp; } msg->next = head; @@ -1213,41 +1225,30 @@ err_dbg: return err; } -void mlx5_cmd_use_events(struct mlx5_core_dev *dev) +static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) { struct mlx5_cmd *cmd = &dev->cmd; int i; for (i = 0; i < cmd->max_reg_cmds; i++) down(&cmd->sem); - down(&cmd->pages_sem); - flush_workqueue(cmd->wq); - - cmd->mode = CMD_MODE_EVENTS; + cmd->mode = mode; up(&cmd->pages_sem); for (i = 0; i < cmd->max_reg_cmds; i++) up(&cmd->sem); } -void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +void mlx5_cmd_use_events(struct mlx5_core_dev *dev) { - struct mlx5_cmd *cmd = &dev->cmd; - int i; - - for (i = 0; i < cmd->max_reg_cmds; i++) - down(&cmd->sem); - - down(&cmd->pages_sem); - - flush_workqueue(cmd->wq); - cmd->mode = CMD_MODE_POLLING; + mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); +} - up(&cmd->pages_sem); - for (i = 0; i < cmd->max_reg_cmds; i++) - up(&cmd->sem); +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +{ + mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) @@ -1283,6 +1284,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) struct semaphore *sem; ent = cmd->ent_arr[i]; + if (ent->callback) + cancel_delayed_work(&ent->cb_timeout_work); if (ent->page_queue) sem = &cmd->pages_sem; else @@ -1369,7 +1372,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, } if (IS_ERR(msg)) - msg = mlx5_alloc_cmd_msg(dev, gfp, in_size); + msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0); return msg; } @@ -1394,6 +1397,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int err; u8 status = 0; u32 drv_synd; + u8 token; if (pci_channel_offline(dev->pdev) || dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -1412,20 +1416,22 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, return err; } - err = mlx5_copy_to_msg(inb, in, in_size); + token = alloc_token(&dev->cmd); + + err = mlx5_copy_to_msg(inb, in, in_size, token); if (err) { mlx5_core_warn(dev, "err %d\n", err); goto out_in; } - outb = mlx5_alloc_cmd_msg(dev, gfp, out_size); + outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token); if (IS_ERR(outb)) { err = PTR_ERR(outb); goto out_in; } err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, - pages_queue, &status); + pages_queue, &status, token); if (err) goto out_out; @@ -1493,7 +1499,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&cmd->cache.med.head); for (i = 0; i < NUM_LONG_LISTS; i++) { - msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE); + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0); if (IS_ERR(msg)) { err = PTR_ERR(msg); goto ex_err; @@ -1503,7 +1509,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev) } for (i = 0; i < NUM_MED_LISTS; i++) { - msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE); + msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0); if (IS_ERR(msg)) { err = PTR_ERR(msg); goto ex_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index b51e42d6fbec..873a631ad155 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -39,6 +39,53 @@ #include <linux/mlx5/cq.h> #include "mlx5_core.h" +#define TASKLET_MAX_TIME 2 +#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) + +void mlx5_cq_tasklet_cb(unsigned long data) +{ + unsigned long flags; + unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES; + struct mlx5_eq_tasklet *ctx = (struct mlx5_eq_tasklet *)data; + struct mlx5_core_cq *mcq; + struct mlx5_core_cq *temp; + + spin_lock_irqsave(&ctx->lock, flags); + list_splice_tail_init(&ctx->list, &ctx->process_list); + spin_unlock_irqrestore(&ctx->lock, flags); + + list_for_each_entry_safe(mcq, temp, &ctx->process_list, + tasklet_ctx.list) { + list_del_init(&mcq->tasklet_ctx.list); + mcq->tasklet_ctx.comp(mcq); + if (atomic_dec_and_test(&mcq->refcount)) + complete(&mcq->free); + if (time_after(jiffies, end)) + break; + } + + if (!list_empty(&ctx->process_list)) + tasklet_schedule(&ctx->task); +} + +static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) +{ + unsigned long flags; + struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; + + spin_lock_irqsave(&tasklet_ctx->lock, flags); + /* When migrating CQs between EQs will be implemented, please note + * that you need to sync this point. It is possible that + * while migrating a CQ, completions on the old EQs could + * still arrive. + */ + if (list_empty_careful(&cq->tasklet_ctx.list)) { + atomic_inc(&cq->refcount); + list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); + } + spin_unlock_irqrestore(&tasklet_ctx->lock, flags); +} + void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) { struct mlx5_core_cq *cq; @@ -96,6 +143,13 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, struct mlx5_create_cq_mbox_out out; struct mlx5_destroy_cq_mbox_in din; struct mlx5_destroy_cq_mbox_out dout; + int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + c_eqn); + struct mlx5_eq *eq; + + eq = mlx5_eqn2eq(dev, eqn); + if (IS_ERR(eq)) + return PTR_ERR(eq); in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ); memset(&out, 0, sizeof(out)); @@ -111,6 +165,11 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->arm_sn = 0; atomic_set(&cq->refcount, 1); init_completion(&cq->free); + if (!cq->comp) + cq->comp = mlx5_add_cq_to_tasklet; + /* assuming CQ will be deleted before the EQ */ + cq->tasklet_ctx.priv = &eq->tasklet_ctx; + INIT_LIST_HEAD(&cq->tasklet_ctx.list); spin_lock_irq(&table->lock); err = radix_tree_insert(&table->tree, cq->cqn, cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 24344aafbd36..bf722aa88cf0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -44,8 +44,12 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/transobj.h> #include <linux/rhashtable.h> +#include <net/switchdev.h> #include "wq.h" #include "mlx5_core.h" +#include "en_stats.h" + +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) #define MLX5E_MAX_NUM_TC 8 @@ -57,263 +61,152 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4 +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 + +#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_LOG_WQE_SZ 17 +#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ + MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) +#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) +#define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \ + MLX5_MPWRQ_WQE_PAGE_ORDER) + +#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) +#define MLX5E_REQUIRED_MTTS(rqs, wqes)\ + (rqs * wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) +#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX) + +#define MLX5_UMR_ALIGN (2048) +#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) + #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) +#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 #define MLX5E_NUM_MAIN_GROUPS 9 +static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW, + wq_size / 2); + default: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES, + wq_size / 2); + } +} + +static inline int mlx5_min_log_rq_size(int wq_type) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + default: + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + } +} + +static inline int mlx5_max_log_rq_size(int wq_type) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW; + default: + return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + } +} + +enum { + MLX5E_INLINE_MODE_L2, + MLX5E_INLINE_MODE_VPORT_CONTEXT, + MLX5_INLINE_MODE_NOT_REQUIRED, +}; + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; +}; + +struct mlx5e_rx_wqe { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data; +}; + +struct mlx5e_umr_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_umr_ctrl_seg uctrl; + struct mlx5_mkey_seg mkc; + struct mlx5_wqe_data_seg data; +}; + +static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { + "rx_cqe_moder", +}; + +enum mlx5e_priv_flag { + MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), +}; + +#define MLX5E_SET_PRIV_FLAG(priv, pflag, enable) \ + do { \ + if (enable) \ + priv->pflags |= pflag; \ + else \ + priv->pflags &= ~pflag; \ + } while (0) + #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ -#define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ #endif -static const char vport_strings[][ETH_GSTRING_LEN] = { - /* vport statistics */ - "rx_packets", - "rx_bytes", - "tx_packets", - "tx_bytes", - "rx_error_packets", - "rx_error_bytes", - "tx_error_packets", - "tx_error_bytes", - "rx_unicast_packets", - "rx_unicast_bytes", - "tx_unicast_packets", - "tx_unicast_bytes", - "rx_multicast_packets", - "rx_multicast_bytes", - "tx_multicast_packets", - "tx_multicast_bytes", - "rx_broadcast_packets", - "rx_broadcast_bytes", - "tx_broadcast_packets", - "tx_broadcast_bytes", - - /* SW counters */ - "tso_packets", - "tso_bytes", - "tso_inner_packets", - "tso_inner_bytes", - "lro_packets", - "lro_bytes", - "rx_csum_good", - "rx_csum_none", - "rx_csum_sw", - "tx_csum_offload", - "tx_csum_inner", - "tx_queue_stopped", - "tx_queue_wake", - "tx_queue_dropped", - "rx_wqe_err", -}; - -struct mlx5e_vport_stats { - /* HW counters */ - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - u64 rx_error_packets; - u64 rx_error_bytes; - u64 tx_error_packets; - u64 tx_error_bytes; - u64 rx_unicast_packets; - u64 rx_unicast_bytes; - u64 tx_unicast_packets; - u64 tx_unicast_bytes; - u64 rx_multicast_packets; - u64 rx_multicast_bytes; - u64 tx_multicast_packets; - u64 tx_multicast_bytes; - u64 rx_broadcast_packets; - u64 rx_broadcast_bytes; - u64 tx_broadcast_packets; - u64 tx_broadcast_bytes; - - /* SW counters */ - u64 tso_packets; - u64 tso_bytes; - u64 tso_inner_packets; - u64 tso_inner_bytes; - u64 lro_packets; - u64 lro_bytes; - u64 rx_csum_good; - u64 rx_csum_none; - u64 rx_csum_sw; - u64 tx_csum_offload; - u64 tx_csum_inner; - u64 tx_queue_stopped; - u64 tx_queue_wake; - u64 tx_queue_dropped; - u64 rx_wqe_err; - -#define NUM_VPORT_COUNTERS 35 -}; - -static const char pport_strings[][ETH_GSTRING_LEN] = { - /* IEEE802.3 counters */ - "frames_tx", - "frames_rx", - "check_seq_err", - "alignment_err", - "octets_tx", - "octets_received", - "multicast_xmitted", - "broadcast_xmitted", - "multicast_rx", - "broadcast_rx", - "in_range_len_errors", - "out_of_range_len", - "too_long_errors", - "symbol_err", - "mac_control_tx", - "mac_control_rx", - "unsupported_op_rx", - "pause_ctrl_rx", - "pause_ctrl_tx", - - /* RFC2863 counters */ - "in_octets", - "in_ucast_pkts", - "in_discards", - "in_errors", - "in_unknown_protos", - "out_octets", - "out_ucast_pkts", - "out_discards", - "out_errors", - "in_multicast_pkts", - "in_broadcast_pkts", - "out_multicast_pkts", - "out_broadcast_pkts", - - /* RFC2819 counters */ - "drop_events", - "octets", - "pkts", - "broadcast_pkts", - "multicast_pkts", - "crc_align_errors", - "undersize_pkts", - "oversize_pkts", - "fragments", - "jabbers", - "collisions", - "p64octets", - "p65to127octets", - "p128to255octets", - "p256to511octets", - "p512to1023octets", - "p1024to1518octets", - "p1519to2047octets", - "p2048to4095octets", - "p4096to8191octets", - "p8192to10239octets", -}; - -#define NUM_IEEE_802_3_COUNTERS 19 -#define NUM_RFC_2863_COUNTERS 13 -#define NUM_RFC_2819_COUNTERS 21 -#define NUM_PPORT_COUNTERS (NUM_IEEE_802_3_COUNTERS + \ - NUM_RFC_2863_COUNTERS + \ - NUM_RFC_2819_COUNTERS) - -struct mlx5e_pport_stats { - __be64 IEEE_802_3_counters[NUM_IEEE_802_3_COUNTERS]; - __be64 RFC_2863_counters[NUM_RFC_2863_COUNTERS]; - __be64 RFC_2819_counters[NUM_RFC_2819_COUNTERS]; -}; - -static const char rq_stats_strings[][ETH_GSTRING_LEN] = { - "packets", - "bytes", - "csum_none", - "csum_sw", - "lro_packets", - "lro_bytes", - "wqe_err" -}; - -struct mlx5e_rq_stats { - u64 packets; - u64 bytes; - u64 csum_none; - u64 csum_sw; - u64 lro_packets; - u64 lro_bytes; - u64 wqe_err; -#define NUM_RQ_STATS 7 -}; - -static const char sq_stats_strings[][ETH_GSTRING_LEN] = { - "packets", - "bytes", - "tso_packets", - "tso_bytes", - "tso_inner_packets", - "tso_inner_bytes", - "csum_offload_inner", - "nop", - "csum_offload_none", - "stopped", - "wake", - "dropped", -}; - -struct mlx5e_sq_stats { - /* commonly accessed in data path */ - u64 packets; - u64 bytes; - u64 tso_packets; - u64 tso_bytes; - u64 tso_inner_packets; - u64 tso_inner_bytes; - u64 csum_offload_inner; - u64 nop; - /* less likely accessed in data path */ - u64 csum_offload_none; - u64 stopped; - u64 wake; - u64 dropped; -#define NUM_SQ_STATS 12 -}; - -struct mlx5e_stats { - struct mlx5e_vport_stats vport; - struct mlx5e_pport_stats pport; +struct mlx5e_cq_moder { + u16 usec; + u16 pkts; }; struct mlx5e_params { u8 log_sq_size; + u8 rq_wq_type; + u8 mpwqe_log_stride_sz; + u8 mpwqe_log_num_strides; u8 log_rq_size; u16 num_channels; u8 num_tc; - u16 rx_cq_moderation_usec; - u16 rx_cq_moderation_pkts; - u16 tx_cq_moderation_usec; - u16 tx_cq_moderation_pkts; + u8 rx_cq_period_mode; + bool rx_cqe_compress_admin; + bool rx_cqe_compress; + struct mlx5e_cq_moder rx_cq_moderation; + struct mlx5e_cq_moder tx_cq_moderation; u16 min_rx_wqes; bool lro_en; u32 lro_wqe_sz; u16 tx_max_inline; + u8 tx_min_inline_mode; u8 rss_hfunc; u8 toeplitz_hash_key[40]; u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; + bool vlan_strip_disable; #ifdef CONFIG_MLX5_CORE_EN_DCB struct ieee_ets ets; #endif + bool rx_am_enabled; }; struct mlx5e_tstamp { @@ -330,7 +223,9 @@ struct mlx5e_tstamp { }; enum { - MLX5E_RQ_STATE_POST_WQES_ENABLE, + MLX5E_RQ_STATE_FLUSH, + MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, + MLX5E_RQ_STATE_AM, }; struct mlx5e_cq { @@ -338,37 +233,124 @@ struct mlx5e_cq { struct mlx5_cqwq wq; /* data path - accessed per napi poll */ + u16 event_ctr; struct napi_struct *napi; struct mlx5_core_cq mcq; struct mlx5e_channel *channel; struct mlx5e_priv *priv; + /* cqe decompression */ + struct mlx5_cqe64 title; + struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE]; + u8 mini_arr_idx; + u16 decmprs_left; + u16 decmprs_wqe_counter; + /* control */ struct mlx5_wq_ctrl wq_ctrl; } ____cacheline_aligned_in_smp; +struct mlx5e_rq; +typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe); +typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, + u16 ix); + +typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix); + +struct mlx5e_dma_info { + struct page *page; + dma_addr_t addr; +}; + +struct mlx5e_rx_am_stats { + int ppms; /* packets per msec */ + int epms; /* events per msec */ +}; + +struct mlx5e_rx_am_sample { + ktime_t time; + unsigned int pkt_ctr; + u16 event_ctr; +}; + +struct mlx5e_rx_am { /* Adaptive Moderation */ + u8 state; + struct mlx5e_rx_am_stats prev_stats; + struct mlx5e_rx_am_sample start_sample; + struct work_struct work; + u8 profile_ix; + u8 mode; + u8 tune_state; + u8 steps_right; + u8 steps_left; + u8 tired; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; u32 wqe_sz; struct sk_buff **skb; + struct mlx5e_mpw_info *wqe_info; + __be32 mkey_be; + __be32 umr_mkey_be; struct device *pdev; struct net_device *netdev; struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_alloc_wqe alloc_wqe; + mlx5e_fp_dealloc_wqe dealloc_wqe; unsigned long state; int ix; + u32 mpwqe_mtt_offset; + + struct mlx5e_rx_am am; /* Adaptive Moderation */ /* control */ struct mlx5_wq_ctrl wq_ctrl; + u8 wq_type; + u32 mpwqe_stride_sz; + u32 mpwqe_num_strides; u32 rqn; struct mlx5e_channel *channel; struct mlx5e_priv *priv; } ____cacheline_aligned_in_smp; +struct mlx5e_umr_dma_info { + __be64 *mtt; + __be64 *mtt_no_align; + dma_addr_t mtt_addr; + struct mlx5e_dma_info *dma_info; +}; + +struct mlx5e_mpw_info { + union { + struct mlx5e_dma_info dma_info; + struct mlx5e_umr_dma_info umr; + }; + u16 consumed_strides; + u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; + + void (*dma_pre_sync)(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len); + void (*add_skb_frag)(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, u32 len); + void (*copy_skb_header)(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen); + void (*free_wqe)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); +}; + struct mlx5e_tx_wqe_info { u32 num_bytes; u8 num_wqebbs; @@ -387,10 +369,15 @@ struct mlx5e_sq_dma { }; enum { - MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, + MLX5E_SQ_STATE_FLUSH, MLX5E_SQ_STATE_BF_ENABLE, }; +struct mlx5e_ico_wqe_info { + u8 opcode; + u8 num_wqebbs; +}; + struct mlx5e_sq { /* data path */ @@ -421,6 +408,7 @@ struct mlx5e_sq { u32 sqn; u16 bf_buf_size; u16 max_inline; + u8 min_inline_mode; u16 edge; struct device *pdev; struct mlx5e_tstamp *tstamp; @@ -432,6 +420,8 @@ struct mlx5e_sq { struct mlx5_uar uar; struct mlx5e_channel *channel; int tc; + struct mlx5e_ico_wqe_info *ico_wqe_info; + u32 rate_limit; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) @@ -448,6 +438,7 @@ struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_sq icosq; /* internal control operations */ struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -474,42 +465,42 @@ enum mlx5e_traffic_types { MLX5E_TT_IPV6, MLX5E_TT_ANY, MLX5E_NUM_TT, + MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; -#define IS_HASHING_TT(tt) (tt != MLX5E_TT_ANY) +enum { + MLX5E_STATE_ASYNC_EVENTS_ENABLED, + MLX5E_STATE_OPENED, + MLX5E_STATE_DESTROYING, +}; -enum mlx5e_rqt_ix { - MLX5E_INDIRECTION_RQT, - MLX5E_SINGLE_RQ_RQT, - MLX5E_NUM_RQT, +struct mlx5e_vxlan_db { + spinlock_t lock; /* protect vxlan table */ + struct radix_tree_root tree; }; -struct mlx5e_eth_addr_info { +struct mlx5e_l2_rule { u8 addr[ETH_ALEN + 2]; - u32 tt_vec; - struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; + struct mlx5_flow_rule *rule; }; -#define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) - -struct mlx5e_eth_addr_db { - struct hlist_head netdev_uc[MLX5E_ETH_ADDR_HASH_SIZE]; - struct hlist_head netdev_mc[MLX5E_ETH_ADDR_HASH_SIZE]; - struct mlx5e_eth_addr_info broadcast; - struct mlx5e_eth_addr_info allmulti; - struct mlx5e_eth_addr_info promisc; - bool broadcast_enabled; - bool allmulti_enabled; - bool promisc_enabled; +struct mlx5e_flow_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; }; -enum { - MLX5E_STATE_ASYNC_EVENTS_ENABLE, - MLX5E_STATE_OPENED, - MLX5E_STATE_DESTROYING, +#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE) + +struct mlx5e_tc_table { + struct mlx5_flow_table *t; + + struct rhashtable_params ht_params; + struct rhashtable ht; }; -struct mlx5e_vlan_db { +struct mlx5e_vlan_table { + struct mlx5e_flow_table ft; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx5_flow_rule *active_vlans_rule[VLAN_N_VID]; struct mlx5_flow_rule *untagged_rule; @@ -517,29 +508,112 @@ struct mlx5e_vlan_db { bool filter_disabled; }; -struct mlx5e_vxlan_db { - spinlock_t lock; /* protect vxlan table */ - struct radix_tree_root tree; +struct mlx5e_l2_table { + struct mlx5e_flow_table ft; + struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE]; + struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE]; + struct mlx5e_l2_rule broadcast; + struct mlx5e_l2_rule allmulti; + struct mlx5e_l2_rule promisc; + bool broadcast_enabled; + bool allmulti_enabled; + bool promisc_enabled; }; -struct mlx5e_flow_table { - int num_groups; - struct mlx5_flow_table *t; - struct mlx5_flow_group **g; +/* L3/L4 traffic type classifier */ +struct mlx5e_ttc_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_rule *rules[MLX5E_NUM_TT]; }; -struct mlx5e_tc_flow_table { - struct mlx5_flow_table *t; +#define ARFS_HASH_SHIFT BITS_PER_BYTE +#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE) +struct arfs_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_rule *default_rule; + struct hlist_head rules_hash[ARFS_HASH_SIZE]; +}; - struct rhashtable_params ht_params; - struct rhashtable ht; +enum arfs_type { + ARFS_IPV4_TCP, + ARFS_IPV6_TCP, + ARFS_IPV4_UDP, + ARFS_IPV6_UDP, + ARFS_NUM_TYPES, +}; + +struct mlx5e_arfs_tables { + struct arfs_table arfs_tables[ARFS_NUM_TYPES]; + /* Protect aRFS rules list */ + spinlock_t arfs_lock; + struct list_head rules; + int last_filter_id; + struct workqueue_struct *wq; +}; + +/* NIC prio FTS */ +enum { + MLX5E_VLAN_FT_LEVEL = 0, + MLX5E_L2_FT_LEVEL, + MLX5E_TTC_FT_LEVEL, + MLX5E_ARFS_FT_LEVEL +}; + +struct mlx5e_ethtool_table { + struct mlx5_flow_table *ft; + int num_rules; +}; + +#define ETHTOOL_NUM_L3_L4_FTS 7 +#define ETHTOOL_NUM_L2_FTS 4 + +struct mlx5e_ethtool_steering { + struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS]; + struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS]; + struct list_head rules; + int tot_num_rules; +}; + +struct mlx5e_flow_steering { + struct mlx5_flow_namespace *ns; + struct mlx5e_ethtool_steering ethtool; + struct mlx5e_tc_table tc; + struct mlx5e_vlan_table vlan; + struct mlx5e_l2_table l2; + struct mlx5e_ttc_table ttc; + struct mlx5e_arfs_tables arfs; +}; + +struct mlx5e_rqt { + u32 rqtn; + bool enabled; }; -struct mlx5e_flow_tables { - struct mlx5_flow_namespace *ns; - struct mlx5e_tc_flow_table tc; - struct mlx5e_flow_table vlan; - struct mlx5e_flow_table main; +struct mlx5e_tir { + u32 tirn; + struct mlx5e_rqt rqt; + struct list_head list; +}; + +enum { + MLX5E_TC_PRIO = 0, + MLX5E_NIC_PRIO +}; + +struct mlx5e_profile { + void (*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, void *ppriv); + void (*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void (*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void (*cleanup_tx)(struct mlx5e_priv *priv); + void (*enable)(struct mlx5e_priv *priv); + void (*disable)(struct mlx5e_priv *priv); + void (*update_stats)(struct mlx5e_priv *priv); + int (*max_nch)(struct mlx5_core_dev *mdev); + int max_tc; }; struct mlx5e_priv { @@ -550,46 +624,34 @@ struct mlx5e_priv { unsigned long state; struct mutex state_lock; /* Protects Interface state */ - struct mlx5_uar cq_uar; - u32 pdn; - u32 tdn; - struct mlx5_core_mkey mkey; + struct mlx5_core_mkey umr_mkey; struct mlx5e_rq drop_rq; struct mlx5e_channel **channel; u32 tisn[MLX5E_MAX_NUM_TC]; - u32 rqtn[MLX5E_NUM_RQT]; - u32 tirn[MLX5E_NUM_TT]; + struct mlx5e_rqt indir_rqt; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + u32 tx_rates[MLX5E_MAX_NUM_SQS]; - struct mlx5e_flow_tables fts; - struct mlx5e_eth_addr_db eth_addr; - struct mlx5e_vlan_db vlan; -#ifdef CONFIG_MLX5_CORE_EN_VXLAN + struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; -#endif struct mlx5e_params params; struct workqueue_struct *wq; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; + struct work_struct tx_timeout_work; struct delayed_work update_stats_work; + u32 pflags; struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_stats stats; struct mlx5e_tstamp tstamp; -}; - -#define MLX5E_NET_IP_ALIGN 2 - -struct mlx5e_tx_wqe { - struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_eth_seg eth; -}; - -struct mlx5e_rx_wqe { - struct mlx5_wqe_srq_next_seg next; - struct mlx5_wqe_data_seg data; + u16 q_counter; + const struct mlx5e_profile *profile; + void *ppriv; }; enum mlx5e_link_mode { @@ -607,6 +669,7 @@ enum mlx5e_link_mode { MLX5E_10GBASE_ER = 14, MLX5E_40GBASE_SR4 = 15, MLX5E_40GBASE_LR4 = 16, + MLX5E_50GBASE_SR2 = 18, MLX5E_100GBASE_CR4 = 20, MLX5E_100GBASE_SR4 = 21, MLX5E_100GBASE_KR4 = 22, @@ -624,6 +687,9 @@ enum mlx5e_link_mode { #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) + +void mlx5e_build_ptys2ethtool_map(void); + void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw); u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); @@ -634,14 +700,52 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +void mlx5e_free_tx_descs(struct mlx5e_sq *sq); + +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); +void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); +void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); +void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u16 byte_cnt, + struct mlx5e_mpw_info *wi, + struct sk_buff *skb); +void mlx5e_complete_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u16 byte_cnt, + struct mlx5e_mpw_info *wi, + struct sk_buff *skb); +void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi); +void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); +void mlx5e_rx_am(struct mlx5e_rq *rq); +void mlx5e_rx_am_work(struct work_struct *work); +struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode); + void mlx5e_update_stats(struct mlx5e_priv *priv); -int mlx5e_create_flow_tables(struct mlx5e_priv *priv); -void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv); -void mlx5e_init_eth_addr(struct mlx5e_priv *priv); +int mlx5e_create_flow_steering(struct mlx5e_priv *priv); +void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); +void mlx5e_init_l2_addr(struct mlx5e_priv *priv); +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); +int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + int location); +int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs); +int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs); +int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, + int location); +void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); +void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, @@ -650,6 +754,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv); void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); +void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); @@ -658,16 +763,23 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix); +int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd); + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix); void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, + u32 *indirection_rqt, int len, int num_channels); +int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); + +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, + u8 cq_period_mode); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, - struct mlx5e_tx_wqe *wqe, int bf_sz) + struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) { u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; @@ -681,9 +793,9 @@ static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, */ wmb(); if (bf_sz) - __iowrite64_copy(sq->uar_map + ofst, &wqe->ctrl, bf_sz); + __iowrite64_copy(sq->uar_map + ofst, ctrl, bf_sz); else - mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL); + mlx5_write64((__be32 *)ctrl, sq->uar_map + ofst, NULL); /* flush the write-combining mapped buffer */ wmb(); @@ -710,6 +822,66 @@ extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets); #endif +#ifndef CONFIG_RFS_ACCEL +static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {} + +static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) +{ + return -ENOTSUPP; +} + +static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) +{ + return -ENOTSUPP; +} +#else +int mlx5e_arfs_create_tables(struct mlx5e_priv *priv); +void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv); +int mlx5e_arfs_enable(struct mlx5e_priv *priv); +int mlx5e_arfs_disable(struct mlx5e_priv *priv); +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id); +#endif + u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); +int mlx5e_create_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir, u32 *in, int inlen); +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev); + +struct mlx5_eswitch_rep; +int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); +void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); +int mlx5e_close(struct net_device *netdev); +int mlx5e_open(struct net_device *netdev); +void mlx5e_update_stats_work(struct work_struct *work); +void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, void *ppriv); +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); +struct rtnl_link_stats64 * +mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c new file mode 100644 index 000000000000..a8cb38789774 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -0,0 +1,742 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifdef CONFIG_RFS_ACCEL + +#include <linux/hash.h> +#include <linux/mlx5/fs.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "en.h" + +struct arfs_tuple { + __be16 etype; + u8 ip_proto; + union { + __be32 src_ipv4; + struct in6_addr src_ipv6; + }; + union { + __be32 dst_ipv4; + struct in6_addr dst_ipv6; + }; + __be16 src_port; + __be16 dst_port; +}; + +struct arfs_rule { + struct mlx5e_priv *priv; + struct work_struct arfs_work; + struct mlx5_flow_rule *rule; + struct hlist_node hlist; + int rxq; + /* Flow ID passed to ndo_rx_flow_steer */ + int flow_id; + /* Filter ID returned by ndo_rx_flow_steer */ + int filter_id; + struct arfs_tuple tuple; +}; + +#define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \ + for (i = 0; i < ARFS_NUM_TYPES; i++) \ + mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j) + +#define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \ + for (j = 0; j < ARFS_HASH_SIZE; j++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist) + +static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) +{ + switch (type) { + case ARFS_IPV4_TCP: + return MLX5E_TT_IPV4_TCP; + case ARFS_IPV4_UDP: + return MLX5E_TT_IPV4_UDP; + case ARFS_IPV6_TCP: + return MLX5E_TT_IPV6_TCP; + case ARFS_IPV6_UDP: + return MLX5E_TT_IPV6_UDP; + default: + return -EINVAL; + } +} + +static int arfs_disable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + struct mlx5e_tir *tir = priv->indir_tir; + int err = 0; + int tt; + int i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (i = 0; i < ARFS_NUM_TYPES; i++) { + dest.tir_num = tir[i].tirn; + tt = arfs_get_tt(i); + /* Modify ttc rules destination to bypass the aRFS tables*/ + err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], + &dest); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc destination failed\n", + __func__); + return err; + } + } + return 0; +} + +static void arfs_del_rules(struct mlx5e_priv *priv); + +int mlx5e_arfs_disable(struct mlx5e_priv *priv) +{ + arfs_del_rules(priv); + + return arfs_disable(priv); +} + +int mlx5e_arfs_enable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + int err = 0; + int tt; + int i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < ARFS_NUM_TYPES; i++) { + dest.ft = priv->fs.arfs.arfs_tables[i].ft.t; + tt = arfs_get_tt(i); + /* Modify ttc rules destination to point on the aRFS FTs */ + err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], + &dest); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc destination failed err=%d\n", + __func__, err); + arfs_disable(priv); + return err; + } + } + return 0; +} + +static void arfs_destroy_table(struct arfs_table *arfs_t) +{ + mlx5_del_flow_rule(arfs_t->default_rule); + mlx5e_destroy_flow_table(&arfs_t->ft); +} + +void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) +{ + int i; + + if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + return; + + arfs_del_rules(priv); + destroy_workqueue(priv->fs.arfs.wq); + for (i = 0; i < ARFS_NUM_TYPES; i++) { + if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t)) + arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]); + } +} + +static int arfs_add_default_rule(struct mlx5e_priv *priv, + enum arfs_type type) +{ + struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; + struct mlx5_flow_destination dest; + struct mlx5e_tir *tir = priv->indir_tir; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto out; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + switch (type) { + case ARFS_IPV4_TCP: + dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn; + break; + case ARFS_IPV4_UDP: + dest.tir_num = tir[MLX5E_TT_IPV4_UDP].tirn; + break; + case ARFS_IPV6_TCP: + dest.tir_num = tir[MLX5E_TT_IPV6_TCP].tirn; + break; + case ARFS_IPV6_UDP: + dest.tir_num = tir[MLX5E_TT_IPV6_UDP].tirn; + break; + default: + err = -EINVAL; + goto out; + } + + arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest); + if (IS_ERR(arfs_t->default_rule)) { + err = PTR_ERR(arfs_t->default_rule); + arfs_t->default_rule = NULL; + netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n", + __func__, type); + } +out: + kvfree(spec); + return err; +} + +#define MLX5E_ARFS_NUM_GROUPS 2 +#define MLX5E_ARFS_GROUP1_SIZE BIT(12) +#define MLX5E_ARFS_GROUP2_SIZE BIT(0) +#define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\ + MLX5E_ARFS_GROUP2_SIZE) +static int arfs_create_groups(struct mlx5e_flow_table *ft, + enum arfs_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, + sizeof(*ft->g), GFP_KERNEL); + in = mlx5_vzalloc(inlen); + if (!in || !ft->g) { + kvfree(ft->g); + kvfree(in); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype); + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV6_TCP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport); + break; + case ARFS_IPV4_UDP: + case ARFS_IPV6_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport); + break; + default: + err = -EINVAL; + goto out; + } + + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV4_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + break; + case ARFS_IPV6_TCP: + case ARFS_IPV6_UDP: + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + break; + default: + err = -EINVAL; + goto out; + } + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +out: + kvfree(in); + + return err; +} + +static int arfs_create_table(struct mlx5e_priv *priv, + enum arfs_type type) +{ + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; + int err; + + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = arfs_create_groups(ft, type); + if (err) + goto err; + + err = arfs_add_default_rule(priv, type); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) +{ + int err = 0; + int i; + + if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + return 0; + + spin_lock_init(&priv->fs.arfs.arfs_lock); + INIT_LIST_HEAD(&priv->fs.arfs.rules); + priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs"); + if (!priv->fs.arfs.wq) + return -ENOMEM; + + for (i = 0; i < ARFS_NUM_TYPES; i++) { + err = arfs_create_table(priv, i); + if (err) + goto err; + } + return 0; +err: + mlx5e_arfs_destroy_tables(priv); + return err; +} + +#define MLX5E_ARFS_EXPIRY_QUOTA 60 + +static void arfs_may_expire_flow(struct mlx5e_priv *priv) +{ + struct arfs_rule *arfs_rule; + struct hlist_node *htmp; + int quota = 0; + int i; + int j; + + HLIST_HEAD(del_list); + spin_lock_bh(&priv->fs.arfs.arfs_lock); + mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) { + if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) + break; + if (!work_pending(&arfs_rule->arfs_work) && + rps_may_expire_flow(priv->netdev, + arfs_rule->rxq, arfs_rule->flow_id, + arfs_rule->filter_id)) { + hlist_del_init(&arfs_rule->hlist); + hlist_add_head(&arfs_rule->hlist, &del_list); + } + } + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { + if (arfs_rule->rule) + mlx5_del_flow_rule(arfs_rule->rule); + hlist_del(&arfs_rule->hlist); + kfree(arfs_rule); + } +} + +static void arfs_del_rules(struct mlx5e_priv *priv) +{ + struct hlist_node *htmp; + struct arfs_rule *rule; + int i; + int j; + + HLIST_HEAD(del_list); + spin_lock_bh(&priv->fs.arfs.arfs_lock); + mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) { + hlist_del_init(&rule->hlist); + hlist_add_head(&rule->hlist, &del_list); + } + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + + hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) { + cancel_work_sync(&rule->arfs_work); + if (rule->rule) + mlx5_del_flow_rule(rule->rule); + hlist_del(&rule->hlist); + kfree(rule); + } +} + +static struct hlist_head * +arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port, + __be16 dst_port) +{ + unsigned long l; + int bucket_idx; + + l = (__force unsigned long)src_port | + ((__force unsigned long)dst_port << 2); + + bucket_idx = hash_long(l, ARFS_HASH_SHIFT); + + return &arfs_t->rules_hash[bucket_idx]; +} + +static u8 arfs_get_ip_proto(const struct sk_buff *skb) +{ + return (skb->protocol == htons(ETH_P_IP)) ? + ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr; +} + +static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, + u8 ip_proto, __be16 etype) +{ + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV4_TCP]; + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV4_UDP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV6_TCP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV6_UDP]; + + return NULL; +} + +static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, + struct arfs_rule *arfs_rule) +{ + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct arfs_tuple *tuple = &arfs_rule->tuple; + struct mlx5_flow_rule *rule = NULL; + struct mlx5_flow_destination dest; + struct arfs_table *arfs_table; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; + int err = 0; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto out; + } + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, + ntohs(tuple->etype)); + arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype); + if (!arfs_table) { + err = -EINVAL; + goto out; + } + + ft = arfs_table->ft.t; + if (tuple->ip_proto == IPPROTO_TCP) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_dport); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.tcp_sport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport, + ntohs(tuple->src_port)); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.udp_dport); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.udp_sport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport, + ntohs(tuple->src_port)); + } + if (tuple->etype == htons(ETH_P_IP)) { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &tuple->src_ipv4, + 4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &tuple->dst_ipv4, + 4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &tuple->src_ipv6, + 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &tuple->dst_ipv6, + 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + } + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; + rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n", + __func__, arfs_rule->filter_id, arfs_rule->rxq, err); + } + +out: + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static void arfs_modify_rule_rq(struct mlx5e_priv *priv, + struct mlx5_flow_rule *rule, u16 rxq) +{ + struct mlx5_flow_destination dst; + int err = 0; + + dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst.tir_num = priv->direct_tir[rxq].tirn; + err = mlx5_modify_rule_destination(rule, &dst); + if (err) + netdev_warn(priv->netdev, + "Failed to modfiy aRFS rule destination to rq=%d\n", rxq); +} + +static void arfs_handle_work(struct work_struct *work) +{ + struct arfs_rule *arfs_rule = container_of(work, + struct arfs_rule, + arfs_work); + struct mlx5e_priv *priv = arfs_rule->priv; + struct mlx5_flow_rule *rule; + + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + spin_lock_bh(&priv->fs.arfs.arfs_lock); + hlist_del(&arfs_rule->hlist); + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + + mutex_unlock(&priv->state_lock); + kfree(arfs_rule); + goto out; + } + mutex_unlock(&priv->state_lock); + + if (!arfs_rule->rule) { + rule = arfs_add_rule(priv, arfs_rule); + if (IS_ERR(rule)) + goto out; + arfs_rule->rule = rule; + } else { + arfs_modify_rule_rq(priv, arfs_rule->rule, + arfs_rule->rxq); + } +out: + arfs_may_expire_flow(priv); +} + +/* return L4 destination port from ip4/6 packets */ +static __be16 arfs_get_dst_port(const struct sk_buff *skb) +{ + char *transport_header; + + transport_header = skb_transport_header(skb); + if (arfs_get_ip_proto(skb) == IPPROTO_TCP) + return ((struct tcphdr *)transport_header)->dest; + return ((struct udphdr *)transport_header)->dest; +} + +/* return L4 source port from ip4/6 packets */ +static __be16 arfs_get_src_port(const struct sk_buff *skb) +{ + char *transport_header; + + transport_header = skb_transport_header(skb); + if (arfs_get_ip_proto(skb) == IPPROTO_TCP) + return ((struct tcphdr *)transport_header)->source; + return ((struct udphdr *)transport_header)->source; +} + +static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, + struct arfs_table *arfs_t, + const struct sk_buff *skb, + u16 rxq, u32 flow_id) +{ + struct arfs_rule *rule; + struct arfs_tuple *tuple; + + rule = kzalloc(sizeof(*rule), GFP_ATOMIC); + if (!rule) + return NULL; + + rule->priv = priv; + rule->rxq = rxq; + INIT_WORK(&rule->arfs_work, arfs_handle_work); + + tuple = &rule->tuple; + tuple->etype = skb->protocol; + if (tuple->etype == htons(ETH_P_IP)) { + tuple->src_ipv4 = ip_hdr(skb)->saddr; + tuple->dst_ipv4 = ip_hdr(skb)->daddr; + } else { + memcpy(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + memcpy(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, + sizeof(struct in6_addr)); + } + tuple->ip_proto = arfs_get_ip_proto(skb); + tuple->src_port = arfs_get_src_port(skb); + tuple->dst_port = arfs_get_dst_port(skb); + + rule->flow_id = flow_id; + rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER; + + hlist_add_head(&rule->hlist, + arfs_hash_bucket(arfs_t, tuple->src_port, + tuple->dst_port)); + return rule; +} + +static bool arfs_cmp_ips(struct arfs_tuple *tuple, + const struct sk_buff *skb) +{ + if (tuple->etype == htons(ETH_P_IP) && + tuple->src_ipv4 == ip_hdr(skb)->saddr && + tuple->dst_ipv4 == ip_hdr(skb)->daddr) + return true; + if (tuple->etype == htons(ETH_P_IPV6) && + (!memcmp(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr))) && + (!memcmp(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, + sizeof(struct in6_addr)))) + return true; + return false; +} + +static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t, + const struct sk_buff *skb) +{ + struct arfs_rule *arfs_rule; + struct hlist_head *head; + __be16 src_port = arfs_get_src_port(skb); + __be16 dst_port = arfs_get_dst_port(skb); + + head = arfs_hash_bucket(arfs_t, src_port, dst_port); + hlist_for_each_entry(arfs_rule, head, hlist) { + if (arfs_rule->tuple.src_port == src_port && + arfs_rule->tuple.dst_port == dst_port && + arfs_cmp_ips(&arfs_rule->tuple, skb)) { + return arfs_rule; + } + } + + return NULL; +} + +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct arfs_table *arfs_t; + struct arfs_rule *arfs_rule; + + if (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)) + return -EPROTONOSUPPORT; + + arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol); + if (!arfs_t) + return -EPROTONOSUPPORT; + + spin_lock_bh(&arfs->arfs_lock); + arfs_rule = arfs_find_rule(arfs_t, skb); + if (arfs_rule) { + if (arfs_rule->rxq == rxq_index) { + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; + } + arfs_rule->rxq = rxq_index; + } else { + arfs_rule = arfs_alloc_rule(priv, arfs_t, skb, + rxq_index, flow_id); + if (!arfs_rule) { + spin_unlock_bh(&arfs->arfs_lock); + return -ENOMEM; + } + } + queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work); + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 2018eebe1531..847a8f3ac2b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -93,6 +93,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: + /* Reset CQE compression to Admin default */ + mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_admin); break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -108,6 +110,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + /* Disable CQE compression */ + mlx5e_modify_rx_cqe_compression(priv, false); config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c new file mode 100644 index 000000000000..9cce153e1035 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* mlx5e global resources should be placed in this file. + * Global resources are common to all the netdevices crated on the same nic. + */ + +int mlx5e_create_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir, u32 *in, int inlen) +{ + int err; + + err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn); + if (err) + return err; + + list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); + + return 0; +} + +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir) +{ + mlx5_core_destroy_tir(mdev, tir->tirn); + list_del(&tir->list); +} + +static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_core_mkey *mkey) +{ + struct mlx5_create_mkey_mbox_in *in; + int err; + + in = mlx5_vzalloc(sizeof(*in)); + if (!in) + return -ENOMEM; + + in->seg.flags = MLX5_PERM_LOCAL_WRITE | + MLX5_PERM_LOCAL_READ | + MLX5_ACCESS_MODE_PA; + in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, + NULL); + + kvfree(in); + + return err; +} + +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_resources *res = &mdev->mlx5e_res; + int err; + + err = mlx5_alloc_map_uar(mdev, &res->cq_uar, false); + if (err) { + mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); + return err; + } + + err = mlx5_core_alloc_pd(mdev, &res->pdn); + if (err) { + mlx5_core_err(mdev, "alloc pd failed, %d\n", err); + goto err_unmap_free_uar; + } + + err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn); + if (err) { + mlx5_core_err(mdev, "alloc td failed, %d\n", err); + goto err_dealloc_pd; + } + + err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey); + if (err) { + mlx5_core_err(mdev, "create mkey failed, %d\n", err); + goto err_dealloc_transport_domain; + } + + INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); + + return 0; + +err_dealloc_transport_domain: + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, res->pdn); +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &res->cq_uar); + + return err; +} + +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_resources *res = &mdev->mlx5e_res; + + mlx5_core_destroy_mkey(mdev, &res->mkey); + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); + mlx5_core_dealloc_pd(mdev, res->pdn); + mlx5_unmap_free_uar(mdev, &res->cq_uar); +} + +int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev) +{ + struct mlx5e_tir *tir; + void *in; + int inlen; + int err = 0; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + + list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { + err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen); + if (err) + goto out; + } + +out: + kvfree(in); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 3036f279a8fd..762af16ed021 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; break; case IEEE_8021QAZ_TSA_ETS: - tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC; + tc_tx_bw[i] = ets->tc_tx_bw[i]; break; } } @@ -127,25 +127,40 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw); } -static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets) +static int mlx5e_dbcnl_validate_ets(struct net_device *netdev, + struct ieee_ets *ets) { int bw_sum = 0; int i; /* Validate Priority */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) + if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) { + netdev_err(netdev, + "Failed to validate ETS: priority value greater than max(%d)\n", + MLX5E_MAX_PRIORITY); return -EINVAL; + } } /* Validate Bandwidth Sum */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + if (!ets->tc_tx_bw[i]) { + netdev_err(netdev, + "Failed to validate ETS: BW 0 is illegal\n"); + return -EINVAL; + } + bw_sum += ets->tc_tx_bw[i]; + } } - if (bw_sum != 0 && bw_sum != 100) + if (bw_sum != 0 && bw_sum != 100) { + netdev_err(netdev, + "Failed to validate ETS: BW sum is illegal\n"); return -EINVAL; + } return 0; } @@ -155,7 +170,7 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); int err; - err = mlx5e_dbcnl_validate_ets(ets); + err = mlx5e_dbcnl_validate_ets(netdev, ets); if (err) return err; @@ -174,8 +189,14 @@ static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + int i; pfc->pfc_cap = mlx5_max_tc(mdev) + 1; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + pfc->requests[i] = PPORT_PER_PRIO_GET(pstats, i, tx_pause); + pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause); + } return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL); } @@ -185,7 +206,6 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - enum mlx5_port_status ps; u8 curr_pfc_en; int ret; @@ -194,14 +214,8 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, if (pfc->pfc_en == curr_pfc_en) return 0; - mlx5_query_port_admin_status(mdev, &ps); - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); - ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en); - - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_UP); + mlx5_toggle_port_link(mdev); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3476ab844634..7a346bb2ed00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -48,177 +48,229 @@ static void mlx5e_get_drvinfo(struct net_device *dev, sizeof(drvinfo->bus_info)); } -static const struct { - u32 supported; - u32 advertised; +struct ptys2ethtool_config { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); u32 speed; -} ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER] = { - [MLX5E_1000BASE_CX_SGMII] = { - .supported = SUPPORTED_1000baseKX_Full, - .advertised = ADVERTISED_1000baseKX_Full, - .speed = 1000, - }, - [MLX5E_1000BASE_KX] = { - .supported = SUPPORTED_1000baseKX_Full, - .advertised = ADVERTISED_1000baseKX_Full, - .speed = 1000, - }, - [MLX5E_10GBASE_CX4] = { - .supported = SUPPORTED_10000baseKX4_Full, - .advertised = ADVERTISED_10000baseKX4_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_KX4] = { - .supported = SUPPORTED_10000baseKX4_Full, - .advertised = ADVERTISED_10000baseKX4_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_KR] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_20GBASE_KR2] = { - .supported = SUPPORTED_20000baseKR2_Full, - .advertised = ADVERTISED_20000baseKR2_Full, - .speed = 20000, - }, - [MLX5E_40GBASE_CR4] = { - .supported = SUPPORTED_40000baseCR4_Full, - .advertised = ADVERTISED_40000baseCR4_Full, - .speed = 40000, - }, - [MLX5E_40GBASE_KR4] = { - .supported = SUPPORTED_40000baseKR4_Full, - .advertised = ADVERTISED_40000baseKR4_Full, - .speed = 40000, - }, - [MLX5E_56GBASE_R4] = { - .supported = SUPPORTED_56000baseKR4_Full, - .advertised = ADVERTISED_56000baseKR4_Full, - .speed = 56000, - }, - [MLX5E_10GBASE_CR] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_SR] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_ER] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_40GBASE_SR4] = { - .supported = SUPPORTED_40000baseSR4_Full, - .advertised = ADVERTISED_40000baseSR4_Full, - .speed = 40000, - }, - [MLX5E_40GBASE_LR4] = { - .supported = SUPPORTED_40000baseLR4_Full, - .advertised = ADVERTISED_40000baseLR4_Full, - .speed = 40000, - }, - [MLX5E_100GBASE_CR4] = { - .speed = 100000, - }, - [MLX5E_100GBASE_SR4] = { - .speed = 100000, - }, - [MLX5E_100GBASE_KR4] = { - .speed = 100000, - }, - [MLX5E_100GBASE_LR4] = { - .speed = 100000, - }, - [MLX5E_100BASE_TX] = { - .speed = 100, - }, - [MLX5E_1000BASE_T] = { - .supported = SUPPORTED_1000baseT_Full, - .advertised = ADVERTISED_1000baseT_Full, - .speed = 1000, - }, - [MLX5E_10GBASE_T] = { - .supported = SUPPORTED_10000baseT_Full, - .advertised = ADVERTISED_10000baseT_Full, - .speed = 1000, - }, - [MLX5E_25GBASE_CR] = { - .speed = 25000, - }, - [MLX5E_25GBASE_KR] = { - .speed = 25000, - }, - [MLX5E_25GBASE_SR] = { - .speed = 25000, - }, - [MLX5E_50GBASE_CR2] = { - .speed = 50000, - }, - [MLX5E_50GBASE_KR2] = { - .speed = 50000, - }, }; +static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER]; + +#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...) \ + ({ \ + struct ptys2ethtool_config *cfg; \ + const unsigned int modes[] = { __VA_ARGS__ }; \ + unsigned int i; \ + cfg = &ptys2ethtool_table[reg_]; \ + cfg->speed = speed_; \ + bitmap_zero(cfg->supported, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + bitmap_zero(cfg->advertised, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ + __set_bit(modes[i], cfg->supported); \ + __set_bit(modes[i], cfg->advertised); \ + } \ + }) + +void mlx5e_build_ptys2ethtool_map(void) +{ + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, SPEED_20000, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, SPEED_56000, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, SPEED_50000, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, SPEED_25000, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, SPEED_25000, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, SPEED_25000, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, SPEED_50000, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, SPEED_50000, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); +} + +static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 pfc_en_tx; + u8 pfc_en_rx; + int err; + + err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); + + return err ? 0 : pfc_en_tx | pfc_en_rx; +} + +static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 rx_pause; + u32 tx_pause; + int err; + + err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + return err ? false : rx_pause | tx_pause; +} + +#define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) +#define MLX5E_NUM_RQ_STATS(priv) \ + (NUM_RQ_STATS * priv->params.num_channels * \ + test_bit(MLX5E_STATE_OPENED, &priv->state)) +#define MLX5E_NUM_SQ_STATS(priv) \ + (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ + test_bit(MLX5E_STATE_OPENED, &priv->state)) +#define MLX5E_NUM_PFC_COUNTERS(priv) \ + ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ + NUM_PPORT_PER_PRIO_PFC_COUNTERS) + static int mlx5e_get_sset_count(struct net_device *dev, int sset) { struct mlx5e_priv *priv = netdev_priv(dev); switch (sset) { case ETH_SS_STATS: - return NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + - priv->params.num_channels * NUM_RQ_STATS + - priv->params.num_channels * priv->params.num_tc * - NUM_SQ_STATS; + return NUM_SW_COUNTERS + + MLX5E_NUM_Q_CNTRS(priv) + + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + + MLX5E_NUM_RQ_STATS(priv) + + MLX5E_NUM_SQ_STATS(priv) + + MLX5E_NUM_PFC_COUNTERS(priv); + case ETH_SS_PRIV_FLAGS: + return ARRAY_SIZE(mlx5e_priv_flags); /* fallthrough */ default: return -EOPNOTSUPP; } } +static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + int i, j, tc, prio, idx = 0; + unsigned long pfc_combined; + + /* SW counters */ + for (i = 0; i < NUM_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); + + /* Q counters */ + for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format); + + /* VPORT counters */ + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_stats_desc[i].format); + + /* PPORT counters */ + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_802_3_stats_desc[i].format); + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_2863_stats_desc[i].format); + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_2819_stats_desc[i].format); + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_traffic_stats_desc[i].format, prio); + } + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + char pfc_string[ETH_GSTRING_LEN]; + + snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, pfc_string); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, "global"); + } + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return; + + /* per channel counters */ + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + rq_stats_desc[j].format, i); + + for (tc = 0; tc < priv->params.num_tc; tc++) + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + sq_stats_desc[j].format, + priv->channeltc_to_txq_map[i][tc]); +} + static void mlx5e_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { - int i, j, tc, idx = 0; struct mlx5e_priv *priv = netdev_priv(dev); + int i; switch (stringset) { case ETH_SS_PRIV_FLAGS: + for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++) + strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]); break; case ETH_SS_TEST: break; case ETH_SS_STATS: - /* VPORT counters */ - for (i = 0; i < NUM_VPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_strings[i]); - - /* PPORT counters */ - for (i = 0; i < NUM_PPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_strings[i]); - - /* per channel counters */ - for (i = 0; i < priv->params.num_channels; i++) - for (j = 0; j < NUM_RQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - "rx%d_%s", i, rq_stats_strings[j]); - - for (tc = 0; tc < priv->params.num_tc; tc++) - for (i = 0; i < priv->params.num_channels; i++) - for (j = 0; j < NUM_SQ_STATS; j++) - sprintf(data + - (idx++) * ETH_GSTRING_LEN, - "tx%d_%s", - priv->channeltc_to_txq_map[i][tc], - sq_stats_strings[j]); + mlx5e_fill_stats_strings(priv, data); break; } } @@ -227,7 +279,8 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct mlx5e_priv *priv = netdev_priv(dev); - int i, j, tc, idx = 0; + int i, j, tc, prio, idx = 0; + unsigned long pfc_combined; if (!data) return; @@ -237,35 +290,123 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, mlx5e_update_stats(priv); mutex_unlock(&priv->state_lock); + for (i = 0; i < NUM_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_stats_desc, i); + + for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + q_stats_desc, i); + for (i = 0; i < NUM_VPORT_COUNTERS; i++) - data[idx++] = ((u64 *)&priv->stats.vport)[i]; + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_stats_desc, i); + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, + pport_802_3_stats_desc, i); + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, + pport_2863_stats_desc, i); + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, + pport_2819_stats_desc, i); + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_traffic_stats_desc, i); + } - for (i = 0; i < NUM_PPORT_COUNTERS; i++) - data[idx++] = be64_to_cpu(((__be64 *)&priv->stats.pport)[i]); + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_pfc_stats_desc, i); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_per_prio_pfc_stats_desc, i); + } + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return; /* per channel counters */ for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_RQ_STATS; j++) - data[idx++] = !test_bit(MLX5E_STATE_OPENED, - &priv->state) ? 0 : - ((u64 *)&priv->channel[i]->rq.stats)[j]; + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel[i]->rq.stats, + rq_stats_desc, j); for (tc = 0; tc < priv->params.num_tc; tc++) for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_SQ_STATS; j++) - data[idx++] = !test_bit(MLX5E_STATE_OPENED, - &priv->state) ? 0 : - ((u64 *)&priv->channel[i]->sq[tc].stats)[j]; + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel[i]->sq[tc].stats, + sq_stats_desc, j); +} + +static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type, + int num_wqe) +{ + int packets_per_wqe; + int stride_size; + int num_strides; + int wqe_size; + + if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return num_wqe; + + stride_size = 1 << priv->params.mpwqe_log_stride_sz; + num_strides = 1 << priv->params.mpwqe_log_num_strides; + wqe_size = stride_size * num_strides; + + packets_per_wqe = wqe_size / + ALIGN(ETH_DATA_LEN, stride_size); + return (1 << (order_base_2(num_wqe * packets_per_wqe) - 1)); +} + +static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type, + int num_packets) +{ + int packets_per_wqe; + int stride_size; + int num_strides; + int wqe_size; + int num_wqes; + + if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return num_packets; + + stride_size = 1 << priv->params.mpwqe_log_stride_sz; + num_strides = 1 << priv->params.mpwqe_log_num_strides; + wqe_size = stride_size * num_strides; + + num_packets = (1 << order_base_2(num_packets)); + + packets_per_wqe = wqe_size / + ALIGN(ETH_DATA_LEN, stride_size); + num_wqes = DIV_ROUND_UP(num_packets, packets_per_wqe); + return 1 << (order_base_2(num_wqes)); } static void mlx5e_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); + int rq_wq_type = priv->params.rq_wq_type; - param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << mlx5_max_log_rq_size(rq_wq_type)); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; - param->rx_pending = 1 << priv->params.log_rq_size; + param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << priv->params.log_rq_size); param->tx_pending = 1 << priv->params.log_sq_size; } @@ -274,9 +415,14 @@ static int mlx5e_set_ringparam(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); bool was_opened; + int rq_wq_type = priv->params.rq_wq_type; + u32 rx_pending_wqes; + u32 min_rq_size; + u32 max_rq_size; u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; + u32 num_mtts; int err = 0; if (param->rx_jumbo_pending) { @@ -289,18 +435,36 @@ static int mlx5e_set_ringparam(struct net_device *dev, __func__); return -EINVAL; } - if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + + min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << mlx5_min_log_rq_size(rq_wq_type)); + max_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, + 1 << mlx5_max_log_rq_size(rq_wq_type)); + rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type, + param->rx_pending); + + if (param->rx_pending < min_rq_size) { netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", __func__, param->rx_pending, - 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + min_rq_size); return -EINVAL; } - if (param->rx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE)) { + if (param->rx_pending > max_rq_size) { netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", __func__, param->rx_pending, - 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE); + max_rq_size); + return -EINVAL; + } + + num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels, + rx_pending_wqes); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + !MLX5E_VALID_NUM_MTTS(num_mtts)) { + netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", + __func__, param->rx_pending); return -EINVAL; } + if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n", __func__, param->tx_pending, @@ -314,10 +478,9 @@ static int mlx5e_set_ringparam(struct net_device *dev, return -EINVAL; } - log_rq_size = order_base_2(param->rx_pending); + log_rq_size = order_base_2(rx_pending_wqes); log_sq_size = order_base_2(param->tx_pending); - min_rx_wqes = min_t(u16, param->rx_pending - 1, - MLX5E_PARAMS_DEFAULT_MIN_RX_WQES); + min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, rx_pending_wqes); if (log_rq_size == priv->params.log_rq_size && log_sq_size == priv->params.log_sq_size && @@ -357,7 +520,9 @@ static int mlx5e_set_channels(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); int ncv = mlx5e_get_max_num_channels(priv->mdev); unsigned int count = ch->combined_count; + bool arfs_enabled; bool was_opened; + u32 num_mtts; int err = 0; if (!count) { @@ -376,6 +541,14 @@ static int mlx5e_set_channels(struct net_device *dev, return -EINVAL; } + num_mtts = MLX5E_REQUIRED_MTTS(count, BIT(priv->params.log_rq_size)); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + !MLX5E_VALID_NUM_MTTS(num_mtts)) { + netdev_info(dev, "%s: rx count (%d) request can't be satisfied, try to reduce.\n", + __func__, count); + return -EINVAL; + } + if (priv->params.num_channels == count) return 0; @@ -385,13 +558,27 @@ static int mlx5e_set_channels(struct net_device *dev, if (was_opened) mlx5e_close_locked(dev); + arfs_enabled = dev->features & NETIF_F_NTUPLE; + if (arfs_enabled) + mlx5e_arfs_disable(priv); + priv->params.num_channels = count; - mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + mlx5e_build_default_indir_rqt(priv->mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); if (was_opened) err = mlx5e_open_locked(dev); + if (err) + goto out; + if (arfs_enabled) { + err = mlx5e_arfs_enable(priv); + if (err) + netdev_err(dev, "%s: mlx5e_arfs_enable failed: %d\n", + __func__, err); + } + +out: mutex_unlock(&priv->state_lock); return err; @@ -405,10 +592,11 @@ static int mlx5e_get_coalesce(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -ENOTSUPP; - coal->rx_coalesce_usecs = priv->params.rx_cq_moderation_usec; - coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation_pkts; - coal->tx_coalesce_usecs = priv->params.tx_cq_moderation_usec; - coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation_pkts; + coal->rx_coalesce_usecs = priv->params.rx_cq_moderation.usec; + coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts; + coal->tx_coalesce_usecs = priv->params.tx_cq_moderation.usec; + coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation.pkts; + coal->use_adaptive_rx_coalesce = priv->params.rx_am_enabled; return 0; } @@ -419,6 +607,10 @@ static int mlx5e_set_coalesce(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channel *c; + bool restart = + !!coal->use_adaptive_rx_coalesce != priv->params.rx_am_enabled; + bool was_opened; + int err = 0; int tc; int i; @@ -426,12 +618,19 @@ static int mlx5e_set_coalesce(struct net_device *netdev, return -ENOTSUPP; mutex_lock(&priv->state_lock); - priv->params.tx_cq_moderation_usec = coal->tx_coalesce_usecs; - priv->params.tx_cq_moderation_pkts = coal->tx_max_coalesced_frames; - priv->params.rx_cq_moderation_usec = coal->rx_coalesce_usecs; - priv->params.rx_cq_moderation_pkts = coal->rx_max_coalesced_frames; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened && restart) { + mlx5e_close_locked(netdev); + priv->params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce; + } + + priv->params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; + priv->params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; + priv->params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; + priv->params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; + + if (!was_opened || restart) goto out; for (i = 0; i < priv->params.num_channels; ++i) { @@ -450,35 +649,39 @@ static int mlx5e_set_coalesce(struct net_device *netdev, } out: + if (was_opened && restart) + err = mlx5e_open_locked(netdev); + mutex_unlock(&priv->state_lock); - return 0; + return err; } -static u32 ptys2ethtool_supported_link(u32 eth_proto_cap) +static void ptys2ethtool_supported_link(unsigned long *supported_modes, + u32 eth_proto_cap) { - int i; - u32 supported_modes = 0; + unsigned long proto_cap = eth_proto_cap; + int proto; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (eth_proto_cap & MLX5E_PROT_MASK(i)) - supported_modes |= ptys2ethtool_table[i].supported; - } - return supported_modes; + for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) + bitmap_or(supported_modes, supported_modes, + ptys2ethtool_table[proto].supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); } -static u32 ptys2ethtool_adver_link(u32 eth_proto_cap) +static void ptys2ethtool_adver_link(unsigned long *advertising_modes, + u32 eth_proto_cap) { - int i; - u32 advertising_modes = 0; + unsigned long proto_cap = eth_proto_cap; + int proto; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (eth_proto_cap & MLX5E_PROT_MASK(i)) - advertising_modes |= ptys2ethtool_table[i].advertised; - } - return advertising_modes; + for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) + bitmap_or(advertising_modes, advertising_modes, + ptys2ethtool_table[proto].advertised, + __ETHTOOL_LINK_MODE_MASK_NBITS); } -static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) +static void ptys2ethtool_supported_port(struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap) { if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) @@ -486,7 +689,7 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { - return SUPPORTED_FIBRE; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, FIBRE); } if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) @@ -494,14 +697,32 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { - return SUPPORTED_Backplane; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Backplane); } +} + +int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + u32 max_speed = 0; + u32 proto_cap; + int err; + int i; + + err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN); + if (err) + return err; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) + if (proto_cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, ptys2ethtool_table[i].speed); + + *speed = max_speed; return 0; } static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *link_ksettings) { int i; u32 speed = SPEED_UNKNOWN; @@ -518,23 +739,32 @@ static void get_speed_duplex(struct net_device *netdev, } } out: - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + link_ksettings->base.speed = speed; + link_ksettings->base.duplex = duplex; } -static void get_supported(u32 eth_proto_cap, u32 *supported) +static void get_supported(u32 eth_proto_cap, + struct ethtool_link_ksettings *link_ksettings) { - *supported |= ptys2ethtool_supported_port(eth_proto_cap); - *supported |= ptys2ethtool_supported_link(eth_proto_cap); - *supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + unsigned long *supported = link_ksettings->link_modes.supported; + + ptys2ethtool_supported_port(link_ksettings, eth_proto_cap); + ptys2ethtool_supported_link(supported, eth_proto_cap); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Asym_Pause); } static void get_advertising(u32 eth_proto_cap, u8 tx_pause, - u8 rx_pause, u32 *advertising) + u8 rx_pause, + struct ethtool_link_ksettings *link_ksettings) { - *advertising |= ptys2ethtool_adver_link(eth_proto_cap); - *advertising |= tx_pause ? ADVERTISED_Pause : 0; - *advertising |= (tx_pause ^ rx_pause) ? ADVERTISED_Asym_Pause : 0; + unsigned long *advertising = link_ksettings->link_modes.advertising; + + ptys2ethtool_adver_link(advertising, eth_proto_cap); + if (tx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); + if (tx_pause ^ rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause); } static u8 get_connector_port(u32 eth_proto) @@ -562,13 +792,16 @@ static u8 get_connector_port(u32 eth_proto) return PORT_OTHER; } -static void get_lp_advertising(u32 eth_proto_lp, u32 *lp_advertising) +static void get_lp_advertising(u32 eth_proto_lp, + struct ethtool_link_ksettings *link_ksettings) { - *lp_advertising = ptys2ethtool_adver_link(eth_proto_lp); + unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; + + ptys2ethtool_adver_link(lp_advertising, eth_proto_lp); } -static int mlx5e_get_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int mlx5e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; @@ -577,6 +810,8 @@ static int mlx5e_get_settings(struct net_device *netdev, u32 eth_proto_admin; u32 eth_proto_lp; u32 eth_proto_oper; + u8 an_disable_admin; + u8 an_status; int err; err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); @@ -587,35 +822,49 @@ static int mlx5e_get_settings(struct net_device *netdev, goto err_query_ptys; } - eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); + an_status = MLX5_GET(ptys_reg, out, an_status); - cmd->supported = 0; - cmd->advertising = 0; + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); - get_supported(eth_proto_cap, &cmd->supported); - get_advertising(eth_proto_admin, 0, 0, &cmd->advertising); - get_speed_duplex(netdev, eth_proto_oper, cmd); + get_supported(eth_proto_cap, link_ksettings); + get_advertising(eth_proto_admin, 0, 0, link_ksettings); + get_speed_duplex(netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; - cmd->port = get_connector_port(eth_proto_oper); - get_lp_advertising(eth_proto_lp, &cmd->lp_advertising); + link_ksettings->base.port = get_connector_port(eth_proto_oper); + get_lp_advertising(eth_proto_lp, link_ksettings); - cmd->transceiver = XCVR_INTERNAL; + if (an_status == MLX5_AN_COMPLETE) + ethtool_link_ksettings_add_link_mode(link_ksettings, + lp_advertising, Autoneg); + + link_ksettings->base.autoneg = an_disable_admin ? AUTONEG_DISABLE : + AUTONEG_ENABLE; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Autoneg); + if (!an_disable_admin) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Autoneg); err_query_ptys: return err; } -static u32 mlx5e_ethtool2ptys_adver_link(u32 link_modes) +static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) { u32 i, ptys_modes = 0; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (ptys2ethtool_table[i].advertised & link_modes) + if (bitmap_intersects(ptys2ethtool_table[i].advertised, + link_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS)) ptys_modes |= MLX5E_PROT_MASK(i); } @@ -634,21 +883,25 @@ static u32 mlx5e_ethtool2ptys_speed_link(u32 speed) return speed_links; } -static int mlx5e_set_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int mlx5e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + u32 eth_proto_cap, eth_proto_admin; + bool an_changes = false; + u8 an_disable_admin; + u8 an_disable_cap; + bool an_disable; u32 link_modes; + u8 an_status; u32 speed; - u32 eth_proto_cap, eth_proto_admin; - enum mlx5_port_status ps; int err; - speed = ethtool_cmd_speed(cmd); + speed = link_ksettings->base.speed; - link_modes = cmd->autoneg == AUTONEG_ENABLE ? - mlx5e_ethtool2ptys_adver_link(cmd->advertising) : + link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? + mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : mlx5e_ethtool2ptys_speed_link(speed); err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); @@ -673,15 +926,18 @@ static int mlx5e_set_settings(struct net_device *netdev, goto out; } - if (link_modes == eth_proto_admin) + mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status, + &an_disable_cap, &an_disable_admin); + + an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE; + an_changes = ((!an_disable && an_disable_admin) || + (an_disable && !an_disable_admin)); + + if (!an_changes && link_modes == eth_proto_admin) goto out; - mlx5_query_port_admin_status(mdev, &ps); - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); - mlx5_set_port_proto(mdev, link_modes, MLX5_PTYS_EN); - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_UP); + mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN); + mlx5_toggle_port_link(mdev); out: return err; @@ -727,9 +983,8 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) MLX5_SET(modify_tir_in, in, bitmask.hash, 1); mlx5e_build_tir_ctx_hash(tirc, priv); - for (i = 0; i < MLX5E_NUM_TT; i++) - if (IS_HASHING_TT(i)) - mlx5_core_modify_tir(mdev, priv->tirn[i], in, inlen); + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5_core_modify_tir(mdev, priv->indir_tir[i].tirn, in, inlen); } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, @@ -751,9 +1006,11 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); if (indir) { + u32 rqtn = priv->indir_rqt.rqtn; + memcpy(priv->params.indirection_rqt, indir, sizeof(priv->params.indirection_rqt)); - mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT); + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); } if (key) @@ -782,6 +1039,15 @@ static int mlx5e_get_rxnfc(struct net_device *netdev, case ETHTOOL_GRXRINGS: info->data = priv->params.num_channels; break; + case ETHTOOL_GRXCLSRLCNT: + info->rule_cnt = priv->fs.ethtool.tot_num_rules; + break; + case ETHTOOL_GRXCLSRULE: + err = mlx5e_ethtool_get_flow(priv, info, info->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); + break; default: err = -EOPNOTSUPP; break; @@ -1036,6 +1302,209 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return mlx5_set_port_wol(mdev, mlx5_wol_mode); } +static int mlx5e_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 beacon_duration; + + if (!MLX5_CAP_GEN(mdev, beacon_led)) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + beacon_duration = MLX5_BEACON_DURATION_INF; + break; + case ETHTOOL_ID_INACTIVE: + beacon_duration = MLX5_BEACON_DURATION_OFF; + break; + default: + return -EOPNOTSUPP; + } + + return mlx5_set_port_beacon(mdev, beacon_duration); +} + +static int mlx5e_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *dev = priv->mdev; + int size_read = 0; + u8 data[4]; + + size_read = mlx5_query_module_eeprom(dev, 0, 2, data); + if (size_read < 2) + return -EIO; + + /* data[0] = identifier byte */ + switch (data[0]) { + case MLX5_MODULE_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLX5_MODULE_ID_QSFP_PLUS: + case MLX5_MODULE_ID_QSFP28: + /* data[1] = revision id */ + if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLX5_MODULE_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", + __func__, data[0]); + return -EINVAL; + } + + return 0; +} + +static int mlx5e_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int offset = ee->offset; + int size_read; + int i = 0; + + if (!ee->len) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i, + data + i); + + if (!size_read) + /* Done reading */ + return 0; + + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", + __func__, size_read); + return 0; + } + + i += size_read; + offset += size_read; + } + + return 0; +} + +typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); + +static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + bool rx_mode_changed; + u8 rx_cq_period_mode; + int err = 0; + bool reset; + + rx_cq_period_mode = enable ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + rx_mode_changed = rx_cq_period_mode != priv->params.rx_cq_period_mode; + + if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && + !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) + return -ENOTSUPP; + + if (!rx_mode_changed) + return 0; + + reset = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (reset) + mlx5e_close_locked(netdev); + + mlx5e_set_rx_cq_mode_params(&priv->params, rx_cq_period_mode); + + if (reset) + err = mlx5e_open_locked(netdev); + + return err; +} + +static int mlx5e_handle_pflag(struct net_device *netdev, + u32 wanted_flags, + enum mlx5e_priv_flag flag, + mlx5e_pflag_handler pflag_handler) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + bool enable = !!(wanted_flags & flag); + u32 changes = wanted_flags ^ priv->pflags; + int err; + + if (!(changes & flag)) + return 0; + + err = pflag_handler(netdev, enable); + if (err) { + netdev_err(netdev, "%s private flag 0x%x failed err %d\n", + enable ? "Enable" : "Disable", flag, err); + return err; + } + + MLX5E_SET_PRIV_FLAG(priv, flag, enable); + return 0; +} + +static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_CQE_BASED_MODER, + set_pflag_rx_cqe_based_moder); + + mutex_unlock(&priv->state_lock); + return err ? -EINVAL : 0; +} + +static u32 mlx5e_get_priv_flags(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return priv->pflags; +} + +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx5e_ethtool_flow_replace(priv, &cmd->fs); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1048,18 +1517,24 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_channels = mlx5e_set_channels, .get_coalesce = mlx5e_get_coalesce, .set_coalesce = mlx5e_set_coalesce, - .get_settings = mlx5e_get_settings, - .set_settings = mlx5e_set_settings, + .get_link_ksettings = mlx5e_get_link_ksettings, + .set_link_ksettings = mlx5e_set_link_ksettings, .get_rxfh_key_size = mlx5e_get_rxfh_key_size, .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, + .set_rxnfc = mlx5e_set_rxnfc, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, .set_pauseparam = mlx5e_set_pauseparam, .get_ts_info = mlx5e_get_ts_info, + .set_phys_id = mlx5e_set_phys_id, .get_wol = mlx5e_get_wol, .set_wol = mlx5e_set_wol, + .get_module_info = mlx5e_get_module_info, + .get_module_eeprom = mlx5e_get_module_eeprom, + .get_priv_flags = mlx5e_get_priv_flags, + .set_priv_flags = mlx5e_set_priv_flags }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index d00a24203410..1587a9fd5724 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -37,7 +37,10 @@ #include <linux/mlx5/fs.h> #include "en.h" -#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai, int type); +static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai); enum { MLX5E_FULLMATCH = 0, @@ -58,21 +61,21 @@ enum { MLX5E_ACTION_DEL = 2, }; -struct mlx5e_eth_addr_hash_node { +struct mlx5e_l2_hash_node { struct hlist_node hlist; u8 action; - struct mlx5e_eth_addr_info ai; + struct mlx5e_l2_rule ai; }; -static inline int mlx5e_hash_eth_addr(u8 *addr) +static inline int mlx5e_hash_l2(u8 *addr) { return addr[5]; } -static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) +static void mlx5e_add_l2_to_hash(struct hlist_head *hash, u8 *addr) { - struct mlx5e_eth_addr_hash_node *hn; - int ix = mlx5e_hash_eth_addr(addr); + struct mlx5e_l2_hash_node *hn; + int ix = mlx5e_hash_l2(addr); int found = 0; hlist_for_each_entry(hn, &hash[ix], hlist) @@ -96,371 +99,12 @@ static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) hlist_add_head(&hn->hlist, &hash[ix]); } -static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) +static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn) { hlist_del(&hn->hlist); kfree(hn); } -static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai) -{ - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_TCP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_TCP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_UDP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_UDP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4]); - - if (ai->tt_vec & BIT(MLX5E_TT_ANY)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_ANY]); -} - -static int mlx5e_get_eth_addr_type(u8 *addr) -{ - if (is_unicast_ether_addr(addr)) - return MLX5E_UC; - - if ((addr[0] == 0x01) && - (addr[1] == 0x00) && - (addr[2] == 0x5e) && - !(addr[3] & 0x80)) - return MLX5E_MC_IPV4; - - if ((addr[0] == 0x33) && - (addr[1] == 0x33)) - return MLX5E_MC_IPV6; - - return MLX5E_MC_OTHER; -} - -static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) -{ - int eth_addr_type; - u32 ret; - - switch (type) { - case MLX5E_FULLMATCH: - eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); - switch (eth_addr_type) { - case MLX5E_UC: - ret = - BIT(MLX5E_TT_IPV4_TCP) | - BIT(MLX5E_TT_IPV6_TCP) | - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4_IPSEC_AH) | - BIT(MLX5E_TT_IPV6_IPSEC_AH) | - BIT(MLX5E_TT_IPV4_IPSEC_ESP) | - BIT(MLX5E_TT_IPV6_IPSEC_ESP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - - case MLX5E_MC_IPV4: - ret = - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV4) | - 0; - break; - - case MLX5E_MC_IPV6: - ret = - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV6) | - 0; - break; - - case MLX5E_MC_OTHER: - ret = - BIT(MLX5E_TT_ANY) | - 0; - break; - } - - break; - - case MLX5E_ALLMULTI: - ret = - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - - default: /* MLX5E_PROMISC */ - ret = - BIT(MLX5E_TT_IPV4_TCP) | - BIT(MLX5E_TT_IPV6_TCP) | - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4_IPSEC_AH) | - BIT(MLX5E_TT_IPV6_IPSEC_AH) | - BIT(MLX5E_TT_IPV4_IPSEC_ESP) | - BIT(MLX5E_TT_IPV6_IPSEC_ESP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - } - - return ret; -} - -static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, - int type, u32 *mc, u32 *mv) -{ - struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; - struct mlx5_flow_rule **rule_p; - struct mlx5_flow_table *ft = priv->fts.main.t; - u8 *mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, - outer_headers.dmac_47_16); - u8 *mv_dmac = MLX5_ADDR_OF(fte_match_param, mv, - outer_headers.dmac_47_16); - u32 *tirn = priv->tirn; - u32 tt_vec; - int err = 0; - - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - - switch (type) { - case MLX5E_FULLMATCH: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - eth_broadcast_addr(mc_dmac); - ether_addr_copy(mv_dmac, ai->addr); - break; - - case MLX5E_ALLMULTI: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - mc_dmac[0] = 0x01; - mv_dmac[0] = 0x01; - break; - - case MLX5E_PROMISC: - break; - } - - tt_vec = mlx5e_get_tt_vec(ai, type); - - if (tt_vec & BIT(MLX5E_TT_ANY)) { - rule_p = &ai->ft_rule[MLX5E_TT_ANY]; - dest.tir_num = tirn[MLX5E_TT_ANY]; - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_ANY); - } - - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - - if (tt_vec & BIT(MLX5E_TT_IPV4)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4]; - dest.tir_num = tirn[MLX5E_TT_IPV4]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6]; - dest.tir_num = tirn[MLX5E_TT_IPV6]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6); - } - - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP); - - if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_UDP]; - dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_UDP]; - dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); - } - - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_TCP); - - if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_TCP]; - dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_TCP]; - dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); - } - - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_AH); - - if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]; - dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]; - dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); - } - - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_ESP); - - if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]; - dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]; - dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); - } - - return 0; - -err_del_ai: - err = PTR_ERR(*rule_p); - *rule_p = NULL; - mlx5e_del_eth_addr_from_flow_table(priv, ai); - - return err; -} - -static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, int type) -{ - u32 *match_criteria; - u32 *match_value; - int err = 0; - - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_eth_addr_rule_out; - } - - err = __mlx5e_add_eth_addr_rule(priv, ai, type, match_criteria, - match_value); - -add_eth_addr_rule_out: - kvfree(match_criteria); - kvfree(match_value); - - return err; -} - static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) { struct net_device *ndev = priv->netdev; @@ -472,7 +116,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) int i; list_size = 0; - for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) + for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) list_size++; max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); @@ -489,7 +133,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) return -ENOMEM; i = 0; - for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { + for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) { if (i >= list_size) break; vlans[i++] = vlan; @@ -512,37 +156,38 @@ enum mlx5e_vlan_rule_type { static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, - u16 vid, u32 *mc, u32 *mv) + u16 vid, struct mlx5_flow_spec *spec) { - struct mlx5_flow_table *ft = priv->fts.vlan.t; + struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; struct mlx5_flow_rule **rule_p; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fts.main.t; + dest.ft = priv->fs.l2.ft.t; - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - rule_p = &priv->vlan.untagged_rule; + rule_p = &priv->fs.vlan.untagged_rule; break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: - rule_p = &priv->vlan.any_vlan_rule; - MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); + rule_p = &priv->fs.vlan.any_vlan_rule; + MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ - rule_p = &priv->vlan.active_vlans_rule[vid]; - MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); - MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid); + rule_p = &priv->fs.vlan.active_vlans_rule[vid]; + MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); break; } - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + *rule_p = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, &dest); @@ -559,27 +204,21 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid) { - u32 *match_criteria; - u32 *match_value; + struct mlx5_flow_spec *spec; int err = 0; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_vlan_rule_out; + return -ENOMEM; } if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID) mlx5e_vport_context_update_vlans(priv); - err = __mlx5e_add_vlan_rule(priv, rule_type, vid, match_criteria, - match_value); + err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec); -add_vlan_rule_out: - kvfree(match_criteria); - kvfree(match_value); + kvfree(spec); return err; } @@ -589,22 +228,22 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, { switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - if (priv->vlan.untagged_rule) { - mlx5_del_flow_rule(priv->vlan.untagged_rule); - priv->vlan.untagged_rule = NULL; + if (priv->fs.vlan.untagged_rule) { + mlx5_del_flow_rule(priv->fs.vlan.untagged_rule); + priv->fs.vlan.untagged_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: - if (priv->vlan.any_vlan_rule) { - mlx5_del_flow_rule(priv->vlan.any_vlan_rule); - priv->vlan.any_vlan_rule = NULL; + if (priv->fs.vlan.any_vlan_rule) { + mlx5_del_flow_rule(priv->fs.vlan.any_vlan_rule); + priv->fs.vlan.any_vlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_VID: mlx5e_vport_context_update_vlans(priv); - if (priv->vlan.active_vlans_rule[vid]) { - mlx5_del_flow_rule(priv->vlan.active_vlans_rule[vid]); - priv->vlan.active_vlans_rule[vid] = NULL; + if (priv->fs.vlan.active_vlans_rule[vid]) { + mlx5_del_flow_rule(priv->fs.vlan.active_vlans_rule[vid]); + priv->fs.vlan.active_vlans_rule[vid] = NULL; } mlx5e_vport_context_update_vlans(priv); break; @@ -613,10 +252,10 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) { - if (!priv->vlan.filter_disabled) + if (!priv->fs.vlan.filter_disabled) return; - priv->vlan.filter_disabled = false; + priv->fs.vlan.filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); @@ -624,10 +263,10 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) { - if (priv->vlan.filter_disabled) + if (priv->fs.vlan.filter_disabled) return; - priv->vlan.filter_disabled = true; + priv->fs.vlan.filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); @@ -638,7 +277,7 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); - set_bit(vid, priv->vlan.active_vlans); + set_bit(vid, priv->fs.vlan.active_vlans); return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); } @@ -648,7 +287,7 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); - clear_bit(vid, priv->vlan.active_vlans); + clear_bit(vid, priv->fs.vlan.active_vlans); mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); @@ -656,21 +295,21 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, } #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ - for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ + for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) -static void mlx5e_execute_action(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_hash_node *hn) +static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, + struct mlx5e_l2_hash_node *hn) { switch (hn->action) { case MLX5E_ACTION_ADD: - mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); + mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); hn->action = MLX5E_ACTION_NONE; break; case MLX5E_ACTION_DEL: - mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); - mlx5e_del_eth_addr_from_hash(hn); + mlx5e_del_l2_flow_rule(priv, &hn->ai); + mlx5e_del_l2_from_hash(hn); break; } } @@ -682,14 +321,14 @@ static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) netif_addr_lock_bh(netdev); - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, - priv->netdev->dev_addr); + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, + priv->netdev->dev_addr); netdev_for_each_uc_addr(ha, netdev) - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr); + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, ha->addr); netdev_for_each_mc_addr(ha, netdev) - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr); + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_mc, ha->addr); netif_addr_unlock_bh(netdev); } @@ -699,17 +338,17 @@ static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, { bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); struct net_device *ndev = priv->netdev; - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; struct hlist_head *addr_list; struct hlist_node *tmp; int i = 0; int hi; - addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc; if (is_uc) /* Make sure our own address is pushed first */ ether_addr_copy(addr_array[i++], ndev->dev_addr); - else if (priv->eth_addr.broadcast_enabled) + else if (priv->fs.l2.broadcast_enabled) ether_addr_copy(addr_array[i++], ndev->broadcast); mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { @@ -725,7 +364,7 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, int list_type) { bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; u8 (*addr_array)[ETH_ALEN] = NULL; struct hlist_head *addr_list; struct hlist_node *tmp; @@ -734,12 +373,12 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, int err; int hi; - size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); + size = is_uc ? 0 : (priv->fs.l2.broadcast_enabled ? 1 : 0); max_size = is_uc ? 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); - addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc; mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) size++; @@ -770,7 +409,7 @@ out: static void mlx5e_vport_context_update(struct mlx5e_priv *priv) { - struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct mlx5e_l2_table *ea = &priv->fs.l2; mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC); mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC); @@ -781,26 +420,26 @@ static void mlx5e_vport_context_update(struct mlx5e_priv *priv) static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) { - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; struct hlist_node *tmp; int i; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) - mlx5e_execute_action(priv, hn); + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i) + mlx5e_execute_l2_action(priv, hn); - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) - mlx5e_execute_action(priv, hn); + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i) + mlx5e_execute_l2_action(priv, hn); } static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) { - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; struct hlist_node *tmp; int i; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i) hn->action = MLX5E_ACTION_DEL; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i) hn->action = MLX5E_ACTION_DEL; if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state)) @@ -814,7 +453,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, set_rx_mode_work); - struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct mlx5e_l2_table *ea = &priv->fs.l2; struct net_device *ndev = priv->netdev; bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state); @@ -830,27 +469,27 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; if (enable_promisc) { - mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); - if (!priv->vlan.filter_disabled) + mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (!priv->fs.vlan.filter_disabled) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); } if (enable_allmulti) - mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); if (enable_broadcast) - mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + mlx5e_add_l2_flow_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); mlx5e_handle_netdev_addr(priv); if (disable_broadcast) - mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); + mlx5e_del_l2_flow_rule(priv, &ea->broadcast); if (disable_allmulti) - mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); + mlx5e_del_l2_flow_rule(priv, &ea->allmulti); if (disable_promisc) { - if (!priv->vlan.filter_disabled) + if (!priv->fs.vlan.filter_disabled) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); - mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + mlx5e_del_l2_flow_rule(priv, &ea->promisc); } ea->promisc_enabled = promisc_enabled; @@ -872,224 +511,441 @@ static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) ft->num_groups = 0; } -void mlx5e_init_eth_addr(struct mlx5e_priv *priv) +void mlx5e_init_l2_addr(struct mlx5e_priv *priv) { - ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); + ether_addr_copy(priv->fs.l2.broadcast.addr, priv->netdev->broadcast); } -#define MLX5E_MAIN_GROUP0_SIZE BIT(3) -#define MLX5E_MAIN_GROUP1_SIZE BIT(1) -#define MLX5E_MAIN_GROUP2_SIZE BIT(0) -#define MLX5E_MAIN_GROUP3_SIZE BIT(14) -#define MLX5E_MAIN_GROUP4_SIZE BIT(13) -#define MLX5E_MAIN_GROUP5_SIZE BIT(11) -#define MLX5E_MAIN_GROUP6_SIZE BIT(2) -#define MLX5E_MAIN_GROUP7_SIZE BIT(1) -#define MLX5E_MAIN_GROUP8_SIZE BIT(0) -#define MLX5E_MAIN_TABLE_SIZE (MLX5E_MAIN_GROUP0_SIZE +\ - MLX5E_MAIN_GROUP1_SIZE +\ - MLX5E_MAIN_GROUP2_SIZE +\ - MLX5E_MAIN_GROUP3_SIZE +\ - MLX5E_MAIN_GROUP4_SIZE +\ - MLX5E_MAIN_GROUP5_SIZE +\ - MLX5E_MAIN_GROUP6_SIZE +\ - MLX5E_MAIN_GROUP7_SIZE +\ - MLX5E_MAIN_GROUP8_SIZE) - -static int __mlx5e_create_main_groups(struct mlx5e_flow_table *ft, u32 *in, - int inlen) +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) { - u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - u8 *dmac = MLX5_ADDR_OF(create_flow_group_in, in, - match_criteria.outer_headers.dmac_47_16); + mlx5e_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} + +static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) +{ + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->rules[i])) { + mlx5_del_flow_rule(ttc->rules[i]); + ttc->rules[i] = NULL; + } + } +} + +static struct { + u16 etype; + u8 proto; +} ttc_rules[] = { + [MLX5E_TT_IPV4_TCP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_TCP, + }, + [MLX5E_TT_IPV6_TCP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_TCP, + }, + [MLX5E_TT_IPV4_UDP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_UDP, + }, + [MLX5E_TT_IPV6_UDP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_UDP, + }, + [MLX5E_TT_IPV4_IPSEC_AH] = { + .etype = ETH_P_IP, + .proto = IPPROTO_AH, + }, + [MLX5E_TT_IPV6_IPSEC_AH] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_AH, + }, + [MLX5E_TT_IPV4_IPSEC_ESP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_ESP, + }, + [MLX5E_TT_IPV6_IPSEC_ESP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_ESP, + }, + [MLX5E_TT_IPV4] = { + .etype = ETH_P_IP, + .proto = 0, + }, + [MLX5E_TT_IPV6] = { + .etype = ETH_P_IPV6, + .proto = 0, + }, + [MLX5E_TT_ANY] = { + .etype = 0, + .proto = 0, + }, +}; + +static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, + u8 proto) +{ + struct mlx5_flow_rule *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + return ERR_PTR(-ENOMEM); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); + } + if (etype) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); + } + + rule = mlx5_add_flow_rule(ft, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + dest); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + struct mlx5e_ttc_table *ttc; + struct mlx5_flow_rule **rules; + struct mlx5_flow_table *ft; + int tt; int err; + + ttc = &priv->fs.ttc; + ft = ttc->ft.t; + rules = ttc->rules; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + if (tt == MLX5E_TT_ANY) + dest.tir_num = priv->direct_tir[0].tirn; + else + dest.tir_num = priv->indir_tir[tt].tirn; + rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + return 0; + +del_rules: + err = PTR_ERR(rules[tt]); + rules[tt] = NULL; + mlx5e_cleanup_ttc_rules(ttc); + return err; +} + +#define MLX5E_TTC_NUM_GROUPS 3 +#define MLX5E_TTC_GROUP1_SIZE BIT(3) +#define MLX5E_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ + MLX5E_TTC_GROUP2_SIZE +\ + MLX5E_TTC_GROUP3_SIZE) +static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &ttc->ft; int ix = 0; + u32 *in; + int err; + u8 *mc; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP0_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; - ft->num_groups++; + ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS, + sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = mlx5_vzalloc(inlen); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP1_SIZE; + ix += MLX5E_TTC_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; + goto err; ft->num_groups++; - memset(in, 0, inlen); + /* L3 Group */ + MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP2_SIZE; + ix += MLX5E_TTC_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; + goto err; ft->num_groups++; + /* Any Group */ memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - eth_broadcast_addr(dmac); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP3_SIZE; + ix += MLX5E_TTC_GROUP3_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; + goto err; ft->num_groups++; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - eth_broadcast_addr(dmac); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP4_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; - ft->num_groups++; + kvfree(in); + return 0; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - eth_broadcast_addr(dmac); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP5_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; - ft->num_groups++; +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - dmac[0] = 0x01; + return err; +} + +static void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.ttc; + + mlx5e_cleanup_ttc_rules(ttc); + mlx5e_destroy_flow_table(&ttc->ft); +} + +static int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.ttc; + struct mlx5e_flow_table *ft = &ttc->ft; + int err; + + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_ttc_table_groups(ttc); + if (err) + goto err; + + err = mlx5e_generate_ttc_table_rules(priv); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai) +{ + if (!IS_ERR_OR_NULL(ai->rule)) { + mlx5_del_flow_rule(ai->rule); + ai->rule = NULL; + } +} + +static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai, int type) +{ + struct mlx5_flow_table *ft = priv->fs.l2.ft.t; + struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; + int err = 0; + u8 *mc_dmac; + u8 *mv_dmac; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + return -ENOMEM; + } + + mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dmac_47_16); + mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers.dmac_47_16); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.ttc.ft.t; + + switch (type) { + case MLX5E_FULLMATCH: + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + eth_broadcast_addr(mc_dmac); + ether_addr_copy(mv_dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + mc_dmac[0] = 0x01; + mv_dmac[0] = 0x01; + break; + + case MLX5E_PROMISC: + break; + } + + ai->rule = mlx5_add_flow_rule(ft, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR(ai->rule)) { + netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n", + __func__, mv_dmac); + err = PTR_ERR(ai->rule); + ai->rule = NULL; + } + + kvfree(spec); + + return err; +} + +#define MLX5E_NUM_L2_GROUPS 3 +#define MLX5E_L2_GROUP1_SIZE BIT(0) +#define MLX5E_L2_GROUP2_SIZE BIT(15) +#define MLX5E_L2_GROUP3_SIZE BIT(0) +#define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\ + MLX5E_L2_GROUP2_SIZE +\ + MLX5E_L2_GROUP3_SIZE) +static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &l2_table->ft; + int ix = 0; + u8 *mc_dmac; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = mlx5_vzalloc(inlen); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers.dmac_47_16); + /* Flow Group for promiscuous */ MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP6_SIZE; + ix += MLX5E_L2_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destroy_groups; ft->num_groups++; - memset(in, 0, inlen); + /* Flow Group for full match */ + eth_broadcast_addr(mc_dmac); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - dmac[0] = 0x01; MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP7_SIZE; + ix += MLX5E_L2_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destroy_groups; ft->num_groups++; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - dmac[0] = 0x01; + /* Flow Group for allmulti */ + eth_zero_addr(mc_dmac); + mc_dmac[0] = 0x01; MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP8_SIZE; + ix += MLX5E_L2_GROUP3_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destroy_groups; ft->num_groups++; + kvfree(in); return 0; err_destroy_groups: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); + kvfree(in); return err; } -static int mlx5e_create_main_groups(struct mlx5e_flow_table *ft) +static void mlx5e_destroy_l2_table(struct mlx5e_priv *priv) { - u32 *in; - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - int err; - - in = mlx5_vzalloc(inlen); - if (!in) - return -ENOMEM; - - err = __mlx5e_create_main_groups(ft, in, inlen); - - kvfree(in); - return err; + mlx5e_destroy_flow_table(&priv->fs.l2.ft); } -static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +static int mlx5e_create_l2_table(struct mlx5e_priv *priv) { - struct mlx5e_flow_table *ft = &priv->fts.main; + struct mlx5e_l2_table *l2_table = &priv->fs.l2; + struct mlx5e_flow_table *ft = &l2_table->ft; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_MAIN_TABLE_SIZE); + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; return err; } - ft->g = kcalloc(MLX5E_NUM_MAIN_GROUPS, sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) { - err = -ENOMEM; - goto err_destroy_main_flow_table; - } - err = mlx5e_create_main_groups(ft); + err = mlx5e_create_l2_table_groups(l2_table); if (err) - goto err_free_g; - return 0; + goto err_destroy_flow_table; -err_free_g: - kfree(ft->g); + return 0; -err_destroy_main_flow_table: +err_destroy_flow_table: mlx5_destroy_flow_table(ft->t); ft->t = NULL; return err; } -static void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) -{ - mlx5e_destroy_groups(ft); - kfree(ft->g); - mlx5_destroy_flow_table(ft->t); - ft->t = NULL; -} - -static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) -{ - mlx5e_destroy_flow_table(&priv->fts.main); -} - #define MLX5E_NUM_VLAN_GROUPS 2 #define MLX5E_VLAN_GROUP0_SIZE BIT(12) #define MLX5E_VLAN_GROUP1_SIZE BIT(1) #define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ MLX5E_VLAN_GROUP1_SIZE) -static int __mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft, u32 *in, - int inlen) +static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) { int err; int ix = 0; @@ -1128,7 +984,7 @@ err_destroy_groups: return err; } -static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) +static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) { u32 *in; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1138,19 +994,20 @@ static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) if (!in) return -ENOMEM; - err = __mlx5e_create_vlan_groups(ft, in, inlen); + err = __mlx5e_create_vlan_table_groups(ft, in, inlen); kvfree(in); return err; } -static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) { - struct mlx5e_flow_table *ft = &priv->fts.vlan; + struct mlx5e_flow_table *ft = &priv->fs.vlan.ft; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_VLAN_TABLE_SIZE); + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1160,65 +1017,93 @@ static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); if (!ft->g) { err = -ENOMEM; - goto err_destroy_vlan_flow_table; + goto err_destroy_vlan_table; } - err = mlx5e_create_vlan_groups(ft); + err = mlx5e_create_vlan_table_groups(ft); if (err) goto err_free_g; + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + if (err) + goto err_destroy_vlan_flow_groups; + return 0; +err_destroy_vlan_flow_groups: + mlx5e_destroy_groups(ft); err_free_g: kfree(ft->g); - -err_destroy_vlan_flow_table: +err_destroy_vlan_table: mlx5_destroy_flow_table(ft->t); ft->t = NULL; return err; } -static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) +static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) { - mlx5e_destroy_flow_table(&priv->fts.vlan); + mlx5e_destroy_flow_table(&priv->fs.vlan.ft); } -int mlx5e_create_flow_tables(struct mlx5e_priv *priv) +int mlx5e_create_flow_steering(struct mlx5e_priv *priv) { int err; - priv->fts.ns = mlx5_get_flow_namespace(priv->mdev, + priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); - if (!priv->fts.ns) + if (!priv->fs.ns) return -EINVAL; - err = mlx5e_create_vlan_flow_table(priv); - if (err) - return err; + err = mlx5e_arfs_create_tables(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", + err); + priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + } - err = mlx5e_create_main_flow_table(priv); - if (err) - goto err_destroy_vlan_flow_table; + err = mlx5e_create_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - if (err) - goto err_destroy_main_flow_table; + err = mlx5e_create_l2_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create l2 table, err=%d\n", + err); + goto err_destroy_ttc_table; + } + + err = mlx5e_create_vlan_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create vlan table, err=%d\n", + err); + goto err_destroy_l2_table; + } + + mlx5e_ethtool_init_steering(priv); return 0; -err_destroy_main_flow_table: - mlx5e_destroy_main_flow_table(priv); -err_destroy_vlan_flow_table: - mlx5e_destroy_vlan_flow_table(priv); +err_destroy_l2_table: + mlx5e_destroy_l2_table(priv); +err_destroy_ttc_table: + mlx5e_destroy_ttc_table(priv); +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(priv); return err; } -void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv) +void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - mlx5e_destroy_main_flow_table(priv); - mlx5e_destroy_vlan_flow_table(priv); + mlx5e_destroy_vlan_table(priv); + mlx5e_destroy_l2_table(priv); + mlx5e_destroy_ttc_table(priv); + mlx5e_arfs_destroy_tables(priv); + mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c new file mode 100644 index 000000000000..d17c24227900 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/fs.h> +#include "en.h" + +struct mlx5e_ethtool_rule { + struct list_head list; + struct ethtool_rx_flow_spec flow_spec; + struct mlx5_flow_rule *rule; + struct mlx5e_ethtool_table *eth_ft; +}; + +static void put_flow_table(struct mlx5e_ethtool_table *eth_ft) +{ + if (!--eth_ft->num_rules) { + mlx5_destroy_flow_table(eth_ft->ft); + eth_ft->ft = NULL; + } +} + +#define MLX5E_ETHTOOL_L3_L4_PRIO 0 +#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS) +#define MLX5E_ETHTOOL_NUM_ENTRIES 64000 +#define MLX5E_ETHTOOL_NUM_GROUPS 10 +static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs, + int num_tuples) +{ + struct mlx5e_ethtool_table *eth_ft; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int max_tuples; + int table_size; + int prio; + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = &priv->fs.ethtool.l3_l4_ft[prio]; + break; + case IP_USER_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = &priv->fs.ethtool.l3_l4_ft[prio]; + break; + case ETHER_FLOW: + max_tuples = ETHTOOL_NUM_L2_FTS; + prio = max_tuples - num_tuples; + eth_ft = &priv->fs.ethtool.l2_ft[prio]; + prio += MLX5E_ETHTOOL_L2_PRIO; + break; + default: + return ERR_PTR(-EINVAL); + } + + eth_ft->num_rules++; + if (eth_ft->ft) + return eth_ft; + + ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_ETHTOOL); + if (!ns) + return ERR_PTR(-ENOTSUPP); + + table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev, + flow_table_properties_nic_receive.log_max_ft_size)), + MLX5E_ETHTOOL_NUM_ENTRIES); + ft = mlx5_create_auto_grouped_flow_table(ns, prio, + table_size, + MLX5E_ETHTOOL_NUM_GROUPS, 0); + if (IS_ERR(ft)) + return (void *)ft; + + eth_ft->ft = ft; + return eth_ft; +} + +static void mask_spec(u8 *mask, u8 *val, size_t size) +{ + unsigned int i; + + for (i = 0; i < size; i++, mask++, val++) + *((u8 *)val) = *((u8 *)mask) & *((u8 *)val); +} + +static void set_ips(void *outer_headers_v, void *outer_headers_c, __be32 ip4src_m, + __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v) +{ + if (ip4src_m) { + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ip4src_v, sizeof(ip4src_v)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + 0xff, sizeof(ip4src_m)); + } + if (ip4dst_m) { + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ip4dst_v, sizeof(ip4dst_v)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + 0xff, sizeof(ip4dst_m)); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ETH_P_IP); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, 0xffff); +} + +static int set_flow_attrs(u32 *match_c, u32 *match_v, + struct ethtool_rx_flow_spec *fs) +{ + void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + struct ethtool_tcpip4_spec *l4_mask; + struct ethtool_tcpip4_spec *l4_val; + struct ethtool_usrip4_spec *l3_mask; + struct ethtool_usrip4_spec *l3_val; + struct ethhdr *eth_val; + struct ethhdr *eth_mask; + + switch (flow_type) { + case TCP_V4_FLOW: + l4_mask = &fs->m_u.tcp_ip4_spec; + l4_val = &fs->h_u.tcp_ip4_spec; + set_ips(outer_headers_v, outer_headers_c, l4_mask->ip4src, + l4_val->ip4src, l4_mask->ip4dst, l4_val->ip4dst); + + if (l4_mask->psrc) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, + ntohs(l4_val->psrc)); + } + if (l4_mask->pdst) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, + ntohs(l4_val->pdst)); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_TCP); + break; + case UDP_V4_FLOW: + l4_mask = &fs->m_u.tcp_ip4_spec; + l4_val = &fs->h_u.tcp_ip4_spec; + set_ips(outer_headers_v, outer_headers_c, l4_mask->ip4src, + l4_val->ip4src, l4_mask->ip4dst, l4_val->ip4dst); + + if (l4_mask->psrc) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, + ntohs(l4_val->psrc)); + } + if (l4_mask->pdst) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, + ntohs(l4_val->pdst)); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_UDP); + break; + case IP_USER_FLOW: + l3_mask = &fs->m_u.usr_ip4_spec; + l3_val = &fs->h_u.usr_ip4_spec; + set_ips(outer_headers_v, outer_headers_c, l3_mask->ip4src, + l3_val->ip4src, l3_mask->ip4dst, l3_val->ip4dst); + break; + case ETHER_FLOW: + eth_mask = &fs->m_u.ether_spec; + eth_val = &fs->h_u.ether_spec; + + mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask)); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_c, smac_47_16), + eth_mask->h_source); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_v, smac_47_16), + eth_val->h_source); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_c, dmac_47_16), + eth_mask->h_dest); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_v, dmac_47_16), + eth_val->h_dest); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, + ntohs(eth_mask->h_proto)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ethertype, + ntohs(eth_val->h_proto)); + break; + default: + return -EINVAL; + } + + if ((fs->flow_type & FLOW_EXT) && + (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_vid, 0xfff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_vid, ntohs(fs->h_ext.vlan_tci)); + } + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) { + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_c, dmac_47_16), + fs->m_ext.h_dest); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_v, dmac_47_16), + fs->h_ext.h_dest); + } + + return 0; +} + +static void add_rule_to_list(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *rule) +{ + struct mlx5e_ethtool_rule *iter; + struct list_head *head = &priv->fs.ethtool.rules; + + list_for_each_entry(iter, &priv->fs.ethtool.rules, list) { + if (iter->flow_spec.location > rule->flow_spec.location) + break; + head = &iter->list; + } + priv->fs.ethtool.tot_num_rules++; + list_add(&rule->list, head); +} + +static bool outer_header_zero(u32 *match_criteria) +{ + int size = MLX5_ST_SZ_BYTES(fte_match_param); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + + return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, + outer_headers_c + 1, + size - 1); +} + +static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct ethtool_rx_flow_spec *fs) +{ + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_spec *spec; + struct mlx5_flow_rule *rule; + int err = 0; + u32 action; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) + return ERR_PTR(-ENOMEM); + err = set_flow_attrs(spec->match_criteria, spec->match_value, + fs); + if (err) + goto free; + + if (fs->ring_cookie == RX_CLS_FLOW_DISC) { + action = MLX5_FLOW_CONTEXT_ACTION_DROP; + } else { + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto free; + } + + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); + rule = mlx5_add_flow_rule(ft, spec, action, + MLX5_FS_DEFAULT_FLOW_TAG, dst); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", + __func__, err); + goto free; + } +free: + kvfree(spec); + kfree(dst); + return err ? ERR_PTR(err) : rule; +} + +static void del_ethtool_rule(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *eth_rule) +{ + if (eth_rule->rule) + mlx5_del_flow_rule(eth_rule->rule); + list_del(ð_rule->list); + priv->fs.ethtool.tot_num_rules--; + put_flow_table(eth_rule->eth_ft); + kfree(eth_rule); +} + +static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *iter; + + list_for_each_entry(iter, &priv->fs.ethtool.rules, list) { + if (iter->flow_spec.location == location) + return iter; + } + return NULL; +} + +static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + eth_rule = find_ethtool_rule(priv, location); + if (eth_rule) + del_ethtool_rule(priv, eth_rule); + + eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL); + if (!eth_rule) + return ERR_PTR(-ENOMEM); + + add_rule_to_list(priv, eth_rule); + return eth_rule; +} + +#define MAX_NUM_OF_ETHTOOL_RULES BIT(10) + +#define all_ones(field) (field == (__force typeof(field))-1) +#define all_zeros_or_all_ones(field) \ + ((field) == 0 || (field) == (__force typeof(field))-1) + +static int validate_flow(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask; + struct ethtool_usrip4_spec *l3_mask; + struct ethhdr *eth_mask; + int num_tuples = 0; + + if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) + return -EINVAL; + + if (fs->ring_cookie >= priv->params.num_channels && + fs->ring_cookie != RX_CLS_FLOW_DISC) + return -EINVAL; + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case ETHER_FLOW: + eth_mask = &fs->m_u.ether_spec; + if (!is_zero_ether_addr(eth_mask->h_dest)) + num_tuples++; + if (!is_zero_ether_addr(eth_mask->h_source)) + num_tuples++; + if (eth_mask->h_proto) + num_tuples++; + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + if (fs->m_u.tcp_ip4_spec.tos) + return -EINVAL; + l4_mask = &fs->m_u.tcp_ip4_spec; + if (l4_mask->ip4src) { + if (!all_ones(l4_mask->ip4src)) + return -EINVAL; + num_tuples++; + } + if (l4_mask->ip4dst) { + if (!all_ones(l4_mask->ip4dst)) + return -EINVAL; + num_tuples++; + } + if (l4_mask->psrc) { + if (!all_ones(l4_mask->psrc)) + return -EINVAL; + num_tuples++; + } + if (l4_mask->pdst) { + if (!all_ones(l4_mask->pdst)) + return -EINVAL; + num_tuples++; + } + /* Flow is TCP/UDP */ + num_tuples++; + break; + case IP_USER_FLOW: + l3_mask = &fs->m_u.usr_ip4_spec; + if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto || + fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4) + return -EINVAL; + if (l3_mask->ip4src) { + if (!all_ones(l3_mask->ip4src)) + return -EINVAL; + num_tuples++; + } + if (l3_mask->ip4dst) { + if (!all_ones(l3_mask->ip4dst)) + return -EINVAL; + num_tuples++; + } + /* Flow is IPv4 */ + num_tuples++; + break; + default: + return -EINVAL; + } + if ((fs->flow_type & FLOW_EXT)) { + if (fs->m_ext.vlan_etype || + (fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK))) + return -EINVAL; + + if (fs->m_ext.vlan_tci) { + if (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID) + return -EINVAL; + } + num_tuples++; + } + + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) + num_tuples++; + + return num_tuples; +} + +int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs) +{ + struct mlx5e_ethtool_table *eth_ft; + struct mlx5e_ethtool_rule *eth_rule; + struct mlx5_flow_rule *rule; + int num_tuples; + int err; + + num_tuples = validate_flow(priv, fs); + if (num_tuples <= 0) { + netdev_warn(priv->netdev, "%s: flow is not valid\n", __func__); + return -EINVAL; + } + + eth_ft = get_flow_table(priv, fs, num_tuples); + if (IS_ERR(eth_ft)) + return PTR_ERR(eth_ft); + + eth_rule = get_ethtool_rule(priv, fs->location); + if (IS_ERR(eth_rule)) { + put_flow_table(eth_ft); + return PTR_ERR(eth_rule); + } + + eth_rule->flow_spec = *fs; + eth_rule->eth_ft = eth_ft; + if (!eth_ft->ft) { + err = -EINVAL; + goto del_ethtool_rule; + } + rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto del_ethtool_rule; + } + + eth_rule->rule = rule; + + return 0; + +del_ethtool_rule: + del_ethtool_rule(priv, eth_rule); + + return err; +} + +int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + int err = 0; + + if (location >= MAX_NUM_OF_ETHTOOL_RULES) + return -ENOSPC; + + eth_rule = find_ethtool_rule(priv, location); + if (!eth_rule) { + err = -ENOENT; + goto out; + } + + del_ethtool_rule(priv, eth_rule); +out: + return err; +} + +int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES) + return -EINVAL; + + list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) { + if (eth_rule->flow_spec.location == location) { + info->fs = eth_rule->flow_spec; + return 0; + } + } + + return -ENOENT; +} + +int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + int location = 0; + int idx = 0; + int err = 0; + + while ((!err || err == -ENOENT) && idx < info->rule_cnt) { + err = mlx5e_ethtool_get_flow(priv, info, location); + if (!err) + rule_locs[idx++] = location; + location++; + } + return err; +} + +void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) +{ + struct mlx5e_ethtool_rule *iter; + struct mlx5e_ethtool_rule *temp; + + list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list) + del_ethtool_rule(priv, iter); +} + +void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) +{ + INIT_LIST_HEAD(&priv->fs.ethtool.rules); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 94fef705890b..2459c7f3db8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -40,27 +40,33 @@ #include "vxlan.h" struct mlx5e_rq_param { - u32 rqc[MLX5_ST_SZ_DW(rqc)]; - struct mlx5_wq_param wq; + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; + bool am_enabled; }; struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; u16 max_inline; + u8 min_inline_mode; + bool icosq; }; struct mlx5e_cq_param { u32 cqc[MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; u16 eq_ix; + u8 cq_period_mode; }; struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; + struct mlx5e_sq_param icosq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; + struct mlx5e_cq_param icosq_cq; }; static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -71,10 +77,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) port_state = mlx5_query_vport_state(mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); - if (port_state == VPORT_STATE_UP) + if (port_state == VPORT_STATE_UP) { + netdev_info(priv->netdev, "Link up\n"); netif_carrier_on(priv->netdev); - else + } else { + netdev_info(priv->netdev, "Link down\n"); netif_carrier_off(priv->netdev); + } } static void mlx5e_update_carrier_work(struct work_struct *work) @@ -88,111 +97,86 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } -static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) +static void mlx5e_tx_timeout_work(struct work_struct *work) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_pport_stats *s = &priv->stats.pport; - u32 *in; - u32 *out; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - - in = mlx5_vzalloc(sz); - out = mlx5_vzalloc(sz); - if (!in || !out) - goto free_out; - - MLX5_SET(ppcnt_reg, in, local_port, 1); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->IEEE_802_3_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->IEEE_802_3_counters)); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->RFC_2863_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->RFC_2863_counters)); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->RFC_2819_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->RFC_2819_counters)); + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + tx_timeout_work); + int err; -free_out: - kvfree(in); - kvfree(out); + rtnl_lock(); + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + mlx5e_close_locked(priv->netdev); + err = mlx5e_open_locked(priv->netdev); + if (err) + netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + err); +unlock: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); } -void mlx5e_update_stats(struct mlx5e_priv *priv) +static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_vport_stats *s = &priv->stats.vport; + struct mlx5e_sw_stats *s = &priv->stats.sw; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; - u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; - u32 *out; - int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); - u64 tx_offload_none; + u64 tx_offload_none = 0; int i, j; - out = mlx5_vzalloc(outlen); - if (!out) - return; - - /* Collect firts the SW counters and then HW for consistency */ - s->rx_packets = 0; - s->rx_bytes = 0; - s->tx_packets = 0; - s->tx_bytes = 0; - s->tso_packets = 0; - s->tso_bytes = 0; - s->tso_inner_packets = 0; - s->tso_inner_bytes = 0; - s->tx_queue_stopped = 0; - s->tx_queue_wake = 0; - s->tx_queue_dropped = 0; - s->tx_csum_inner = 0; - tx_offload_none = 0; - s->lro_packets = 0; - s->lro_bytes = 0; - s->rx_csum_none = 0; - s->rx_csum_sw = 0; - s->rx_wqe_err = 0; + memset(s, 0, sizeof(*s)); for (i = 0; i < priv->params.num_channels; i++) { rq_stats = &priv->channel[i]->rq.stats; s->rx_packets += rq_stats->packets; s->rx_bytes += rq_stats->bytes; - s->lro_packets += rq_stats->lro_packets; - s->lro_bytes += rq_stats->lro_bytes; + s->rx_lro_packets += rq_stats->lro_packets; + s->rx_lro_bytes += rq_stats->lro_bytes; s->rx_csum_none += rq_stats->csum_none; - s->rx_csum_sw += rq_stats->csum_sw; + s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_wqe_err += rq_stats->wqe_err; + s->rx_mpwqe_filler += rq_stats->mpwqe_filler; + s->rx_mpwqe_frag += rq_stats->mpwqe_frag; + s->rx_buff_alloc_err += rq_stats->buff_alloc_err; + s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; + s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; - s->tso_packets += sq_stats->tso_packets; - s->tso_bytes += sq_stats->tso_bytes; - s->tso_inner_packets += sq_stats->tso_inner_packets; - s->tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_tso_packets += sq_stats->tso_packets; + s->tx_tso_bytes += sq_stats->tso_bytes; + s->tx_tso_inner_packets += sq_stats->tso_inner_packets; + s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; - s->tx_csum_inner += sq_stats->csum_offload_inner; - tx_offload_none += sq_stats->csum_offload_none; + s->tx_xmit_more += sq_stats->xmit_more; + s->tx_csum_partial_inner += sq_stats->csum_partial_inner; + tx_offload_none += sq_stats->csum_none; } } - /* HW counters */ + /* Update calculated offload counters */ + s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner; + s->rx_csum_unnecessary = s->rx_packets - s->rx_csum_none - s->rx_csum_complete; + + s->link_down_events_phy = MLX5_GET(ppcnt_reg, + priv->stats.pport.phy_counters, + counter_set.phys_layer_cntrs.link_down_events); +} + +static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 *out = (u32 *)priv->stats.vport.query_vport_out; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + struct mlx5_core_dev *mdev = priv->mdev; + memset(in, 0, sizeof(in)); MLX5_SET(query_vport_counter_in, in, opcode, @@ -202,66 +186,79 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) memset(out, 0, outlen); - if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen)) + mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); +} + +static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int prio; + void *out; + u32 *in; + + in = mlx5_vzalloc(sz); + if (!in) goto free_out; -#define MLX5_GET_CTR(p, x) \ - MLX5_GET64(query_vport_counter_out, p, x) - - s->rx_error_packets = - MLX5_GET_CTR(out, received_errors.packets); - s->rx_error_bytes = - MLX5_GET_CTR(out, received_errors.octets); - s->tx_error_packets = - MLX5_GET_CTR(out, transmit_errors.packets); - s->tx_error_bytes = - MLX5_GET_CTR(out, transmit_errors.octets); - - s->rx_unicast_packets = - MLX5_GET_CTR(out, received_eth_unicast.packets); - s->rx_unicast_bytes = - MLX5_GET_CTR(out, received_eth_unicast.octets); - s->tx_unicast_packets = - MLX5_GET_CTR(out, transmitted_eth_unicast.packets); - s->tx_unicast_bytes = - MLX5_GET_CTR(out, transmitted_eth_unicast.octets); - - s->rx_multicast_packets = - MLX5_GET_CTR(out, received_eth_multicast.packets); - s->rx_multicast_bytes = - MLX5_GET_CTR(out, received_eth_multicast.octets); - s->tx_multicast_packets = - MLX5_GET_CTR(out, transmitted_eth_multicast.packets); - s->tx_multicast_bytes = - MLX5_GET_CTR(out, transmitted_eth_multicast.octets); - - s->rx_broadcast_packets = - MLX5_GET_CTR(out, received_eth_broadcast.packets); - s->rx_broadcast_bytes = - MLX5_GET_CTR(out, received_eth_broadcast.octets); - s->tx_broadcast_packets = - MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); - s->tx_broadcast_bytes = - MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + MLX5_SET(ppcnt_reg, in, local_port, 1); + + out = pstats->IEEE_802_3_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - /* Update calculated offload counters */ - s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; - s->rx_csum_good = s->rx_packets - s->rx_csum_none - - s->rx_csum_sw; + out = pstats->RFC_2863_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + out = pstats->RFC_2819_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + out = pstats->phy_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, + MLX5_REG_PPCNT, 0, 0); + } - mlx5e_update_pport_counters(priv); free_out: - kvfree(out); + kvfree(in); } -static void mlx5e_update_stats_work(struct work_struct *work) +static void mlx5e_update_q_counter(struct mlx5e_priv *priv) +{ + struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + + if (!priv->q_counter) + return; + + mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter, + &qcnt->rx_out_of_buffer); +} + +void mlx5e_update_stats(struct mlx5e_priv *priv) +{ + mlx5e_update_q_counter(priv); + mlx5e_update_vport_counters(priv); + mlx5e_update_pport_counters(priv); + mlx5e_update_sw_counters(priv); +} + +void mlx5e_update_stats_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv, update_stats_work); mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - mlx5e_update_stats(priv); + priv->profile->update_stats(priv); queue_delayed_work(priv->wq, dwork, msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL)); } @@ -273,7 +270,7 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, { struct mlx5e_priv *priv = vpriv; - if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) + if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) return; switch (event) { @@ -289,12 +286,12 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { - set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); + set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { - clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); + clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC)); } @@ -309,6 +306,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 byte_count; int wq_sz; int err; int i; @@ -323,32 +321,64 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; wq_sz = mlx5_wq_ll_get_size(&rq->wq); - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } - rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : - MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->wqe_info) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; + rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; + + rq->mpwqe_mtt_offset = c->ix * + MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); + + rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); + rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); + rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; + byte_count = rq->wqe_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!rq->skb) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; + + rq->wqe_sz = (priv->params.lro_en) ? + priv->params.lro_wqe_sz : + MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); + byte_count = rq->wqe_sz; + byte_count |= MLX5_HW_START_PADDING; + } for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); - u32 byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; - wqe->data.lkey = c->mkey_be; - wqe->data.byte_count = - cpu_to_be32(byte_count | MLX5_HW_START_PADDING); + wqe->data.byte_count = cpu_to_be32(byte_count); } + INIT_WORK(&rq->am.work, mlx5e_rx_am_work); + rq->am.mode = priv->params.rx_cq_period_mode; + + rq->wq_type = priv->params.rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; rq->tstamp = &priv->tstamp; rq->channel = c; rq->ix = c->ix; rq->priv = c->priv; + rq->mkey_be = c->mkey_be; + rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key); return 0; @@ -360,7 +390,14 @@ err_rq_wq_destroy: static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { - kfree(rq->skb); + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + kfree(rq->wqe_info); + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + kfree(rq->skb); + } + mlx5_wq_destroy(&rq->wq_ctrl); } @@ -388,7 +425,7 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); - MLX5_SET(rqc, rqc, flush_in_error_en, 1); + MLX5_SET(rqc, rqc, vsd, priv->params.vlan_strip_disable); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); @@ -403,7 +440,8 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) return err; } -static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) +static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, + int next_state) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; @@ -431,6 +469,36 @@ static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) return err; } +static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_RQ_BITMASK_VSD); + MLX5_SET(rqc, rqc, vsd, vsd); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + static void mlx5e_disable_rq(struct mlx5e_rq *rq) { mlx5_core_destroy_rq(rq->priv->mdev, rq->rqn); @@ -453,10 +521,33 @@ static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) return -ETIMEDOUT; } +static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + struct mlx5e_rx_wqe *wqe; + __be16 wqe_ix_be; + u16 wqe_ix; + + /* UMR WQE (if in progress) is always at wq->head */ + if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + + while (!mlx5_wq_ll_is_empty(wq)) { + wqe_ix_be = *wq->tail_next; + wqe_ix = be16_to_cpu(wqe_ix_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_ll_pop(&rq->wq, wqe_ix_be, + &wqe->next.next_wqe_index); + } +} + static int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { + struct mlx5e_sq *sq = &c->icosq; + u16 pi = sq->pc & sq->wq.sz_m1; int err; err = mlx5e_create_rq(c, param, rq); @@ -467,12 +558,16 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (err) goto err_destroy_rq; - err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) goto err_disable_rq; - set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); - mlx5e_send_nop(&c->sq[0], true); /* trigger mlx5e_post_rx_wqes() */ + if (param->am_enabled) + set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; + sq->ico_wqe_info[pi].num_wqebbs = 1; + mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ return 0; @@ -486,17 +581,12 @@ err_destroy_rq: static void mlx5e_close_rq(struct mlx5e_rq *rq) { - clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); + set_bit(MLX5E_RQ_STATE_FLUSH, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - - mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); - while (!mlx5_wq_ll_is_empty(&rq->wq)) - msleep(20); - - /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ - napi_synchronize(&rq->channel->napi); + cancel_work_sync(&rq->am.work); mlx5e_disable_rq(rq); + mlx5e_free_rx_descs(rq); mlx5e_destroy_rq(rq); } @@ -538,10 +628,9 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); - int txq_ix; int err; - err = mlx5_alloc_map_uar(mdev, &sq->uar, true); + err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); if (err) return err; @@ -561,13 +650,32 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, } sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; sq->max_inline = param->max_inline; + sq->min_inline_mode = + MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5E_INLINE_MODE_VPORT_CONTEXT ? + param->min_inline_mode : 0; err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; - txq_ix = c->ix + tc * priv->params.num_channels; - sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); + if (param->icosq) { + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) * + wq_sz, + GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!sq->ico_wqe_info) { + err = -ENOMEM; + goto err_free_sq_db; + } + } else { + int txq_ix; + + txq_ix = c->ix + tc * priv->params.num_channels; + sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); + priv->txq_to_sq_map[txq_ix] = sq; + } sq->pdev = c->pdev; sq->tstamp = &priv->tstamp; @@ -576,10 +684,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, sq->tc = tc; sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; sq->bf_budget = MLX5E_SQ_BF_BUDGET; - priv->txq_to_sq_map[txq_ix] = sq; return 0; +err_free_sq_db: + mlx5e_free_sq_db(sq); + err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -594,6 +704,7 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; + kfree(sq->ico_wqe_info); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(priv->mdev, &sq->uar); @@ -622,10 +733,11 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) memcpy(sqc, param->sqc, sizeof(param->sqc)); - MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]); - MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn); + MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); + MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, 1); + MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -644,7 +756,8 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) return err; } -static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) +static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, + int next_state, bool update_rl, int rl_index) { struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; @@ -664,6 +777,10 @@ static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) MLX5_SET(modify_sq_in, in, sq_state, curr_state); MLX5_SET(sqc, sqc, state, next_state); + if (update_rl && next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); + } err = mlx5_core_modify_sq(mdev, sq->sqn, in, inlen); @@ -679,6 +796,8 @@ static void mlx5e_disable_sq(struct mlx5e_sq *sq) struct mlx5_core_dev *mdev = priv->mdev; mlx5_core_destroy_sq(mdev, sq->sqn); + if (sq->rate_limit) + mlx5_rl_remove_rate(mdev, sq->rate_limit); } static int mlx5e_open_sq(struct mlx5e_channel *c, @@ -696,13 +815,15 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, if (err) goto err_destroy_sq; - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, + false, 0); if (err) goto err_disable_sq; - set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); - netdev_tx_reset_queue(sq->txq); - netif_tx_start_queue(sq->txq); + if (sq->txq) { + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); + } return 0; @@ -723,22 +844,20 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { - clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); - napi_synchronize(&sq->channel->napi); /* prevent netif_tx_wake_queue */ - netif_tx_disable_queue(sq->txq); - - /* ensure hw is notified of all pending wqes */ - if (mlx5e_sq_has_room_for(sq, 1)) - mlx5e_send_nop(sq, true); + set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state); + /* prevent netif_tx_wake_queue */ + napi_synchronize(&sq->channel->napi); - mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); - while (sq->cc != sq->pc) /* wait till sq is empty */ - msleep(20); + if (sq->txq) { + netif_tx_disable_queue(sq->txq); - /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */ - napi_synchronize(&sq->channel->napi); + /* last doorbell out, godspeed .. */ + if (mlx5e_sq_has_room_for(sq, 1)) + mlx5e_send_nop(sq, true); + } mlx5e_disable_sq(sq); + mlx5e_free_tx_descs(sq); mlx5e_destroy_sq(sq); } @@ -776,7 +895,7 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &priv->cq_uar; + mcq->uar = &mdev->mlx5e_res.cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -823,6 +942,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); MLX5_SET(cqc, cqc, c_eqn, eqn); MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - @@ -852,8 +972,7 @@ static void mlx5e_disable_cq(struct mlx5e_cq *cq) static int mlx5e_open_cq(struct mlx5e_channel *c, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, - u16 moderation_usecs, - u16 moderation_frames) + struct mlx5e_cq_moder moderation) { int err; struct mlx5e_priv *priv = c->priv; @@ -869,8 +988,8 @@ static int mlx5e_open_cq(struct mlx5e_channel *c, if (MLX5_CAP_GEN(mdev, cq_moderation)) mlx5_core_modify_cq_moderation(mdev, &cq->mcq, - moderation_usecs, - moderation_frames); + moderation.usec, + moderation.pkts); return 0; err_destroy_cq: @@ -899,8 +1018,7 @@ static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, for (tc = 0; tc < c->num_tc; tc++) { err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, - priv->params.tx_cq_moderation_usec, - priv->params.tx_cq_moderation_pkts); + priv->params.tx_cq_moderation); if (err) goto err_close_tx_cqs; } @@ -955,19 +1073,96 @@ static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) { int i; - for (i = 0; i < MLX5E_MAX_NUM_TC; i++) + for (i = 0; i < priv->profile->max_tc; i++) priv->channeltc_to_txq_map[ix][i] = ix + i * priv->params.num_channels; } +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_sq *sq, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 rl_index = 0; + int err; + + if (rate == sq->rate_limit) + /* nothing to do */ + return 0; + + if (sq->rate_limit) + /* remove current rl index to free space to next ones */ + mlx5_rl_remove_rate(mdev, sq->rate_limit); + + sq->rate_limit = 0; + + if (rate) { + err = mlx5_rl_add_rate(mdev, rate, &rl_index); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + return err; + } + } + + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RDY, true, rl_index); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + /* remove the rate from the table */ + if (rate) + mlx5_rl_remove_rate(mdev, rate); + return err; + } + + sq->rate_limit = rate; + return 0; +} + +static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_sq *sq = priv->txq_to_sq_map[index]; + int err = 0; + + if (!mlx5_rl_is_supported(mdev)) { + netdev_err(dev, "Rate limiting is not supported on this device\n"); + return -EINVAL; + } + + /* rate is given in Mb/sec, HW config is in Kb/sec */ + rate = rate << 10; + + /* Check whether rate in valid range, 0 is always valid */ + if (rate && !mlx5_rl_is_in_range(mdev, rate)) { + netdev_err(dev, "TX rate %u, is not in range\n", rate); + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + err = mlx5e_set_sq_maxrate(dev, sq, rate); + if (!err) + priv->tx_rates[index] = rate; + mutex_unlock(&priv->state_lock); + + return err; +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { + struct mlx5e_cq_moder icosq_cq_moder = {0, 0}; struct net_device *netdev = priv->netdev; + struct mlx5e_cq_moder rx_cq_profile; int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; + struct mlx5e_sq *sq; int err; + int i; c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) @@ -978,29 +1173,51 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); c->num_tc = priv->params.num_tc; + if (priv->params.rx_am_enabled) + rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode); + else + rx_cq_profile = priv->params.rx_cq_moderation; + mlx5e_build_channeltc_to_txq_map(priv, ix); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - err = mlx5e_open_tx_cqs(c, cparam); + err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, icosq_cq_moder); if (err) goto err_napi_del; + err = mlx5e_open_tx_cqs(c, cparam); + if (err) + goto err_close_icosq_cq; + err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, - priv->params.rx_cq_moderation_usec, - priv->params.rx_cq_moderation_pkts); + rx_cq_profile); if (err) goto err_close_tx_cqs; napi_enable(&c->napi); - err = mlx5e_open_sqs(c, cparam); + err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq); if (err) goto err_disable_napi; + err = mlx5e_open_sqs(c, cparam); + if (err) + goto err_close_icosq; + + for (i = 0; i < priv->params.num_tc; i++) { + u32 txq_ix = priv->channeltc_to_txq_map[ix][i]; + + if (priv->tx_rates[txq_ix]) { + sq = priv->txq_to_sq_map[txq_ix]; + mlx5e_set_sq_maxrate(priv->netdev, sq, + priv->tx_rates[txq_ix]); + } + } + err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_sqs; @@ -1013,6 +1230,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, err_close_sqs: mlx5e_close_sqs(c); +err_close_icosq: + mlx5e_close_sq(&c->icosq); + err_disable_napi: napi_disable(&c->napi); mlx5e_close_cq(&c->rq.cq); @@ -1020,6 +1240,9 @@ err_disable_napi: err_close_tx_cqs: mlx5e_close_tx_cqs(c); +err_close_icosq_cq: + mlx5e_close_cq(&c->icosq.cq); + err_napi_del: netif_napi_del(&c->napi); napi_hash_del(&c->napi); @@ -1032,9 +1255,11 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); + mlx5e_close_sq(&c->icosq); napi_disable(&c->napi); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); netif_napi_del(&c->napi); napi_hash_del(&c->napi); @@ -1049,14 +1274,28 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + MLX5_SET(wq, wq, log_wqe_num_of_strides, + priv->params.mpwqe_log_num_strides - 9); + MLX5_SET(wq, wq, log_wqe_stride_size, + priv->params.mpwqe_log_stride_sz - 6); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + } + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); - MLX5_SET(wq, wq, pd, priv->pdn); + MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); + MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->wq.linear = 1; + + param->am_enabled = priv->params.rx_am_enabled; } static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) @@ -1068,18 +1307,29 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); } +static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); + + param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); +} + static void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); - MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); - MLX5_SET(wq, wq, pd, priv->pdn); - param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->max_inline = priv->params.tx_max_inline; + param->min_inline_mode = priv->params.tx_min_inline_mode; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1087,17 +1337,33 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); + MLX5_SET(cqc, cqc, uar_page, priv->mdev->mlx5e_res.cq_uar.index); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; + u8 log_cq_size; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + log_cq_size = priv->params.log_rq_size + + priv->params.mpwqe_log_num_strides; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + log_cq_size = priv->params.log_rq_size; + } + + MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); + if (priv->params.rx_cqe_compress) { + MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + } mlx5e_build_common_cq_param(priv, param); + + param->cq_period_mode = priv->params.rx_cq_period_mode; } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, @@ -1105,25 +1371,56 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); mlx5e_build_common_cq_param(priv, param); + + param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; +} + +static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param, + u8 log_wq_size) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); + + mlx5e_build_common_cq_param(priv, param); + + param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; } -static void mlx5e_build_channel_param(struct mlx5e_priv *priv, - struct mlx5e_channel_param *cparam) +static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param, + u8 log_wq_size) { - memset(cparam, 0, sizeof(*cparam)); + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); + + param->icosq = true; +} + +static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) +{ + u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, &cparam->icosq_cq, icosq_log_wq_sz); } static int mlx5e_open_channels(struct mlx5e_priv *priv) { - struct mlx5e_channel_param cparam; + struct mlx5e_channel_param *cparam; int nch = priv->params.num_channels; int err = -ENOMEM; int i; @@ -1135,12 +1432,15 @@ static int mlx5e_open_channels(struct mlx5e_priv *priv) priv->txq_to_sq_map = kcalloc(nch * priv->params.num_tc, sizeof(struct mlx5e_sq *), GFP_KERNEL); - if (!priv->channel || !priv->txq_to_sq_map) + cparam = kzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); + + if (!priv->channel || !priv->txq_to_sq_map || !cparam) goto err_free_txq_to_sq_map; - mlx5e_build_channel_param(priv, &cparam); + mlx5e_build_channel_param(priv, cparam); + for (i = 0; i < nch; i++) { - err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]); + err = mlx5e_open_channel(priv, i, cparam, &priv->channel[i]); if (err) goto err_close_channels; } @@ -1151,6 +1451,12 @@ static int mlx5e_open_channels(struct mlx5e_priv *priv) goto err_close_channels; } + /* FIXME: This is a W/A for tx timeout watch dog false alarm when + * polling for inactive tx queues. + */ + netif_tx_start_all_queues(priv->netdev); + + kfree(cparam); return 0; err_close_channels: @@ -1160,6 +1466,7 @@ err_close_channels: err_free_txq_to_sq_map: kfree(priv->txq_to_sq_map); kfree(priv->channel); + kfree(cparam); return err; } @@ -1168,6 +1475,12 @@ static void mlx5e_close_channels(struct mlx5e_priv *priv) { int i; + /* FIXME: This is a W/A only for tx timeout watch dog false alarm when + * polling for inactive tx queues. + */ + netif_tx_stop_all_queues(priv->netdev); + netif_tx_disable(priv->netdev); + for (i = 0; i < priv->params.num_channels; i++) mlx5e_close_channel(priv->channel[i]); @@ -1199,48 +1512,37 @@ static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc) for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { int ix = i; + u32 rqn; if (priv->params.rss_hfunc == ETH_RSS_HASH_XOR) ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE); ix = priv->params.indirection_rqt[ix]; - MLX5_SET(rqtc, rqtc, rq_num[i], - test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[ix]->rq.rqn : - priv->drop_rq.rqn); + rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? + priv->channel[ix]->rq.rqn : + priv->drop_rq.rqn; + MLX5_SET(rqtc, rqtc, rq_num[i], rqn); } } -static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, void *rqtc, - enum mlx5e_rqt_ix rqt_ix) +static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, + int ix) { + u32 rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? + priv->channel[ix]->rq.rqn : + priv->drop_rq.rqn; - switch (rqt_ix) { - case MLX5E_INDIRECTION_RQT: - mlx5e_fill_indir_rqt_rqns(priv, rqtc); - - break; - - default: /* MLX5E_SINGLE_RQ_RQT */ - MLX5_SET(rqtc, rqtc, rq_num[0], - test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[0]->rq.rqn : - priv->drop_rq.rqn); - - break; - } + MLX5_SET(rqtc, rqtc, rq_num[0], rqn); } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) +static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, + int ix, struct mlx5e_rqt *rqt) { struct mlx5_core_dev *mdev = priv->mdev; - u32 *in; void *rqtc; int inlen; - int sz; int err; - - sz = (rqt_ix == MLX5E_SINGLE_RQ_RQT) ? 1 : MLX5E_INDIR_RQT_SIZE; + u32 *in; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); @@ -1252,26 +1554,62 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - mlx5e_fill_rqt_rqns(priv, rqtc, rqt_ix); + if (sz > 1) /* RSS */ + mlx5e_fill_indir_rqt_rqns(priv, rqtc); + else + mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - err = mlx5_core_create_rqt(mdev, in, inlen, &priv->rqtn[rqt_ix]); + err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn); + if (!err) + rqt->enabled = true; kvfree(in); + return err; +} + +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) +{ + rqt->enabled = false; + mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); +} + +static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv) +{ + struct mlx5e_rqt *rqt = &priv->indir_rqt; + + return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); +} + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) +{ + struct mlx5e_rqt *rqt; + int err; + int ix; + + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + rqt = &priv->direct_tir[ix].rqt; + err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); + if (err) + goto err_destroy_rqts; + } + + return 0; + +err_destroy_rqts: + for (ix--; ix >= 0; ix--) + mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); return err; } -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) { struct mlx5_core_dev *mdev = priv->mdev; - u32 *in; void *rqtc; int inlen; - int sz; + u32 *in; int err; - sz = (rqt_ix == MLX5E_SINGLE_RQ_RQT) ? 1 : MLX5E_INDIR_RQT_SIZE; - inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); if (!in) @@ -1280,27 +1618,36 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - - mlx5e_fill_rqt_rqns(priv, rqtc, rqt_ix); + if (sz > 1) /* RSS */ + mlx5e_fill_indir_rqt_rqns(priv, rqtc); + else + mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); - err = mlx5_core_modify_rqt(mdev, priv->rqtn[rqt_ix], in, inlen); + err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen); kvfree(in); return err; } -static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) -{ - mlx5_core_destroy_rqt(priv->mdev, priv->rqtn[rqt_ix]); -} - static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) { - mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT); - mlx5e_redirect_rqt(priv, MLX5E_SINGLE_RQ_RQT); + u32 rqtn; + int ix; + + if (priv->indir_rqt.enabled) { + rqtn = priv->indir_rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + } + + for (ix = 0; ix < priv->params.num_channels; ix++) { + if (!priv->direct_tir[ix].rqt.enabled) + continue; + rqtn = priv->direct_tir[ix].rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, 1, ix); + } } static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) @@ -1345,6 +1692,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) int inlen; int err; int tt; + int ix; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); @@ -1356,53 +1704,26 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) mlx5e_build_tir_ctx_lro(tirc, priv); - for (tt = 0; tt < MLX5E_NUM_TT; tt++) { - err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, + inlen); if (err) - break; + goto free_in; } - kvfree(in); - - return err; -} - -static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev, - u32 tirn) -{ - void *in; - int inlen; - int err; - - inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - in = mlx5_vzalloc(inlen); - if (!in) - return -ENOMEM; - - MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); - - err = mlx5_core_modify_tir(mdev, tirn, in, inlen); + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, + in, inlen); + if (err) + goto free_in; + } +free_in: kvfree(in); return err; } -static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) -{ - int err; - int i; - - for (i = 0; i < MLX5E_NUM_TT; i++) { - err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev, - priv->tirn[i]); - if (err) - return err; - } - - return 0; -} - static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1464,13 +1785,17 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_num_tc(netdev, ntc); + /* Map netdev TCs to offset 0 + * We have our own UP to TXQ mapping for QoS + */ for (tc = 0; tc < ntc; tc++) - netdev_set_tc_queue(netdev, tc, nch, tc * nch); + netdev_set_tc_queue(netdev, tc, nch, 0); } int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; int num_txqs; int err; @@ -1482,10 +1807,6 @@ int mlx5e_open_locked(struct net_device *netdev) netif_set_real_num_tx_queues(netdev, num_txqs); netif_set_real_num_rx_queues(netdev, priv->params.num_channels); - err = mlx5e_set_dev_port_mtu(netdev); - if (err) - goto err_clear_state_opened_flag; - err = mlx5e_open_channels(priv); if (err) { netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", @@ -1493,7 +1814,7 @@ int mlx5e_open_locked(struct net_device *netdev) goto err_clear_state_opened_flag; } - err = mlx5e_refresh_tirs_self_loopback_enable(priv); + err = mlx5e_refresh_tirs_self_loopback_enable(priv->mdev); if (err) { netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", __func__, err); @@ -1503,9 +1824,17 @@ int mlx5e_open_locked(struct net_device *netdev) mlx5e_redirect_rqts(priv); mlx5e_update_carrier(priv); mlx5e_timestamp_init(priv); +#ifdef CONFIG_RFS_ACCEL + priv->netdev->rx_cpu_rmap = priv->mdev->rmap; +#endif + if (priv->profile->update_stats) + queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + err = mlx5e_add_sqs_fwd_rules(priv); + if (err) + goto err_close_channels; + } return 0; err_close_channels: @@ -1515,7 +1844,7 @@ err_clear_state_opened_flag: return err; } -static int mlx5e_open(struct net_device *netdev) +int mlx5e_open(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; @@ -1530,6 +1859,7 @@ static int mlx5e_open(struct net_device *netdev) int mlx5e_close_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; /* May already be CLOSED in case a previous configuration operation * (e.g RX/TX queue size change) that involves close&open failed. @@ -1539,6 +1869,9 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5e_remove_sqs_fwd_rules(priv); + mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); mlx5e_redirect_rqts(priv); @@ -1547,7 +1880,7 @@ int mlx5e_close_locked(struct net_device *netdev) return 0; } -static int mlx5e_close(struct net_device *netdev) +int mlx5e_close(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; @@ -1606,7 +1939,7 @@ static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &priv->cq_uar; + mcq->uar = &mdev->mlx5e_res.cq_uar; cq->priv = priv; @@ -1672,7 +2005,7 @@ static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) memset(in, 0, sizeof(in)); MLX5_SET(tisc, tisc, prio, tc << 1); - MLX5_SET(tisc, tisc, transport_domain, priv->tdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); } @@ -1682,12 +2015,12 @@ static void mlx5e_destroy_tis(struct mlx5e_priv *priv, int tc) mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); } -static int mlx5e_create_tises(struct mlx5e_priv *priv) +int mlx5e_create_tises(struct mlx5e_priv *priv) { int err; int tc; - for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) { + for (tc = 0; tc < priv->profile->max_tc; tc++) { err = mlx5e_create_tis(priv, tc); if (err) goto err_close_tises; @@ -1702,19 +2035,20 @@ err_close_tises: return err; } -static void mlx5e_destroy_tises(struct mlx5e_priv *priv) +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; - for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) + for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv, tc); } -static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) +static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, + enum mlx5e_traffic_types tt) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -1731,19 +2065,8 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) mlx5e_build_tir_ctx_lro(tirc, priv); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - - switch (tt) { - case MLX5E_TT_ANY: - MLX5_SET(tirc, tirc, indirect_table, - priv->rqtn[MLX5E_SINGLE_RQ_RQT]); - MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); - break; - default: - MLX5_SET(tirc, tirc, indirect_table, - priv->rqtn[MLX5E_INDIRECTION_RQT]); - mlx5e_build_tir_ctx_hash(tirc, priv); - break; - } + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); + mlx5e_build_tir_ctx_hash(tirc, priv); switch (tt) { case MLX5E_TT_IPV4_TCP: @@ -1823,64 +2146,132 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); break; + default: + WARN_ONCE(true, + "mlx5e_build_indir_tir_ctx: bad traffic type!\n"); } } -static int mlx5e_create_tir(struct mlx5e_priv *priv, int tt) +static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, + u32 rqtn) { - struct mlx5_core_dev *mdev = priv->mdev; - u32 *in; + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + + mlx5e_build_tir_ctx_lro(tirc, priv); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, rqtn); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); +} + +static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) +{ + struct mlx5e_tir *tir; void *tirc; int inlen; int err; + u32 *in; + int tt; inlen = MLX5_ST_SZ_BYTES(create_tir_in); in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(in, 0, inlen); + tir = &priv->indir_tir[tt]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_indir_tir_ctx(priv, tirc, tt); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); + if (err) + goto err_destroy_tirs; + } - mlx5e_build_tir_ctx(priv, tirc, tt); + kvfree(in); + + return 0; - err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]); +err_destroy_tirs: + for (tt--; tt >= 0; tt--) + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); kvfree(in); return err; } -static void mlx5e_destroy_tir(struct mlx5e_priv *priv, int tt) -{ - mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]); -} - -static int mlx5e_create_tirs(struct mlx5e_priv *priv) +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) { + int nch = priv->profile->max_nch(priv->mdev); + struct mlx5e_tir *tir; + void *tirc; + int inlen; int err; - int i; + u32 *in; + int ix; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; - for (i = 0; i < MLX5E_NUM_TT; i++) { - err = mlx5e_create_tir(priv, i); + for (ix = 0; ix < nch; ix++) { + memset(in, 0, inlen); + tir = &priv->direct_tir[ix]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_direct_tir_ctx(priv, tirc, + priv->direct_tir[ix].rqt.rqtn); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) - goto err_destroy_tirs; + goto err_destroy_ch_tirs; } + kvfree(in); + return 0; -err_destroy_tirs: - for (i--; i >= 0; i--) - mlx5e_destroy_tir(priv, i); +err_destroy_ch_tirs: + for (ix--; ix >= 0; ix--) + mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); + + kvfree(in); return err; } -static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) +static void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); +} + +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) { + int nch = priv->profile->max_nch(priv->mdev); int i; - for (i = 0; i < MLX5E_NUM_TT; i++) - mlx5e_destroy_tir(priv, i); + for (i = 0; i < nch; i++) + mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); +} + +int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) +{ + int err = 0; + int i; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + for (i = 0; i < priv->params.num_channels; i++) { + err = mlx5e_modify_rq_vsd(&priv->channel[i]->rq, vsd); + if (err) + return err; + } + + return 0; } static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) @@ -1923,6 +2314,8 @@ static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle, return mlx5e_configure_flower(priv, proto, tc->cls_flower); case TC_CLSFLOWER_DESTROY: return mlx5e_delete_flower(priv, tc->cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, tc->cls_flower); } default: return -EOPNOTSUPP; @@ -1935,23 +2328,41 @@ mqprio: return mlx5e_setup_tc(dev, tc->tc); } -static struct rtnl_link_stats64 * +struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_sw_stats *sstats = &priv->stats.sw; struct mlx5e_vport_stats *vstats = &priv->stats.vport; - - stats->rx_packets = vstats->rx_packets; - stats->rx_bytes = vstats->rx_bytes; - stats->tx_packets = vstats->tx_packets; - stats->tx_bytes = vstats->tx_bytes; - stats->multicast = vstats->rx_multicast_packets + - vstats->tx_multicast_packets; - stats->tx_errors = vstats->tx_error_packets; - stats->rx_errors = vstats->rx_error_packets; - stats->tx_dropped = vstats->tx_queue_dropped; - stats->rx_crc_errors = 0; - stats->rx_length_errors = 0; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + + stats->rx_packets = sstats->rx_packets; + stats->rx_bytes = sstats->rx_bytes; + stats->tx_packets = sstats->tx_packets; + stats->tx_bytes = sstats->tx_bytes; + + stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; + stats->tx_dropped = sstats->tx_queue_dropped; + + stats->rx_length_errors = + PPORT_802_3_GET(pstats, a_in_range_length_errors) + + PPORT_802_3_GET(pstats, a_out_of_range_length_field) + + PPORT_802_3_GET(pstats, a_frame_too_long_errors); + stats->rx_crc_errors = + PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); + stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); + stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards); + stats->tx_carrier_errors = + PPORT_802_3_GET(pstats, a_symbol_error_during_carrier); + stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + + stats->rx_frame_errors; + stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; + + /* vport multicast also counts packets that are dropped due to steering + * or rx out of buffer + */ + stats->multicast = + VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); return stats; } @@ -1980,50 +2391,154 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr) return 0; } -static int mlx5e_set_features(struct net_device *netdev, - netdev_features_t features) +#define MLX5E_SET_FEATURE(netdev, feature, enable) \ + do { \ + if (enable) \ + netdev->features |= feature; \ + else \ + netdev->features &= ~feature; \ + } while (0) + +typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); + +static int set_feature_lro(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - int err = 0; - netdev_features_t changes = features ^ netdev->features; + bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + int err; mutex_lock(&priv->state_lock); - if (changes & NETIF_F_LRO) { - bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - - if (was_opened) - mlx5e_close_locked(priv->netdev); - - priv->params.lro_en = !!(features & NETIF_F_LRO); - err = mlx5e_modify_tirs_lro(priv); - if (err) - mlx5_core_warn(priv->mdev, "lro modify failed, %d\n", - err); + if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) + mlx5e_close_locked(priv->netdev); - if (was_opened) - err = mlx5e_open_locked(priv->netdev); + priv->params.lro_en = enable; + err = mlx5e_modify_tirs_lro(priv); + if (err) { + netdev_err(netdev, "lro modify failed, %d\n", err); + priv->params.lro_en = !enable; } + if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) + mlx5e_open_locked(priv->netdev); + mutex_unlock(&priv->state_lock); - if (changes & NETIF_F_HW_VLAN_CTAG_FILTER) { - if (features & NETIF_F_HW_VLAN_CTAG_FILTER) - mlx5e_enable_vlan_filter(priv); - else - mlx5e_disable_vlan_filter(priv); - } + return err; +} + +static int set_feature_vlan_filter(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (enable) + mlx5e_enable_vlan_filter(priv); + else + mlx5e_disable_vlan_filter(priv); + + return 0; +} + +static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); - if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) && - mlx5e_tc_num_filters(priv)) { + if (!enable && mlx5e_tc_num_filters(priv)) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; } + return 0; +} + +static int set_feature_rx_all(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_set_port_fcs(mdev, !enable); +} + +static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + priv->params.vlan_strip_disable = !enable; + err = mlx5e_modify_rqs_vsd(priv, !enable); + if (err) + priv->params.vlan_strip_disable = enable; + + mutex_unlock(&priv->state_lock); + return err; } +#ifdef CONFIG_RFS_ACCEL +static int set_feature_arfs(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (enable) + err = mlx5e_arfs_enable(priv); + else + err = mlx5e_arfs_disable(priv); + + return err; +} +#endif + +static int mlx5e_handle_feature(struct net_device *netdev, + netdev_features_t wanted_features, + netdev_features_t feature, + mlx5e_feature_handler feature_handler) +{ + netdev_features_t changes = wanted_features ^ netdev->features; + bool enable = !!(wanted_features & feature); + int err; + + if (!(changes & feature)) + return 0; + + err = feature_handler(netdev, enable); + if (err) { + netdev_err(netdev, "%s feature 0x%llx failed err %d\n", + enable ? "Enable" : "Disable", feature, err); + return err; + } + + MLX5E_SET_FEATURE(netdev, feature, enable); + return 0; +} + +static int mlx5e_set_features(struct net_device *netdev, + netdev_features_t features) +{ + int err; + + err = mlx5e_handle_feature(netdev, features, NETIF_F_LRO, + set_feature_lro); + err |= mlx5e_handle_feature(netdev, features, + NETIF_F_HW_VLAN_CTAG_FILTER, + set_feature_vlan_filter); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_TC, + set_feature_tc_num_filters); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, + set_feature_rx_all); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_RX, + set_feature_rx_vlan); +#ifdef CONFIG_RFS_ACCEL + err |= mlx5e_handle_feature(netdev, features, NETIF_F_NTUPLE, + set_feature_arfs); +#endif + + return err ? -EINVAL : 0; +} + #define MXL5_HW_MIN_MTU 64 #define MXL5E_MIN_MTU (MXL5_HW_MIN_MTU + ETH_FCS_LEN) @@ -2035,6 +2550,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) u16 max_mtu; u16 min_mtu; int err = 0; + bool reset; mlx5_query_port_max_mtu(mdev, &max_mtu, 1); @@ -2050,13 +2566,18 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) mutex_lock(&priv->state_lock); + reset = !priv->params.lro_en && + (priv->params.rq_wq_type != + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) + if (was_opened && reset) mlx5e_close_locked(netdev); netdev->mtu = new_mtu; + mlx5e_set_dev_port_mtu(netdev); - if (was_opened) + if (was_opened && reset) err = mlx5e_open_locked(netdev); mutex_unlock(&priv->state_lock); @@ -2093,6 +2614,21 @@ static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) vlan, qos); } +static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting); +} + +static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); +} static int mlx5_vport_link2ifla(u8 esw_link) { switch (esw_link) { @@ -2149,27 +2685,32 @@ static int mlx5e_get_vf_stats(struct net_device *dev, vf_stats); } -#if IS_ENABLED(CONFIG_MLX5_CORE_EN_VXLAN) static void mlx5e_add_vxlan_port(struct net_device *netdev, - sa_family_t sa_family, __be16 port) + struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + if (!mlx5e_vxlan_allowed(priv->mdev)) return; - mlx5e_vxlan_queue_work(priv, sa_family, be16_to_cpu(port), 1); + mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1); } static void mlx5e_del_vxlan_port(struct net_device *netdev, - sa_family_t sa_family, __be16 port) + struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + if (!mlx5e_vxlan_allowed(priv->mdev)) return; - mlx5e_vxlan_queue_work(priv, sa_family, be16_to_cpu(port), 0); + mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0); } static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, @@ -2221,7 +2762,29 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } -#endif + +static void mlx5e_tx_timeout(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + bool sched_work = false; + int i; + + netdev_err(dev, "TX timeout detected\n"); + + for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) { + struct mlx5e_sq *sq = priv->txq_to_sq_map[i]; + + if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) + continue; + sched_work = true; + set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state); + netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n", + i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc); + } + + if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state)) + schedule_work(&priv->tx_timeout_work); +} static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, @@ -2237,6 +2800,11 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_set_features = mlx5e_set_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, + .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = mlx5e_rx_flow_steer, +#endif + .ndo_tx_timeout = mlx5e_tx_timeout, }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -2253,16 +2821,21 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_features = mlx5e_set_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, -#ifdef CONFIG_MLX5_CORE_EN_VXLAN - .ndo_add_vxlan_port = mlx5e_add_vxlan_port, - .ndo_del_vxlan_port = mlx5e_del_vxlan_port, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, + .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, .ndo_features_check = mlx5e_features_check, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_set_vf_mac = mlx5e_set_vf_mac, .ndo_set_vf_vlan = mlx5e_set_vf_vlan, + .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, + .ndo_set_vf_trust = mlx5e_set_vf_trust, .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_tx_timeout = mlx5e_tx_timeout, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2317,51 +2890,187 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) } #endif -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, + u32 *indirection_rqt, int len, int num_channels) { + int node = mdev->priv.numa_node; + int node_num_of_cores; int i; + if (node == -1) + node = first_online_node; + + node_num_of_cores = cpumask_weight(cpumask_of_node(node)); + + if (node_num_of_cores) + num_channels = min_t(int, num_channels, node_num_of_cores); + for (i = 0; i < len; i++) indirection_rqt[i] = i % num_channels; } -static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - int num_channels) +static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); +} + +static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) +{ + enum pcie_link_width width; + enum pci_bus_speed speed; + int err = 0; + + err = pcie_get_minimum_link(mdev->pdev, &speed, &width); + if (err) + return err; + + if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) + return -EINVAL; + + switch (speed) { + case PCIE_SPEED_2_5GT: + *pci_bw = 2500 * width; + break; + case PCIE_SPEED_5_0GT: + *pci_bw = 5000 * width; + break; + case PCIE_SPEED_8_0GT: + *pci_bw = 8000 * width; + break; + default: + return -EINVAL; + } + + return 0; +} + +static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) +{ + return (link_speed && pci_bw && + (pci_bw < 40000) && (pci_bw < link_speed)); +} + +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + params->rx_cq_period_mode = cq_period_mode; + + params->rx_cq_moderation.pkts = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + params->rx_cq_moderation.usec = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + params->rx_cq_moderation.usec = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; +} + +static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) { + case MLX5E_INLINE_MODE_L2: + *min_inline_mode = MLX5_INLINE_MODE_L2; + break; + case MLX5E_INLINE_MODE_VPORT_CONTEXT: + mlx5_query_nic_vport_min_inline(mdev, + min_inline_mode); + break; + case MLX5_INLINE_MODE_NOT_REQUIRED: + *min_inline_mode = MLX5_INLINE_MODE_NONE; + break; + } +} + +static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); + u32 link_speed = 0; + u32 pci_bw = 0; + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.log_rq_size = - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - priv->params.rx_cq_moderation_usec = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - priv->params.rx_cq_moderation_pkts = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; - priv->params.tx_cq_moderation_usec = + priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + + /* set CQE compression */ + priv->params.rx_cqe_compress_admin = false; + if (MLX5_CAP_GEN(mdev, cqe_compression) && + MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5e_get_max_linkspeed(mdev, &link_speed); + mlx5e_get_pci_bw(mdev, &pci_bw); + mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + priv->params.rx_cqe_compress_admin = + cqe_compress_heuristic(link_speed, pci_bw); + } + + priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin; + + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.mpwqe_log_stride_sz = + priv->params.rx_cqe_compress ? + MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : + MLX5_MPWRQ_LOG_STRIDE_SIZE; + priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + priv->params.mpwqe_log_stride_sz; + priv->params.lro_en = true; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + + mlx5_core_info(mdev, + "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(priv->params.log_rq_size), + BIT(priv->params.mpwqe_log_stride_sz), + priv->params.rx_cqe_compress_admin); + + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); + + priv->params.tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - priv->params.tx_cq_moderation_pkts = + priv->params.tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - priv->params.min_rx_wqes = - MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; + mlx5e_query_min_inline(mdev, &priv->params.tx_min_inline_mode); priv->params.num_tc = 1; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; netdev_rss_key_fill(priv->params.toeplitz_hash_key, sizeof(priv->params.toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, num_channels); + mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + /* Initialize pflags */ + MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, + priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); + priv->mdev = mdev; priv->netdev = netdev; - priv->params.num_channels = num_channels; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); @@ -2371,6 +3080,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); } @@ -2386,10 +3096,16 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } -static void mlx5e_build_netdev(struct net_device *netdev) +static const struct switchdev_ops mlx5e_switchdev_ops = { + .switchdev_port_attr_get = mlx5e_attr_get, +}; + +static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + bool fcs_supported; + bool fcs_enabled; SET_NETDEV_DEV(netdev, &mdev->pdev->dev); @@ -2424,205 +3140,340 @@ static void mlx5e_build_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; if (mlx5e_vxlan_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; netdev->hw_enc_features |= NETIF_F_IP_CSUM; - netdev->hw_enc_features |= NETIF_F_RXCSUM; + netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; - netdev->hw_enc_features |= NETIF_F_RXHASH; netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); + + if (fcs_supported) + netdev->hw_features |= NETIF_F_RXALL; + netdev->features = netdev->hw_features; if (!priv->params.lro_en) netdev->features &= ~NETIF_F_LRO; + if (fcs_enabled) + netdev->features &= ~NETIF_F_RXALL; + #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) if (FT_CAP(flow_modify_en) && FT_CAP(modify_root) && FT_CAP(identified_miss_table_mode) && - FT_CAP(flow_table_modify)) - priv->netdev->hw_features |= NETIF_F_HW_TC; + FT_CAP(flow_table_modify)) { + netdev->hw_features |= NETIF_F_HW_TC; +#ifdef CONFIG_RFS_ACCEL + netdev->hw_features |= NETIF_F_NTUPLE; +#endif + } netdev->features |= NETIF_F_HIGHDMA; netdev->priv_flags |= IFF_UNICAST_FLT; mlx5e_set_netdev_dev_addr(netdev); + +#ifdef CONFIG_NET_SWITCHDEV + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + netdev->switchdev_ops = &mlx5e_switchdev_ops; +#endif } -static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, - struct mlx5_core_mkey *mkey) +static void mlx5e_create_q_counter(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_core_alloc_q_counter(mdev, &priv->q_counter); + if (err) { + mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err); + priv->q_counter = 0; + } +} + +static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv) +{ + if (!priv->q_counter) + return; + + mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter); +} + +static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_create_mkey_mbox_in *in; + struct mlx5_mkey_seg *mkc; + int inlen = sizeof(*in); + u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev), + BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)); int err; - in = mlx5_vzalloc(sizeof(*in)); + in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; - in->seg.flags = MLX5_PERM_LOCAL_WRITE | - MLX5_PERM_LOCAL_READ | - MLX5_ACCESS_MODE_PA; - in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + mkc = &in->seg; + mkc->status = MLX5_MKEY_STATUS_FREE; + mkc->flags = MLX5_PERM_UMR_EN | + MLX5_PERM_LOCAL_READ | + MLX5_PERM_LOCAL_WRITE | + MLX5_ACCESS_MODE_MTT; + + npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages); + + mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn); + mkc->len = cpu_to_be64(npages << PAGE_SHIFT); + mkc->xlt_oct_size = cpu_to_be32(MLX5_MTT_OCTW(npages)); + mkc->log2_page_size = PAGE_SHIFT; - err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, - NULL); + err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL, + NULL, NULL); kvfree(in); return err; } -static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) +static void mlx5e_nic_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { - struct net_device *netdev; - struct mlx5e_priv *priv; - int nch = mlx5e_get_max_num_channels(mdev); - int err; + struct mlx5e_priv *priv = netdev_priv(netdev); - if (mlx5e_check_required_hca_cap(mdev)) - return NULL; + mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv); + mlx5e_build_nic_netdev(netdev); + mlx5e_vxlan_init(priv); +} - netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), - nch * MLX5E_MAX_NUM_TC, - nch); - if (!netdev) { - mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); - return NULL; - } +static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; - mlx5e_build_netdev_priv(mdev, netdev, nch); - mlx5e_build_netdev(netdev); + mlx5e_vxlan_cleanup(priv); - netif_carrier_off(netdev); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5_eswitch_unregister_vport_rep(esw, 0); +} - priv = netdev_priv(netdev); +static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + int i; - priv->wq = create_singlethread_workqueue("mlx5e"); - if (!priv->wq) - goto err_free_netdev; + err = mlx5e_create_indirect_rqts(priv); + if (err) { + mlx5_core_warn(mdev, "create indirect rqts failed, %d\n", err); + return err; + } - err = mlx5_alloc_map_uar(mdev, &priv->cq_uar, false); + err = mlx5e_create_direct_rqts(priv); if (err) { - mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); - goto err_destroy_wq; + mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + goto err_destroy_indirect_rqts; } - err = mlx5_core_alloc_pd(mdev, &priv->pdn); + err = mlx5e_create_indirect_tirs(priv); if (err) { - mlx5_core_err(mdev, "alloc pd failed, %d\n", err); - goto err_unmap_free_uar; + mlx5_core_warn(mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_direct_rqts; } - err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn); + err = mlx5e_create_direct_tirs(priv); if (err) { - mlx5_core_err(mdev, "alloc td failed, %d\n", err); - goto err_dealloc_pd; + mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + goto err_destroy_indirect_tirs; } - err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey); + err = mlx5e_create_flow_steering(priv); if (err) { - mlx5_core_err(mdev, "create mkey failed, %d\n", err); - goto err_dealloc_transport_domain; + mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); + goto err_destroy_direct_tirs; } + err = mlx5e_tc_init(priv); + if (err) + goto err_destroy_flow_steering; + + return 0; + +err_destroy_flow_steering: + mlx5e_destroy_flow_steering(priv); +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_indirect_tirs: + mlx5e_destroy_indirect_tirs(priv); +err_destroy_direct_rqts: + for (i = 0; i < priv->profile->max_nch(mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +err_destroy_indirect_rqts: + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + return err; +} + +static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_tc_cleanup(priv); + mlx5e_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv); + for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); +} + +static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) +{ + int err; + err = mlx5e_create_tises(priv); if (err) { - mlx5_core_warn(mdev, "create tises failed, %d\n", err); - goto err_destroy_mkey; + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; } - err = mlx5e_open_drop_rq(priv); - if (err) { - mlx5_core_err(mdev, "open drop rq failed, %d\n", err); - goto err_destroy_tises; +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); +#endif + return 0; +} + +static void mlx5e_nic_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep rep; + + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); + udp_tunnel_get_rx_info(netdev); + rtnl_unlock(); } - err = mlx5e_create_rqt(priv, MLX5E_INDIRECTION_RQT); - if (err) { - mlx5_core_warn(mdev, "create rqt(INDIR) failed, %d\n", err); - goto err_close_drop_rq; + mlx5e_enable_async_events(priv); + queue_work(priv->wq, &priv->set_rx_mode_work); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); + rep.load = mlx5e_nic_rep_load; + rep.unload = mlx5e_nic_rep_unload; + rep.vport = 0; + rep.priv_data = priv; + mlx5_eswitch_register_vport_rep(esw, &rep); } +} - err = mlx5e_create_rqt(priv, MLX5E_SINGLE_RQ_RQT); - if (err) { - mlx5_core_warn(mdev, "create rqt(SINGLE) failed, %d\n", err); - goto err_destroy_rqt_indir; +static void mlx5e_nic_disable(struct mlx5e_priv *priv) +{ + queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_disable_async_events(priv); +} + +static const struct mlx5e_profile mlx5e_nic_profile = { + .init = mlx5e_nic_init, + .cleanup = mlx5e_nic_cleanup, + .init_rx = mlx5e_init_nic_rx, + .cleanup_rx = mlx5e_cleanup_nic_rx, + .init_tx = mlx5e_init_nic_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .enable = mlx5e_nic_enable, + .disable = mlx5e_nic_disable, + .update_stats = mlx5e_update_stats, + .max_nch = mlx5e_get_max_num_channels, + .max_tc = MLX5E_MAX_NUM_TC, +}; + +void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, void *ppriv) +{ + struct net_device *netdev; + struct mlx5e_priv *priv; + int nch = profile->max_nch(mdev); + int err; + + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), + nch * profile->max_tc, + nch); + if (!netdev) { + mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); + return NULL; } - err = mlx5e_create_tirs(priv); + profile->init(mdev, netdev, profile, ppriv); + + netif_carrier_off(netdev); + + priv = netdev_priv(netdev); + + priv->wq = create_singlethread_workqueue("mlx5e"); + if (!priv->wq) + goto err_free_netdev; + + err = mlx5e_create_umr_mkey(priv); if (err) { - mlx5_core_warn(mdev, "create tirs failed, %d\n", err); - goto err_destroy_rqt_single; + mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); + goto err_destroy_wq; } - err = mlx5e_create_flow_tables(priv); + err = profile->init_tx(priv); + if (err) + goto err_destroy_umr_mkey; + + err = mlx5e_open_drop_rq(priv); if (err) { - mlx5_core_warn(mdev, "create flow tables failed, %d\n", err); - goto err_destroy_tirs; + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_cleanup_tx; } - mlx5e_init_eth_addr(priv); + err = profile->init_rx(priv); + if (err) + goto err_close_drop_rq; - mlx5e_vxlan_init(priv); + mlx5e_create_q_counter(priv); - err = mlx5e_tc_init(priv); - if (err) - goto err_destroy_flow_tables; + mlx5e_init_l2_addr(priv); -#ifdef CONFIG_MLX5_CORE_EN_DCB - mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); -#endif + mlx5e_set_dev_port_mtu(netdev); err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_tc_cleanup; + goto err_dealloc_q_counters; } - if (mlx5e_vxlan_allowed(mdev)) - vxlan_get_rx_port(netdev); - - mlx5e_enable_async_events(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); + if (profile->enable) + profile->enable(priv); return priv; -err_tc_cleanup: - mlx5e_tc_cleanup(priv); - -err_destroy_flow_tables: - mlx5e_destroy_flow_tables(priv); - -err_destroy_tirs: - mlx5e_destroy_tirs(priv); - -err_destroy_rqt_single: - mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT); - -err_destroy_rqt_indir: - mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); +err_dealloc_q_counters: + mlx5e_destroy_q_counter(priv); + profile->cleanup_rx(priv); err_close_drop_rq: mlx5e_close_drop_rq(priv); -err_destroy_tises: - mlx5e_destroy_tises(priv); +err_cleanup_tx: + profile->cleanup_tx(priv); -err_destroy_mkey: - mlx5_core_destroy_mkey(mdev, &priv->mkey); - -err_dealloc_transport_domain: - mlx5_core_dealloc_transport_domain(mdev, priv->tdn); - -err_dealloc_pd: - mlx5_core_dealloc_pd(mdev, priv->pdn); - -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &priv->cq_uar); +err_destroy_umr_mkey: + mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); err_destroy_wq: destroy_workqueue(priv->wq); @@ -2633,45 +3484,100 @@ err_free_netdev: return NULL; } -static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) +static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) { - struct mlx5e_priv *priv = vpriv; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + u8 mac[ETH_ALEN]; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return; + + mlx5_query_nic_vport_mac_address(mdev, 0, mac); + + for (vport = 1; vport < total_vfs; vport++) { + struct mlx5_eswitch_rep rep; + + rep.load = mlx5e_vport_rep_load; + rep.unload = mlx5e_vport_rep_unload; + rep.vport = vport; + ether_addr_copy(rep.hw_id, mac); + mlx5_eswitch_register_vport_rep(esw, &rep); + } +} + +static void *mlx5e_add(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + void *ppriv = NULL; + void *ret; + + if (mlx5e_check_required_hca_cap(mdev)) + return NULL; + + if (mlx5e_create_mdev_resources(mdev)) + return NULL; + + mlx5e_register_vport_rep(mdev); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + ppriv = &esw->offloads.vport_reps[0]; + + ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + if (!ret) { + mlx5e_destroy_mdev_resources(mdev); + return NULL; + } + return ret; +} + +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (profile->disable) + profile->disable(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); - mlx5e_disable_async_events(priv); flush_workqueue(priv->wq); if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { netif_device_detach(netdev); - mutex_lock(&priv->state_lock); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_close_locked(netdev); - mutex_unlock(&priv->state_lock); + mlx5e_close(netdev); } else { unregister_netdev(netdev); } - mlx5e_tc_cleanup(priv); - mlx5e_vxlan_cleanup(priv); - mlx5e_destroy_flow_tables(priv); - mlx5e_destroy_tirs(priv); - mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT); - mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); + mlx5e_destroy_q_counter(priv); + profile->cleanup_rx(priv); mlx5e_close_drop_rq(priv); - mlx5e_destroy_tises(priv); - mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); - mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); - mlx5_core_dealloc_pd(priv->mdev, priv->pdn); - mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); + profile->cleanup_tx(priv); + mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); cancel_delayed_work_sync(&priv->update_stats_work); destroy_workqueue(priv->wq); + if (profile->cleanup) + profile->cleanup(priv); if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) free_netdev(netdev); } +static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + struct mlx5e_priv *priv = vpriv; + int vport; + + mlx5e_destroy_netdev(mdev, priv); + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); + + mlx5e_destroy_mdev_resources(mdev); +} + static void *mlx5e_get_netdev(void *vpriv) { struct mlx5e_priv *priv = vpriv; @@ -2680,8 +3586,8 @@ static void *mlx5e_get_netdev(void *vpriv) } static struct mlx5_interface mlx5e_interface = { - .add = mlx5e_create_netdev, - .remove = mlx5e_destroy_netdev, + .add = mlx5e_add, + .remove = mlx5e_remove, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_netdev, @@ -2689,6 +3595,7 @@ static struct mlx5_interface mlx5e_interface = { void mlx5e_init(void) { + mlx5e_build_ptys2ethtool_map(); mlx5_register_interface(&mlx5e_interface); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c new file mode 100644 index 000000000000..134de4a11f1d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <generated/utsrelease.h> +#include <linux/mlx5/fs.h> +#include <net/switchdev.h> +#include <net/pkt_cls.h> + +#include "eswitch.h" +#include "en.h" +#include "en_tc.h" + +static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; + +static void mlx5e_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->driver, mlx5e_rep_driver_name, + sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); +} + +static const struct counter_desc sw_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, +}; + +#define NUM_VPORT_REP_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) + +static void mlx5e_rep_get_strings(struct net_device *dev, + u32 stringset, uint8_t *data) +{ + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++) + strcpy(data + (i * ETH_GSTRING_LEN), + sw_rep_stats_desc[i].format); + break; + } +} + +static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + int i, j; + + memset(s, 0, sizeof(*s)); + for (i = 0; i < priv->params.num_channels; i++) { + rq_stats = &priv->channel[i]->rq.stats; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->params.num_tc; j++) { + sq_stats = &priv->channel[i]->sq[j].stats; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + } + } +} + +static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i; + + if (!data) + return; + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_sw_rep_counters(priv); + mutex_unlock(&priv->state_lock); + + for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++) + data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_rep_stats_desc, i); +} + +static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return NUM_VPORT_REP_COUNTERS; + default: + return -EOPNOTSUPP; + } +} + +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { + .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_rep_get_strings, + .get_sset_count = mlx5e_rep_get_sset_count, + .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, +}; + +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + attr->u.ppid.id_len = ETH_ALEN; + ether_addr_copy(attr->u.ppid.id, rep->hw_id); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) + +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_channel *c; + int n, tc, err, num_sqs = 0; + u16 *sqs; + + sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL); + if (!sqs) + return -ENOMEM; + + for (n = 0; n < priv->params.num_channels; n++) { + c = priv->channel[n]; + for (tc = 0; tc < c->num_tc; tc++) + sqs[num_sqs++] = c->sq[tc].sqn; + } + + err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs); + + kfree(sqs); + return err; +} + +int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + return mlx5e_add_sqs_fwd_rules(priv); + return 0; +} + +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + + mlx5_eswitch_sqs2vport_stop(esw, rep); +} + +void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* clean (and re-init) existing uplink offloaded TC rules */ + mlx5e_tc_cleanup(priv); + mlx5e_tc_init(priv); +} + +static int mlx5e_rep_get_phys_port_name(struct net_device *dev, + char *buf, size_t len) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_eswitch_rep *rep = priv->ppriv; + int ret; + + ret = snprintf(buf, len, "%d", rep->vport - 1); + if (ret >= len) + return -EOPNOTSUPP; + + return 0; +} + +static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, + __be16 proto, struct tc_to_netdev *tc) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + return -EOPNOTSUPP; + + switch (tc->type) { + case TC_SETUP_CLSFLOWER: + switch (tc->cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, proto, tc->cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, tc->cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, tc->cls_flower); + } + default: + return -EOPNOTSUPP; + } +} + +static const struct switchdev_ops mlx5e_rep_switchdev_ops = { + .switchdev_port_attr_get = mlx5e_attr_get, +}; + +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, + .ndo_setup_tc = mlx5e_rep_ndo_setup_tc, + .ndo_get_stats64 = mlx5e_get_stats, +}; + +static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + + priv->params.log_sq_size = + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); + + priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); + priv->params.num_tc = 1; + + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; + + mutex_init(&priv->state_lock); + + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); +} + +static void mlx5e_build_rep_netdev(struct net_device *netdev) +{ + netdev->netdev_ops = &mlx5e_netdev_ops_rep; + + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; + +#ifdef CONFIG_NET_SWITCHDEV + netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; +#endif + + netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC; + netdev->hw_features |= NETIF_F_HW_TC; + + eth_hw_addr_random(netdev); +} + +static void mlx5e_init_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv); + mlx5e_build_rep_netdev(netdev); +} + +static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_flow_rule *flow_rule; + int err; + int i; + + err = mlx5e_create_direct_rqts(priv); + if (err) { + mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + return err; + } + + err = mlx5e_create_direct_tirs(priv); + if (err) { + mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + goto err_destroy_direct_rqts; + } + + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, + rep->vport, + priv->direct_tir[0].tirn); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto err_destroy_direct_tirs; + } + rep->vport_rx_rule = flow_rule; + + err = mlx5e_tc_init(priv); + if (err) + goto err_del_flow_rule; + + return 0; + +err_del_flow_rule: + mlx5_del_flow_rule(rep->vport_rx_rule); +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_direct_rqts: + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + return err; +} + +static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch_rep *rep = priv->ppriv; + int i; + + mlx5e_tc_cleanup(priv); + mlx5_del_flow_rule(rep->vport_rx_rule); + mlx5e_destroy_direct_tirs(priv); + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +} + +static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + return 0; +} + +static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev) +{ +#define MLX5E_PORT_REPRESENTOR_NCH 1 + return MLX5E_PORT_REPRESENTOR_NCH; +} + +static struct mlx5e_profile mlx5e_rep_profile = { + .init = mlx5e_init_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .update_stats = mlx5e_update_sw_rep_counters, + .max_nch = mlx5e_get_rep_max_num_channels, + .max_tc = 1, +}; + +int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + if (!rep->priv_data) { + pr_warn("Failed to create representor for vport %d\n", + rep->vport); + return -EINVAL; + } + return 0; +} + +void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + mlx5e_destroy_netdev(esw->dev, priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 58d4e2f962c3..e7c969df3dad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -42,13 +42,149 @@ static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; } -static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, u16 ix) +static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, + void *data) +{ + u32 ci = cqcc & cq->wq.sz_m1; + + memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64)); +} + +static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_read_cqe_slot(cq, cqcc, &cq->title); + cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt); + cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter); + rq->stats.cqe_compress_blks++; +} + +static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr); + cq->mini_arr_idx = 0; +} + +static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) +{ + u8 op_own = (cqcc >> cq->wq.log_sz) & 1; + u32 wq_sz = 1 << cq->wq.log_sz; + u32 ci = cqcc & cq->wq.sz_m1; + u32 ci_top = min_t(u32, wq_sz, ci + n); + + for (; ci < ci_top; ci++, n--) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + + cqe->op_own = op_own; + } + + if (unlikely(ci == wq_sz)) { + op_own = !op_own; + for (ci = 0; ci < n; ci++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + + cqe->op_own = op_own; + } + } +} + +static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + u16 wqe_cnt_step; + + cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; + cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; + cq->title.op_own &= 0xf0; + cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz); + cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); + + wqe_cnt_step = + rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + mpwrq_get_cqe_consumed_strides(&cq->title) : 1; + cq->decmprs_wqe_counter = + (cq->decmprs_wqe_counter + wqe_cnt_step) & rq->wq.sz_m1; +} + +static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_decompress_cqe(rq, cq, cqcc); + cq->title.rss_hash_type = 0; + cq->title.rss_hash_result = 0; +} + +static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, + int update_owner_only, + int budget_rem) +{ + u32 cqcc = cq->wq.cc + update_owner_only; + u32 cqe_count; + u32 i; + + cqe_count = min_t(u32, cq->decmprs_left, budget_rem); + + for (i = update_owner_only; i < cqe_count; + i++, cq->mini_arr_idx++, cqcc++) { + if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) + mlx5e_read_mini_arr_slot(cq, cqcc); + + mlx5e_decompress_cqe_no_hash(rq, cq, cqcc); + rq->handle_rx_cqe(rq, &cq->title); + } + mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc); + cq->wq.cc = cqcc; + cq->decmprs_left -= cqe_count; + rq->stats.cqe_compress_pkts += cqe_count; + + return cqe_count; +} + +static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, + int budget_rem) +{ + mlx5e_read_title_slot(rq, cq, cq->wq.cc); + mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1); + mlx5e_decompress_cqe(rq, cq, cq->wq.cc); + rq->handle_rx_cqe(rq, &cq->title); + cq->mini_arr_idx++; + + return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; +} + +void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val) +{ + bool was_opened; + + if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) + return; + + mutex_lock(&priv->state_lock); + + if (priv->params.rx_cqe_compress == val) + goto unlock; + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(priv->netdev); + + priv->params.rx_cqe_compress = val; + + if (was_opened) + mlx5e_open_locked(priv->netdev); + +unlock: + mutex_unlock(&priv->state_lock); +} + +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { struct sk_buff *skb; dma_addr_t dma_addr; - skb = netdev_alloc_skb(rq->netdev, rq->wqe_sz); + skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz); if (unlikely(!skb)) return -ENOMEM; @@ -62,10 +198,9 @@ static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) goto err_free_skb; - skb_reserve(skb, MLX5E_NET_IP_ALIGN); - *((dma_addr_t *)skb->cb) = dma_addr; - wqe->data.addr = cpu_to_be64(dma_addr + MLX5E_NET_IP_ALIGN); + wqe->data.addr = cpu_to_be64(dma_addr); + wqe->data.lkey = rq->mkey_be; rq->skb[ix] = skb; @@ -77,18 +212,416 @@ err_free_skb: return -ENOMEM; } +void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) +{ + struct sk_buff *skb = rq->skb[ix]; + + if (skb) { + rq->skb[ix] = NULL; + dma_unmap_single(rq->pdev, + *((dma_addr_t *)skb->cb), + rq->wqe_sz, + DMA_FROM_DEVICE); + dev_kfree_skb(skb); + } +} + +static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) +{ + return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; +} + +static inline void +mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len) +{ + dma_sync_single_for_cpu(pdev, wi->dma_info.addr + wqe_offset, + len, DMA_FROM_DEVICE); +} + +static inline void +mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len) +{ + /* No dma pre sync for fragmented MPWQE */ +} + +static inline void +mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) +{ + unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); + + wi->skbs_frags[page_idx]++; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + &wi->dma_info.page[page_idx], frag_offset, + len, truesize); +} + +static inline void +mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) +{ + unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); + + dma_sync_single_for_cpu(rq->pdev, + wi->umr.dma_info[page_idx].addr + frag_offset, + len, DMA_FROM_DEVICE); + wi->skbs_frags[page_idx]++; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + wi->umr.dma_info[page_idx].page, frag_offset, + len, truesize); +} + +static inline void +mlx5e_copy_skb_header_linear_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) +{ + struct page *page = &wi->dma_info.page[page_idx]; + + skb_copy_to_linear_data(skb, page_address(page) + offset, + ALIGN(headlen, sizeof(long))); +} + +static inline void +mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) +{ + u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx]; + unsigned int len; + + /* Aligning len to sizeof(long) optimizes memcpy performance */ + len = ALIGN(headlen_pg, sizeof(long)); + dma_sync_single_for_cpu(pdev, dma_info->addr + offset, len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data_offset(skb, 0, + page_address(dma_info->page) + offset, + len); + if (unlikely(offset + headlen > PAGE_SIZE)) { + dma_info++; + headlen_pg = len; + len = ALIGN(headlen - headlen_pg, sizeof(long)); + dma_sync_single_for_cpu(pdev, dma_info->addr, len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data_offset(skb, headlen_pg, + page_address(dma_info->page), + len); + } +} + +static u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) +{ + return rq->mpwqe_mtt_offset + + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); +} + +static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, + struct mlx5e_sq *sq, + struct mlx5e_umr_wqe *wqe, + u16 ix) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_data_seg *dseg = &wqe->data; + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); + u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); + + memset(wqe, 0, sizeof(*wqe)); + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->imm = rq->umr_mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; + ucseg->klm_octowords = + cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); + ucseg->bsf_octowords = + cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + + dseg->lkey = sq->mkey_be; + dseg->addr = cpu_to_be64(wi->umr.mtt_addr); +} + +static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_sq *sq = &rq->channel->icosq; + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_umr_wqe *wqe; + u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB); + u16 pi; + + /* fill sq edge with nops to avoid wqe wrap around */ + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; + sq->ico_wqe_info[pi].num_wqebbs = 1; + mlx5e_send_nop(sq, true); + } + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + mlx5e_build_umr_wqe(rq, sq, wqe, ix); + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; + sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; + sq->pc += num_wqebbs; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); +} + +static inline int mlx5e_get_wqe_mtt_sz(void) +{ + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the mtt array, we allocate + * a little more. + */ + return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), + MLX5_UMR_MTT_ALIGNMENT); +} + +static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + int i) +{ + struct page *page; + + page = dev_alloc_page(); + if (unlikely(!page)) + return -ENOMEM; + + wi->umr.dma_info[i].page = page; + wi->umr.dma_info[i].addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->umr.dma_info[i].addr))) { + put_page(page); + return -ENOMEM; + } + wi->umr.mtt[i] = cpu_to_be64(wi->umr.dma_info[i].addr | MLX5_EN_WR); + + return 0; +} + +static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; + int i; + + wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) * + MLX5_MPWRQ_PAGES_PER_WQE, + GFP_ATOMIC); + if (unlikely(!wi->umr.dma_info)) + goto err_out; + + /* We allocate more than mtt_sz as we will align the pointer */ + wi->umr.mtt_no_align = kzalloc(mtt_sz + MLX5_UMR_ALIGN - 1, + GFP_ATOMIC); + if (unlikely(!wi->umr.mtt_no_align)) + goto err_free_umr; + + wi->umr.mtt = PTR_ALIGN(wi->umr.mtt_no_align, MLX5_UMR_ALIGN); + wi->umr.mtt_addr = dma_map_single(rq->pdev, wi->umr.mtt, mtt_sz, + PCI_DMA_TODEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->umr.mtt_addr))) + goto err_free_mtt; + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i))) + goto err_unmap; + page_ref_add(wi->umr.dma_info[i].page, + mlx5e_mpwqe_strides_per_page(rq)); + wi->skbs_frags[i] = 0; + } + + wi->consumed_strides = 0; + wi->dma_pre_sync = mlx5e_dma_pre_sync_fragmented_mpwqe; + wi->add_skb_frag = mlx5e_add_skb_frag_fragmented_mpwqe; + wi->copy_skb_header = mlx5e_copy_skb_header_fragmented_mpwqe; + wi->free_wqe = mlx5e_free_rx_fragmented_mpwqe; + wqe->data.lkey = rq->umr_mkey_be; + wqe->data.addr = cpu_to_be64(dma_offset); + + return 0; + +err_unmap: + while (--i >= 0) { + dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + page_ref_sub(wi->umr.dma_info[i].page, + mlx5e_mpwqe_strides_per_page(rq)); + put_page(wi->umr.dma_info[i].page); + } + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); + +err_free_mtt: + kfree(wi->umr.mtt_no_align); + +err_free_umr: + kfree(wi->umr.dma_info); + +err_out: + return -ENOMEM; +} + +void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi) +{ + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int i; + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + page_ref_sub(wi->umr.dma_info[i].page, + mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]); + put_page(wi->umr.dma_info[i].page); + } + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); + kfree(wi->umr.mtt_no_align); + kfree(wi->umr.dma_info); +} + +void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + + clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + + if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { + mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + return; + } + + mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); + rq->stats.mpwqe_frag++; + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_ll_update_db_record(wq); +} + +static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + gfp_t gfp_mask; + int i; + + gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC; + wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + MLX5_MPWRQ_WQE_PAGE_ORDER); + if (unlikely(!wi->dma_info.page)) + return -ENOMEM; + + wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0, + rq->wqe_sz, PCI_DMA_FROMDEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) { + put_page(wi->dma_info.page); + return -ENOMEM; + } + + /* We split the high-order page into order-0 ones and manage their + * reference counter to minimize the memory held by small skb fragments + */ + split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + page_ref_add(&wi->dma_info.page[i], + mlx5e_mpwqe_strides_per_page(rq)); + wi->skbs_frags[i] = 0; + } + + wi->consumed_strides = 0; + wi->dma_pre_sync = mlx5e_dma_pre_sync_linear_mpwqe; + wi->add_skb_frag = mlx5e_add_skb_frag_linear_mpwqe; + wi->copy_skb_header = mlx5e_copy_skb_header_linear_mpwqe; + wi->free_wqe = mlx5e_free_rx_linear_mpwqe; + wqe->data.lkey = rq->mkey_be; + wqe->data.addr = cpu_to_be64(wi->dma_info.addr); + + return 0; +} + +void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi) +{ + int i; + + dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, + PCI_DMA_FROMDEVICE); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + page_ref_sub(&wi->dma_info.page[i], + mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]); + put_page(&wi->dma_info.page[i]); + } +} + +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + int err; + + err = mlx5e_alloc_rx_linear_mpwqe(rq, wqe, ix); + if (unlikely(err)) { + err = mlx5e_alloc_rx_fragmented_mpwqe(rq, wqe, ix); + if (unlikely(err)) + return err; + set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5e_post_umr_wqe(rq, ix); + return -EBUSY; + } + + return 0; +} + +void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + + wi->free_wqe(rq, wi); +} + +#define RQ_CANNOT_POST(rq) \ + (test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state) || \ + test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; - if (unlikely(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state))) + if (unlikely(RQ_CANNOT_POST(rq))) return false; while (!mlx5_wq_ll_is_full(wq)) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + int err; - if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, wq->head))) + err = rq->alloc_wqe(rq, wqe, wq->head); + if (unlikely(err)) { + if (err != -EBUSY) + rq->stats.buff_alloc_err++; break; + } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); } @@ -101,26 +634,35 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !mlx5_wq_ll_is_full(wq); } -static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe) +static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) { - struct ethhdr *eth = (struct ethhdr *)(skb->data); - struct iphdr *ipv4 = (struct iphdr *)(skb->data + ETH_HLEN); - struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + ETH_HLEN); + struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct iphdr *ipv4; + struct ipv6hdr *ipv6; struct tcphdr *tcp; + int network_depth = 0; + __be16 proto; + u16 tot_len; u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); - u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETH_HLEN; + skb->mac_len = ETH_HLEN; + proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); + + ipv4 = (struct iphdr *)(skb->data + network_depth); + ipv6 = (struct ipv6hdr *)(skb->data + network_depth); + tot_len = cqe_bcnt - network_depth; - if (eth->h_proto == htons(ETH_P_IP)) { - tcp = (struct tcphdr *)(skb->data + ETH_HLEN + + if (proto == htons(ETH_P_IP)) { + tcp = (struct tcphdr *)(skb->data + network_depth + sizeof(struct iphdr)); ipv6 = NULL; skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; } else { - tcp = (struct tcphdr *)(skb->data + ETH_HLEN + + tcp = (struct tcphdr *)(skb->data + network_depth + sizeof(struct ipv6hdr)); ipv4 = NULL; skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; @@ -176,35 +718,43 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (lro) { skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (likely(is_first_ethertype_ip(skb))) { + return; + } + + if (is_first_ethertype_ip(skb)) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); - rq->stats.csum_sw++; - } else { - goto csum_none; + rq->stats.csum_complete++; + return; } - return; - + if (likely((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (cqe_is_tunneled(cqe)) { + skb->csum_level = 1; + skb->encapsulation = 1; + rq->stats.csum_unnecessary_inner++; + } + return; + } csum_none: skb->ip_summed = CHECKSUM_NONE; rq->stats.csum_none++; } static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, struct mlx5e_rq *rq, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; - u32 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); struct mlx5e_tstamp *tstamp = rq->tstamp; int lro_num_seg; - skb_put(skb, cqe_bcnt); - lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { - mlx5e_lro_update_hdr(skb, cqe); + mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); rq->stats.lro_packets++; rq->stats.lro_bytes += cqe_bcnt; @@ -213,10 +763,6 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (unlikely(mlx5e_rx_hw_stamp(tstamp))) mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); - mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); - - skb->protocol = eth_type_trans(skb, netdev); - skb_record_rx_queue(skb, rq->ix); if (likely(netdev->features & NETIF_F_RXHASH)) @@ -227,52 +773,168 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, be16_to_cpu(cqe->vlan_info)); skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; + + mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); + skb->protocol = eth_type_trans(skb, netdev); +} + +static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + rq->stats.packets++; + rq->stats.bytes += cqe_bcnt; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); + napi_gro_receive(rq->cq.napi, skb); +} + +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_rx_wqe *wqe; + struct sk_buff *skb; + __be16 wqe_counter_be; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + skb = rq->skb[wqe_counter]; + prefetch(skb->data); + rq->skb[wqe_counter] = NULL; + + dma_unmap_single(rq->pdev, + *((dma_addr_t *)skb->cb), + rq->wqe_sz, + DMA_FROM_DEVICE); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + dev_kfree_skb(skb); + goto wq_ll_pop; + } + + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + skb_put(skb, cqe_bcnt); + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + +static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_mpw_info *wi, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; + u32 head_offset = wqe_offset & (PAGE_SIZE - 1); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + u32 head_page_idx = page_idx; + u16 headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt); + u32 frag_offset = head_offset + headlen; + u16 byte_cnt = cqe_bcnt - headlen; + + if (unlikely(frag_offset >= PAGE_SIZE)) { + page_idx++; + frag_offset -= PAGE_SIZE; + } + wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes); + + while (byte_cnt) { + u32 pg_consumed_bytes = + min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + + wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset, + pg_consumed_bytes); + byte_cnt -= pg_consumed_bytes; + frag_offset = 0; + page_idx++; + } + /* copy header */ + wi->copy_skb_header(rq->pdev, skb, wi, head_page_idx, head_offset, + headlen); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; +} + +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + rq->stats.mpwqe_filler++; + goto mpwrq_cqe_out; + } + + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, + sizeof(long))); + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + goto mpwrq_cqe_out; + } + + prefetch(skb->data); + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) + return; + + wi->free_wqe(rq, wi); + mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); } int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - int work_done; + int work_done = 0; - for (work_done = 0; work_done < budget; work_done++) { - struct mlx5e_rx_wqe *wqe; - struct mlx5_cqe64 *cqe; - struct sk_buff *skb; - __be16 wqe_counter_be; - u16 wqe_counter; + if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) + return 0; - cqe = mlx5e_get_cqe(cq); - if (!cqe) - break; + if (cq->decmprs_left) + work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); - mlx5_cqwq_pop(&cq->wq); - - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); - skb = rq->skb[wqe_counter]; - prefetch(skb->data); - rq->skb[wqe_counter] = NULL; + for (; work_done < budget; work_done++) { + struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq); - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); + if (!cqe) + break; - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { - rq->stats.wqe_err++; - dev_kfree_skb(skb); - goto wq_ll_pop; + if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { + work_done += + mlx5e_decompress_cqes_start(rq, cq, + budget - work_done); + continue; } - mlx5e_build_rx_skb(cqe, rq, skb); - rq->stats.packets++; - rq->stats.bytes += be32_to_cpu(cqe->byte_cnt); - napi_gro_receive(cq->napi, skb); + mlx5_cqwq_pop(&cq->wq); -wq_ll_pop: - mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, - &wqe->next.next_wqe_index); + rq->handle_rx_cqe(rq, cqe); } mlx5_cqwq_update_db_record(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c new file mode 100644 index 000000000000..1fffe48a93cc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* Adaptive moderation profiles */ +#define MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 +#define MLX5E_RX_AM_DEF_PROFILE_CQE 1 +#define MLX5E_RX_AM_DEF_PROFILE_EQE 1 +#define MLX5E_PARAMS_AM_NUM_PROFILES 5 + +/* All profiles sizes must be MLX5E_PARAMS_AM_NUM_PROFILES */ +#define MLX5_AM_EQE_PROFILES { \ + {1, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {256, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ +} + +#define MLX5_AM_CQE_PROFILES { \ + {2, 256}, \ + {8, 128}, \ + {16, 64}, \ + {32, 64}, \ + {64, 64} \ +} + +static const struct mlx5e_cq_moder +profile[MLX5_CQ_PERIOD_NUM_MODES][MLX5E_PARAMS_AM_NUM_PROFILES] = { + MLX5_AM_EQE_PROFILES, + MLX5_AM_CQE_PROFILES, +}; + +static inline struct mlx5e_cq_moder mlx5e_am_get_profile(u8 cq_period_mode, int ix) +{ + return profile[cq_period_mode][ix]; +} + +struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode) +{ + int default_profile_ix; + + if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_CQE; + else /* MLX5_CQ_PERIOD_MODE_START_FROM_EQE */ + default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_EQE; + + return profile[rx_cq_period_mode][default_profile_ix]; +} + +/* Adaptive moderation logic */ +enum { + MLX5E_AM_START_MEASURE, + MLX5E_AM_MEASURE_IN_PROGRESS, + MLX5E_AM_APPLY_NEW_PROFILE, +}; + +enum { + MLX5E_AM_PARKING_ON_TOP, + MLX5E_AM_PARKING_TIRED, + MLX5E_AM_GOING_RIGHT, + MLX5E_AM_GOING_LEFT, +}; + +enum { + MLX5E_AM_STATS_WORSE, + MLX5E_AM_STATS_SAME, + MLX5E_AM_STATS_BETTER, +}; + +enum { + MLX5E_AM_STEPPED, + MLX5E_AM_TOO_TIRED, + MLX5E_AM_ON_EDGE, +}; + +static bool mlx5e_am_on_top(struct mlx5e_rx_am *am) +{ + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: + WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n"); + return true; + case MLX5E_AM_GOING_RIGHT: + return (am->steps_left > 1) && (am->steps_right == 1); + default: /* MLX5E_AM_GOING_LEFT */ + return (am->steps_right > 1) && (am->steps_left == 1); + } +} + +static void mlx5e_am_turn(struct mlx5e_rx_am *am) +{ + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: + WARN_ONCE(true, "mlx5e_am_turn: PARKING\n"); + break; + case MLX5E_AM_GOING_RIGHT: + am->tune_state = MLX5E_AM_GOING_LEFT; + am->steps_left = 0; + break; + case MLX5E_AM_GOING_LEFT: + am->tune_state = MLX5E_AM_GOING_RIGHT; + am->steps_right = 0; + break; + } +} + +static int mlx5e_am_step(struct mlx5e_rx_am *am) +{ + if (am->tired == (MLX5E_PARAMS_AM_NUM_PROFILES * 2)) + return MLX5E_AM_TOO_TIRED; + + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: + WARN_ONCE(true, "mlx5e_am_step: PARKING\n"); + break; + case MLX5E_AM_GOING_RIGHT: + if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1)) + return MLX5E_AM_ON_EDGE; + am->profile_ix++; + am->steps_right++; + break; + case MLX5E_AM_GOING_LEFT: + if (am->profile_ix == 0) + return MLX5E_AM_ON_EDGE; + am->profile_ix--; + am->steps_left++; + break; + } + + am->tired++; + return MLX5E_AM_STEPPED; +} + +static void mlx5e_am_park_on_top(struct mlx5e_rx_am *am) +{ + am->steps_right = 0; + am->steps_left = 0; + am->tired = 0; + am->tune_state = MLX5E_AM_PARKING_ON_TOP; +} + +static void mlx5e_am_park_tired(struct mlx5e_rx_am *am) +{ + am->steps_right = 0; + am->steps_left = 0; + am->tune_state = MLX5E_AM_PARKING_TIRED; +} + +static void mlx5e_am_exit_parking(struct mlx5e_rx_am *am) +{ + am->tune_state = am->profile_ix ? MLX5E_AM_GOING_LEFT : + MLX5E_AM_GOING_RIGHT; + mlx5e_am_step(am); +} + +static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr, + struct mlx5e_rx_am_stats *prev) +{ + int diff; + + if (!prev->ppms) + return curr->ppms ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_SAME; + + diff = curr->ppms - prev->ppms; + if (((100 * abs(diff)) / prev->ppms) > 10) /* more than 10% diff */ + return (diff > 0) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; + + if (!prev->epms) + return curr->epms ? MLX5E_AM_STATS_WORSE : + MLX5E_AM_STATS_SAME; + + diff = curr->epms - prev->epms; + if (((100 * abs(diff)) / prev->epms) > 10) /* more than 10% diff */ + return (diff < 0) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; + + return MLX5E_AM_STATS_SAME; +} + +static bool mlx5e_am_decision(struct mlx5e_rx_am_stats *curr_stats, + struct mlx5e_rx_am *am) +{ + int prev_state = am->tune_state; + int prev_ix = am->profile_ix; + int stats_res; + int step_res; + + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + stats_res = mlx5e_am_stats_compare(curr_stats, &am->prev_stats); + if (stats_res != MLX5E_AM_STATS_SAME) + mlx5e_am_exit_parking(am); + break; + + case MLX5E_AM_PARKING_TIRED: + am->tired--; + if (!am->tired) + mlx5e_am_exit_parking(am); + break; + + case MLX5E_AM_GOING_RIGHT: + case MLX5E_AM_GOING_LEFT: + stats_res = mlx5e_am_stats_compare(curr_stats, &am->prev_stats); + if (stats_res != MLX5E_AM_STATS_BETTER) + mlx5e_am_turn(am); + + if (mlx5e_am_on_top(am)) { + mlx5e_am_park_on_top(am); + break; + } + + step_res = mlx5e_am_step(am); + switch (step_res) { + case MLX5E_AM_ON_EDGE: + mlx5e_am_park_on_top(am); + break; + case MLX5E_AM_TOO_TIRED: + mlx5e_am_park_tired(am); + break; + } + + break; + } + + if ((prev_state != MLX5E_AM_PARKING_ON_TOP) || + (am->tune_state != MLX5E_AM_PARKING_ON_TOP)) + am->prev_stats = *curr_stats; + + return am->profile_ix != prev_ix; +} + +static void mlx5e_am_sample(struct mlx5e_rq *rq, + struct mlx5e_rx_am_sample *s) +{ + s->time = ktime_get(); + s->pkt_ctr = rq->stats.packets; + s->event_ctr = rq->cq.event_ctr; +} + +#define MLX5E_AM_NEVENTS 64 + +static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start, + struct mlx5e_rx_am_sample *end, + struct mlx5e_rx_am_stats *curr_stats) +{ + /* u32 holds up to 71 minutes, should be enough */ + u32 delta_us = ktime_us_delta(end->time, start->time); + unsigned int npkts = end->pkt_ctr - start->pkt_ctr; + + if (!delta_us) { + WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n"); + return; + } + + curr_stats->ppms = (npkts * USEC_PER_MSEC) / delta_us; + curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us; +} + +void mlx5e_rx_am_work(struct work_struct *work) +{ + struct mlx5e_rx_am *am = container_of(work, struct mlx5e_rx_am, + work); + struct mlx5e_rq *rq = container_of(am, struct mlx5e_rq, am); + struct mlx5e_cq_moder cur_profile = profile[am->mode][am->profile_ix]; + + mlx5_core_modify_cq_moderation(rq->priv->mdev, &rq->cq.mcq, + cur_profile.usec, cur_profile.pkts); + + am->state = MLX5E_AM_START_MEASURE; +} + +void mlx5e_rx_am(struct mlx5e_rq *rq) +{ + struct mlx5e_rx_am *am = &rq->am; + struct mlx5e_rx_am_sample end_sample; + struct mlx5e_rx_am_stats curr_stats; + u16 nevents; + + switch (am->state) { + case MLX5E_AM_MEASURE_IN_PROGRESS: + nevents = rq->cq.event_ctr - am->start_sample.event_ctr; + if (nevents < MLX5E_AM_NEVENTS) + break; + mlx5e_am_sample(rq, &end_sample); + mlx5e_am_calc_stats(&am->start_sample, &end_sample, + &curr_stats); + if (mlx5e_am_decision(&curr_stats, am)) { + am->state = MLX5E_AM_APPLY_NEW_PROFILE; + schedule_work(&am->work); + break; + } + /* fall through */ + case MLX5E_AM_START_MEASURE: + mlx5e_am_sample(rq, &am->start_sample); + am->state = MLX5E_AM_MEASURE_IN_PROGRESS; + break; + case MLX5E_AM_APPLY_NEW_PROFILE: + break; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h new file mode 100644 index 000000000000..499487ce3b53 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -0,0 +1,358 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_STATS_H__ +#define __MLX5_EN_STATS_H__ + +#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \ + (*(u64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ + (*(u32 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + +#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) +#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) + +struct counter_desc { + char format[ETH_GSTRING_LEN]; + int offset; /* Byte offset */ +}; + +struct mlx5e_sw_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 tx_tso_packets; + u64 tx_tso_bytes; + u64 tx_tso_inner_packets; + u64 tx_tso_inner_bytes; + u64 rx_lro_packets; + u64 rx_lro_bytes; + u64 rx_csum_unnecessary; + u64 rx_csum_none; + u64 rx_csum_complete; + u64 rx_csum_unnecessary_inner; + u64 tx_csum_partial; + u64 tx_csum_partial_inner; + u64 tx_queue_stopped; + u64 tx_queue_wake; + u64 tx_queue_dropped; + u64 tx_xmit_more; + u64 rx_wqe_err; + u64 rx_mpwqe_filler; + u64 rx_mpwqe_frag; + u64 rx_buff_alloc_err; + u64 rx_cqe_compress_blks; + u64 rx_cqe_compress_pkts; + + /* Special handling counters */ + u64 link_down_events_phy; +}; + +static const struct counter_desc sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, +}; + +struct mlx5e_qcounter_stats { + u32 rx_out_of_buffer; +}; + +static const struct counter_desc q_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, +}; + +#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) +#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ + vstats->query_vport_out, c) + +struct mlx5e_vport_stats { + __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)]; +}; + +static const struct counter_desc vport_stats_desc[] = { + { "rx_vport_unicast_packets", + VPORT_COUNTER_OFF(received_eth_unicast.packets) }, + { "rx_vport_unicast_bytes", + VPORT_COUNTER_OFF(received_eth_unicast.octets) }, + { "tx_vport_unicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, + { "tx_vport_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, + { "rx_vport_multicast_packets", + VPORT_COUNTER_OFF(received_eth_multicast.packets) }, + { "rx_vport_multicast_bytes", + VPORT_COUNTER_OFF(received_eth_multicast.octets) }, + { "tx_vport_multicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, + { "tx_vport_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, + { "rx_vport_broadcast_packets", + VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, + { "rx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, + { "tx_vport_broadcast_packets", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, + { "tx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, + { "rx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(received_ib_unicast.packets) }, + { "rx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(received_ib_unicast.octets) }, + { "tx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, + { "tx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, + { "rx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(received_ib_multicast.packets) }, + { "rx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(received_ib_multicast.octets) }, + { "tx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, + { "tx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, +}; + +#define PPORT_802_3_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_802_3_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_PER_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +#define PPORT_PER_PRIO_GET(pstats, prio, c) \ + MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +#define NUM_PPORT_PRIO 8 + +struct mlx5e_pport_stats { + __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; +}; + +static const struct counter_desc pport_802_3_stats_desc[] = { + { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) }, + { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) }, + { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, +}; + +static const struct counter_desc pport_2863_stats_desc[] = { + { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, + { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, + { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) }, +}; + +static const struct counter_desc pport_2819_stats_desc[] = { + { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, + { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) }, + { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, +}; + +static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { + { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, + { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, + { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, + { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, +}; + +static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { + /* %s is "global" or "prio{i}" */ + { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, +}; + +struct mlx5e_rq_stats { + u64 packets; + u64 bytes; + u64 csum_complete; + u64 csum_unnecessary_inner; + u64 csum_none; + u64 lro_packets; + u64 lro_bytes; + u64 wqe_err; + u64 mpwqe_filler; + u64 mpwqe_frag; + u64 buff_alloc_err; + u64 cqe_compress_blks; + u64 cqe_compress_pkts; +}; + +static const struct counter_desc rq_stats_desc[] = { + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, +}; + +struct mlx5e_sq_stats { + /* commonly accessed in data path */ + u64 packets; + u64 bytes; + u64 xmit_more; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 csum_partial_inner; + u64 nop; + /* less likely accessed in data path */ + u64 csum_none; + u64 stopped; + u64 wake; + u64 dropped; +}; + +static const struct counter_desc sq_stats_desc[] = { + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, +}; + +#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) +#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) +#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) +#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) +#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) +#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) +#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ + ARRAY_SIZE(pport_per_prio_traffic_stats_desc) +#define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ + ARRAY_SIZE(pport_per_prio_pfc_stats_desc) +#define NUM_PPORT_COUNTERS (NUM_PPORT_802_3_COUNTERS + \ + NUM_PPORT_2863_COUNTERS + \ + NUM_PPORT_2819_COUNTERS + \ + NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ + NUM_PPORT_PRIO) +#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) +#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) + +struct mlx5e_stats { + struct mlx5e_sw_stats sw; + struct mlx5e_qcounter_stats qcnt; + struct mlx5e_vport_stats vport; + struct mlx5e_pport_stats pport; +}; + +#endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b3de09f13425..22cfc4ac1837 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -37,8 +37,11 @@ #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> +#include <net/switchdev.h> +#include <net/tc_act/tc_mirred.h> #include "en.h" #include "en_tc.h" +#include "eswitch.h" struct mlx5e_tc_flow { struct rhash_head node; @@ -46,64 +49,109 @@ struct mlx5e_tc_flow { struct mlx5_flow_rule *rule; }; -#define MLX5E_TC_FLOW_TABLE_NUM_ENTRIES 1024 -#define MLX5E_TC_FLOW_TABLE_NUM_GROUPS 4 +#define MLX5E_TC_TABLE_NUM_ENTRIES 1024 +#define MLX5E_TC_TABLE_NUM_GROUPS 4 -static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, - u32 *match_c, u32 *match_v, - u32 action, u32 flow_tag) +static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + u32 action, u32 flow_tag) { - struct mlx5_flow_destination dest = { - .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, - {.ft = priv->fts.vlan.t}, - }; + struct mlx5_core_dev *dev = priv->mdev; + struct mlx5_flow_destination dest = { 0 }; + struct mlx5_fc *counter = NULL; struct mlx5_flow_rule *rule; bool table_created = false; - if (IS_ERR_OR_NULL(priv->fts.tc.t)) { - priv->fts.tc.t = - mlx5_create_auto_grouped_flow_table(priv->fts.ns, 0, - MLX5E_TC_FLOW_TABLE_NUM_ENTRIES, - MLX5E_TC_FLOW_TABLE_NUM_GROUPS); - if (IS_ERR(priv->fts.tc.t)) { + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.vlan.ft.t; + } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + counter = mlx5_fc_create(dev, true); + if (IS_ERR(counter)) + return ERR_CAST(counter); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter = counter; + } + + if (IS_ERR_OR_NULL(priv->fs.tc.t)) { + priv->fs.tc.t = + mlx5_create_auto_grouped_flow_table(priv->fs.ns, + MLX5E_TC_PRIO, + MLX5E_TC_TABLE_NUM_ENTRIES, + MLX5E_TC_TABLE_NUM_GROUPS, + 0); + if (IS_ERR(priv->fs.tc.t)) { netdev_err(priv->netdev, "Failed to create tc offload table\n"); - return ERR_CAST(priv->fts.tc.t); + rule = ERR_CAST(priv->fs.tc.t); + goto err_create_ft; } table_created = true; } - rule = mlx5_add_flow_rule(priv->fts.tc.t, MLX5_MATCH_OUTER_HEADERS, - match_c, match_v, + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + rule = mlx5_add_flow_rule(priv->fs.tc.t, spec, action, flow_tag, - action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST ? &dest : NULL); + &dest); - if (IS_ERR(rule) && table_created) { - mlx5_destroy_flow_table(priv->fts.tc.t); - priv->fts.tc.t = NULL; + if (IS_ERR(rule)) + goto err_add_rule; + + return rule; + +err_add_rule: + if (table_created) { + mlx5_destroy_flow_table(priv->fs.tc.t); + priv->fs.tc.t = NULL; } +err_create_ft: + mlx5_fc_destroy(dev, counter); return rule; } +static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + u32 action, u32 dst_vport) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + u32 src_vport; + + if (rep->vport) /* set source vport for the flow */ + src_vport = rep->vport; + else + src_vport = FDB_UPLINK_VPORT; + + return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); +} + static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5_flow_rule *rule) { + struct mlx5_fc *counter = NULL; + + counter = mlx5_flow_rule_counter(rule); + mlx5_del_flow_rule(rule); - if (!mlx5e_tc_num_filters(priv)) { - mlx5_destroy_flow_table(priv->fts.tc.t); - priv->fts.tc.t = NULL; + mlx5_fc_destroy(priv->mdev, counter); + + if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { + mlx5_destroy_flow_table(priv->fs.tc.t); + priv->fs.tc.t = NULL; } } -static int parse_cls_flower(struct mlx5e_priv *priv, - u32 *match_c, u32 *match_v, +static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f) { - void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); u16 addr_type = 0; u8 ip_proto = 0; @@ -122,7 +170,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_dissector_key_control *key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, + FLOW_DISSECTOR_KEY_CONTROL, f->key); addr_type = key->addr_type; } @@ -266,10 +314,11 @@ static int parse_cls_flower(struct mlx5e_priv *priv, return 0; } -static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *flow_tag) +static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + u32 *action, u32 *flow_tag) { const struct tc_action *a; + LIST_HEAD(actions); if (tc_no_actions(exts)) return -EINVAL; @@ -277,13 +326,17 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; *action = 0; - tc_for_each_action(a, exts) { + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ if (*action) return -EINVAL; if (is_tcf_gact_shot(a)) { *action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + if (MLX5_CAP_FLOWTABLE(priv->mdev, + flow_table_properties_nic_receive.flow_counter)) + *action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; continue; } @@ -307,17 +360,68 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } +static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + u32 *action, u32 *dest_vport) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + if (tc_no_actions(exts)) + return -EINVAL; + + *action = 0; + + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + /* Only support a single action per rule */ + if (*action) + return -EINVAL; + + if (is_tcf_gact_shot(a)) { + *action = MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + continue; + } + + if (is_tcf_mirred_redirect(a)) { + int ifindex = tcf_mirred_ifindex(a); + struct net_device *out_dev; + struct mlx5e_priv *out_priv; + struct mlx5_eswitch_rep *out_rep; + + out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); + + if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) { + pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, out_dev->name); + return -EINVAL; + } + + out_priv = netdev_priv(out_dev); + out_rep = out_priv->ppriv; + if (out_rep->vport == 0) + *dest_vport = FDB_UPLINK_VPORT; + else + *dest_vport = out_rep->vport; + *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + continue; + } + + return -EINVAL; + } + return 0; +} + int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; - u32 *match_c; - u32 *match_v; + struct mlx5e_tc_table *tc = &priv->fs.tc; int err = 0; - u32 flow_tag; - u32 action; + u32 flow_tag, action, dest_vport = 0; struct mlx5e_tc_flow *flow; + struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); @@ -326,49 +430,53 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, else flow = kzalloc(sizeof(*flow), GFP_KERNEL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!match_c || !match_v || !flow) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec || !flow) { err = -ENOMEM; goto err_free; } flow->cookie = f->cookie; - err = parse_cls_flower(priv, match_c, match_v, f); + err = parse_cls_flower(priv, spec, f); if (err < 0) goto err_free; - err = parse_tc_actions(priv, f->exts, &action, &flow_tag); - if (err < 0) + if (esw && esw->mode == SRIOV_OFFLOADS) { + err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport); + if (err < 0) + goto err_free; + flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport); + } else { + err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); + if (err < 0) + goto err_free; + flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); + } + + if (IS_ERR(flow->rule)) { + err = PTR_ERR(flow->rule); goto err_free; + } err = rhashtable_insert_fast(&tc->ht, &flow->node, tc->ht_params); if (err) - goto err_free; - - flow->rule = mlx5e_tc_add_flow(priv, match_c, match_v, action, - flow_tag); - if (IS_ERR(flow->rule)) { - err = PTR_ERR(flow->rule); - goto err_hash_del; - } + goto err_del_rule; if (old) mlx5e_tc_del_flow(priv, old); goto out; -err_hash_del: - rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); +err_del_rule: + mlx5_del_flow_rule(flow->rule); err_free: if (!old) kfree(flow); out: - kfree(match_c); - kfree(match_v); + kvfree(spec); return err; } @@ -376,7 +484,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f) { struct mlx5e_tc_flow *flow; - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); @@ -392,6 +500,36 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, return 0; } +int mlx5e_stats_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f) +{ + struct mlx5e_tc_table *tc = &priv->fs.tc; + struct mlx5e_tc_flow *flow; + struct tc_action *a; + struct mlx5_fc *counter; + LIST_HEAD(actions); + u64 bytes; + u64 packets; + u64 lastuse; + + flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, + tc->ht_params); + if (!flow) + return -EINVAL; + + counter = mlx5_flow_rule_counter(flow->rule); + if (!counter) + return 0; + + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + + tcf_exts_to_list(f->exts, &actions); + list_for_each_entry(a, &actions, list) + tcf_action_stats_update(a, bytes, packets, lastuse); + + return 0; +} + static const struct rhashtable_params mlx5e_tc_flow_ht_params = { .head_offset = offsetof(struct mlx5e_tc_flow, node), .key_offset = offsetof(struct mlx5e_tc_flow, cookie), @@ -401,7 +539,7 @@ static const struct rhashtable_params mlx5e_tc_flow_ht_params = { int mlx5e_tc_init(struct mlx5e_priv *priv) { - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; tc->ht_params = mlx5e_tc_flow_ht_params; return rhashtable_init(&tc->ht, &tc->ht_params); @@ -418,12 +556,12 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg) void mlx5e_tc_cleanup(struct mlx5e_priv *priv) { - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv); - if (!IS_ERR_OR_NULL(priv->fts.tc.t)) { - mlx5_destroy_flow_table(priv->fts.tc.t); - priv->fts.tc.t = NULL; + if (!IS_ERR_OR_NULL(tc->t)) { + mlx5_destroy_flow_table(tc->t); + tc->t = NULL; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index d677428dc10f..34bf903fc886 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -43,9 +43,12 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); +int mlx5e_stats_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f); + static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { - return atomic_read(&priv->fts.tc.ht.nelems); + return atomic_read(&priv->fs.tc.ht.nelems); } #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 1ffc7cb6f78c..eb0e72537f10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -54,10 +54,11 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) sq->skb[pi] = NULL; sq->pc++; + sq->stats.nop++; if (notify_hw) { cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, wqe, 0); + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } } @@ -109,12 +110,68 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx5e_priv *priv = netdev_priv(dev); int channel_ix = fallback(dev, skb); - int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ? - skb->vlan_tci >> VLAN_PRIO_SHIFT : 0; + int up = 0; + + if (!netdev_get_num_tc(dev)) + return channel_ix; + + if (skb_vlan_tag_present(skb)) + up = skb->vlan_tci >> VLAN_PRIO_SHIFT; + + /* channel_ix can be larger than num_channels since + * dev->num_real_tx_queues = num_channels * num_tc + */ + if (channel_ix >= priv->params.num_channels) + channel_ix = reciprocal_scale(channel_ix, + priv->params.num_channels); return priv->channeltc_to_txq_map[channel_ix][up]; } +static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) +{ +#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) + + return max(skb_network_offset(skb), MLX5E_MIN_INLINE); +} + +static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) +{ + struct flow_keys keys; + + if (skb_transport_header_was_set(skb)) + return skb_transport_offset(skb); + else if (skb_flow_dissect_flow_keys(skb, &keys, 0)) + return keys.control.thoff; + else + return mlx5e_skb_l2_header_offset(skb); +} + +static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, + struct sk_buff *skb) +{ + int hlen; + + switch (mode) { + case MLX5_INLINE_MODE_TCP_UDP: + hlen = eth_get_headlen(skb->data, skb_headlen(skb)); + if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) + hlen += VLAN_HLEN; + return hlen; + case MLX5_INLINE_MODE_IP: + /* When transport header is set to zero, it means no transport + * header. When transport header is set to 0xff's, it means + * transport header wasn't set. + */ + if (skb_transport_offset(skb)) + return mlx5e_skb_l3_header_offset(skb); + /* fall through */ + case MLX5_INLINE_MODE_L2: + default: + return mlx5e_skb_l2_header_offset(skb); + } +} + static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct sk_buff *skb, bool bf) { @@ -122,8 +179,6 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, * headers and occur before the data gather. * Therefore these headers must be copied into the WQE */ -#define MLX5E_MIN_INLINE ETH_HLEN - if (bf) { u16 ihs = skb_headlen(skb); @@ -133,8 +188,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, if (ihs <= sq->max_inline) return skb_headlen(skb); } - - return MLX5E_MIN_INLINE; + return mlx5e_calc_min_inline(sq->min_inline_mode, skb); } static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, @@ -191,12 +245,12 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) if (skb->encapsulation) { eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM; - sq->stats.csum_offload_inner++; + sq->stats.csum_partial_inner++; } else { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; } } else - sq->stats.csum_offload_none++; + sq->stats.csum_none++; if (sq->cc != sq->prev_cc) { sq->prev_cc = sq->cc; @@ -302,6 +356,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) sq->stats.stopped++; } + sq->stats.xmit_more += skb->xmit_more; if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { int bf_sz = 0; @@ -309,14 +364,15 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) bf_sz = wi->num_wqebbs << 3; cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, wqe, bf_sz); + mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz); } /* fill sq edge with nops to avoid wqe wrap around */ while ((sq->pc & wq->sz_m1) > sq->edge) mlx5e_send_nop(sq, false); - sq->bf_budget = bf ? sq->bf_budget - 1 : 0; + if (bf) + sq->bf_budget--; sq->stats.packets++; sq->stats.bytes += num_bytes; @@ -350,6 +406,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_sq, cq); + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + return false; + npkts = 0; nbytes = 0; @@ -387,7 +446,6 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wi = &sq->wqe_info[ci]; if (unlikely(!skb)) { /* nop */ - sq->stats.nop++; sqcc++; continue; } @@ -426,11 +484,39 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM) && - likely(test_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state))) { - netif_tx_wake_queue(sq->txq); - sq->stats.wake++; + mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM)) { + netif_tx_wake_queue(sq->txq); + sq->stats.wake++; } return (i == MLX5E_TX_CQ_POLL_BUDGET); } + +void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct sk_buff *skb; + u16 ci; + int i; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + skb = sq->skb[ci]; + wi = &sq->wqe_info[ci]; + + if (!skb) { /* nop */ + sq->cc++; + continue; + } + + for (i = 0; i < wi->num_dma; i++) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, sq->dma_fifo_cc++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } + + dev_kfree_skb_any(skb); + sq->cc += wi->num_wqebbs; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9bb4395aceeb..9bf33bb69210 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -49,6 +49,62 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) return cqe; } +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_sq *sq = container_of(cq, struct mlx5e_sq, cq); + struct mlx5_wq_cyc *wq; + struct mlx5_cqe64 *cqe; + u16 sqcc; + + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + return; + + cqe = mlx5e_get_cqe(cq); + if (likely(!cqe)) + return; + + wq = &sq->wq; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + do { + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci]; + + mlx5_cqwq_pop(&cq->wq); + sqcc += icowi->num_wqebbs; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); + break; + } + + switch (icowi->opcode) { + case MLX5_OPCODE_NOP: + break; + case MLX5_OPCODE_UMR: + mlx5e_post_rx_fragmented_mpwqe(&sq->channel->rq); + break; + default: + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); + } + + } while ((cqe = mlx5e_get_cqe(cq))); + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -64,6 +120,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; + + mlx5e_poll_ico_cq(&c->icosq.cq); + busy |= mlx5e_post_rx_wqes(&c->rq); if (busy) @@ -79,7 +138,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); + + if (test_bit(MLX5E_RQ_STATE_AM, &c->rq.state)) + mlx5e_rx_am(&c->rq); + mlx5e_cq_arm(&c->rq.cq); + mlx5e_cq_arm(&c->icosq.cq); return work_done; } @@ -88,8 +152,8 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + cq->event_ctr++; set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); - barrier(); napi_schedule(cq->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 18fccec72c5d..0e30602ef76d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -202,7 +202,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) struct mlx5_eqe *eqe; int eqes_found = 0; int set_ci = 0; - u32 cqn; + u32 cqn = -1; u32 rsn; u8 port; @@ -320,6 +320,9 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) eq_update_ci(eq, 1); + if (cqn != -1) + tasklet_schedule(&eq->tasklet_ctx.task); + return eqes_found; } @@ -403,6 +406,12 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_irq; + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + /* EQs are created in ARMED state */ eq_update_ci(eq, 1); @@ -436,6 +445,7 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); synchronize_irq(eq->irqn); + tasklet_disable(&eq->tasklet_ctx.task); mlx5_buf_free(dev, &eq->buf); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index bc3d9f8a75c1..b247949df135 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -40,17 +40,6 @@ #define UPLINK_VPORT 0xFFFF -#define MLX5_DEBUG_ESWITCH_MASK BIT(3) - -#define esw_info(dev, format, ...) \ - pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_warn(dev, format, ...) \ - pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_debug(dev, format, ...) \ - mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) - enum { MLX5_ACTION_NONE = 0, MLX5_ACTION_ADD = 1, @@ -77,16 +66,23 @@ struct vport_addr { u8 action; u32 vport; struct mlx5_flow_rule *flow_rule; /* SRIOV only */ + /* A flag indicating that mac was added due to mc promiscuous vport */ + bool mc_promisc; }; enum { UC_ADDR_CHANGE = BIT(0), MC_ADDR_CHANGE = BIT(1), + PROMISC_CHANGE = BIT(3), }; /* Vport context events */ #define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ - MC_ADDR_CHANGE) + MC_ADDR_CHANGE | \ + PROMISC_CHANGE) + +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) @@ -116,6 +112,9 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, if (events_mask & MC_ADDR_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_mc_address_change, 1); + if (events_mask & PROMISC_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_promisc_change, 1); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -323,30 +322,45 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) /* E-Switch FDB */ static struct mlx5_flow_rule * -esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, + u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) { - int match_header = MLX5_MATCH_OUTER_HEADERS; - struct mlx5_flow_destination dest; + int match_header = (is_zero_ether_addr(mac_c) ? 0 : + MLX5_MATCH_OUTER_HEADERS); struct mlx5_flow_rule *flow_rule = NULL; - u32 *match_v; - u32 *match_c; - u8 *dmac_v; - u8 *dmac_c; - - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!match_v || !match_c) { + struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; + void *mv_misc = NULL; + void *mc_misc = NULL; + u8 *dmac_v = NULL; + u8 *dmac_c = NULL; + + if (rx_rule) + match_header |= MLX5_MATCH_MISC_PARAMETERS; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { pr_warn("FDB: Failed to alloc match parameters\n"); - goto out; + return NULL; } - dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dmac_47_16); - dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers.dmac_47_16); - ether_addr_copy(dmac_v, mac); - /* Match criteria mask */ - memset(dmac_c, 0xff, 6); + if (match_header & MLX5_MATCH_OUTER_HEADERS) { + ether_addr_copy(dmac_v, mac_v); + ether_addr_copy(dmac_c, mac_c); + } + + if (match_header & MLX5_MATCH_MISC_PARAMETERS) { + mv_misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); + MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); + } dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = vport; @@ -354,26 +368,56 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) esw_debug(esw->dev, "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", dmac_v, dmac_c, vport); + spec->match_criteria_enable = match_header; flow_rule = - mlx5_add_flow_rule(esw->fdb_table.fdb, - match_header, - match_c, - match_v, + mlx5_add_flow_rule(esw->fdb_table.fdb, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); - if (IS_ERR_OR_NULL(flow_rule)) { + if (IS_ERR(flow_rule)) { pr_warn( "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); flow_rule = NULL; } -out: - kfree(match_v); - kfree(match_c); + + kvfree(spec); return flow_rule; } -static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +static struct mlx5_flow_rule * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + u8 mac_c[ETH_ALEN]; + + eth_broadcast_addr(mac_c); + return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac); +} + +static struct mlx5_flow_rule * +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) +{ + u8 mac_c[ETH_ALEN]; + u8 mac_v[ETH_ALEN]; + + eth_zero_addr(mac_c); + eth_zero_addr(mac_v); + mac_c[0] = 0x01; + mac_v[0] = 0x01; + return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v); +} + +static struct mlx5_flow_rule * +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) +{ + u8 mac_c[ETH_ALEN]; + u8 mac_v[ETH_ALEN]; + + eth_zero_addr(mac_c); + eth_zero_addr(mac_v); + return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); +} + +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -401,47 +445,97 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) memset(flow_group_in, 0, inlen); table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - fdb = mlx5_create_flow_table(root_ns, 0, table_size); - if (IS_ERR_OR_NULL(fdb)) { + fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); + if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); goto out; } + esw->fdb_table.fdb = fdb; + /* Addresses group : Full match unicast/multicast addresses */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + /* Preserve 2 entries for allmulti and promisc rules*/ + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); eth_broadcast_addr(dmac); - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR_OR_NULL(g)) { + if (IS_ERR(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create flow group err(%d)\n", err); goto out; } + esw->fdb_table.legacy.addr_grp = g; + + /* Allmulti group : One rule that forwards any mcast traffic */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2); + eth_zero_addr(dmac); + dmac[0] = 0x01; + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.allmulti_grp = g; + + /* Promiscuous group : + * One rule that forward all unmatched traffic from previous groups + */ + eth_zero_addr(dmac); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.promisc_grp = g; - esw->fdb_table.addr_grp = g; - esw->fdb_table.fdb = fdb; out: - kfree(flow_group_in); - if (err && !IS_ERR_OR_NULL(fdb)) - mlx5_destroy_flow_table(fdb); + if (err) { + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + esw->fdb_table.legacy.allmulti_grp = NULL; + } + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + esw->fdb_table.legacy.addr_grp = NULL; + } + if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) { + mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw->fdb_table.fdb = NULL; + } + } + + kvfree(flow_group_in); return err; } -static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; esw_debug(esw->dev, "Destroy FDB Table\n"); - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); mlx5_destroy_flow_table(esw->fdb_table.fdb); esw->fdb_table.fdb = NULL; - esw->fdb_table.addr_grp = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; } /* E-Switch vport UC/MC lists management */ @@ -473,7 +567,8 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) if (err) goto abort; - if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + /* SRIOV is enabled: Forward UC MAC to vport */ + if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", @@ -511,6 +606,53 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) return 0; } +static void update_allmulti_vports(struct mlx5_eswitch *esw, + struct vport_addr *vaddr, + struct esw_mc_addr *esw_mc) +{ + u8 *mac = vaddr->node.addr; + u32 vport_idx = 0; + + for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { + struct mlx5_vport *vport = &esw->vports[vport_idx]; + struct hlist_head *vport_hash = vport->mc_list; + struct vport_addr *iter_vaddr = + l2addr_hash_find(vport_hash, + mac, + struct vport_addr); + if (IS_ERR_OR_NULL(vport->allmulti_rule) || + vaddr->vport == vport_idx) + continue; + switch (vaddr->action) { + case MLX5_ACTION_ADD: + if (iter_vaddr) + continue; + iter_vaddr = l2addr_hash_add(vport_hash, mac, + struct vport_addr, + GFP_KERNEL); + if (!iter_vaddr) { + esw_warn(esw->dev, + "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n", + mac, vport_idx); + continue; + } + iter_vaddr->vport = vport_idx; + iter_vaddr->flow_rule = + esw_fdb_set_vport_rule(esw, + mac, + vport_idx); + iter_vaddr->mc_promisc = true; + break; + case MLX5_ACTION_DEL: + if (!iter_vaddr) + continue; + mlx5_del_flow_rule(iter_vaddr->flow_rule); + l2addr_hash_del(iter_vaddr); + break; + } + } +} + static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { struct hlist_head *hash = esw->mc_table; @@ -531,8 +673,17 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); + + /* Add this multicast mac to all the mc promiscuous vports */ + update_allmulti_vports(esw, vaddr, esw_mc); + add: - esw_mc->refcnt++; + /* If the multicast mac is added as a result of mc promiscuous vport, + * don't increment the multicast ref count + */ + if (!vaddr->mc_promisc) + esw_mc->refcnt++; + /* Forward MC MAC to vport */ vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, @@ -568,9 +719,15 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) mlx5_del_flow_rule(vaddr->flow_rule); vaddr->flow_rule = NULL; - if (--esw_mc->refcnt) + /* If the multicast mac is added as a result of mc promiscuous vport, + * don't decrement the multicast ref count. + */ + if (vaddr->mc_promisc || (--esw_mc->refcnt > 0)) return 0; + /* Remove this multicast mac from all the mc promiscuous vports */ + update_allmulti_vports(esw, vaddr, esw_mc); + if (esw_mc->uplink_rule) mlx5_del_flow_rule(esw_mc->uplink_rule); @@ -643,10 +800,13 @@ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, addr->action = MLX5_ACTION_DEL; } + if (!vport->enabled) + goto out; + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, mac_list, &size); if (err) - return; + goto out; esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", vport_num, is_uc ? "UC" : "MC", size); @@ -660,6 +820,24 @@ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); if (addr) { addr->action = MLX5_ACTION_NONE; + /* If this mac was previously added because of allmulti + * promiscuous rx mode, its now converted to be original + * vport mac. + */ + if (addr->mc_promisc) { + struct esw_mc_addr *esw_mc = + l2addr_hash_find(esw->mc_table, + mac_list[i], + struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to MAC(%pM) in mcast DB\n", + mac_list[i]); + continue; + } + esw_mc->refcnt++; + addr->mc_promisc = false; + } continue; } @@ -674,13 +852,121 @@ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, addr->vport = vport_num; addr->action = MLX5_ACTION_ADD; } +out: kfree(mac_list); } -static void esw_vport_change_handler(struct work_struct *work) +/* Sync vport UC/MC list from vport context + * Must be called after esw_update_vport_addr_list + */ +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + hash = vport->mc_list; + + for_each_l2hash_node(node, tmp, esw->mc_table, hi) { + u8 *mac = node->addr; + + addr = l2addr_hash_find(hash, mac, struct vport_addr); + if (addr) { + if (addr->action == MLX5_ACTION_DEL) + addr->action = MLX5_ACTION_NONE; + continue; + } + addr = l2addr_hash_add(hash, mac, struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add allmulti MAC(%pM) to vport[%d] DB\n", + mac, vport_num); + continue; + } + addr->vport = vport_num; + addr->action = MLX5_ACTION_ADD; + addr->mc_promisc = true; + } +} + +/* Apply vport rx mode to HW FDB table */ +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, + bool promisc, bool mc_promisc) +{ + struct esw_mc_addr *allmulti_addr = esw->mc_promisc; + struct mlx5_vport *vport = &esw->vports[vport_num]; + + if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) + goto promisc; + + if (mc_promisc) { + vport->allmulti_rule = + esw_fdb_set_vport_allmulti_rule(esw, vport_num); + if (!allmulti_addr->uplink_rule) + allmulti_addr->uplink_rule = + esw_fdb_set_vport_allmulti_rule(esw, + UPLINK_VPORT); + allmulti_addr->refcnt++; + } else if (vport->allmulti_rule) { + mlx5_del_flow_rule(vport->allmulti_rule); + vport->allmulti_rule = NULL; + + if (--allmulti_addr->refcnt > 0) + goto promisc; + + if (allmulti_addr->uplink_rule) + mlx5_del_flow_rule(allmulti_addr->uplink_rule); + allmulti_addr->uplink_rule = NULL; + } + +promisc: + if (IS_ERR_OR_NULL(vport->promisc_rule) != promisc) + return; + + if (promisc) { + vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw, + vport_num); + } else if (vport->promisc_rule) { + mlx5_del_flow_rule(vport->promisc_rule); + vport->promisc_rule = NULL; + } +} + +/* Sync vport rx mode from vport context */ +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + int promisc_all = 0; + int promisc_uc = 0; + int promisc_mc = 0; + int err; + + err = mlx5_query_nic_vport_promisc(esw->dev, + vport_num, + &promisc_uc, + &promisc_mc, + &promisc_all); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n", + vport_num, promisc_all, promisc_mc); + + if (!vport->trusted || !vport->enabled) { + promisc_uc = 0; + promisc_mc = 0; + promisc_all = 0; + } + + esw_apply_vport_rx_mode(esw, vport_num, promisc_all, + (promisc_all || promisc_mc)); +} + +static void esw_vport_change_handle_locked(struct mlx5_vport *vport) { - struct mlx5_vport *vport = - container_of(work, struct mlx5_vport, vport_change_handler); struct mlx5_core_dev *dev = vport->dev; struct mlx5_eswitch *esw = dev->priv.eswitch; u8 mac[ETH_ALEN]; @@ -699,6 +985,15 @@ static void esw_vport_change_handler(struct work_struct *work) if (vport->enabled_events & MC_ADDR_CHANGE) { esw_update_vport_addr_list(esw, vport->vport, MLX5_NVPRT_LIST_TYPE_MC); + } + + if (vport->enabled_events & PROMISC_CHANGE) { + esw_update_vport_rx_mode(esw, vport->vport); + if (!IS_ERR_OR_NULL(vport->allmulti_rule)) + esw_update_vport_mc_promisc(esw, vport->vport); + } + + if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) { esw_apply_vport_addr_list(esw, vport->vport, MLX5_NVPRT_LIST_TYPE_MC); } @@ -709,15 +1004,459 @@ static void esw_vport_change_handler(struct work_struct *work) vport->enabled_events); } +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + mutex_lock(&esw->state_lock); + esw_vport_change_handle_locked(vport); + mutex_unlock(&esw->state_lock); +} + +static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *vlan_grp = NULL; + struct mlx5_flow_group *drop_grp = NULL; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + void *match_criteria; + u32 *flow_group_in; + /* The egress acl table contains 2 rules: + * 1)Allow traffic with vlan_tag=vst_vlan_id + * 2)Drop all other traffic. + */ + int table_size = 2; + int err = 0; + + if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support) || + !IS_ERR_OR_NULL(vport->egress.acl)) + return; + + esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n", + vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch egress flow namespace\n"); + return; + } + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return; + + acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n", + vport->vport, err); + goto out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + vlan_grp = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(vlan_grp)) { + err = PTR_ERR(vlan_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n", + vport->vport, err); + goto out; + } + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + drop_grp = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(drop_grp)) { + err = PTR_ERR(drop_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", + vport->vport, err); + goto out; + } + + vport->egress.acl = acl; + vport->egress.drop_grp = drop_grp; + vport->egress.allowed_vlans_grp = vlan_grp; +out: + kvfree(flow_group_in); + if (err && !IS_ERR_OR_NULL(vlan_grp)) + mlx5_destroy_flow_group(vlan_grp); + if (err && !IS_ERR_OR_NULL(acl)) + mlx5_destroy_flow_table(acl); +} + +static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) + mlx5_del_flow_rule(vport->egress.allowed_vlan); + + if (!IS_ERR_OR_NULL(vport->egress.drop_rule)) + mlx5_del_flow_rule(vport->egress.drop_rule); + + vport->egress.allowed_vlan = NULL; + vport->egress.drop_rule = NULL; +} + +static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->egress.acl)) + return; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport); + + esw_vport_cleanup_egress_rules(esw, vport); + mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp); + mlx5_destroy_flow_group(vport->egress.drop_grp); + mlx5_destroy_flow_table(vport->egress.acl); + vport->egress.allowed_vlans_grp = NULL; + vport->egress.drop_grp = NULL; + vport->egress.acl = NULL; +} + +static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + /* The ingress acl table contains 4 groups + * (2 active rules at the same time - + * 1 allow rule from one of the first 3 groups. + * 1 drop rule from the last group): + * 1)Allow untagged traffic with smac=original mac. + * 2)Allow untagged traffic. + * 3)Allow traffic with smac=original mac. + * 4)Drop all other traffic. + */ + int table_size = 4; + int err = 0; + + if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) || + !IS_ERR_OR_NULL(vport->ingress.acl)) + return; + + esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", + vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n"); + return; + } + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return; + + acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.acl = acl; + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.allow_untagged_spoofchk_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.allow_untagged_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.allow_spoofchk_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.drop_grp = g; + +out: + if (err) { + if (!IS_ERR_OR_NULL(vport->ingress.allow_spoofchk_only_grp)) + mlx5_destroy_flow_group( + vport->ingress.allow_spoofchk_only_grp); + if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_only_grp)) + mlx5_destroy_flow_group( + vport->ingress.allow_untagged_only_grp); + if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_spoofchk_grp)) + mlx5_destroy_flow_group( + vport->ingress.allow_untagged_spoofchk_grp); + if (!IS_ERR_OR_NULL(vport->ingress.acl)) + mlx5_destroy_flow_table(vport->ingress.acl); + } + + kvfree(flow_group_in); +} + +static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->ingress.drop_rule)) + mlx5_del_flow_rule(vport->ingress.drop_rule); + + if (!IS_ERR_OR_NULL(vport->ingress.allow_rule)) + mlx5_del_flow_rule(vport->ingress.allow_rule); + + vport->ingress.drop_rule = NULL; + vport->ingress.allow_rule = NULL; +} + +static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->ingress.acl)) + return; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport); + + esw_vport_cleanup_ingress_rules(esw, vport); + mlx5_destroy_flow_group(vport->ingress.allow_spoofchk_only_grp); + mlx5_destroy_flow_group(vport->ingress.allow_untagged_only_grp); + mlx5_destroy_flow_group(vport->ingress.allow_untagged_spoofchk_grp); + mlx5_destroy_flow_group(vport->ingress.drop_grp); + mlx5_destroy_flow_table(vport->ingress.acl); + vport->ingress.acl = NULL; + vport->ingress.drop_grp = NULL; + vport->ingress.allow_spoofchk_only_grp = NULL; + vport->ingress.allow_untagged_only_grp = NULL; + vport->ingress.allow_untagged_spoofchk_grp = NULL; +} + +static int esw_vport_ingress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_spec *spec; + u8 smac[ETH_ALEN]; + int err = 0; + u8 *smac_v; + + if (vport->spoofchk) { + err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac); + if (err) { + esw_warn(esw->dev, + "vport[%d] configure ingress rules failed, query smac failed, err(%d)\n", + vport->vport, err); + return err; + } + + if (!is_valid_ether_addr(smac)) { + mlx5_core_warn(esw->dev, + "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", + vport->vport); + return -EPERM; + } + } + + esw_vport_cleanup_ingress_rules(esw, vport); + + if (!vport->vlan && !vport->qos && !vport->spoofchk) { + esw_vport_disable_ingress_acl(esw, vport); + return 0; + } + + esw_vport_enable_ingress_acl(esw, vport); + + esw_debug(esw->dev, + "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->vlan, vport->qos); + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + err = -ENOMEM; + esw_warn(esw->dev, "vport[%d] configure ingress rules failed, err(%d)\n", + vport->vport, err); + goto out; + } + + if (vport->vlan || vport->qos) + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); + + if (vport->spoofchk) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); + smac_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, smac); + } + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + vport->ingress.allow_rule = + mlx5_add_flow_rule(vport->ingress.acl, spec, + MLX5_FLOW_CONTEXT_ACTION_ALLOW, + 0, NULL); + if (IS_ERR(vport->ingress.allow_rule)) { + err = PTR_ERR(vport->ingress.allow_rule); + pr_warn("vport[%d] configure ingress allow rule, err(%d)\n", + vport->vport, err); + vport->ingress.allow_rule = NULL; + goto out; + } + + memset(spec, 0, sizeof(*spec)); + vport->ingress.drop_rule = + mlx5_add_flow_rule(vport->ingress.acl, spec, + MLX5_FLOW_CONTEXT_ACTION_DROP, + 0, NULL); + if (IS_ERR(vport->ingress.drop_rule)) { + err = PTR_ERR(vport->ingress.drop_rule); + pr_warn("vport[%d] configure ingress drop rule, err(%d)\n", + vport->vport, err); + vport->ingress.drop_rule = NULL; + goto out; + } + +out: + if (err) + esw_vport_cleanup_ingress_rules(esw, vport); + kvfree(spec); + return err; +} + +static int esw_vport_egress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + struct mlx5_flow_spec *spec; + int err = 0; + + esw_vport_cleanup_egress_rules(esw, vport); + + if (!vport->vlan && !vport->qos) { + esw_vport_disable_egress_acl(esw, vport); + return 0; + } + + esw_vport_enable_egress_acl(esw, vport); + + esw_debug(esw->dev, + "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->vlan, vport->qos); + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + err = -ENOMEM; + esw_warn(esw->dev, "vport[%d] configure egress rules failed, err(%d)\n", + vport->vport, err); + goto out; + } + + /* Allowed vlan rule */ + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + vport->egress.allowed_vlan = + mlx5_add_flow_rule(vport->egress.acl, spec, + MLX5_FLOW_CONTEXT_ACTION_ALLOW, + 0, NULL); + if (IS_ERR(vport->egress.allowed_vlan)) { + err = PTR_ERR(vport->egress.allowed_vlan); + pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n", + vport->vport, err); + vport->egress.allowed_vlan = NULL; + goto out; + } + + /* Drop others rule (star rule) */ + memset(spec, 0, sizeof(*spec)); + vport->egress.drop_rule = + mlx5_add_flow_rule(vport->egress.acl, spec, + MLX5_FLOW_CONTEXT_ACTION_DROP, + 0, NULL); + if (IS_ERR(vport->egress.drop_rule)) { + err = PTR_ERR(vport->egress.drop_rule); + pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n", + vport->vport, err); + vport->egress.drop_rule = NULL; + } +out: + kvfree(spec); + return err; +} + static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, int enable_events) { struct mlx5_vport *vport = &esw->vports[vport_num]; - unsigned long flags; + mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + + /* Only VFs need ACLs for VST and spoofchk filtering */ + if (vport_num && esw->mode == SRIOV_LEGACY) { + esw_vport_ingress_config(esw, vport); + esw_vport_egress_config(esw, vport); + } + mlx5_modify_vport_admin_state(esw->dev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, vport_num, @@ -725,53 +1464,29 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, /* Sync with current vport context */ vport->enabled_events = enable_events; - esw_vport_change_handler(&vport->vport_change_handler); - - spin_lock_irqsave(&vport->lock, flags); vport->enabled = true; - spin_unlock_irqrestore(&vport->lock, flags); - arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + /* only PF is trusted by default */ + vport->trusted = (vport_num) ? false : true; + esw_vport_change_handle_locked(vport); esw->enabled_vports++; esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); -} - -static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num) -{ - struct mlx5_vport *vport = &esw->vports[vport_num]; - struct l2addr_node *node; - struct vport_addr *addr; - struct hlist_node *tmp; - int hi; - - for_each_l2hash_node(node, tmp, vport->uc_list, hi) { - addr = container_of(node, struct vport_addr, node); - addr->action = MLX5_ACTION_DEL; - } - esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC); - - for_each_l2hash_node(node, tmp, vport->mc_list, hi) { - addr = container_of(node, struct vport_addr, node); - addr->action = MLX5_ACTION_DEL; - } - esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC); + mutex_unlock(&esw->state_lock); } static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) { struct mlx5_vport *vport = &esw->vports[vport_num]; - unsigned long flags; if (!vport->enabled) return; esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); /* Mark this vport as disabled to discard new events */ - spin_lock_irqsave(&vport->lock, flags); vport->enabled = false; - vport->enabled_events = 0; - spin_unlock_irqrestore(&vport->lock, flags); + + synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC)); mlx5_modify_vport_admin_state(esw->dev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, @@ -781,16 +1496,26 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) flush_workqueue(esw->work_queue); /* Disable events from this vport */ arm_vport_context_events_cmd(esw->dev, vport->vport, 0); - /* We don't assume VFs will cleanup after themselves */ - esw_cleanup_vport(esw, vport_num); + mutex_lock(&esw->state_lock); + /* We don't assume VFs will cleanup after themselves. + * Calling vport change handler while vport is disabled will cleanup + * the vport resources. + */ + esw_vport_change_handle_locked(vport); + vport->enabled_events = 0; + if (vport_num && esw->mode == SRIOV_LEGACY) { + esw_vport_disable_egress_acl(esw, vport); + esw_vport_disable_ingress_acl(esw, vport); + } esw->enabled_vports--; + mutex_unlock(&esw->state_lock); } /* Public E-Switch API */ -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; - int i; + int i, enabled_events; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) @@ -802,16 +1527,26 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) return -ENOTSUPP; } - esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); + if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) + esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n"); + + if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) + esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + esw->mode = mode; esw_disable_vport(esw, 0); - err = esw_create_fdb_table(esw, nvfs + 1); + if (mode == SRIOV_LEGACY) + err = esw_create_legacy_fdb_table(esw, nvfs + 1); + else + err = esw_offloads_init(esw, nvfs + 1); if (err) goto abort; + enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE; for (i = 0; i <= nvfs; i++) - esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + esw_enable_vport(esw, i, enabled_events); esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", esw->enabled_vports); @@ -819,25 +1554,38 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) abort: esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + esw->mode = SRIOV_NONE; return err; } void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { + struct esw_mc_addr *mc_promisc; + int nvports; int i; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return; - esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", - esw->enabled_vports); + esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", + esw->enabled_vports, esw->mode); + + mc_promisc = esw->mc_promisc; + nvports = esw->enabled_vports; for (i = 0; i < esw->total_vports; i++) esw_disable_vport(esw, i); - esw_destroy_fdb_table(esw); + if (mc_promisc && mc_promisc->uplink_rule) + mlx5_del_flow_rule(mc_promisc->uplink_rule); + + if (esw->mode == SRIOV_LEGACY) + esw_destroy_legacy_fdb_table(esw); + else if (esw->mode == SRIOV_OFFLOADS) + esw_offloads_cleanup(esw, nvports); + esw->mode = SRIOV_NONE; /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ esw_enable_vport(esw, 0, UC_ADDR_CHANGE); } @@ -845,7 +1593,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); - int total_vports = 1 + pci_sriov_get_totalvfs(dev->pdev); + int total_vports = MLX5_TOTAL_VPORTS(dev); + struct esw_mc_addr *mc_promisc; struct mlx5_eswitch *esw; int vport_num; int err; @@ -874,6 +1623,13 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) } esw->l2_table.size = l2_table_size; + mc_promisc = kzalloc(sizeof(*mc_promisc), GFP_KERNEL); + if (!mc_promisc) { + err = -ENOMEM; + goto abort; + } + esw->mc_promisc = mc_promisc; + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -887,6 +1643,16 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + esw->offloads.vport_reps = + kzalloc(total_vports * sizeof(struct mlx5_eswitch_rep), + GFP_KERNEL); + if (!esw->offloads.vport_reps) { + err = -ENOMEM; + goto abort; + } + + mutex_init(&esw->state_lock); + for (vport_num = 0; vport_num < total_vports; vport_num++) { struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -894,11 +1660,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) vport->dev = dev; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); - spin_lock_init(&vport->lock); } esw->total_vports = total_vports; esw->enabled_vports = 0; + esw->mode = SRIOV_NONE; dev->priv.eswitch = esw; esw_enable_vport(esw, 0, UC_ADDR_CHANGE); @@ -909,6 +1675,7 @@ abort: destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); kfree(esw->vports); + kfree(esw->offloads.vport_reps); kfree(esw); return err; } @@ -925,6 +1692,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); + kfree(esw->mc_promisc); + kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); } @@ -942,10 +1711,8 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) } vport = &esw->vports[vport_num]; - spin_lock(&vport->lock); if (vport->enabled) queue_work(esw->work_queue, &vport->vport_change_handler); - spin_unlock(&vport->lock); } /* Vport Administration */ @@ -953,9 +1720,23 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]) { + struct mlx5_vport *evport; + u64 node_guid; int err = 0; if (!ESW_ALLOWED(esw)) @@ -963,6 +1744,15 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + evport = &esw->vports[vport]; + + if (evport->spoofchk && !is_valid_ether_addr(mac)) { + mlx5_core_warn(esw->dev, + "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", + vport); + return -EPERM; + } + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); if (err) { mlx5_core_warn(esw->dev, @@ -971,6 +1761,17 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, return err; } + node_guid_gen_from_mac(&node_guid, mac); + err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid); + if (err) + mlx5_core_warn(esw->dev, + "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", + vport, err); + + mutex_lock(&esw->state_lock); + if (evport->enabled && esw->mode == SRIOV_LEGACY) + err = esw_vport_ingress_config(esw, evport); + mutex_unlock(&esw->state_lock); return err; } @@ -990,6 +1791,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi) { + struct mlx5_vport *evport; u16 vlan; u8 qos; @@ -998,6 +1800,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + evport = &esw->vports[vport]; + memset(ivi, 0, sizeof(*ivi)); ivi->vf = vport - 1; @@ -1008,7 +1812,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); ivi->vlan = vlan; ivi->qos = qos; - ivi->spoofchk = 0; + ivi->spoofchk = evport->spoofchk; return 0; } @@ -1016,6 +1820,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, int vport, u16 vlan, u8 qos) { + struct mlx5_vport *evport; + int err = 0; int set = 0; if (!ESW_ALLOWED(esw)) @@ -1026,7 +1832,73 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, if (vlan || qos) set = 1; - return modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); + evport = &esw->vports[vport]; + + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); + if (err) + return err; + + mutex_lock(&esw->state_lock); + evport->vlan = vlan; + evport->qos = qos; + if (evport->enabled && esw->mode == SRIOV_LEGACY) { + err = esw_vport_ingress_config(esw, evport); + if (err) + goto out; + err = esw_vport_egress_config(esw, evport); + } + +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + int vport, bool spoofchk) +{ + struct mlx5_vport *evport; + bool pschk; + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + evport = &esw->vports[vport]; + + mutex_lock(&esw->state_lock); + pschk = evport->spoofchk; + evport->spoofchk = spoofchk; + if (evport->enabled && esw->mode == SRIOV_LEGACY) { + err = esw_vport_ingress_config(esw, evport); + if (err) + evport->spoofchk = pschk; + } + mutex_unlock(&esw->state_lock); + + return err; +} + +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + int vport, bool setting) +{ + struct mlx5_vport *evport; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + evport = &esw->vports[vport]; + + mutex_lock(&esw->state_lock); + evport->trusted = setting; + if (evport->enabled) + esw_vport_change_handle_locked(evport); + mutex_unlock(&esw->state_lock); + + return 0; } int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 3416a428f70f..a96140971d77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -35,6 +35,7 @@ #include <linux/if_ether.h> #include <linux/if_link.h> +#include <net/devlink.h> #include <linux/mlx5/device.h> #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -46,6 +47,8 @@ #define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) #define MLX5_L2_ADDR_HASH(addr) (addr[5]) +#define FDB_UPLINK_VPORT 0xffff + /* L2 -mac address based- hash helpers */ struct l2addr_node { struct hlist_node hlist; @@ -88,18 +91,40 @@ struct l2addr_node { kfree(ptr); \ }) +struct vport_ingress { + struct mlx5_flow_table *acl; + struct mlx5_flow_group *allow_untagged_spoofchk_grp; + struct mlx5_flow_group *allow_spoofchk_only_grp; + struct mlx5_flow_group *allow_untagged_only_grp; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_rule *allow_rule; + struct mlx5_flow_rule *drop_rule; +}; + +struct vport_egress { + struct mlx5_flow_table *acl; + struct mlx5_flow_group *allowed_vlans_grp; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_rule *allowed_vlan; + struct mlx5_flow_rule *drop_rule; +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct mlx5_flow_rule *promisc_rule; + struct mlx5_flow_rule *allmulti_rule; struct work_struct vport_change_handler; - /* This spinlock protects access to vport data, between - * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event" - * once vport marked as disabled new interrupts are discarded. - */ - spinlock_t lock; /* vport events sync */ + struct vport_ingress ingress; + struct vport_egress egress; + + u16 vlan; + u8 qos; + bool spoofchk; + bool trusted; bool enabled; u16 enabled_events; }; @@ -112,7 +137,50 @@ struct mlx5_l2_table { struct mlx5_eswitch_fdb { void *fdb; - struct mlx5_flow_group *addr_grp; + union { + struct legacy_fdb { + struct mlx5_flow_group *addr_grp; + struct mlx5_flow_group *allmulti_grp; + struct mlx5_flow_group *promisc_grp; + } legacy; + + struct offloads_fdb { + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_rule *miss_rule; + } offloads; + }; +}; + +enum { + SRIOV_NONE, + SRIOV_LEGACY, + SRIOV_OFFLOADS +}; + +struct mlx5_esw_sq { + struct mlx5_flow_rule *send_to_vport_rule; + struct list_head list; +}; + +struct mlx5_eswitch_rep { + int (*load)(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + void (*unload)(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + u16 vport; + struct mlx5_flow_rule *vport_rx_rule; + void *priv_data; + struct list_head vport_sqs_list; + bool valid; + u8 hw_id[ETH_ALEN]; +}; + +struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_group *vport_rx_group; + struct mlx5_eswitch_rep *vport_reps; }; struct mlx5_eswitch { @@ -124,13 +192,20 @@ struct mlx5_eswitch { struct mlx5_vport *vports; int total_vports; int enabled_vports; + /* Synchronize between vport change events + * and async SRIOV admin state changes + */ + struct mutex state_lock; + struct esw_mc_addr *mc_promisc; + struct mlx5_esw_offload offloads; + int mode; }; /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]); @@ -138,10 +213,46 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int vport, int link_state); int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, int vport, u16 vlan, u8 qos); +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + int vport, bool spoofchk); +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + int vport_num, bool setting); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi); int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); +struct mlx5_flow_spec; + +struct mlx5_flow_rule * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + u32 action, u32 src_vport, u32 dst_vport); +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); + +int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u16 *sqns_array, int sqns_num); +void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport); + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c new file mode 100644 index 000000000000..7de40e6b0c25 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -0,0 +1,646 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "eswitch.h" + +enum { + FDB_FAST_PATH = 0, + FDB_SLOW_PATH +}; + +struct mlx5_flow_rule * +mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + u32 action, u32 src_vport, u32 dst_vport) +{ + struct mlx5_flow_destination dest = { 0 }; + struct mlx5_fc *counter = NULL; + struct mlx5_flow_rule *rule; + void *misc; + + if (esw->mode != SRIOV_OFFLOADS) + return ERR_PTR(-EOPNOTSUPP); + + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = dst_vport; + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(counter)) + return ERR_CAST(counter); + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter = counter; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, src_vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | + MLX5_MATCH_MISC_PARAMETERS; + + rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb, + spec, action, 0, &dest); + + if (IS_ERR(rule)) + mlx5_fc_destroy(esw->dev, counter); + + return rule; +} + +static struct mlx5_flow_rule * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); + MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); +out: + kvfree(spec); + return flow_rule; +} + +void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_esw_sq *esw_sq, *tmp; + + if (esw->mode != SRIOV_OFFLOADS) + return; + + list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) { + mlx5_del_flow_rule(esw_sq->send_to_vport_rule); + list_del(&esw_sq->list); + kfree(esw_sq); + } +} + +int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u16 *sqns_array, int sqns_num) +{ + struct mlx5_flow_rule *flow_rule; + struct mlx5_esw_sq *esw_sq; + int vport; + int err; + int i; + + if (esw->mode != SRIOV_OFFLOADS) + return 0; + + vport = rep->vport == 0 ? + FDB_UPLINK_VPORT : rep->vport; + + for (i = 0; i < sqns_num; i++) { + esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL); + if (!esw_sq) { + err = -ENOMEM; + goto out_err; + } + + /* Add re-inject rule to the PF/representor sqs */ + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, + vport, + sqns_array[i]); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + kfree(esw_sq); + goto out_err; + } + esw_sq->send_to_vport_rule = flow_rule; + list_add(&esw_sq->list, &rep->vport_sqs_list); + } + return 0; + +out_err: + mlx5_eswitch_sqs2vport_stop(esw, rep); + return err; +} + +static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + err = -ENOMEM; + goto out; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = 0; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); + goto out; + } + + esw->fdb_table.offloads.miss_rule = flow_rule; +out: + kvfree(spec); + return err; +} + +#define MAX_PF_SQ 256 +#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */ +#define ESW_OFFLOADS_NUM_GROUPS 4 + +static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + struct mlx5_flow_group *g; + u32 *flow_group_in; + void *match_criteria; + int table_size, ix, err = 0; + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + goto ns_err; + } + + esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, + ESW_OFFLOADS_NUM_ENTRIES, + ESW_OFFLOADS_NUM_GROUPS, 0); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); + goto fast_fdb_err; + } + esw->fdb_table.fdb = fdb; + + table_size = nvports + MAX_PF_SQ + 1; + fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); + goto slow_fdb_err; + } + esw->fdb_table.offloads.fdb = fdb; + + /* create send-to-vport group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + + ix = nvports + MAX_PF_SQ; + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err); + goto send_vport_err; + } + esw->fdb_table.offloads.send_to_vport_grp = g; + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create miss flow group err(%d)\n", err); + goto miss_err; + } + esw->fdb_table.offloads.miss_grp = g; + + err = esw_add_fdb_miss_rule(esw); + if (err) + goto miss_rule_err; + + return 0; + +miss_rule_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); +miss_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); +send_vport_err: + mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); +slow_fdb_err: + mlx5_destroy_flow_table(esw->fdb_table.fdb); +fast_fdb_err: +ns_err: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy offloads FDB Table\n"); + mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule); + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + + mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); + mlx5_destroy_flow_table(esw->fdb_table.fdb); +} + +static int esw_create_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft_offloads; + struct mlx5_core_dev *dev = esw->dev; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -ENOMEM; + } + + ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0); + if (IS_ERR(ft_offloads)) { + err = PTR_ERR(ft_offloads); + esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); + return err; + } + + esw->offloads.ft_offloads = ft_offloads; + return 0; +} + +static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + mlx5_destroy_flow_table(offloads->ft_offloads); +} + +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + struct mlx5_priv *priv = &esw->dev->priv; + u32 *flow_group_in; + void *match_criteria, *misc; + int err = 0; + int nvports = priv->sriov.num_vfs + 2; + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + /* create vport rx group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_group = g; +out: + kfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_group(esw->offloads.vport_rx_group); +} + +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + esw_warn(esw->dev, "Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tirn; + + flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); + goto out; + } + +out: + kvfree(spec); + return flow_rule; +} + +static int esw_offloads_start(struct mlx5_eswitch *esw) +{ + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + + if (esw->mode != SRIOV_LEGACY) { + esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); + return -EINVAL; + } + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err); + } + return err; +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_eswitch_rep *rep; + int vport; + int err; + + err = esw_create_offloads_fdb_table(esw, nvports); + if (err) + return err; + + err = esw_create_offloads_table(esw); + if (err) + goto create_ft_err; + + err = esw_create_vport_rx_group(esw); + if (err) + goto create_fg_err; + + for (vport = 0; vport < nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + + err = rep->load(esw, rep); + if (err) + goto err_reps; + } + return 0; + +err_reps: + for (vport--; vport >= 0; vport--) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + rep->unload(esw, rep); + } + esw_destroy_vport_rx_group(esw); + +create_fg_err: + esw_destroy_offloads_table(esw); + +create_ft_err: + esw_destroy_offloads_fdb_table(esw); + return err; +} + +static int esw_offloads_stop(struct mlx5_eswitch *esw) +{ + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); + } + + return err; +} + +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_eswitch_rep *rep; + int vport; + + for (vport = 0; vport < nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + rep->unload(esw, rep); + } + + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_fdb_table(esw); +} + +static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) +{ + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + *mlx5_mode = SRIOV_LEGACY; + break; + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + *mlx5_mode = SRIOV_OFFLOADS; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) +{ + switch (mlx5_mode) { + case SRIOV_LEGACY: + *mode = DEVLINK_ESWITCH_MODE_LEGACY; + break; + case SRIOV_OFFLOADS: + *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + break; + default: + return -EINVAL; + } + + return 0; +} + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + struct mlx5_core_dev *dev; + u16 cur_mlx5_mode, mlx5_mode = 0; + + dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + cur_mlx5_mode = dev->priv.eswitch->mode; + + if (cur_mlx5_mode == SRIOV_NONE) + return -EOPNOTSUPP; + + if (esw_mode_from_devlink(mode, &mlx5_mode)) + return -EINVAL; + + if (cur_mlx5_mode == mlx5_mode) + return 0; + + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + return esw_offloads_start(dev->priv.eswitch); + else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + return esw_offloads_stop(dev->priv.eswitch); + else + return -EINVAL; +} + +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct mlx5_core_dev *dev; + + dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (dev->priv.eswitch->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); +} + +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + memcpy(&offloads->vport_reps[rep->vport], rep, + sizeof(struct mlx5_eswitch_rep)); + + INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list); + offloads->vport_reps[rep->vport].valid = true; +} + +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[vport]; + + if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled) + rep->unload(esw, rep); + + offloads->vport_reps[vport].valid = false; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index f46f1db0fc00..287ade151ec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -50,6 +50,10 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport); + MLX5_SET(set_flow_table_root_in, in, other_vport, 1); + } memset(out, 0, sizeof(out)); return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, @@ -57,6 +61,7 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, } int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + u16 vport, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table *next_ft, unsigned int *table_id) @@ -77,6 +82,10 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_SET(create_flow_table_in, in, table_type, type); MLX5_SET(create_flow_table_in, in, level, level); MLX5_SET(create_flow_table_in, in, log_size, log_size); + if (vport) { + MLX5_SET(create_flow_table_in, in, vport_number, vport); + MLX5_SET(create_flow_table_in, in, other_vport, 1); + } memset(out, 0, sizeof(out)); err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, @@ -101,6 +110,10 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, MLX5_CMD_OP_DESTROY_FLOW_TABLE); MLX5_SET(destroy_flow_table_in, in, table_type, ft->type); MLX5_SET(destroy_flow_table_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(destroy_flow_table_in, in, other_vport, 1); + } return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); @@ -120,6 +133,10 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, MLX5_CMD_OP_MODIFY_FLOW_TABLE); MLX5_SET(modify_flow_table_in, in, table_type, ft->type); MLX5_SET(modify_flow_table_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(modify_flow_table_in, in, other_vport, 1); + } MLX5_SET(modify_flow_table_in, in, modify_field_select, MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); if (next_ft) { @@ -148,6 +165,10 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, MLX5_CMD_OP_CREATE_FLOW_GROUP); MLX5_SET(create_flow_group_in, in, table_type, ft->type); MLX5_SET(create_flow_group_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(create_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(create_flow_group_in, in, other_vport, 1); + } err = mlx5_cmd_exec_check_status(dev, in, inlen, out, @@ -174,6 +195,10 @@ int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, MLX5_SET(destroy_flow_group_in, in, table_type, ft->type); MLX5_SET(destroy_flow_group_in, in, table_id, ft->id); MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + if (ft->vport) { + MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(destroy_flow_group_in, in, other_vport, 1); + } return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); @@ -207,22 +232,29 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(set_fte_in, in, table_type, ft->type); MLX5_SET(set_fte_in, in, table_id, ft->id); MLX5_SET(set_fte_in, in, flow_index, fte->index); + if (ft->vport) { + MLX5_SET(set_fte_in, in, vport_number, ft->vport); + MLX5_SET(set_fte_in, in, other_vport, 1); + } in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); MLX5_SET(flow_context, in_flow_context, group_id, group_id); MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action); - MLX5_SET(flow_context, in_flow_context, destination_list_size, - fte->dests_size); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + int list_size = 0; + list_for_each_entry(dst, &fte->node.children, node.list) { unsigned int id; + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + MLX5_SET(dest_format_struct, in_dests, destination_type, dst->dest_attr.type); if (dst->dest_attr.type == @@ -233,8 +265,31 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, } MLX5_SET(dest_format_struct, in_dests, destination_id, id); in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + list_size++; } + + MLX5_SET(flow_context, in_flow_context, destination_list_size, + list_size); } + + if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int list_size = 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + MLX5_SET(flow_counter_list, in_dests, flow_counter_id, + dst->dest_attr.counter->id); + in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + list_size++; + } + + MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, + list_size); + } + memset(out, 0, sizeof(out)); err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); @@ -254,18 +309,16 @@ int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, unsigned group_id, + int modify_mask, struct fs_fte *fte) { int opmod; - int modify_mask; int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, flow_table_properties_nic_receive. flow_modify_en); if (!atomic_mod_cap) return -ENOTSUPP; opmod = 1; - modify_mask = 1 << - MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST; return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte); } @@ -285,8 +338,145 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, MLX5_SET(delete_fte_in, in, table_type, ft->type); MLX5_SET(delete_fte_in, in, table_id, ft->id); MLX5_SET(delete_fte_in, in, flow_index, index); + if (ft->vport) { + MLX5_SET(delete_fte_in, in, vport_number, ft->vport); + MLX5_SET(delete_fte_in, in, other_vport, 1); + } err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); return err; } + +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) +{ + u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]; + u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(alloc_flow_counter_in, in, opcode, + MLX5_CMD_OP_ALLOC_FLOW_COUNTER); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); + if (err) + return err; + + *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + + return 0; +} + +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(dealloc_flow_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, + u64 *packets, u64 *bytes) +{ + u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter)]; + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)]; + void *stats; + int err = 0; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, op_mod, 0); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, id); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics); + *packets = MLX5_GET64(traffic_counter, stats, packets); + *bytes = MLX5_GET64(traffic_counter, stats, octets); + + return 0; +} + +struct mlx5_cmd_fc_bulk { + u16 id; + int num; + int outlen; + u32 out[0]; +}; + +struct mlx5_cmd_fc_bulk * +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) +{ + struct mlx5_cmd_fc_bulk *b; + int outlen = + MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter) * num; + + b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL); + if (!b) + return NULL; + + b->id = id; + b->num = num; + b->outlen = outlen; + + return b; +} + +void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b) +{ + kfree(b); +} + +int +mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) +{ + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, op_mod, 0); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id); + MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + b->out, b->outlen); +} + +void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, + struct mlx5_cmd_fc_bulk *b, u16 id, + u64 *packets, u64 *bytes) +{ + int index = id - b->id; + void *stats; + + if (index < 0 || index >= b->num) { + mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n", + id, b->id, b->id + b->num - 1); + return; + } + + stats = MLX5_ADDR_OF(query_flow_counter_out, b->out, + flow_statistics[index]); + *packets = MLX5_GET64(traffic_counter, stats, packets); + *bytes = MLX5_GET64(traffic_counter, stats, octets); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 9814d4784803..158844cef82b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -34,6 +34,7 @@ #define _MLX5_FS_CMD_ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + u16 vport, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table *next_ft, unsigned int *table_id); @@ -61,6 +62,7 @@ int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, unsigned group_id, + int modify_mask, struct fs_fte *fte); int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, @@ -69,4 +71,21 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft); + +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, + u64 *packets, u64 *bytes); + +struct mlx5_cmd_fc_bulk; + +struct mlx5_cmd_fc_bulk * +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num); +void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); +int +mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); +void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, + struct mlx5_cmd_fc_bulk *b, u16 id, + u64 *packets, u64 *bytes); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 89cce97d46c6..3d6c1f65e586 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,18 +40,18 @@ #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) -#define ADD_PRIO(num_prios_val, min_level_val, max_ft_val, caps_val,\ +#define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\ ...) {.type = FS_TYPE_PRIO,\ .min_ft_level = min_level_val,\ - .max_ft = max_ft_val,\ + .num_levels = num_levels_val,\ .num_leaf_prios = num_prios_val,\ .caps = caps_val,\ .children = (struct init_tree_node[]) {__VA_ARGS__},\ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ } -#define ADD_MULTIPLE_PRIO(num_prios_val, max_ft_val, ...)\ - ADD_PRIO(num_prios_val, 0, max_ft_val, {},\ +#define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\ + ADD_PRIO(num_prios_val, 0, num_levels_val, {},\ __VA_ARGS__)\ #define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ @@ -67,19 +67,35 @@ #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ .caps = (long[]) {__VA_ARGS__} } -#define LEFTOVERS_MAX_FT 1 +#define FS_CHAINING_CAPS FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_receive.modify_root), \ + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)) + +#define LEFTOVERS_NUM_LEVELS 1 #define LEFTOVERS_NUM_PRIOS 1 -#define BY_PASS_PRIO_MAX_FT 1 -#define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ - LEFTOVERS_MAX_FT) -#define KERNEL_MAX_FT 3 -#define KERNEL_NUM_PRIOS 2 -#define KENREL_MIN_LEVEL 2 +#define BY_PASS_PRIO_NUM_LEVELS 1 +#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ + LEFTOVERS_NUM_PRIOS) + +#define ETHTOOL_PRIO_NUM_LEVELS 1 +#define ETHTOOL_NUM_PRIOS 11 +#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) +/* Vlan, mac, ttc, aRFS */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 4 +#define KERNEL_NIC_NUM_PRIOS 1 +/* One more level for tc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) -#define ANCHOR_MAX_FT 1 +#define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) + +#define OFFLOADS_MAX_FT 1 +#define OFFLOADS_NUM_PRIOS 1 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) + struct node_caps { size_t arr_sz; long *caps; @@ -92,27 +108,30 @@ static struct init_tree_node { int min_ft_level; int num_leaf_prios; int prio; - int max_ft; + int num_levels; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 4, + .ar_size = 6, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, - FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), - FS_CAP(flow_table_properties_nic_receive.modify_root), - FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), - FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), - ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_MAX_FT))), - ADD_PRIO(0, KENREL_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NUM_PRIOS, KERNEL_MAX_FT))), + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), + ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, + ETHTOOL_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(1, 1), + ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, + KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, - FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), - FS_CAP(flow_table_properties_nic_receive.modify_root), - FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), - FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), - ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))), ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))), + ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))), } }; @@ -222,19 +241,6 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, return NULL; } -static unsigned int find_next_free_level(struct fs_prio *prio) -{ - if (!list_empty(&prio->node.children)) { - struct mlx5_flow_table *ft; - - ft = list_last_entry(&prio->node.children, - struct mlx5_flow_table, - node.list); - return ft->level + 1; - } - return prio->start_level; -} - static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size) { unsigned int i; @@ -351,6 +357,7 @@ static void del_rule(struct fs_node *node) struct mlx5_flow_group *fg; struct fs_fte *fte; u32 *match_value; + int modify_mask; struct mlx5_core_dev *dev = get_dev(node); int match_len = MLX5_ST_SZ_BYTES(fte_match_param); int err; @@ -374,8 +381,11 @@ static void del_rule(struct fs_node *node) } if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { + modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST), err = mlx5_cmd_update_fte(dev, ft, - fg->id, fte); + fg->id, + modify_mask, + fte); if (err) pr_warn("%s can't del rule fg id=%d fte_index=%d\n", __func__, fg->id, fte->index); @@ -464,7 +474,7 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) return fg; } -static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, +static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte, enum fs_flow_table_type table_type) { struct mlx5_flow_table *ft; @@ -476,6 +486,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, ft->level = level; ft->node.type = FS_TYPE_FLOW_TABLE; ft->type = table_type; + ft->vport = vport; ft->max_fte = max_fte; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); @@ -615,12 +626,13 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio return err; } -static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, - struct mlx5_flow_destination *dest) +int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) { struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; struct fs_fte *fte; + int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); int err = 0; fs_get_obj(fte, rule->node.parent); @@ -632,7 +644,9 @@ static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, memcpy(&rule->dest_attr, dest, sizeof(*dest)); err = mlx5_cmd_update_fte(get_dev(&ft->node), - ft, fg->id, fte); + ft, fg->id, + modify_mask, + fte); unlock_ref_node(&fte->node); return err; @@ -693,9 +707,23 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table return err; } -struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int max_fte) +static void list_add_flow_table(struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct list_head *prev = &prio->node.children; + struct mlx5_flow_table *iter; + + fs_for_each_ft(iter, prio) { + if (iter->level > ft->level) + break; + prev = &iter->node.list; + } + list_add(&ft->node.list, prev); +} + +static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + u16 vport, int prio, + int max_fte, u32 level) { struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_table *ft; @@ -716,12 +744,16 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, err = -EINVAL; goto unlock_root; } - if (fs_prio->num_ft == fs_prio->max_ft) { + if (level >= fs_prio->num_levels) { err = -ENOSPC; goto unlock_root; } - - ft = alloc_flow_table(find_next_free_level(fs_prio), + /* The level is related to the + * priority level range. + */ + level += fs_prio->start_level; + ft = alloc_flow_table(level, + vport, roundup_pow_of_two(max_fte), root->table_type); if (!ft) { @@ -732,7 +764,7 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, tree_init_node(&ft->node, 1, del_flow_table); log_table_sz = ilog2(ft->max_fte); next_ft = find_next_chained_ft(fs_prio); - err = mlx5_cmd_create_flow_table(root->dev, ft->type, ft->level, + err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->type, ft->level, log_table_sz, next_ft, &ft->id); if (err) goto free_ft; @@ -742,7 +774,7 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, goto destroy_ft; lock_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); - list_add_tail(&ft->node.list, &fs_prio->node.children); + list_add_flow_table(ft, fs_prio); fs_prio->num_ft++; unlock_ref_node(&fs_prio->node); mutex_unlock(&root->chain_lock); @@ -756,17 +788,32 @@ unlock_root: return ERR_PTR(err); } +struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + int prio, int max_fte, + u32 level) +{ + return __mlx5_create_flow_table(ns, 0, prio, max_fte, level); +} + +struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, + int prio, int max_fte, + u32 level, u16 vport) +{ + return __mlx5_create_flow_table(ns, vport, prio, max_fte, level); +} + struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, - int max_num_groups) + int max_num_groups, + u32 level) { struct mlx5_flow_table *ft; if (max_num_groups > num_flow_table_entries) return ERR_PTR(-EINVAL); - ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries); + ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level); if (IS_ERR(ft)) return ft; @@ -850,6 +897,7 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, { struct mlx5_flow_table *ft; struct mlx5_flow_rule *rule; + int modify_mask = 0; int err; rule = alloc_rule(dest); @@ -865,14 +913,20 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, list_add(&rule->node.list, &fte->node.children); else list_add_tail(&rule->node.list, &fte->node.children); - if (dest) + if (dest) { fte->dests_size++; + + modify_mask |= dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ? + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS) : + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + } + if (fte->dests_size == 1 || !dest) err = mlx5_cmd_create_fte(get_dev(&ft->node), ft, fg->id, fte); else err = mlx5_cmd_update_fte(get_dev(&ft->node), - ft, fg->id, fte); + ft, fg->id, modify_mask, fte); if (err) goto free_rule; @@ -1065,11 +1119,53 @@ unlock_fg: return rule; } +struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule) +{ + struct mlx5_flow_rule *dst; + struct fs_fte *fte; + + fs_get_obj(fte, rule->node.parent); + + fs_for_each_dst(dst, fte) { + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + return dst->dest_attr.counter; + } + + return NULL; +} + +static bool counter_is_valid(struct mlx5_fc *counter, u32 action) +{ + if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) + return !counter; + + if (!counter) + return false; + + /* Hardware support counter for a drop action only */ + return action == (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT); +} + +static bool dest_is_valid(struct mlx5_flow_destination *dest, + u32 action, + struct mlx5_flow_table *ft) +{ + if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)) + return counter_is_valid(dest->counter, action); + + if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return true; + + if (!dest || ((dest->type == + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) && + (dest->ft->level <= ft->level))) + return false; + return true; +} + static struct mlx5_flow_rule * _mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, + struct mlx5_flow_spec *spec, u32 action, u32 flow_tag, struct mlx5_flow_destination *dest) @@ -1077,28 +1173,29 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft, struct mlx5_flow_group *g; struct mlx5_flow_rule *rule; - if ((action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !dest) + if (!dest_is_valid(dest, action, ft)) return ERR_PTR(-EINVAL); nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); fs_for_each_fg(g, ft) if (compare_match_criteria(g->mask.match_criteria_enable, - match_criteria_enable, + spec->match_criteria_enable, g->mask.match_criteria, - match_criteria)) { - rule = add_rule_fg(g, match_value, + spec->match_criteria)) { + rule = add_rule_fg(g, spec->match_value, action, flow_tag, dest); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) goto unlock; } - g = create_autogroup(ft, match_criteria_enable, match_criteria); + g = create_autogroup(ft, spec->match_criteria_enable, + spec->match_criteria); if (IS_ERR(g)) { rule = (void *)g; goto unlock; } - rule = add_rule_fg(g, match_value, + rule = add_rule_fg(g, spec->match_value, action, flow_tag, dest); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group @@ -1122,9 +1219,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) struct mlx5_flow_rule * mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, + struct mlx5_flow_spec *spec, u32 action, u32 flow_tag, struct mlx5_flow_destination *dest) @@ -1155,8 +1250,7 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft, } } - rule = _mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, - match_value, action, flow_tag, dest); + rule = _mlx5_add_flow_rule(ft, spec, action, flow_tag, dest); if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!IS_ERR_OR_NULL(rule) && @@ -1207,8 +1301,8 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft) ft->id); return err; } - root->root_ft = new_root_ft; } + root->root_ft = new_root_ft; return 0; } @@ -1274,30 +1368,47 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) { - struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; + struct mlx5_flow_steering *steering = dev->priv.steering; + struct mlx5_flow_root_namespace *root_ns; int prio; struct fs_prio *fs_prio; struct mlx5_flow_namespace *ns; - if (!root_ns) + if (!steering) return NULL; switch (type) { case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_OFFLOADS: + case MLX5_FLOW_NAMESPACE_ETHTOOL: case MLX5_FLOW_NAMESPACE_KERNEL: case MLX5_FLOW_NAMESPACE_LEFTOVERS: case MLX5_FLOW_NAMESPACE_ANCHOR: prio = type; break; case MLX5_FLOW_NAMESPACE_FDB: - if (dev->priv.fdb_root_ns) - return &dev->priv.fdb_root_ns->ns; + if (steering->fdb_root_ns) + return &steering->fdb_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_ESW_EGRESS: + if (steering->esw_egress_root_ns) + return &steering->esw_egress_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + if (steering->esw_ingress_root_ns) + return &steering->esw_ingress_root_ns->ns; else return NULL; default: return NULL; } + root_ns = steering->root_ns; + if (!root_ns) + return NULL; + fs_prio = find_prio(&root_ns->ns, prio); if (!fs_prio) return NULL; @@ -1311,7 +1422,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, EXPORT_SYMBOL(mlx5_get_flow_namespace); static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, - unsigned prio, int max_ft) + unsigned int prio, int num_levels) { struct fs_prio *fs_prio; @@ -1322,7 +1433,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, fs_prio->node.type = FS_TYPE_PRIO; tree_init_node(&fs_prio->node, 1, NULL); tree_add_node(&fs_prio->node, &ns->node); - fs_prio->max_ft = max_ft; + fs_prio->num_levels = num_levels; fs_prio->prio = prio; list_add_tail(&fs_prio->node.list, &ns->node.children); @@ -1353,14 +1464,14 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ns; } -static int create_leaf_prios(struct mlx5_flow_namespace *ns, struct init_tree_node - *prio_metadata) +static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio, + struct init_tree_node *prio_metadata) { struct fs_prio *fs_prio; int i; for (i = 0; i < prio_metadata->num_leaf_prios; i++) { - fs_prio = fs_create_prio(ns, i, prio_metadata->max_ft); + fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels); if (IS_ERR(fs_prio)) return PTR_ERR(fs_prio); } @@ -1383,13 +1494,13 @@ static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) return true; } -static int init_root_tree_recursive(struct mlx5_core_dev *dev, +static int init_root_tree_recursive(struct mlx5_flow_steering *steering, struct init_tree_node *init_node, struct fs_node *fs_parent_node, struct init_tree_node *init_parent_node, - int index) + int prio) { - int max_ft_level = MLX5_CAP_FLOWTABLE(dev, + int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev, flow_table_properties_nic_receive. max_ft_level); struct mlx5_flow_namespace *fs_ns; @@ -1400,13 +1511,13 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, if (init_node->type == FS_TYPE_PRIO) { if ((init_node->min_ft_level > max_ft_level) || - !has_required_caps(dev, &init_node->caps)) + !has_required_caps(steering->dev, &init_node->caps)) return 0; fs_get_obj(fs_ns, fs_parent_node); if (init_node->num_leaf_prios) - return create_leaf_prios(fs_ns, init_node); - fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft); + return create_leaf_prios(fs_ns, prio, init_node); + fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels); if (IS_ERR(fs_prio)) return PTR_ERR(fs_prio); base = &fs_prio->node; @@ -1419,17 +1530,22 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, } else { return -EINVAL; } + prio = 0; for (i = 0; i < init_node->ar_size; i++) { - err = init_root_tree_recursive(dev, &init_node->children[i], - base, init_node, i); + err = init_root_tree_recursive(steering, &init_node->children[i], + base, init_node, prio); if (err) return err; + if (init_node->children[i].type == FS_TYPE_PRIO && + init_node->children[i].num_leaf_prios) { + prio += init_node->children[i].num_leaf_prios; + } } return 0; } -static int init_root_tree(struct mlx5_core_dev *dev, +static int init_root_tree(struct mlx5_flow_steering *steering, struct init_tree_node *init_node, struct fs_node *fs_parent_node) { @@ -1439,7 +1555,7 @@ static int init_root_tree(struct mlx5_core_dev *dev, fs_get_obj(fs_ns, fs_parent_node); for (i = 0; i < init_node->ar_size; i++) { - err = init_root_tree_recursive(dev, &init_node->children[i], + err = init_root_tree_recursive(steering, &init_node->children[i], &fs_ns->node, init_node, i); if (err) @@ -1448,7 +1564,7 @@ static int init_root_tree(struct mlx5_core_dev *dev, return 0; } -static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev, +static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering, enum fs_flow_table_type table_type) { @@ -1460,7 +1576,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev if (!root_ns) return NULL; - root_ns->dev = dev; + root_ns->dev = steering->dev; root_ns->table_type = table_type; ns = &root_ns->ns; @@ -1479,9 +1595,9 @@ static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level) struct fs_prio *prio; fs_for_each_prio(prio, ns) { - /* This updates prio start_level and max_ft */ + /* This updates prio start_level and num_levels */ set_prio_attrs_in_prio(prio, acc_level); - acc_level += prio->max_ft; + acc_level += prio->num_levels; } return acc_level; } @@ -1493,11 +1609,11 @@ static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) prio->start_level = acc_level; fs_for_each_ns(ns, prio) - /* This updates start_level and max_ft of ns's priority descendants */ + /* This updates start_level and num_levels of ns's priority descendants */ acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); - if (!prio->max_ft) - prio->max_ft = acc_level_ns - prio->start_level; - WARN_ON(prio->max_ft < acc_level_ns - prio->start_level); + if (!prio->num_levels) + prio->num_levels = acc_level_ns - prio->start_level; + WARN_ON(prio->num_levels < acc_level_ns - prio->start_level); } static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) @@ -1508,200 +1624,184 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) fs_for_each_prio(prio, ns) { set_prio_attrs_in_prio(prio, start_level); - start_level += prio->max_ft; + start_level += prio->num_levels; } } #define ANCHOR_PRIO 0 #define ANCHOR_SIZE 1 -static int create_anchor_flow_table(struct mlx5_core_dev - *dev) +#define ANCHOR_LEVEL 0 +static int create_anchor_flow_table(struct mlx5_flow_steering *steering) { struct mlx5_flow_namespace *ns = NULL; struct mlx5_flow_table *ft; - ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR); + ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (!ns) return -EINVAL; - ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE); + ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL); if (IS_ERR(ft)) { - mlx5_core_err(dev, "Failed to create last anchor flow table"); + mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); } return 0; } -static int init_root_ns(struct mlx5_core_dev *dev) +static int init_root_ns(struct mlx5_flow_steering *steering) { - dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX); - if (IS_ERR_OR_NULL(dev->priv.root_ns)) + steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); + if (IS_ERR_OR_NULL(steering->root_ns)) goto cleanup; - if (init_root_tree(dev, &root_fs, &dev->priv.root_ns->ns.node)) + if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node)) goto cleanup; - set_prio_attrs(dev->priv.root_ns); + set_prio_attrs(steering->root_ns); - if (create_anchor_flow_table(dev)) + if (create_anchor_flow_table(steering)) goto cleanup; return 0; cleanup: - mlx5_cleanup_fs(dev); + mlx5_cleanup_fs(steering->dev); return -ENOMEM; } -static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev, - struct mlx5_flow_root_namespace *root_ns) +static void clean_tree(struct fs_node *node) { - struct fs_node *prio; - - if (!root_ns) - return; + if (node) { + struct fs_node *iter; + struct fs_node *temp; - if (!list_empty(&root_ns->ns.node.children)) { - prio = list_first_entry(&root_ns->ns.node.children, - struct fs_node, - list); - if (tree_remove_node(prio)) - mlx5_core_warn(dev, - "Flow steering priority wasn't destroyed, refcount > 1\n"); + list_for_each_entry_safe(iter, temp, &node->children, list) + clean_tree(iter); + tree_remove_node(node); } - if (tree_remove_node(&root_ns->ns.node)) - mlx5_core_warn(dev, - "Flow steering namespace wasn't destroyed, refcount > 1\n"); - root_ns = NULL; } -static void destroy_flow_tables(struct fs_prio *prio) +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) { - struct mlx5_flow_table *iter; - struct mlx5_flow_table *tmp; + if (!root_ns) + return; - fs_for_each_ft_safe(iter, tmp, prio) - mlx5_destroy_flow_table(iter); + clean_tree(&root_ns->ns.node); } -static void cleanup_root_ns(struct mlx5_core_dev *dev) +void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { - struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; - struct fs_prio *iter_prio; + struct mlx5_flow_steering *steering = dev->priv.steering; - if (!MLX5_CAP_GEN(dev, nic_flow_table)) + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return; - if (!root_ns) - return; + cleanup_root_ns(steering->root_ns); + cleanup_root_ns(steering->esw_egress_root_ns); + cleanup_root_ns(steering->esw_ingress_root_ns); + cleanup_root_ns(steering->fdb_root_ns); + mlx5_cleanup_fc_stats(dev); + kfree(steering); +} - /* stage 1 */ - fs_for_each_prio(iter_prio, &root_ns->ns) { - struct fs_node *node; - struct mlx5_flow_namespace *iter_ns; - - fs_for_each_ns_or_ft(node, iter_prio) { - if (node->type == FS_TYPE_FLOW_TABLE) - continue; - fs_get_obj(iter_ns, node); - while (!list_empty(&iter_ns->node.children)) { - struct fs_prio *obj_iter_prio2; - struct fs_node *iter_prio2 = - list_first_entry(&iter_ns->node.children, - struct fs_node, - list); - - fs_get_obj(obj_iter_prio2, iter_prio2); - destroy_flow_tables(obj_iter_prio2); - if (tree_remove_node(iter_prio2)) { - mlx5_core_warn(dev, - "Priority %d wasn't destroyed, refcount > 1\n", - obj_iter_prio2->prio); - return; - } - } - } - } +static int init_fdb_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; - /* stage 2 */ - fs_for_each_prio(iter_prio, &root_ns->ns) { - while (!list_empty(&iter_prio->node.children)) { - struct fs_node *iter_ns = - list_first_entry(&iter_prio->node.children, - struct fs_node, - list); - if (tree_remove_node(iter_ns)) { - mlx5_core_warn(dev, - "Namespace wasn't destroyed, refcount > 1\n"); - return; - } - } - } + steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); + if (!steering->fdb_root_ns) + return -ENOMEM; - /* stage 3 */ - while (!list_empty(&root_ns->ns.node.children)) { - struct fs_prio *obj_prio_node; - struct fs_node *prio_node = - list_first_entry(&root_ns->ns.node.children, - struct fs_node, - list); - - fs_get_obj(obj_prio_node, prio_node); - if (tree_remove_node(prio_node)) { - mlx5_core_warn(dev, - "Priority %d wasn't destroyed, refcount > 1\n", - obj_prio_node->prio); - return; - } - } + prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1); + if (IS_ERR(prio)) + goto out_err; - if (tree_remove_node(&root_ns->ns.node)) { - mlx5_core_warn(dev, - "root namespace wasn't destroyed, refcount > 1\n"); - return; - } + prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); + if (IS_ERR(prio)) + goto out_err; - dev->priv.root_ns = NULL; + set_prio_attrs(steering->fdb_root_ns); + return 0; + +out_err: + cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + return PTR_ERR(prio); } -void mlx5_cleanup_fs(struct mlx5_core_dev *dev) +static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering) { - cleanup_root_ns(dev); - cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); + struct fs_prio *prio; + + steering->esw_egress_root_ns = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL); + if (!steering->esw_egress_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&steering->esw_egress_root_ns->ns, 0, + MLX5_TOTAL_VPORTS(steering->dev)); + return PTR_ERR_OR_ZERO(prio); } -static int init_fdb_root_ns(struct mlx5_core_dev *dev) +static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; - dev->priv.fdb_root_ns = create_root_ns(dev, FS_FT_FDB); - if (!dev->priv.fdb_root_ns) + steering->esw_ingress_root_ns = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL); + if (!steering->esw_ingress_root_ns) return -ENOMEM; - /* Create single prio */ - prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1); - if (IS_ERR(prio)) { - cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); - return PTR_ERR(prio); - } else { - return 0; - } + /* create 1 prio*/ + prio = fs_create_prio(&steering->esw_ingress_root_ns->ns, 0, + MLX5_TOTAL_VPORTS(steering->dev)); + return PTR_ERR_OR_ZERO(prio); } int mlx5_init_fs(struct mlx5_core_dev *dev) { + struct mlx5_flow_steering *steering; int err = 0; - if (MLX5_CAP_GEN(dev, nic_flow_table)) { - err = init_root_ns(dev); + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + err = mlx5_init_fc_stats(dev); + if (err) + return err; + + steering = kzalloc(sizeof(*steering), GFP_KERNEL); + if (!steering) + return -ENOMEM; + steering->dev = dev; + dev->priv.steering = steering; + + if (MLX5_CAP_GEN(dev, nic_flow_table) && + MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { + err = init_root_ns(steering); if (err) - return err; + goto err; } + if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { - err = init_fdb_root_ns(dev); - if (err) - cleanup_root_ns(dev); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { + err = init_fdb_root_ns(steering); + if (err) + goto err; + } + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { + err = init_egress_acl_root_ns(steering); + if (err) + goto err; + } + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { + err = init_ingress_acl_root_ns(steering); + if (err) + goto err; + } } + return 0; +err: + mlx5_cleanup_fs(dev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index f37a6248a27b..9cffb6aeb4e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -45,14 +45,24 @@ enum fs_node_type { }; enum fs_flow_table_type { - FS_FT_NIC_RX = 0x0, - FS_FT_FDB = 0X4, + FS_FT_NIC_RX = 0x0, + FS_FT_ESW_EGRESS_ACL = 0x2, + FS_FT_ESW_INGRESS_ACL = 0x3, + FS_FT_FDB = 0X4, }; enum fs_fte_status { FS_FTE_STATUS_EXISTING = 1UL << 0, }; +struct mlx5_flow_steering { + struct mlx5_core_dev *dev; + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_root_namespace *esw_egress_root_ns; + struct mlx5_flow_root_namespace *esw_ingress_root_ns; +}; + struct fs_node { struct list_head list; struct list_head children; @@ -79,6 +89,7 @@ struct mlx5_flow_rule { struct mlx5_flow_table { struct fs_node node; u32 id; + u16 vport; unsigned int max_fte; unsigned int level; enum fs_flow_table_type type; @@ -93,6 +104,29 @@ struct mlx5_flow_table { struct list_head fwd_rules; }; +struct mlx5_fc_cache { + u64 packets; + u64 bytes; + u64 lastuse; +}; + +struct mlx5_fc { + struct rb_node node; + struct list_head list; + + /* last{packets,bytes} members are used when calculating the delta since + * last reading + */ + u64 lastpackets; + u64 lastbytes; + + u16 id; + bool deleted; + bool aging; + + struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; +}; + /* Type of children is mlx5_flow_rule */ struct fs_fte { struct fs_node node; @@ -102,12 +136,13 @@ struct fs_fte { u32 index; u32 action; enum fs_fte_status status; + struct mlx5_fc *counter; }; /* Type of children is mlx5_flow_table/namespace */ struct fs_prio { struct fs_node node; - unsigned int max_ft; + unsigned int num_levels; unsigned int start_level; unsigned int prio; unsigned int num_ft; @@ -143,6 +178,9 @@ struct mlx5_flow_root_namespace { struct mutex chain_lock; }; +int mlx5_init_fc_stats(struct mlx5_core_dev *dev); +void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); + int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c new file mode 100644 index 000000000000..3a9195b4169d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include <linux/rbtree.h> +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" + +#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) + +/* locking scheme: + * + * It is the responsibility of the user to prevent concurrent calls or bad + * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference + * to struct mlx5_fc. + * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a + * dump (access to struct mlx5_fc) after a counter is destroyed. + * + * access to counter list: + * - create (user context) + * - mlx5_fc_create() only adds to an addlist to be used by + * mlx5_fc_stats_query_work(). addlist is protected by a spinlock. + * - spawn thread to do the actual destroy + * + * - destroy (user context) + * - mark a counter as deleted + * - spawn thread to do the actual del + * + * - dump (user context) + * user should not call dump after destroy + * + * - query (single thread workqueue context) + * destroy/dump - no conflict (see destroy) + * query/dump - packets and bytes might be inconsistent (since update is not + * atomic) + * query/create - no conflict (see create) + * since every create/destroy spawn the work, only after necessary time has + * elapsed, the thread will actually query the hardware. + */ + +static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) +{ + struct rb_node **new = &root->rb_node; + struct rb_node *parent = NULL; + + while (*new) { + struct mlx5_fc *this = container_of(*new, struct mlx5_fc, node); + int result = counter->id - this->id; + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + /* Add new node and rebalance tree. */ + rb_link_node(&counter->node, parent, new); + rb_insert_color(&counter->node, root); +} + +static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, + struct mlx5_fc *first, + u16 last_id) +{ + struct mlx5_cmd_fc_bulk *b; + struct rb_node *node = NULL; + u16 afirst_id; + int num; + int err; + int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk); + + /* first id must be aligned to 4 when using bulk query */ + afirst_id = first->id & ~0x3; + + /* number of counters to query inc. the last counter */ + num = ALIGN(last_id - afirst_id + 1, 4); + if (num > max_bulk) { + num = max_bulk; + last_id = afirst_id + num - 1; + } + + b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num); + if (!b) { + mlx5_core_err(dev, "Error allocating resources for bulk query\n"); + return NULL; + } + + err = mlx5_cmd_fc_bulk_query(dev, b); + if (err) { + mlx5_core_err(dev, "Error doing bulk query: %d\n", err); + goto out; + } + + for (node = &first->node; node; node = rb_next(node)) { + struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node); + struct mlx5_fc_cache *c = &counter->cache; + u64 packets; + u64 bytes; + + if (counter->id > last_id) + break; + + mlx5_cmd_fc_bulk_get(dev, b, + counter->id, &packets, &bytes); + + if (c->packets == packets) + continue; + + c->packets = packets; + c->bytes = bytes; + c->lastuse = jiffies; + } + +out: + mlx5_cmd_fc_bulk_free(b); + + return node; +} + +static void mlx5_fc_stats_work(struct work_struct *work) +{ + struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, + priv.fc_stats.work.work); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + unsigned long now = jiffies; + struct mlx5_fc *counter = NULL; + struct mlx5_fc *last = NULL; + struct rb_node *node; + LIST_HEAD(tmplist); + + spin_lock(&fc_stats->addlist_lock); + + list_splice_tail_init(&fc_stats->addlist, &tmplist); + + if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters)) + queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); + + spin_unlock(&fc_stats->addlist_lock); + + list_for_each_entry(counter, &tmplist, list) + mlx5_fc_stats_insert(&fc_stats->counters, counter); + + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); + + node = rb_next(node); + + if (counter->deleted) { + rb_erase(&counter->node, &fc_stats->counters); + + mlx5_cmd_fc_free(dev, counter->id); + + kfree(counter); + continue; + } + + last = counter; + } + + if (time_before(now, fc_stats->next_query) || !last) + return; + + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); + + node = mlx5_fc_stats_query(dev, counter, last->id); + } + + fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; +} + +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + int err; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + err = mlx5_cmd_fc_alloc(dev, &counter->id); + if (err) + goto err_out; + + if (aging) { + counter->aging = true; + + spin_lock(&fc_stats->addlist_lock); + list_add(&counter->list, &fc_stats->addlist); + spin_unlock(&fc_stats->addlist_lock); + + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + } + + return counter; + +err_out: + kfree(counter); + + return ERR_PTR(err); +} + +void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (!counter) + return; + + if (counter->aging) { + counter->deleted = true; + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + return; + } + + mlx5_cmd_fc_free(dev, counter->id); + kfree(counter); +} + +int mlx5_init_fc_stats(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + fc_stats->counters = RB_ROOT; + INIT_LIST_HEAD(&fc_stats->addlist); + spin_lock_init(&fc_stats->addlist_lock); + + fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); + if (!fc_stats->wq) + return -ENOMEM; + + INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); + + return 0; +} + +void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + struct mlx5_fc *tmp; + struct rb_node *node; + + cancel_delayed_work_sync(&dev->priv.fc_stats.work); + destroy_workqueue(dev->priv.fc_stats.wq); + dev->priv.fc_stats.wq = NULL; + + list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) { + list_del(&counter->list); + + mlx5_cmd_fc_free(dev, counter->id); + + kfree(counter); + } + + node = rb_first(&fc_stats->counters); + while (node) { + counter = rb_entry(node, struct mlx5_fc, node); + + node = rb_next(node); + + rb_erase(&counter->node, &fc_stats->counters); + + mlx5_cmd_fc_free(dev, counter->id); + + kfree(counter); + } +} + +void mlx5_fc_query_cached(struct mlx5_fc *counter, + u64 *bytes, u64 *packets, u64 *lastuse) +{ + struct mlx5_fc_cache c; + + c = counter->cache; + + *bytes = c.bytes - counter->lastbytes; + *packets = c.packets - counter->lastpackets; + *lastuse = c.lastuse; + + counter->lastbytes = c.bytes; + counter->lastpackets = c.packets; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 75c7ae6a5cc4..77fc1aa26114 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -151,6 +151,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, qos)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_QOS); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index f5deb642d0d6..1a05fb965c8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev) void mlx5_enter_error_state(struct mlx5_core_dev *dev) { + mutex_lock(&dev->intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - return; + goto unlock; mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev)) + if (pci_channel_offline(dev->pdev) || in_fatal(dev)) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + trigger_cmd_completions(dev); + } mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0); mlx5_core_err(dev, "end\n"); + +unlock: + mutex_unlock(&dev->intf_state_mutex); } static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) @@ -176,11 +182,11 @@ static const char *hsynd_str(u8 synd) case MLX5_HEALTH_SYNDR_EQ_ERR: return "EQ error"; case MLX5_HEALTH_SYNDR_EQ_INV: - return "Invalid EQ refrenced"; + return "Invalid EQ referenced"; case MLX5_HEALTH_SYNDR_FFSER_ERR: return "FFSER error"; case MLX5_HEALTH_SYNDR_HIGH_TEMP: - return "High temprature"; + return "High temperature"; default: return "unrecognized error"; } @@ -245,7 +251,6 @@ static void poll_health(unsigned long data) u32 count; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - trigger_cmd_completions(dev); mod_timer(&health->timer, get_next_poll_jiffies()); return; } @@ -267,7 +272,7 @@ static void poll_health(unsigned long data) if (in_fatal(dev) && !health->sick) { health->sick = true; print_health_info(dev); - queue_work(health->wq, &health->work); + schedule_work(&health->work); } } @@ -296,7 +301,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - destroy_workqueue(health->wq); + flush_work(&health->work); } int mlx5_health_init(struct mlx5_core_dev *dev) @@ -311,10 +316,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) strcpy(name, "mlx5_health"); strcat(name, dev_name(&dev->pdev->dev)); - health->wq = create_singlethread_workqueue(name); kfree(name); - if (!health->wq) - return -ENOMEM; INIT_WORK(&health->work, health_care); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6892746fd10d..2385bae92672 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -48,6 +48,10 @@ #include <linux/kmod.h> #include <linux/delay.h> #include <linux/mlx5/mlx5_ifc.h> +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif +#include <net/devlink.h> #include "mlx5_core.h" #include "fs_core.h" #ifdef CONFIG_MLX5_CORE_EN @@ -660,11 +664,34 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, } EXPORT_SYMBOL(mlx5_vector2eqn); +struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq; + + spin_lock(&table->lock); + list_for_each_entry(eq, &table->comp_eqs_list, list) + if (eq->eqn == eqn) { + spin_unlock(&table->lock); + return eq; + } + + spin_unlock(&table->lock); + + return ERR_PTR(-ENOENT); +} + static void free_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq, *n; +#ifdef CONFIG_RFS_ACCEL + if (dev->rmap) { + free_irq_cpu_rmap(dev->rmap); + dev->rmap = NULL; + } +#endif spin_lock(&table->lock); list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); @@ -691,6 +718,11 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&table->comp_eqs_list); ncomp_vec = table->num_comp_vectors; nent = MLX5_COMP_EQ_SIZE; +#ifdef CONFIG_RFS_ACCEL + dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); + if (!dev->rmap) + return -ENOMEM; +#endif for (i = 0; i < ncomp_vec; i++) { eq = kzalloc(sizeof(*eq), GFP_KERNEL); if (!eq) { @@ -698,6 +730,10 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) goto clean; } +#ifdef CONFIG_RFS_ACCEL + irq_cpu_rmap_add(dev->rmap, + dev->priv.msix_arr[i + MLX5_EQ_VEC_COMP_BASE].vector); +#endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, @@ -1109,6 +1145,13 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) dev_err(&pdev->dev, "Failed to init flow steering\n"); goto err_fs; } + + err = mlx5_init_rl_table(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init rate limiting\n"); + goto err_rl; + } + #ifdef CONFIG_MLX5_CORE_EN err = mlx5_eswitch_init(dev); if (err) { @@ -1148,6 +1191,8 @@ err_sriov: mlx5_eswitch_cleanup(dev->priv.eswitch); #endif err_reg_dev: + mlx5_cleanup_rl_table(dev); +err_rl: mlx5_cleanup_fs(dev); err_fs: mlx5_cleanup_mkey_table(dev); @@ -1218,6 +1263,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_eswitch_cleanup(dev->priv.eswitch); #endif + mlx5_cleanup_rl_table(dev); mlx5_cleanup_fs(dev); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); @@ -1270,19 +1316,28 @@ struct mlx5_core_event_handler { void *data); }; +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_CORE_EN + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, +#endif +}; static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx5_core_dev *dev; + struct devlink *devlink; struct mlx5_priv *priv; int err; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { + devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev)); + if (!devlink) { dev_err(&pdev->dev, "kzalloc failed\n"); return -ENOMEM; } + + dev = devlink_priv(devlink); priv = &dev->priv; priv->pci_dev_data = id->driver_data; @@ -1319,15 +1374,21 @@ static int init_one(struct pci_dev *pdev, goto clean_health; } + err = devlink_register(devlink, &pdev->dev); + if (err) + goto clean_load; + return 0; +clean_load: + mlx5_unload_one(dev, priv); clean_health: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); clean_dev: pci_set_drvdata(pdev, NULL); - kfree(dev); + devlink_free(devlink); return err; } @@ -1335,8 +1396,10 @@ clean_dev: static void remove_one(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_priv *priv = &dev->priv; + devlink_unregister(devlink); if (mlx5_unload_one(dev, priv)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); @@ -1345,7 +1408,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); - kfree(dev); + devlink_free(devlink); } static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, @@ -1357,15 +1420,43 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv); + pci_save_state(pdev); mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } +/* wait for the device to show vital signs by waiting + * for the health counter to start counting. + */ +static int wait_vital(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_health *health = &dev->priv.health; + const int niter = 100; + u32 last_count = 0; + u32 count; + int i; + + for (i = 0; i < niter; i++) { + count = ioread32be(health->health_counter); + if (count && count != 0xffffffff) { + if (last_count && last_count != count) { + dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i); + return 0; + } + last_count = count; + } + msleep(50); + } + + return -ETIMEDOUT; +} + static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - int err = 0; + int err; dev_info(&pdev->dev, "%s was called\n", __func__); @@ -1375,11 +1466,16 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) , __func__, err); return PCI_ERS_RESULT_DISCONNECT; } + pci_set_master(pdev); - pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; + if (wait_vital(pdev)) { + dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); + return PCI_ERS_RESULT_DISCONNECT; + } + + return PCI_ERS_RESULT_RECOVERED; } void mlx5_disable_device(struct mlx5_core_dev *dev) @@ -1387,48 +1483,6 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) mlx5_pci_err_detected(dev->pdev, 0); } -/* wait for the device to show vital signs. For now we check - * that we can read the device ID and that the health buffer - * shows a non zero value which is different than 0xffffffff - */ -static void wait_vital(struct pci_dev *pdev) -{ - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_health *health = &dev->priv.health; - const int niter = 100; - u32 count; - u16 did; - int i; - - /* Wait for firmware to be ready after reset */ - msleep(1000); - for (i = 0; i < niter; i++) { - if (pci_read_config_word(pdev, 2, &did)) { - dev_warn(&pdev->dev, "failed reading config word\n"); - break; - } - if (did == pdev->device) { - dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i); - break; - } - msleep(50); - } - if (i == niter) - dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__); - - for (i = 0; i < niter; i++) { - count = ioread32be(health->health_counter); - if (count && count != 0xffffffff) { - dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i); - break; - } - msleep(50); - } - - if (i == niter) - dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__); -} - static void mlx5_pci_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); @@ -1437,9 +1491,6 @@ static void mlx5_pci_resume(struct pci_dev *pdev) dev_info(&pdev->dev, "%s was called\n", __func__); - pci_save_state(pdev); - wait_vital(pdev); - err = mlx5_load_one(dev, priv); if (err) dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" @@ -1473,8 +1524,9 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ - { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5 */ + { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ + { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5, PCIe 4.0 */ { 0, } }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0b0b226c789e..2f86ec6fcf25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -42,6 +42,8 @@ #define DRIVER_VERSION "3.0-1" #define DRIVER_RELDATE "January 2015" +#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev)) + extern int mlx5_core_debug_mask; #define mlx5_core_dbg(__dev, format, ...) \ @@ -100,6 +102,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev); u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); +struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); +void mlx5_cq_tasklet_cb(unsigned long data); void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 9eeee0545f1c..32dea3524cee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -345,7 +345,6 @@ retry: func_id, npages, err); goto out_4k; } - dev->priv.fw_pages += npages; err = mlx5_cmd_status_to_err(&out.hdr); if (err) { @@ -373,6 +372,33 @@ out_free: return err; } +static int reclaim_pages_cmd(struct mlx5_core_dev *dev, + struct mlx5_manage_pages_inbox *in, int in_size, + struct mlx5_manage_pages_outbox *out, int out_size) +{ + struct fw_page *fwp; + struct rb_node *p; + u32 npages; + u32 i = 0; + + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size, + (u32 *)out, out_size); + + npages = be32_to_cpu(in->num_entries); + + p = rb_first(&dev->priv.page_root); + while (p && i < npages) { + fwp = rb_entry(p, struct fw_page, rb_node); + out->pas[i] = cpu_to_be64(fwp->addr); + p = rb_next(p); + i++; + } + + out->num_entries = cpu_to_be32(i); + return 0; +} + static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, int *nclaimed) { @@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, in.func_id = cpu_to_be16(func_id); in.num_entries = cpu_to_be32(npages); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen); if (err) { - mlx5_core_err(dev, "failed reclaiming pages\n"); - goto out_free; - } - dev->priv.fw_pages -= npages; - - if (out->hdr.status) { - err = mlx5_cmd_status_to_err(&out->hdr); + mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); goto out_free; } @@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, err = -EINVAL; goto out_free; } - if (nclaimed) - *nclaimed = num_claimed; for (i = 0; i < num_claimed; i++) { addr = be64_to_cpu(out->pas[i]); free_4k(dev, addr); } + + if (nclaimed) + *nclaimed = num_claimed; + dev->priv.fw_pages -= num_claimed; if (func_id) dev->priv.vfs_pages -= num_claimed; @@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) p = rb_first(&dev->priv.page_root); if (p) { fwp = rb_entry(p, struct fw_page, rb_node); - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - free_4k(dev, fwp->addr); - nclaimed = 1; - } else { - err = reclaim_pages(dev, fwp->func_id, - optimal_reclaimed_pages(), - &nclaimed); - } + err = reclaim_pages(dev, fwp->func_id, + optimal_reclaimed_pages(), + &nclaimed); + if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err); @@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) } } while (p); + WARN(dev->priv.fw_pages, + "FW pages counter is %d after reclaiming all pages\n", + dev->priv.fw_pages); + WARN(dev->priv.vfs_pages, + "VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.vfs_pages); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 53cc1e2c693b..752c08127138 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -115,6 +115,19 @@ int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, } EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) +{ + u32 out[MLX5_ST_SZ_DW(mlcr_reg)]; + u32 in[MLX5_ST_SZ_DW(mlcr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(mlcr_reg, in, local_port, 1); + MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MLCR, 0, 1); +} + int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 *proto_cap, int proto_mask) { @@ -189,15 +202,24 @@ int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper); -int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, - int proto_mask) +int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, int proto_mask) { - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_query_port_autoneg(dev, proto_mask, &an_status, + &an_disable_cap, &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; memset(in, 0, sizeof(in)); MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); MLX5_SET(ptys_reg, in, proto_mask, proto_mask); if (proto_mask == MLX5_PTYS_EN) MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); @@ -207,7 +229,19 @@ int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PTYS, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_proto); +EXPORT_SYMBOL_GPL(mlx5_set_port_ptys); + +/* This function should be used after setting a port register only */ +void mlx5_toggle_port_link(struct mlx5_core_dev *dev) +{ + enum mlx5_port_status ps; + + mlx5_query_port_admin_status(dev, &ps); + mlx5_set_port_admin_status(dev, MLX5_PORT_DOWN); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(dev, MLX5_PORT_UP); +} +EXPORT_SYMBOL_GPL(mlx5_toggle_port_link); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status) @@ -297,6 +331,82 @@ void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); +static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) +{ + u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; + u32 in[MLX5_ST_SZ_DW(pmlp_reg)]; + int module_mapping; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(pmlp_reg, in, local_port, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return err; + + module_mapping = MLX5_GET(pmlp_reg, out, lane0_module_mapping); + *module_num = module_mapping & MLX5_EEPROM_IDENTIFIER_BYTE_MASK; + + return 0; +} + +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data) +{ + u32 out[MLX5_ST_SZ_DW(mcia_reg)]; + u32 in[MLX5_ST_SZ_DW(mcia_reg)]; + int module_num; + u16 i2c_addr; + int status; + int err; + void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + + err = mlx5_query_module_num(dev, &module_num); + if (err) + return err; + + memset(in, 0, sizeof(in)); + size = min_t(int, size, MLX5_EEPROM_MAX_BYTES); + + if (offset < MLX5_EEPROM_PAGE_LENGTH && + offset + size > MLX5_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; + + i2c_addr = MLX5_I2C_ADDR_LOW; + if (offset >= MLX5_EEPROM_PAGE_LENGTH) { + i2c_addr = MLX5_I2C_ADDR_HIGH; + offset -= MLX5_EEPROM_PAGE_LENGTH; + } + + MLX5_SET(mcia_reg, in, l, 0); + MLX5_SET(mcia_reg, in, module, module_num); + MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr); + MLX5_SET(mcia_reg, in, page_number, 0); + MLX5_SET(mcia_reg, in, device_address, offset); + MLX5_SET(mcia_reg, in, size, size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); + if (err) + return err; + + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + + memcpy(data, ptr, size); + + return size; +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); + static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int pvlc_size, u8 local_port) { @@ -429,6 +539,25 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) } EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); +void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, + u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg); + int mlx5_max_tc(struct mlx5_core_dev *mdev) { u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; @@ -607,3 +736,52 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) return err; } EXPORT_SYMBOL_GPL(mlx5_query_port_wol); + +static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pcmr_reg, in, local_port, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + outlen, MLX5_REG_PCMR, 0, 0); +} + +static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + + return mlx5_core_access_reg(mdev, in, inlen, out, + sizeof(out), MLX5_REG_PCMR, 0, 1); +} + +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, fcs_chk, enable); + + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + *supported = false; + *enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap)); + *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index def289375ecb..b82d65802d96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -418,7 +418,7 @@ int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) if (out.hdr.status) err = mlx5_cmd_status_to_err(&out.hdr); else - *xrcdn = be32_to_cpu(out.xrcdn); + *xrcdn = be32_to_cpu(out.xrcdn) & 0xffffff; return err; } @@ -538,3 +538,71 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, mlx5_core_destroy_sq(dev, sq->qpn); } EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); + +int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id) +{ + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *counter_id = MLX5_GET(alloc_q_counter_out, out, + counter_set_id); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter); + +int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter); + +int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, + int reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, clear, reset); + MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_size); +} +EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); + +int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, + u32 *out_of_buffer) +{ + int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); + void *out; + int err; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_q_counter(dev, counter_id, 0, out, outlen); + if (!err) + *out_of_buffer = MLX5_GET(query_q_counter_out, out, + out_of_buffer); + + kfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c new file mode 100644 index 000000000000..c07c28bd3d55 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" + +/* Finds an entry where we can register the given rate + * If the rate already exists, return the entry where it is registered, + * otherwise return the first available entry. + * If the table is full, return NULL + */ +static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, + u32 rate) +{ + struct mlx5_rl_entry *ret_entry = NULL; + bool empty_found = false; + int i; + + for (i = 0; i < table->max_size; i++) { + if (table->rl_entry[i].rate == rate) + return &table->rl_entry[i]; + if (!empty_found && !table->rl_entry[i].rate) { + empty_found = true; + ret_entry = &table->rl_entry[i]; + } + } + + return ret_entry; +} + +static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, + u32 rate, u16 index) +{ + u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]; + u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_RATE_LIMIT); + MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); + MLX5_SET(set_rate_limit_in, in, rate_limit, rate); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + + return (rate <= table->max_rate && rate >= table->min_rate); +} +EXPORT_SYMBOL(mlx5_rl_is_in_range); + +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry; + int err = 0; + + mutex_lock(&table->rl_lock); + + if (!rate || !mlx5_rl_is_in_range(dev, rate)) { + mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n", + rate, table->min_rate, table->max_rate); + err = -EINVAL; + goto out; + } + + entry = find_rl_entry(table, rate); + if (!entry) { + mlx5_core_err(dev, "Max number of %u rates reached\n", + table->max_size); + err = -ENOSPC; + goto out; + } + if (entry->refcount) { + /* rate already configured */ + entry->refcount++; + } else { + /* new rate limit */ + err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); + if (err) { + mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", + rate, err); + goto out; + } + entry->rate = rate; + entry->refcount = 1; + } + *index = entry->index; + +out: + mutex_unlock(&table->rl_lock); + return err; +} +EXPORT_SYMBOL(mlx5_rl_add_rate); + +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry = NULL; + + /* 0 is a reserved value for unlimited rate */ + if (rate == 0) + return; + + mutex_lock(&table->rl_lock); + entry = find_rl_entry(table, rate); + if (!entry || !entry->refcount) { + mlx5_core_warn(dev, "Rate %u is not configured\n", rate); + goto out; + } + + entry->refcount--; + if (!entry->refcount) { + /* need to remove rate */ + mlx5_set_rate_limit_cmd(dev, 0, entry->index); + entry->rate = 0; + } + +out: + mutex_unlock(&table->rl_lock); +} +EXPORT_SYMBOL(mlx5_rl_remove_rate); + +int mlx5_init_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + int i; + + mutex_init(&table->rl_lock); + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) { + table->max_size = 0; + return 0; + } + + /* First entry is reserved for unlimited rate */ + table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1; + table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate); + table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate); + + table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry), + GFP_KERNEL); + if (!table->rl_entry) + return -ENOMEM; + + /* The index represents the index in HW rate limit table + * Index 0 is reserved for unlimited rate + */ + for (i = 0; i < table->max_size; i++) + table->rl_entry[i].index = i + 1; + + /* Index 0 is reserved */ + mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n", + table->max_size, + table->min_rate >> 10, + table->max_rate >> 10); + + return 0; +} + +void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + int i; + + /* Clear all configured rates */ + for (i = 0; i < table->max_size; i++) + if (table->rl_entry[i].rate) + mlx5_set_rate_limit_cmd(dev, 0, + table->rl_entry[i].index); + + kfree(dev->priv.rl_table.rl_entry); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 7b24386794f9..b380a6bc1f85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -140,7 +140,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err; - mlx5_core_dbg(dev, "requsted num_vfs %d\n", num_vfs); + mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs); if (!mlx5_core_is_pf(dev)) return -EPERM; @@ -167,7 +167,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) mlx5_core_init_vfs(dev, num_vfs); #ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); #endif return num_vfs; @@ -209,7 +209,8 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) mlx5_core_init_vfs(dev, cur_vfs); #ifdef CONFIG_MLX5_CORE_EN if (cur_vfs) - mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs, + SRIOV_LEGACY); #endif enable_vfs(dev, cur_vfs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 04bc522605a0..c07f4d01b70e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -63,12 +63,12 @@ void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) complete(&srq->free); } -static int get_pas_size(void *srqc) +static int get_pas_size(struct mlx5_srq_attr *in) { - u32 log_page_size = MLX5_GET(srqc, srqc, log_page_size) + 12; - u32 log_srq_size = MLX5_GET(srqc, srqc, log_srq_size); - u32 log_rq_stride = MLX5_GET(srqc, srqc, log_rq_stride); - u32 page_offset = MLX5_GET(srqc, srqc, page_offset); + u32 log_page_size = in->log_page_size + 12; + u32 log_srq_size = in->log_size; + u32 log_rq_stride = in->wqe_shift; + u32 page_offset = in->page_offset; u32 po_quanta = 1 << (log_page_size - 6); u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); u32 page_size = 1 << log_page_size; @@ -78,57 +78,58 @@ static int get_pas_size(void *srqc) return rq_num_pas * sizeof(u64); } -static void rmpc_srqc_reformat(void *srqc, void *rmpc, bool srqc_to_rmpc) +static void set_wq(void *wq, struct mlx5_srq_attr *in) { - void *wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - if (srqc_to_rmpc) { - switch (MLX5_GET(srqc, srqc, state)) { - case MLX5_SRQC_STATE_GOOD: - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - break; - case MLX5_SRQC_STATE_ERROR: - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_ERR); - break; - default: - pr_warn("%s: %d: Unknown srq state = 0x%x\n", __func__, - __LINE__, MLX5_GET(srqc, srqc, state)); - MLX5_SET(rmpc, rmpc, state, MLX5_GET(srqc, srqc, state)); - } - - MLX5_SET(wq, wq, wq_signature, MLX5_GET(srqc, srqc, wq_signature)); - MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(srqc, srqc, log_page_size)); - MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(srqc, srqc, log_rq_stride) + 4); - MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(srqc, srqc, log_srq_size)); - MLX5_SET(wq, wq, page_offset, MLX5_GET(srqc, srqc, page_offset)); - MLX5_SET(wq, wq, lwm, MLX5_GET(srqc, srqc, lwm)); - MLX5_SET(wq, wq, pd, MLX5_GET(srqc, srqc, pd)); - MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(srqc, srqc, dbr_addr)); - } else { - switch (MLX5_GET(rmpc, rmpc, state)) { - case MLX5_RMPC_STATE_RDY: - MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_GOOD); - break; - case MLX5_RMPC_STATE_ERR: - MLX5_SET(srqc, srqc, state, MLX5_SRQC_STATE_ERROR); - break; - default: - pr_warn("%s: %d: Unknown rmp state = 0x%x\n", - __func__, __LINE__, - MLX5_GET(rmpc, rmpc, state)); - MLX5_SET(srqc, srqc, state, - MLX5_GET(rmpc, rmpc, state)); - } - - MLX5_SET(srqc, srqc, wq_signature, MLX5_GET(wq, wq, wq_signature)); - MLX5_SET(srqc, srqc, log_page_size, MLX5_GET(wq, wq, log_wq_pg_sz)); - MLX5_SET(srqc, srqc, log_rq_stride, MLX5_GET(wq, wq, log_wq_stride) - 4); - MLX5_SET(srqc, srqc, log_srq_size, MLX5_GET(wq, wq, log_wq_sz)); - MLX5_SET(srqc, srqc, page_offset, MLX5_GET(wq, wq, page_offset)); - MLX5_SET(srqc, srqc, lwm, MLX5_GET(wq, wq, lwm)); - MLX5_SET(srqc, srqc, pd, MLX5_GET(wq, wq, pd)); - MLX5_SET64(srqc, srqc, dbr_addr, MLX5_GET64(wq, wq, dbr_addr)); - } + MLX5_SET(wq, wq, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); + MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); + MLX5_SET(wq, wq, log_wq_sz, in->log_size); + MLX5_SET(wq, wq, page_offset, in->page_offset); + MLX5_SET(wq, wq, lwm, in->lwm); + MLX5_SET(wq, wq, pd, in->pd); + MLX5_SET64(wq, wq, dbr_addr, in->db_record); +} + +static void set_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + MLX5_SET(srqc, srqc, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); + MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); + MLX5_SET(srqc, srqc, log_srq_size, in->log_size); + MLX5_SET(srqc, srqc, page_offset, in->page_offset); + MLX5_SET(srqc, srqc, lwm, in->lwm); + MLX5_SET(srqc, srqc, pd, in->pd); + MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); + MLX5_SET(srqc, srqc, xrcd, in->xrcd); + MLX5_SET(srqc, srqc, cqn, in->cqn); +} + +static void get_wq(void *wq, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(wq, wq, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); + in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; + in->log_size = MLX5_GET(wq, wq, log_wq_sz); + in->page_offset = MLX5_GET(wq, wq, page_offset); + in->lwm = MLX5_GET(wq, wq, lwm); + in->pd = MLX5_GET(wq, wq, pd); + in->db_record = MLX5_GET64(wq, wq, dbr_addr); +} + +static void get_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(srqc, srqc, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); + in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); + in->log_size = MLX5_GET(srqc, srqc, log_srq_size); + in->page_offset = MLX5_GET(srqc, srqc, page_offset); + in->lwm = MLX5_GET(srqc, srqc, lwm); + in->pd = MLX5_GET(srqc, srqc, pd); + in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); } struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) @@ -149,19 +150,36 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) EXPORT_SYMBOL(mlx5_core_get_srq); static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen) + struct mlx5_srq_attr *in) { - struct mlx5_create_srq_mbox_out out; + u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; + void *create_in; + void *srqc; + void *pas; + int pas_size; + int inlen; int err; - memset(&out, 0, sizeof(out)); + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; + create_in = mlx5_vzalloc(inlen); + if (!create_in) + return -ENOMEM; + + srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); + pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ); + set_srqc(srqc, in); + memcpy(pas, in->pas, pas_size); - err = mlx5_cmd_exec_check_status(dev, (u32 *)in, inlen, (u32 *)(&out), - sizeof(out)); + MLX5_SET(create_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_SRQ); - srq->srqn = be32_to_cpu(out.srqn) & 0xffffff; + err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) + srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); return err; } @@ -169,67 +187,75 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, static int destroy_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - struct mlx5_destroy_srq_mbox_in in; - struct mlx5_destroy_srq_mbox_out out; + u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, opcode, + MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), - (u32 *)(&out), sizeof(out)); + return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq) { - struct mlx5_arm_srq_mbox_in in; - struct mlx5_arm_srq_mbox_out out; - - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); + /* arm_srq structs missing using identical xrc ones */ + u32 srq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ); - in.hdr.opmod = cpu_to_be16(!!is_srq); - in.srqn = cpu_to_be32(srq->srqn); - in.lwm = cpu_to_be16(lwm); + MLX5_SET(arm_xrc_srq_in, srq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, srq_in, lwm, lwm); - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), - sizeof(in), (u32 *)(&out), - sizeof(out)); + return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { - struct mlx5_query_srq_mbox_in in; + u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 *srq_out; + void *srqc; + int err; - memset(&in, 0, sizeof(in)); + srq_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_srq_out)); + if (!srq_out) + return -ENOMEM; - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ); - in.srqn = cpu_to_be32(srq->srqn); + MLX5_SET(query_srq_in, srq_in, opcode, + MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); + err = mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), + srq_out, + MLX5_ST_SZ_BYTES(query_srq_out)); + if (err) + goto out; - return mlx5_cmd_exec_check_status(dev, (u32 *)(&in), sizeof(in), - (u32 *)out, sizeof(*out)); + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + get_srqc(srqc, out); + if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; +out: + kvfree(srq_out); + return err; } static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, - int srq_inlen) + struct mlx5_srq_attr *in) { u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; void *create_in; - void *srqc; void *xrc_srqc; void *pas; int pas_size; int inlen; int err; - srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); - pas_size = get_pas_size(srqc); + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; create_in = mlx5_vzalloc(inlen); if (!create_in) @@ -239,7 +265,8 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, xrc_srq_context_entry); pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); - memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc)); + set_srqc(xrc_srqc, in); + MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); memcpy(pas, in->pas, pas_size); MLX5_SET(create_xrc_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); @@ -293,11 +320,10 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; u32 *xrcsrq_out; - void *srqc; void *xrc_srqc; int err; @@ -317,8 +343,9 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, xrc_srq_context_entry); - srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); - memcpy(srqc, xrc_srqc, MLX5_ST_SZ_BYTES(srqc)); + get_srqc(xrc_srqc, out); + if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; out: kvfree(xrcsrq_out); @@ -326,26 +353,27 @@ out: } static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int srq_inlen) + struct mlx5_srq_attr *in) { void *create_in; void *rmpc; - void *srqc; + void *wq; int pas_size; int inlen; int err; - srqc = MLX5_ADDR_OF(create_srq_in, in, srq_context_entry); - pas_size = get_pas_size(srqc); + pas_size = get_pas_size(in); inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; create_in = mlx5_vzalloc(inlen); if (!create_in) return -ENOMEM; rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + set_wq(wq, in); memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); - rmpc_srqc_reformat(srqc, rmpc, true); err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); @@ -390,11 +418,10 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev, } static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { u32 *rmp_out; void *rmpc; - void *srqc; int err; rmp_out = mlx5_vzalloc(MLX5_ST_SZ_BYTES(query_rmp_out)); @@ -405,9 +432,10 @@ static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, if (err) goto out; - srqc = MLX5_ADDR_OF(query_srq_out, out, srq_context_entry); rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); - rmpc_srqc_reformat(srqc, rmpc, false); + get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); + if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) + out->flags |= MLX5_SRQ_FLAG_ERR; out: kvfree(rmp_out); @@ -416,15 +444,14 @@ out: static int create_srq_split(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, - int inlen, int is_xrc) + struct mlx5_srq_attr *in) { if (!dev->issi) - return create_srq_cmd(dev, srq, in, inlen); + return create_srq_cmd(dev, srq, in); else if (srq->common.res == MLX5_RES_XSRQ) - return create_xrc_srq_cmd(dev, srq, in, inlen); + return create_xrc_srq_cmd(dev, srq, in); else - return create_rmp_cmd(dev, srq, in, inlen); + return create_rmp_cmd(dev, srq, in); } static int destroy_srq_split(struct mlx5_core_dev *dev, @@ -439,15 +466,17 @@ static int destroy_srq_split(struct mlx5_core_dev *dev, } int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_create_srq_mbox_in *in, int inlen, - int is_xrc) + struct mlx5_srq_attr *in) { int err; struct mlx5_srq_table *table = &dev->priv.srq_table; - srq->common.res = is_xrc ? MLX5_RES_XSRQ : MLX5_RES_SRQ; + if (in->type == IB_SRQT_XRC) + srq->common.res = MLX5_RES_XSRQ; + else + srq->common.res = MLX5_RES_SRQ; - err = create_srq_split(dev, srq, in, inlen, is_xrc); + err = create_srq_split(dev, srq, in); if (err) return err; @@ -502,7 +531,7 @@ int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) EXPORT_SYMBOL(mlx5_core_destroy_srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_query_srq_mbox_out *out) + struct mlx5_srq_attr *out) { if (!dev->issi) return query_srq_cmd(dev, srq, out); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 03a5093ffeb7..28274a6fbafe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -85,6 +85,7 @@ int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) return err; } +EXPORT_SYMBOL(mlx5_core_create_rq); int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) { @@ -110,6 +111,7 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_destroy_rq); int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) { @@ -430,6 +432,7 @@ int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; } +EXPORT_SYMBOL(mlx5_core_create_rqt); int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, int inlen) @@ -455,3 +458,4 @@ void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_core_destroy_rqt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b69dadcfb897..21365d06982b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -135,6 +135,18 @@ static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); } +void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u8 *min_inline_mode) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0}; + + mlx5_query_nic_vport_context(mdev, 0, out, sizeof(out)); + + *min_inline_mode = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.min_wqe_inline_mode); +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline); + int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr) { @@ -508,6 +520,41 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u32 vport, u64 node_guid) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_context; + void *in; + int err; + + if (!vport) + return -EINVAL; + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EACCES; + if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) + return -ENOTSUPP; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.node_guid, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport); + + nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} + int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index f2fd1ef16da7..e25a73ed2981 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -72,8 +72,8 @@ static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port) u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]; u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)]; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); MLX5_SET(delete_vxlan_udp_dport_in, in, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); @@ -105,6 +105,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; int err; + if (mlx5e_vxlan_lookup_port(priv, port)) + goto free_work; + if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) goto free_work; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h index 217ac530a514..5def12c048e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h @@ -48,18 +48,12 @@ struct mlx5e_vxlan_work { static inline bool mlx5e_vxlan_allowed(struct mlx5_core_dev *mdev) { - return IS_ENABLED(CONFIG_MLX5_CORE_EN_VXLAN) && - (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) && + return (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) && mlx5_core_is_pf(mdev)); } -#ifdef CONFIG_MLX5_CORE_EN_VXLAN void mlx5e_vxlan_init(struct mlx5e_priv *priv); void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv); -#else -static inline void mlx5e_vxlan_init(struct mlx5e_priv *priv) {} -static inline void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv) {} -#endif void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, sa_family_t sa_family, u16 port, int add); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index ce21ee5b2357..821a087c7ae2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -75,14 +75,14 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq), &wq_ctrl->buf, param->buf_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); goto err_db_free; } @@ -111,14 +111,14 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq), &wq_ctrl->buf, param->buf_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); goto err_db_free; } @@ -148,13 +148,14 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } - err = mlx5_buf_alloc(mdev, mlx5_wq_ll_get_byte_size(wq), &wq_ctrl->buf); + err = mlx5_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); goto err_db_free; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 2ad7f67854d5..5989f7cb5462 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -50,3 +50,11 @@ config MLXSW_SPECTRUM To compile this driver as a module, choose M here: the module will be called mlxsw_spectrum. + +config MLXSW_SPECTRUM_DCB + bool "Data Center Bridging (DCB) support" + depends on MLXSW_SPECTRUM && DCB + default y + ---help--- + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 584cac444852..d20ae1838a64 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -7,4 +7,6 @@ obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o mlxsw_switchx2-objs := switchx2.o obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ - spectrum_switchdev.o + spectrum_switchdev.o spectrum_router.o \ + spectrum_kvdl.o +mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index cd63b8263688..28271bedd957 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -105,6 +105,7 @@ enum mlxsw_cmd_opcode { MLXSW_CMD_OPCODE_SW2HW_EQ = 0x013, MLXSW_CMD_OPCODE_HW2SW_EQ = 0x014, MLXSW_CMD_OPCODE_QUERY_EQ = 0x015, + MLXSW_CMD_OPCODE_QUERY_RESOURCES = 0x101, }; static inline const char *mlxsw_cmd_opcode_str(u16 opcode) @@ -144,6 +145,8 @@ static inline const char *mlxsw_cmd_opcode_str(u16 opcode) return "HW2SW_EQ"; case MLXSW_CMD_OPCODE_QUERY_EQ: return "QUERY_EQ"; + case MLXSW_CMD_OPCODE_QUERY_RESOURCES: + return "QUERY_RESOURCES"; default: return "*UNKNOWN*"; } @@ -500,6 +503,35 @@ static inline int mlxsw_cmd_unmap_fa(struct mlxsw_core *mlxsw_core) return mlxsw_cmd_exec_none(mlxsw_core, MLXSW_CMD_OPCODE_UNMAP_FA, 0, 0); } +/* QUERY_RESOURCES - Query chip resources + * -------------------------------------- + * OpMod == 0 (N/A) , INMmod is index + * ---------------------------------- + * The QUERY_RESOURCES command retrieves information related to chip resources + * by resource ID. Every command returns 32 entries. INmod is being use as base. + * for example, index 1 will return entries 32-63. When the tables end and there + * are no more sources in the table, will return resource id 0xFFF to indicate + * it. + */ +static inline int mlxsw_cmd_query_resources(struct mlxsw_core *mlxsw_core, + char *out_mbox, int index) +{ + return mlxsw_cmd_exec_out(mlxsw_core, MLXSW_CMD_OPCODE_QUERY_RESOURCES, + 0, index, false, out_mbox, + MLXSW_CMD_MBOX_SIZE); +} + +/* cmd_mbox_query_resource_id + * The resource id. 0xFFFF indicates table's end. + */ +MLXSW_ITEM32_INDEXED(cmd_mbox, query_resource, id, 0x00, 16, 16, 0x8, 0, false); + +/* cmd_mbox_query_resource_data + * The resource + */ +MLXSW_ITEM64_INDEXED(cmd_mbox, query_resource, data, + 0x00, 0, 40, 0x8, 0, false); + /* CONFIG_PROFILE (Set) - Configure Switch Profile * ------------------------------ * OpMod == 1 (Set), INMmod == 0 (N/A) @@ -607,6 +639,24 @@ MLXSW_ITEM32(cmd_mbox, config_profile, */ MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1); +/* cmd_mbox_config_set_kvd_linear_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1); + +/* cmd_mbox_config_set_kvd_hash_single_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1); + +/* cmd_mbox_config_set_kvd_hash_double_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1); + /* cmd_mbox_config_profile_max_vepa_channels * Maximum number of VEPA channels per port (0 through 16) * 0 - multi-channel VEPA is disabled @@ -733,6 +783,31 @@ MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16); */ MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1); +/* cmd_mbox_config_kvd_linear_size + * KVD Linear Size + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24); + +/* cmd_mbox_config_kvd_hash_single_size + * KVD Hash single-entries size + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + * Must be greater or equal to cap_min_kvd_hash_single_size + * Must be smaller or equal to cap_kvd_size - kvd_linear_size + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24); + +/* cmd_mbox_config_kvd_hash_double_size + * KVD Hash double-entries size (units of single-size entries) + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + * Must be either 0 or greater or equal to cap_min_kvd_hash_double_size + * Must be smaller or equal to cap_kvd_size - kvd_linear_size + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_double_size, 0x5C, 0, 24); + /* cmd_mbox_config_profile_swid_config_mask * Modify Switch Partition Configuration mask. When set, the configu- * ration value for the Switch Partition are taken from the mailbox. diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index f69f6280519f..068ee65a960b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -44,7 +44,7 @@ #include <linux/seq_file.h> #include <linux/u64_stats_sync.h> #include <linux/netdevice.h> -#include <linux/wait.h> +#include <linux/completion.h> #include <linux/skbuff.h> #include <linux/etherdevice.h> #include <linux/types.h> @@ -55,8 +55,10 @@ #include <linux/mutex.h> #include <linux/rcupdate.h> #include <linux/slab.h> +#include <linux/workqueue.h> #include <asm/byteorder.h> #include <net/devlink.h> +#include <trace/events/devlink.h> #include "core.h" #include "item.h" @@ -73,6 +75,8 @@ static const char mlxsw_core_driver_name[] = "mlxsw_core"; static struct dentry *mlxsw_core_dbg_root; +static struct workqueue_struct *mlxsw_wq; + struct mlxsw_core_pcpu_stats { u64 trap_rx_packets[MLXSW_TRAP_ID_MAX]; u64 trap_rx_bytes[MLXSW_TRAP_ID_MAX]; @@ -93,11 +97,9 @@ struct mlxsw_core { struct list_head rx_listener_list; struct list_head event_listener_list; struct { - struct sk_buff *resp_skb; - u64 tid; - wait_queue_head_t wait; - bool trans_active; - struct mutex lock; /* One EMAD transaction at a time. */ + atomic64_t tid; + struct list_head trans_list; + spinlock_t trans_list_lock; /* protects trans_list writes */ bool use_emad; } emad; struct mlxsw_core_pcpu_stats __percpu *pcpu_stats; @@ -109,11 +111,18 @@ struct mlxsw_core { struct { u8 *mapping; /* lag_id+port_index to local_port mapping */ } lag; + struct mlxsw_resources resources; struct mlxsw_hwmon *hwmon; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ }; +void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->driver_priv; +} +EXPORT_SYMBOL(mlxsw_core_driver_priv); + struct mlxsw_rx_listener_item { struct list_head list; struct mlxsw_rx_listener rxl; @@ -284,7 +293,7 @@ static void mlxsw_emad_pack_reg_tlv(char *reg_tlv, static void mlxsw_emad_pack_op_tlv(char *op_tlv, const struct mlxsw_reg_info *reg, enum mlxsw_core_reg_access_type type, - struct mlxsw_core *mlxsw_core) + u64 tid) { mlxsw_emad_op_tlv_type_set(op_tlv, MLXSW_EMAD_TLV_TYPE_OP); mlxsw_emad_op_tlv_len_set(op_tlv, MLXSW_EMAD_OP_TLV_LEN); @@ -300,7 +309,7 @@ static void mlxsw_emad_pack_op_tlv(char *op_tlv, MLXSW_EMAD_OP_TLV_METHOD_WRITE); mlxsw_emad_op_tlv_class_set(op_tlv, MLXSW_EMAD_OP_TLV_CLASS_REG_ACCESS); - mlxsw_emad_op_tlv_tid_set(op_tlv, mlxsw_core->emad.tid); + mlxsw_emad_op_tlv_tid_set(op_tlv, tid); } static int mlxsw_emad_construct_eth_hdr(struct sk_buff *skb) @@ -322,7 +331,7 @@ static void mlxsw_emad_construct(struct sk_buff *skb, const struct mlxsw_reg_info *reg, char *payload, enum mlxsw_core_reg_access_type type, - struct mlxsw_core *mlxsw_core) + u64 tid) { char *buf; @@ -333,7 +342,7 @@ static void mlxsw_emad_construct(struct sk_buff *skb, mlxsw_emad_pack_reg_tlv(buf, reg, payload); buf = skb_push(skb, MLXSW_EMAD_OP_TLV_LEN * sizeof(u32)); - mlxsw_emad_pack_op_tlv(buf, reg, type, mlxsw_core); + mlxsw_emad_pack_op_tlv(buf, reg, type, tid); mlxsw_emad_construct_eth_hdr(skb); } @@ -370,58 +379,16 @@ static bool mlxsw_emad_is_resp(const struct sk_buff *skb) return (mlxsw_emad_op_tlv_r_get(op_tlv) == MLXSW_EMAD_OP_TLV_RESPONSE); } -#define MLXSW_EMAD_TIMEOUT_MS 200 - -static int __mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) -{ - int err; - int ret; - - mlxsw_core->emad.trans_active = true; - - err = mlxsw_core_skb_transmit(mlxsw_core->driver_priv, skb, tx_info); - if (err) { - dev_err(mlxsw_core->bus_info->dev, "Failed to transmit EMAD (tid=%llx)\n", - mlxsw_core->emad.tid); - dev_kfree_skb(skb); - goto trans_inactive_out; - } - - ret = wait_event_timeout(mlxsw_core->emad.wait, - !(mlxsw_core->emad.trans_active), - msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS)); - if (!ret) { - dev_warn(mlxsw_core->bus_info->dev, "EMAD timed-out (tid=%llx)\n", - mlxsw_core->emad.tid); - err = -EIO; - goto trans_inactive_out; - } - - return 0; - -trans_inactive_out: - mlxsw_core->emad.trans_active = false; - return err; -} - -static int mlxsw_emad_process_status(struct mlxsw_core *mlxsw_core, - char *op_tlv) +static int mlxsw_emad_process_status(char *op_tlv, + enum mlxsw_emad_op_tlv_status *p_status) { - enum mlxsw_emad_op_tlv_status status; - u64 tid; - - status = mlxsw_emad_op_tlv_status_get(op_tlv); - tid = mlxsw_emad_op_tlv_tid_get(op_tlv); + *p_status = mlxsw_emad_op_tlv_status_get(op_tlv); - switch (status) { + switch (*p_status) { case MLXSW_EMAD_OP_TLV_STATUS_SUCCESS: return 0; case MLXSW_EMAD_OP_TLV_STATUS_BUSY: case MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK: - dev_warn(mlxsw_core->bus_info->dev, "Reg access status again (tid=%llx,status=%x(%s))\n", - tid, status, mlxsw_emad_op_tlv_status_str(status)); return -EAGAIN; case MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED: case MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV: @@ -432,70 +399,157 @@ static int mlxsw_emad_process_status(struct mlxsw_core *mlxsw_core, case MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE: case MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR: default: - dev_err(mlxsw_core->bus_info->dev, "Reg access status failed (tid=%llx,status=%x(%s))\n", - tid, status, mlxsw_emad_op_tlv_status_str(status)); return -EIO; } } -static int mlxsw_emad_process_status_skb(struct mlxsw_core *mlxsw_core, - struct sk_buff *skb) +static int +mlxsw_emad_process_status_skb(struct sk_buff *skb, + enum mlxsw_emad_op_tlv_status *p_status) +{ + return mlxsw_emad_process_status(mlxsw_emad_op_tlv(skb), p_status); +} + +struct mlxsw_reg_trans { + struct list_head list; + struct list_head bulk_list; + struct mlxsw_core *core; + struct sk_buff *tx_skb; + struct mlxsw_tx_info tx_info; + struct delayed_work timeout_dw; + unsigned int retries; + u64 tid; + struct completion completion; + atomic_t active; + mlxsw_reg_trans_cb_t *cb; + unsigned long cb_priv; + const struct mlxsw_reg_info *reg; + enum mlxsw_core_reg_access_type type; + int err; + enum mlxsw_emad_op_tlv_status emad_status; + struct rcu_head rcu; +}; + +#define MLXSW_EMAD_TIMEOUT_MS 200 + +static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) { - return mlxsw_emad_process_status(mlxsw_core, mlxsw_emad_op_tlv(skb)); + unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS); + + mlxsw_core_schedule_dw(&trans->timeout_dw, timeout); } static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) + struct mlxsw_reg_trans *trans) { - struct sk_buff *trans_skb; - int n_retry; + struct sk_buff *skb; int err; - n_retry = 0; -retry: - /* We copy the EMAD to a new skb, since we might need - * to retransmit it in case of failure. - */ - trans_skb = skb_copy(skb, GFP_KERNEL); - if (!trans_skb) { - err = -ENOMEM; - goto out; + skb = skb_copy(trans->tx_skb, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), false, 0, + skb->data + mlxsw_core->driver->txhdr_len, + skb->len - mlxsw_core->driver->txhdr_len); + + atomic_set(&trans->active, 1); + err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info); + if (err) { + dev_kfree_skb(skb); + return err; } + mlxsw_emad_trans_timeout_schedule(trans); + return 0; +} - err = __mlxsw_emad_transmit(mlxsw_core, trans_skb, tx_info); - if (!err) { - struct sk_buff *resp_skb = mlxsw_core->emad.resp_skb; +static void mlxsw_emad_trans_finish(struct mlxsw_reg_trans *trans, int err) +{ + struct mlxsw_core *mlxsw_core = trans->core; + + dev_kfree_skb(trans->tx_skb); + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_del_rcu(&trans->list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + trans->err = err; + complete(&trans->completion); +} + +static void mlxsw_emad_transmit_retry(struct mlxsw_core *mlxsw_core, + struct mlxsw_reg_trans *trans) +{ + int err; - err = mlxsw_emad_process_status_skb(mlxsw_core, resp_skb); - if (err) - dev_kfree_skb(resp_skb); - if (!err || err != -EAGAIN) - goto out; + if (trans->retries < MLXSW_EMAD_MAX_RETRY) { + trans->retries++; + err = mlxsw_emad_transmit(trans->core, trans); + if (err == 0) + return; + } else { + err = -EIO; } - if (n_retry++ < MLXSW_EMAD_MAX_RETRY) - goto retry; + mlxsw_emad_trans_finish(trans, err); +} -out: - dev_kfree_skb(skb); - mlxsw_core->emad.tid++; - return err; +static void mlxsw_emad_trans_timeout_work(struct work_struct *work) +{ + struct mlxsw_reg_trans *trans = container_of(work, + struct mlxsw_reg_trans, + timeout_dw.work); + + if (!atomic_dec_and_test(&trans->active)) + return; + + mlxsw_emad_transmit_retry(trans->core, trans); } +static void mlxsw_emad_process_response(struct mlxsw_core *mlxsw_core, + struct mlxsw_reg_trans *trans, + struct sk_buff *skb) +{ + int err; + + if (!atomic_dec_and_test(&trans->active)) + return; + + err = mlxsw_emad_process_status_skb(skb, &trans->emad_status); + if (err == -EAGAIN) { + mlxsw_emad_transmit_retry(mlxsw_core, trans); + } else { + if (err == 0) { + char *op_tlv = mlxsw_emad_op_tlv(skb); + + if (trans->cb) + trans->cb(mlxsw_core, + mlxsw_emad_reg_payload(op_tlv), + trans->reg->len, trans->cb_priv); + } + mlxsw_emad_trans_finish(trans, err); + } +} + +/* called with rcu read lock held */ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port, void *priv) { struct mlxsw_core *mlxsw_core = priv; + struct mlxsw_reg_trans *trans; - if (mlxsw_emad_is_resp(skb) && - mlxsw_core->emad.trans_active && - mlxsw_emad_get_tid(skb) == mlxsw_core->emad.tid) { - mlxsw_core->emad.resp_skb = skb; - mlxsw_core->emad.trans_active = false; - wake_up(&mlxsw_core->emad.wait); - } else { - dev_kfree_skb(skb); + trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), true, 0, + skb->data, skb->len); + + if (!mlxsw_emad_is_resp(skb)) + goto free_skb; + + list_for_each_entry_rcu(trans, &mlxsw_core->emad.trans_list, list) { + if (mlxsw_emad_get_tid(skb) == trans->tid) { + mlxsw_emad_process_response(mlxsw_core, trans, skb); + break; + } } + +free_skb: + dev_kfree_skb(skb); } static const struct mlxsw_rx_listener mlxsw_emad_rx_listener = { @@ -522,18 +576,19 @@ static int mlxsw_emad_traps_set(struct mlxsw_core *mlxsw_core) static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) { + u64 tid; int err; /* Set the upper 32 bits of the transaction ID field to a random * number. This allows us to discard EMADs addressed to other * devices. */ - get_random_bytes(&mlxsw_core->emad.tid, 4); - mlxsw_core->emad.tid = mlxsw_core->emad.tid << 32; + get_random_bytes(&tid, 4); + tid <<= 32; + atomic64_set(&mlxsw_core->emad.tid, tid); - init_waitqueue_head(&mlxsw_core->emad.wait); - mlxsw_core->emad.trans_active = false; - mutex_init(&mlxsw_core->emad.lock); + INIT_LIST_HEAD(&mlxsw_core->emad.trans_list); + spin_lock_init(&mlxsw_core->emad.trans_list_lock); err = mlxsw_core_rx_listener_register(mlxsw_core, &mlxsw_emad_rx_listener, @@ -591,6 +646,59 @@ static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, return skb; } +static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type, + struct mlxsw_reg_trans *trans, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, + unsigned long cb_priv, u64 tid) +{ + struct sk_buff *skb; + int err; + + dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", + trans->tid, reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + + skb = mlxsw_emad_alloc(mlxsw_core, reg->len); + if (!skb) + return -ENOMEM; + + list_add_tail(&trans->bulk_list, bulk_list); + trans->core = mlxsw_core; + trans->tx_skb = skb; + trans->tx_info.local_port = MLXSW_PORT_CPU_PORT; + trans->tx_info.is_emad = true; + INIT_DELAYED_WORK(&trans->timeout_dw, mlxsw_emad_trans_timeout_work); + trans->tid = tid; + init_completion(&trans->completion); + trans->cb = cb; + trans->cb_priv = cb_priv; + trans->reg = reg; + trans->type = type; + + mlxsw_emad_construct(skb, reg, payload, type, trans->tid); + mlxsw_core->driver->txhdr_construct(skb, &trans->tx_info); + + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_add_tail_rcu(&trans->list, &mlxsw_core->emad.trans_list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + err = mlxsw_emad_transmit(mlxsw_core, trans); + if (err) + goto err_out; + return 0; + +err_out: + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_del_rcu(&trans->list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + list_del(&trans->bulk_list); + dev_kfree_skb(trans->tx_skb); + return err; +} + /***************** * Core functions *****************/ @@ -680,24 +788,6 @@ static const struct file_operations mlxsw_core_rx_stats_dbg_ops = { .llseek = seq_lseek }; -static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, - const char *buf, size_t size) -{ - __be32 *m = (__be32 *) buf; - int i; - int count = size / sizeof(__be32); - - for (i = count - 1; i >= 0; i--) - if (m[i]) - break; - i++; - count = i ? i : 1; - for (i = 0; i < count; i += 4) - dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n", - i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]), - be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3])); -} - int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver) { spin_lock(&mlxsw_core_driver_list_lock); @@ -795,8 +885,7 @@ static int mlxsw_devlink_port_split(struct devlink *devlink, return -EINVAL; if (!mlxsw_core->driver->port_split) return -EOPNOTSUPP; - return mlxsw_core->driver->port_split(mlxsw_core->driver_priv, - port_index, count); + return mlxsw_core->driver->port_split(mlxsw_core, port_index, count); } static int mlxsw_devlink_port_unsplit(struct devlink *devlink, @@ -808,13 +897,171 @@ static int mlxsw_devlink_port_unsplit(struct devlink *devlink, return -EINVAL; if (!mlxsw_core->driver->port_unsplit) return -EOPNOTSUPP; - return mlxsw_core->driver->port_unsplit(mlxsw_core->driver_priv, - port_index); + return mlxsw_core->driver->port_unsplit(mlxsw_core, port_index); +} + +static int +mlxsw_devlink_sb_pool_get(struct devlink *devlink, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_pool_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_pool_get(mlxsw_core, sb_index, + pool_index, pool_info); +} + +static int +mlxsw_devlink_sb_pool_set(struct devlink *devlink, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_pool_set) + return -EOPNOTSUPP; + return mlxsw_driver->sb_pool_set(mlxsw_core, sb_index, + pool_index, size, threshold_type); +} + +static void *__dl_port(struct devlink_port *devlink_port) +{ + return container_of(devlink_port, struct mlxsw_core_port, devlink_port); +} + +static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_port_pool_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_port_pool_get(mlxsw_core_port, sb_index, + pool_index, p_threshold); +} + +static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_port_pool_set) + return -EOPNOTSUPP; + return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index, + pool_index, threshold); +} + +static int +mlxsw_devlink_sb_tc_pool_bind_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_tc_pool_bind_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_tc_pool_bind_get(mlxsw_core_port, sb_index, + tc_index, pool_type, + p_pool_index, p_threshold); +} + +static int +mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_tc_pool_bind_set) + return -EOPNOTSUPP; + return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index, + tc_index, pool_type, + pool_index, threshold); +} + +static int mlxsw_devlink_sb_occ_snapshot(struct devlink *devlink, + unsigned int sb_index) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_occ_snapshot) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_snapshot(mlxsw_core, sb_index); +} + +static int mlxsw_devlink_sb_occ_max_clear(struct devlink *devlink, + unsigned int sb_index) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_occ_max_clear) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_max_clear(mlxsw_core, sb_index); +} + +static int +mlxsw_devlink_sb_occ_port_pool_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_occ_port_pool_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_port_pool_get(mlxsw_core_port, sb_index, + pool_index, p_cur, p_max); +} + +static int +mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_occ_tc_port_bind_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_tc_port_bind_get(mlxsw_core_port, + sb_index, tc_index, + pool_type, p_cur, p_max); } static const struct devlink_ops mlxsw_devlink_ops = { - .port_split = mlxsw_devlink_port_split, - .port_unsplit = mlxsw_devlink_port_unsplit, + .port_split = mlxsw_devlink_port_split, + .port_unsplit = mlxsw_devlink_port_unsplit, + .sb_pool_get = mlxsw_devlink_sb_pool_get, + .sb_pool_set = mlxsw_devlink_sb_pool_set, + .sb_port_pool_get = mlxsw_devlink_sb_port_pool_get, + .sb_port_pool_set = mlxsw_devlink_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_devlink_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_devlink_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_devlink_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_devlink_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get, }; int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, @@ -864,7 +1111,8 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } } - err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile); + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, + &mlxsw_core->resources); if (err) goto err_bus_init; @@ -872,16 +1120,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_emad_init; - err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); - if (err) - goto err_hwmon_init; - err = devlink_register(devlink, mlxsw_bus_info->dev); if (err) goto err_devlink_register; - err = mlxsw_driver->init(mlxsw_core->driver_priv, mlxsw_core, - mlxsw_bus_info); + err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); + if (err) + goto err_hwmon_init; + + err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info); if (err) goto err_driver_init; @@ -892,11 +1139,11 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, return 0; err_debugfs_init: - mlxsw_core->driver->fini(mlxsw_core->driver_priv); + mlxsw_core->driver->fini(mlxsw_core); err_driver_init: +err_hwmon_init: devlink_unregister(devlink); err_devlink_register: -err_hwmon_init: mlxsw_emad_fini(mlxsw_core); err_emad_init: mlxsw_bus->fini(bus_priv); @@ -918,7 +1165,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) struct devlink *devlink = priv_to_devlink(mlxsw_core); mlxsw_core_debugfs_fini(mlxsw_core); - mlxsw_core->driver->fini(mlxsw_core->driver_priv); + mlxsw_core->driver->fini(mlxsw_core); devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); mlxsw_core->bus->fini(mlxsw_core->bus_priv); @@ -929,26 +1176,17 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_bus_device_unregister); -static struct mlxsw_core *__mlxsw_core_get(void *driver_priv) -{ - return container_of(driver_priv, struct mlxsw_core, driver_priv); -} - -bool mlxsw_core_skb_transmit_busy(void *driver_priv, +bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, const struct mlxsw_tx_info *tx_info) { - struct mlxsw_core *mlxsw_core = __mlxsw_core_get(driver_priv); - return mlxsw_core->bus->skb_transmit_busy(mlxsw_core->bus_priv, tx_info); } EXPORT_SYMBOL(mlxsw_core_skb_transmit_busy); -int mlxsw_core_skb_transmit(void *driver_priv, struct sk_buff *skb, +int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info) { - struct mlxsw_core *mlxsw_core = __mlxsw_core_get(driver_priv); - return mlxsw_core->bus->skb_transmit(mlxsw_core->bus_priv, skb, tx_info); } @@ -1108,56 +1346,112 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_event_listener_unregister); +static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core) +{ + return atomic64_inc_return(&mlxsw_core->emad.tid); +} + static int mlxsw_core_reg_access_emad(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload, - enum mlxsw_core_reg_access_type type) + enum mlxsw_core_reg_access_type type, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, + unsigned long cb_priv) { + u64 tid = mlxsw_core_tid_get(mlxsw_core); + struct mlxsw_reg_trans *trans; int err; - char *op_tlv; - struct sk_buff *skb; - struct mlxsw_tx_info tx_info = { - .local_port = MLXSW_PORT_CPU_PORT, - .is_emad = true, - }; - skb = mlxsw_emad_alloc(mlxsw_core, reg->len); - if (!skb) + trans = kzalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) return -ENOMEM; - mlxsw_emad_construct(skb, reg, payload, type, mlxsw_core); - mlxsw_core->driver->txhdr_construct(skb, &tx_info); + err = mlxsw_emad_reg_access(mlxsw_core, reg, payload, type, trans, + bulk_list, cb, cb_priv, tid); + if (err) { + kfree(trans); + return err; + } + return 0; +} - dev_dbg(mlxsw_core->bus_info->dev, "EMAD send (tid=%llx)\n", - mlxsw_core->emad.tid); - mlxsw_core_buf_dump_dbg(mlxsw_core, skb->data, skb->len); +int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv) +{ + return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_QUERY, + bulk_list, cb, cb_priv); +} +EXPORT_SYMBOL(mlxsw_reg_trans_query); - err = mlxsw_emad_transmit(mlxsw_core, skb, &tx_info); - if (!err) { - op_tlv = mlxsw_emad_op_tlv(mlxsw_core->emad.resp_skb); - memcpy(payload, mlxsw_emad_reg_payload(op_tlv), - reg->len); +int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv) +{ + return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_WRITE, + bulk_list, cb, cb_priv); +} +EXPORT_SYMBOL(mlxsw_reg_trans_write); - dev_dbg(mlxsw_core->bus_info->dev, "EMAD recv (tid=%llx)\n", - mlxsw_core->emad.tid - 1); - mlxsw_core_buf_dump_dbg(mlxsw_core, - mlxsw_core->emad.resp_skb->data, - mlxsw_core->emad.resp_skb->len); +static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans) +{ + struct mlxsw_core *mlxsw_core = trans->core; + int err; - dev_kfree_skb(mlxsw_core->emad.resp_skb); - } + wait_for_completion(&trans->completion); + cancel_delayed_work_sync(&trans->timeout_dw); + err = trans->err; + if (trans->retries) + dev_warn(mlxsw_core->bus_info->dev, "EMAD retries (%d/%d) (tid=%llx)\n", + trans->retries, MLXSW_EMAD_MAX_RETRY, trans->tid); + if (err) + dev_err(mlxsw_core->bus_info->dev, "EMAD reg access failed (tid=%llx,reg_id=%x(%s),type=%s,status=%x(%s))\n", + trans->tid, trans->reg->id, + mlxsw_reg_id_str(trans->reg->id), + mlxsw_core_reg_access_type_str(trans->type), + trans->emad_status, + mlxsw_emad_op_tlv_status_str(trans->emad_status)); + + list_del(&trans->bulk_list); + kfree_rcu(trans, rcu); return err; } +int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list) +{ + struct mlxsw_reg_trans *trans; + struct mlxsw_reg_trans *tmp; + int sum_err = 0; + int err; + + list_for_each_entry_safe(trans, tmp, bulk_list, bulk_list) { + err = mlxsw_reg_trans_wait(trans); + if (err && sum_err == 0) + sum_err = err; /* first error to be returned */ + } + return sum_err; +} +EXPORT_SYMBOL(mlxsw_reg_trans_bulk_wait); + static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload, enum mlxsw_core_reg_access_type type) { + enum mlxsw_emad_op_tlv_status status; int err, n_retry; char *in_mbox, *out_mbox, *tmp; + dev_dbg(mlxsw_core->bus_info->dev, "Reg cmd access (reg_id=%x(%s),type=%s)\n", + reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + in_mbox = mlxsw_cmd_mbox_alloc(); if (!in_mbox) return -ENOMEM; @@ -1168,7 +1462,8 @@ static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, goto free_in_mbox; } - mlxsw_emad_pack_op_tlv(in_mbox, reg, type, mlxsw_core); + mlxsw_emad_pack_op_tlv(in_mbox, reg, type, + mlxsw_core_tid_get(mlxsw_core)); tmp = in_mbox + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32); mlxsw_emad_pack_reg_tlv(tmp, reg, payload); @@ -1176,60 +1471,61 @@ static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, retry: err = mlxsw_cmd_access_reg(mlxsw_core, in_mbox, out_mbox); if (!err) { - err = mlxsw_emad_process_status(mlxsw_core, out_mbox); - if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY) - goto retry; + err = mlxsw_emad_process_status(out_mbox, &status); + if (err) { + if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY) + goto retry; + dev_err(mlxsw_core->bus_info->dev, "Reg cmd access status failed (status=%x(%s))\n", + status, mlxsw_emad_op_tlv_status_str(status)); + } } if (!err) memcpy(payload, mlxsw_emad_reg_payload(out_mbox), reg->len); - mlxsw_core->emad.tid++; mlxsw_cmd_mbox_free(out_mbox); free_in_mbox: mlxsw_cmd_mbox_free(in_mbox); + if (err) + dev_err(mlxsw_core->bus_info->dev, "Reg cmd access failed (reg_id=%x(%s),type=%s)\n", + reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); return err; } +static void mlxsw_core_reg_access_cb(struct mlxsw_core *mlxsw_core, + char *payload, size_t payload_len, + unsigned long cb_priv) +{ + char *orig_payload = (char *) cb_priv; + + memcpy(orig_payload, payload, payload_len); +} + static int mlxsw_core_reg_access(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload, enum mlxsw_core_reg_access_type type) { - u64 cur_tid; + LIST_HEAD(bulk_list); int err; - if (mutex_lock_interruptible(&mlxsw_core->emad.lock)) { - dev_err(mlxsw_core->bus_info->dev, "Reg access interrupted (reg_id=%x(%s),type=%s)\n", - reg->id, mlxsw_reg_id_str(reg->id), - mlxsw_core_reg_access_type_str(type)); - return -EINTR; - } - - cur_tid = mlxsw_core->emad.tid; - dev_dbg(mlxsw_core->bus_info->dev, "Reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", - cur_tid, reg->id, mlxsw_reg_id_str(reg->id), - mlxsw_core_reg_access_type_str(type)); - /* During initialization EMAD interface is not available to us, * so we default to command interface. We switch to EMAD interface * after setting the appropriate traps. */ if (!mlxsw_core->emad.use_emad) - err = mlxsw_core_reg_access_cmd(mlxsw_core, reg, - payload, type); - else - err = mlxsw_core_reg_access_emad(mlxsw_core, reg, + return mlxsw_core_reg_access_cmd(mlxsw_core, reg, payload, type); + err = mlxsw_core_reg_access_emad(mlxsw_core, reg, + payload, type, &bulk_list, + mlxsw_core_reg_access_cb, + (unsigned long) payload); if (err) - dev_err(mlxsw_core->bus_info->dev, "Reg access failed (tid=%llx,reg_id=%x(%s),type=%s)\n", - cur_tid, reg->id, mlxsw_reg_id_str(reg->id), - mlxsw_core_reg_access_type_str(type)); - - mutex_unlock(&mlxsw_core->emad.lock); - return err; + return err; + return mlxsw_reg_trans_bulk_wait(&bulk_list); } int mlxsw_reg_query(struct mlxsw_core *mlxsw_core, @@ -1358,6 +1654,52 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear); +struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core) +{ + return &mlxsw_core->resources; +} +EXPORT_SYMBOL(mlxsw_core_resources_get); + +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, + struct mlxsw_core_port *mlxsw_core_port, u8 local_port, + struct net_device *dev, bool split, u32 split_group) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + if (split) + devlink_port_split_set(devlink_port, split_group); + devlink_port_type_eth_set(devlink_port, dev); + return devlink_port_register(devlink, devlink_port, local_port); +} +EXPORT_SYMBOL(mlxsw_core_port_init); + +void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port) +{ + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + devlink_port_unregister(devlink_port); +} +EXPORT_SYMBOL(mlxsw_core_port_fini); + +static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, + const char *buf, size_t size) +{ + __be32 *m = (__be32 *) buf; + int i; + int count = size / sizeof(__be32); + + for (i = count - 1; i >= 0; i--) + if (m[i]) + break; + i++; + count = i ? i : 1; + for (i = 0; i < count; i += 4) + dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n", + i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]), + be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3])); +} + int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, u32 in_mod, bool out_mbox_direct, char *in_mbox, size_t in_mbox_size, @@ -1400,17 +1742,35 @@ int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, } EXPORT_SYMBOL(mlxsw_cmd_exec); +int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay) +{ + return queue_delayed_work(mlxsw_wq, dwork, delay); +} +EXPORT_SYMBOL(mlxsw_core_schedule_dw); + static int __init mlxsw_core_module_init(void) { - mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL); - if (!mlxsw_core_dbg_root) + int err; + + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); + if (!mlxsw_wq) return -ENOMEM; + mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL); + if (!mlxsw_core_dbg_root) { + err = -ENOMEM; + goto err_debugfs_create_dir; + } return 0; + +err_debugfs_create_dir: + destroy_workqueue(mlxsw_wq); + return err; } static void __exit mlxsw_core_module_exit(void) { debugfs_remove_recursive(mlxsw_core_dbg_root); + destroy_workqueue(mlxsw_wq); } module_init(mlxsw_core_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index c73d1c0792a6..d3476ead9982 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -43,6 +43,8 @@ #include <linux/gfp.h> #include <linux/types.h> #include <linux/skbuff.h> +#include <linux/workqueue.h> +#include <net/devlink.h> #include "trap.h" #include "reg.h" @@ -61,6 +63,8 @@ struct mlxsw_driver; struct mlxsw_bus; struct mlxsw_bus_info; +void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); + int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver); @@ -74,10 +78,9 @@ struct mlxsw_tx_info { bool is_emad; }; -bool mlxsw_core_skb_transmit_busy(void *driver_priv, +bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, const struct mlxsw_tx_info *tx_info); - -int mlxsw_core_skb_transmit(void *driver_priv, struct sk_buff *skb, +int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); struct mlxsw_rx_listener { @@ -106,6 +109,19 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, const struct mlxsw_event_listener *el, void *priv); +typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload, + size_t payload_len, unsigned long cb_priv); + +int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv); +int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv); +int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list); + int mlxsw_reg_query(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload); int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, @@ -131,6 +147,26 @@ u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, u16 lag_id, u8 local_port); +struct mlxsw_core_port { + struct devlink_port devlink_port; +}; + +static inline void * +mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port) +{ + /* mlxsw_core_port is ensured to always be the first field in driver + * port structure. + */ + return mlxsw_core_port; +} + +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, + struct mlxsw_core_port *mlxsw_core_port, u8 local_port, + struct net_device *dev, bool split, u32 split_group); +void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port); + +int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); + #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 struct mlxsw_swid_config { @@ -154,7 +190,8 @@ struct mlxsw_config_profile { used_max_ib_mc:1, used_max_pkey:1, used_ar_sec:1, - used_adaptive_routing_group_cap:1; + used_adaptive_routing_group_cap:1, + used_kvd_sizes:1; u8 max_vepa_channels; u16 max_lag; u16 max_port_per_lag; @@ -175,6 +212,10 @@ struct mlxsw_config_profile { u8 ar_sec; u16 adaptive_routing_group_cap; u8 arn; + u32 kvd_linear_size; + u32 kvd_hash_single_size; + u32 kvd_hash_double_size; + u8 resource_query_enable; struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; }; @@ -183,21 +224,61 @@ struct mlxsw_driver { const char *kind; struct module *owner; size_t priv_size; - int (*init)(void *driver_priv, struct mlxsw_core *mlxsw_core, + int (*init)(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info); - void (*fini)(void *driver_priv); - int (*port_split)(void *driver_priv, u8 local_port, unsigned int count); - int (*port_unsplit)(void *driver_priv, u8 local_port); + void (*fini)(struct mlxsw_core *mlxsw_core); + int (*port_split)(struct mlxsw_core *mlxsw_core, u8 local_port, + unsigned int count); + int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u8 local_port); + int (*sb_pool_get)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info); + int (*sb_pool_set)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type); + int (*sb_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); + int (*sb_port_pool_set)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold); + int (*sb_tc_pool_bind_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); + int (*sb_tc_pool_bind_set)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold); + int (*sb_occ_snapshot)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); + int (*sb_occ_max_clear)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); + int (*sb_occ_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); + int (*sb_occ_tc_port_bind_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); void (*txhdr_construct)(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); u8 txhdr_len; const struct mlxsw_config_profile *profile; }; +struct mlxsw_resources { + u8 max_span_valid:1; + u8 max_span; +}; + +struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); + struct mlxsw_bus { const char *kind; int (*init)(void *bus_priv, struct mlxsw_core *mlxsw_core, - const struct mlxsw_config_profile *profile); + const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources); void (*fini)(void *bus_priv); bool (*skb_transmit_busy)(void *bus_priv, const struct mlxsw_tx_info *tx_info); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 7f4173c8eda3..1d1360c178bb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1154,6 +1154,61 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask); } +#define MLXSW_RESOURCES_TABLE_END_ID 0xffff +#define MLXSW_MAX_SPAN_ID 0x2420 +#define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 +#define MLXSW_RESOURCES_PER_QUERY 32 + +static void mlxsw_pci_resources_query_parse(int id, u64 val, + struct mlxsw_resources *resources) +{ + switch (id) { + case MLXSW_MAX_SPAN_ID: + resources->max_span = val; + resources->max_span_valid = 1; + break; + default: + break; + } +} + +static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox, + struct mlxsw_resources *resources, + u8 query_enabled) +{ + int index, i; + u64 data; + u16 id; + int err; + + /* Not all the versions support resources query */ + if (!query_enabled) + return 0; + + mlxsw_cmd_mbox_zero(mbox); + + for (index = 0; index < MLXSW_RESOURCES_QUERY_MAX_QUERIES; index++) { + err = mlxsw_cmd_query_resources(mlxsw_pci->core, mbox, index); + if (err) + return err; + + for (i = 0; i < MLXSW_RESOURCES_PER_QUERY; i++) { + id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i); + data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i); + + if (id == MLXSW_RESOURCES_TABLE_END_ID) + return 0; + + mlxsw_pci_resources_query_parse(id, data, resources); + } + } + + /* If after MLXSW_RESOURCES_QUERY_MAX_QUERIES we still didn't get + * MLXSW_RESOURCES_TABLE_END_ID, something went bad in the FW. + */ + return -EIO; +} + static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, const struct mlxsw_config_profile *profile) { @@ -1255,6 +1310,20 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( mbox, profile->adaptive_routing_group_cap); } + if (profile->used_kvd_sizes) { + mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_linear_size_set( + mbox, profile->kvd_linear_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set( + mbox, profile->kvd_hash_single_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set( + mbox, profile->kvd_hash_double_size); + } for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i, @@ -1390,7 +1459,8 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci, } static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, - const struct mlxsw_config_profile *profile) + const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources) { struct mlxsw_pci *mlxsw_pci = bus_priv; struct pci_dev *pdev = mlxsw_pci->pdev; @@ -1449,6 +1519,11 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_boardinfo; + err = mlxsw_pci_resources_query(mlxsw_pci, mbox, resources, + profile->resource_query_enable); + if (err) + goto err_query_resources; + err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile); if (err) goto err_config_profile; @@ -1471,6 +1546,7 @@ err_request_eq_irq: mlxsw_pci_aqs_fini(mlxsw_pci); err_aqs_init: err_config_profile: +err_query_resources: err_boardinfo: mlxsw_pci_fw_area_fini(mlxsw_pci); err_fw_area_init: diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index f33b997f2b61..af371a82c35b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -56,6 +56,7 @@ #define MLXSW_PORT_PHY_BITS_MASK (MLXSW_PORT_MAX_PHY_PORTS - 1) #define MLXSW_PORT_CPU_PORT 0x0 +#define MLXSW_PORT_ROUTER_PORT (MLXSW_PORT_MAX_PHY_PORTS + 2) #define MLXSW_PORT_DONT_CARE (MLXSW_PORT_MAX_PORTS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index ffe4c0305733..1721098eef13 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1,9 +1,10 @@ /* * drivers/net/ethernet/mellanox/mlxsw/reg.h * Copyright (c) 2015 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> - * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2015-2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -386,7 +387,9 @@ enum mlxsw_reg_sfd_rec_action { /* forward and trap, trap_id is FDB_TRAP */ MLXSW_REG_SFD_REC_ACTION_MIRROR_TO_CPU = 1, /* trap and do not forward, trap_id is FDB_TRAP */ - MLXSW_REG_SFD_REC_ACTION_TRAP = 3, + MLXSW_REG_SFD_REC_ACTION_TRAP = 2, + /* forward to IP router */ + MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER = 3, MLXSW_REG_SFD_REC_ACTION_DISCARD_ERROR = 15, }; @@ -1805,6 +1808,184 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port, } } +/* QTCT - QoS Switch Traffic Class Table + * ------------------------------------- + * Configures the mapping between the packet switch priority and the + * traffic class on the transmit port. + */ +#define MLXSW_REG_QTCT_ID 0x400A +#define MLXSW_REG_QTCT_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_qtct = { + .id = MLXSW_REG_QTCT_ID, + .len = MLXSW_REG_QTCT_LEN, +}; + +/* reg_qtct_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is not supported. + */ +MLXSW_ITEM32(reg, qtct, local_port, 0x00, 16, 8); + +/* reg_qtct_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: Index + */ +MLXSW_ITEM32(reg, qtct, sub_port, 0x00, 8, 8); + +/* reg_qtct_switch_prio + * Switch priority. + * Access: Index + */ +MLXSW_ITEM32(reg, qtct, switch_prio, 0x00, 0, 4); + +/* reg_qtct_tclass + * Traffic class. + * Default values: + * switch_prio 0 : tclass 1 + * switch_prio 1 : tclass 0 + * switch_prio i : tclass i, for i > 1 + * Access: RW + */ +MLXSW_ITEM32(reg, qtct, tclass, 0x04, 0, 4); + +static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port, + u8 switch_prio, u8 tclass) +{ + MLXSW_REG_ZERO(qtct, payload); + mlxsw_reg_qtct_local_port_set(payload, local_port); + mlxsw_reg_qtct_switch_prio_set(payload, switch_prio); + mlxsw_reg_qtct_tclass_set(payload, tclass); +} + +/* QEEC - QoS ETS Element Configuration Register + * --------------------------------------------- + * Configures the ETS elements. + */ +#define MLXSW_REG_QEEC_ID 0x400D +#define MLXSW_REG_QEEC_LEN 0x1C + +static const struct mlxsw_reg_info mlxsw_reg_qeec = { + .id = MLXSW_REG_QEEC_ID, + .len = MLXSW_REG_QEEC_LEN, +}; + +/* reg_qeec_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is supported. + */ +MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8); + +enum mlxsw_reg_qeec_hr { + MLXSW_REG_QEEC_HIERARCY_PORT, + MLXSW_REG_QEEC_HIERARCY_GROUP, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HIERARCY_TC, +}; + +/* reg_qeec_element_hierarchy + * 0 - Port + * 1 - Group + * 2 - Subgroup + * 3 - Traffic Class + * Access: Index + */ +MLXSW_ITEM32(reg, qeec, element_hierarchy, 0x04, 16, 4); + +/* reg_qeec_element_index + * The index of the element in the hierarchy. + * Access: Index + */ +MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8); + +/* reg_qeec_next_element_index + * The index of the next (lower) element in the hierarchy. + * Access: RW + * + * Note: Reserved for element_hierarchy 0. + */ +MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8); + +enum { + MLXSW_REG_QEEC_BYTES_MODE, + MLXSW_REG_QEEC_PACKETS_MODE, +}; + +/* reg_qeec_pb + * Packets or bytes mode. + * 0 - Bytes mode + * 1 - Packets mode + * Access: RW + * + * Note: Used for max shaper configuration. For Spectrum, packets mode + * is supported only for traffic classes of CPU port. + */ +MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1); + +/* reg_qeec_mase + * Max shaper configuration enable. Enables configuration of the max + * shaper on this ETS element. + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1); + +/* A large max rate will disable the max shaper. */ +#define MLXSW_REG_QEEC_MAS_DIS 200000000 /* Kbps */ + +/* reg_qeec_max_shaper_rate + * Max shaper information rate. + * For CPU port, can only be configured for port hierarchy. + * When in bytes mode, value is specified in units of 1000bps. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 28); + +/* reg_qeec_de + * DWRR configuration enable. Enables configuration of the dwrr and + * dwrr_weight. + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, de, 0x18, 31, 1); + +/* reg_qeec_dwrr + * Transmission selection algorithm to use on the link going down from + * the ETS element. + * 0 - Strict priority + * 1 - DWRR + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1); + +/* reg_qeec_dwrr_weight + * DWRR weight on the link going down from the ETS element. The + * percentage of bandwidth guaranteed to an ETS element within + * its hierarchy. The sum of all weights across all ETS elements + * within one hierarchy should be equal to 100. Reserved when + * transmission selection algorithm is strict priority. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8); + +static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index) +{ + MLXSW_REG_ZERO(qeec, payload); + mlxsw_reg_qeec_local_port_set(payload, local_port); + mlxsw_reg_qeec_element_hierarchy_set(payload, hr); + mlxsw_reg_qeec_element_index_set(payload, index); + mlxsw_reg_qeec_next_element_index_set(payload, next_index); +} + /* PMLP - Ports Module to Local Port Register * ------------------------------------------ * Configures the assignment of modules to local ports. @@ -2141,6 +2322,145 @@ static inline void mlxsw_reg_paos_pack(char *payload, u8 local_port, mlxsw_reg_paos_e_set(payload, 1); } +/* PFCC - Ports Flow Control Configuration Register + * ------------------------------------------------ + * Configures and retrieves the per port flow control configuration. + */ +#define MLXSW_REG_PFCC_ID 0x5007 +#define MLXSW_REG_PFCC_LEN 0x20 + +static const struct mlxsw_reg_info mlxsw_reg_pfcc = { + .id = MLXSW_REG_PFCC_ID, + .len = MLXSW_REG_PFCC_LEN, +}; + +/* reg_pfcc_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pfcc, local_port, 0x00, 16, 8); + +/* reg_pfcc_pnat + * Port number access type. Determines the way local_port is interpreted: + * 0 - Local port number. + * 1 - IB / label port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pfcc, pnat, 0x00, 14, 2); + +/* reg_pfcc_shl_cap + * Send to higher layers capabilities: + * 0 - No capability of sending Pause and PFC frames to higher layers. + * 1 - Device has capability of sending Pause and PFC frames to higher + * layers. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, shl_cap, 0x00, 1, 1); + +/* reg_pfcc_shl_opr + * Send to higher layers operation: + * 0 - Pause and PFC frames are handled by the port (default). + * 1 - Pause and PFC frames are handled by the port and also sent to + * higher layers. Only valid if shl_cap = 1. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, shl_opr, 0x00, 0, 1); + +/* reg_pfcc_ppan + * Pause policy auto negotiation. + * 0 - Disabled. Generate / ignore Pause frames based on pptx / pprtx. + * 1 - Enabled. When auto-negotiation is performed, set the Pause policy + * based on the auto-negotiation resolution. + * Access: RW + * + * Note: The auto-negotiation advertisement is set according to pptx and + * pprtx. When PFC is set on Tx / Rx, ppan must be set to 0. + */ +MLXSW_ITEM32(reg, pfcc, ppan, 0x04, 28, 4); + +/* reg_pfcc_prio_mask_tx + * Bit per priority indicating if Tx flow control policy should be + * updated based on bit pfctx. + * Access: WO + */ +MLXSW_ITEM32(reg, pfcc, prio_mask_tx, 0x04, 16, 8); + +/* reg_pfcc_prio_mask_rx + * Bit per priority indicating if Rx flow control policy should be + * updated based on bit pfcrx. + * Access: WO + */ +MLXSW_ITEM32(reg, pfcc, prio_mask_rx, 0x04, 0, 8); + +/* reg_pfcc_pptx + * Admin Pause policy on Tx. + * 0 - Never generate Pause frames (default). + * 1 - Generate Pause frames according to Rx buffer threshold. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pptx, 0x08, 31, 1); + +/* reg_pfcc_aptx + * Active (operational) Pause policy on Tx. + * 0 - Never generate Pause frames. + * 1 - Generate Pause frames according to Rx buffer threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, aptx, 0x08, 30, 1); + +/* reg_pfcc_pfctx + * Priority based flow control policy on Tx[7:0]. Per-priority bit mask: + * 0 - Never generate priority Pause frames on the specified priority + * (default). + * 1 - Generate priority Pause frames according to Rx buffer threshold on + * the specified priority. + * Access: RW + * + * Note: pfctx and pptx must be mutually exclusive. + */ +MLXSW_ITEM32(reg, pfcc, pfctx, 0x08, 16, 8); + +/* reg_pfcc_pprx + * Admin Pause policy on Rx. + * 0 - Ignore received Pause frames (default). + * 1 - Respect received Pause frames. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pprx, 0x0C, 31, 1); + +/* reg_pfcc_aprx + * Active (operational) Pause policy on Rx. + * 0 - Ignore received Pause frames. + * 1 - Respect received Pause frames. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, aprx, 0x0C, 30, 1); + +/* reg_pfcc_pfcrx + * Priority based flow control policy on Rx[7:0]. Per-priority bit mask: + * 0 - Ignore incoming priority Pause frames on the specified priority + * (default). + * 1 - Respect incoming priority Pause frames on the specified priority. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pfcrx, 0x0C, 16, 8); + +#define MLXSW_REG_PFCC_ALL_PRIO 0xFF + +static inline void mlxsw_reg_pfcc_prio_pack(char *payload, u8 pfc_en) +{ + mlxsw_reg_pfcc_prio_mask_tx_set(payload, MLXSW_REG_PFCC_ALL_PRIO); + mlxsw_reg_pfcc_prio_mask_rx_set(payload, MLXSW_REG_PFCC_ALL_PRIO); + mlxsw_reg_pfcc_pfctx_set(payload, pfc_en); + mlxsw_reg_pfcc_pfcrx_set(payload, pfc_en); +} + +static inline void mlxsw_reg_pfcc_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(pfcc, payload); + mlxsw_reg_pfcc_local_port_set(payload, local_port); +} + /* PPCNT - Ports Performance Counters Register * ------------------------------------------- * The PPCNT register retrieves per port performance counters. @@ -2180,6 +2500,12 @@ MLXSW_ITEM32(reg, ppcnt, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2); +enum mlxsw_reg_ppcnt_grp { + MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0, + MLXSW_REG_PPCNT_PRIO_CNT = 0x10, + MLXSW_REG_PPCNT_TC_CNT = 0x11, +}; + /* reg_ppcnt_grp * Performance counter group. * Group 63 indicates all groups. Only valid on Set() operation with @@ -2215,6 +2541,8 @@ MLXSW_ITEM32(reg, ppcnt, clr, 0x04, 31, 1); */ MLXSW_ITEM32(reg, ppcnt, prio_tc, 0x04, 0, 5); +/* Ethernet IEEE 802.3 Counter Group */ + /* reg_ppcnt_a_frames_transmitted_ok * Access: RO */ @@ -2329,15 +2657,177 @@ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received, MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted, 0x08 + 0x90, 0, 64); -static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port) +/* Ethernet Per Priority Group Counters */ + +/* reg_ppcnt_rx_octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_octets, 0x08 + 0x00, 0, 64); + +/* reg_ppcnt_rx_frames + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_frames, 0x08 + 0x20, 0, 64); + +/* reg_ppcnt_tx_octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_octets, 0x08 + 0x28, 0, 64); + +/* reg_ppcnt_tx_frames + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_frames, 0x08 + 0x48, 0, 64); + +/* reg_ppcnt_rx_pause + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_pause, 0x08 + 0x50, 0, 64); + +/* reg_ppcnt_rx_pause_duration + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_pause_duration, 0x08 + 0x58, 0, 64); + +/* reg_ppcnt_tx_pause + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause, 0x08 + 0x60, 0, 64); + +/* reg_ppcnt_tx_pause_duration + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, 0x08 + 0x68, 0, 64); + +/* reg_ppcnt_rx_pause_transition + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64); + +/* Ethernet Per Traffic Group Counters */ + +/* reg_ppcnt_tc_transmit_queue + * Contains the transmit queue depth in cells of traffic class + * selected by prio_tc and the port selected by local_port. + * The field cannot be cleared. + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, 0x08 + 0x00, 0, 64); + +/* reg_ppcnt_tc_no_buffer_discard_uc + * The number of unicast packets dropped due to lack of shared + * buffer resources. + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, 0x08 + 0x08, 0, 64); + +static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, + enum mlxsw_reg_ppcnt_grp grp, + u8 prio_tc) { MLXSW_REG_ZERO(ppcnt, payload); mlxsw_reg_ppcnt_swid_set(payload, 0); mlxsw_reg_ppcnt_local_port_set(payload, local_port); mlxsw_reg_ppcnt_pnat_set(payload, 0); - mlxsw_reg_ppcnt_grp_set(payload, 0); + mlxsw_reg_ppcnt_grp_set(payload, grp); mlxsw_reg_ppcnt_clr_set(payload, 0); - mlxsw_reg_ppcnt_prio_tc_set(payload, 0); + mlxsw_reg_ppcnt_prio_tc_set(payload, prio_tc); +} + +/* PPTB - Port Prio To Buffer Register + * ----------------------------------- + * Configures the switch priority to buffer table. + */ +#define MLXSW_REG_PPTB_ID 0x500B +#define MLXSW_REG_PPTB_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_pptb = { + .id = MLXSW_REG_PPTB_ID, + .len = MLXSW_REG_PPTB_LEN, +}; + +enum { + MLXSW_REG_PPTB_MM_UM, + MLXSW_REG_PPTB_MM_UNICAST, + MLXSW_REG_PPTB_MM_MULTICAST, +}; + +/* reg_pptb_mm + * Mapping mode. + * 0 - Map both unicast and multicast packets to the same buffer. + * 1 - Map only unicast packets. + * 2 - Map only multicast packets. + * Access: Index + * + * Note: SwitchX-2 only supports the first option. + */ +MLXSW_ITEM32(reg, pptb, mm, 0x00, 28, 2); + +/* reg_pptb_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pptb, local_port, 0x00, 16, 8); + +/* reg_pptb_um + * Enables the update of the untagged_buf field. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, um, 0x00, 8, 1); + +/* reg_pptb_pm + * Enables the update of the prio_to_buff field. + * Bit <i> is a flag for updating the mapping for switch priority <i>. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, pm, 0x00, 0, 8); + +/* reg_pptb_prio_to_buff + * Mapping of switch priority <i> to one of the allocated receive port + * buffers. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff, 0x04, 0x04, 4); + +/* reg_pptb_pm_msb + * Enables the update of the prio_to_buff field. + * Bit <i> is a flag for updating the mapping for switch priority <i+8>. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, pm_msb, 0x08, 24, 8); + +/* reg_pptb_untagged_buff + * Mapping of untagged frames to one of the allocated receive port buffers. + * Access: RW + * + * Note: In SwitchX-2 this field must be mapped to buffer 8. Reserved for + * Spectrum, as it maps untagged packets based on the default switch priority. + */ +MLXSW_ITEM32(reg, pptb, untagged_buff, 0x08, 0, 4); + +/* reg_pptb_prio_to_buff_msb + * Mapping of switch priority <i+8> to one of the allocated receive port + * buffers. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff_msb, 0x0C, 0x04, 4); + +#define MLXSW_REG_PPTB_ALL_PRIO 0xFF + +static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(pptb, payload); + mlxsw_reg_pptb_mm_set(payload, MLXSW_REG_PPTB_MM_UM); + mlxsw_reg_pptb_local_port_set(payload, local_port); + mlxsw_reg_pptb_pm_set(payload, MLXSW_REG_PPTB_ALL_PRIO); + mlxsw_reg_pptb_pm_msb_set(payload, MLXSW_REG_PPTB_ALL_PRIO); +} + +static inline void mlxsw_reg_pptb_prio_to_buff_pack(char *payload, u8 prio, + u8 buff) +{ + mlxsw_reg_pptb_prio_to_buff_set(payload, prio, buff); + mlxsw_reg_pptb_prio_to_buff_msb_set(payload, prio, buff); } /* PBMC - Port Buffer Management Control Register @@ -2346,7 +2836,7 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port) * allocation for different Prios, and the Pause threshold management. */ #define MLXSW_REG_PBMC_ID 0x500C -#define MLXSW_REG_PBMC_LEN 0x68 +#define MLXSW_REG_PBMC_LEN 0x6C static const struct mlxsw_reg_info mlxsw_reg_pbmc = { .id = MLXSW_REG_PBMC_ID, @@ -2374,6 +2864,8 @@ MLXSW_ITEM32(reg, pbmc, xoff_timer_value, 0x04, 16, 16); */ MLXSW_ITEM32(reg, pbmc, xoff_refresh, 0x04, 0, 16); +#define MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX 11 + /* reg_pbmc_buf_lossy * The field indicates if the buffer is lossy. * 0 - Lossless @@ -2398,6 +2890,30 @@ MLXSW_ITEM32_INDEXED(reg, pbmc, buf_epsb, 0x0C, 24, 1, 0x08, 0x00, false); */ MLXSW_ITEM32_INDEXED(reg, pbmc, buf_size, 0x0C, 0, 16, 0x08, 0x00, false); +/* reg_pbmc_buf_xoff_threshold + * Once the amount of data in the buffer goes above this value, device + * starts sending PFC frames for all priorities associated with the + * buffer. Units are represented in cells. Reserved in case of lossy + * buffer. + * Access: RW + * + * Note: In Spectrum, reserved for buffer[9]. + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xoff_threshold, 0x0C, 16, 16, + 0x08, 0x04, false); + +/* reg_pbmc_buf_xon_threshold + * When the amount of data in the buffer goes below this value, device + * stops sending PFC frames for the priorities associated with the + * buffer. Units are represented in cells. Reserved in case of lossy + * buffer. + * Access: RW + * + * Note: In Spectrum, reserved for buffer[9]. + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xon_threshold, 0x0C, 0, 16, + 0x08, 0x04, false); + static inline void mlxsw_reg_pbmc_pack(char *payload, u8 local_port, u16 xoff_timer_value, u16 xoff_refresh) { @@ -2416,6 +2932,17 @@ static inline void mlxsw_reg_pbmc_lossy_buffer_pack(char *payload, mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size); } +static inline void mlxsw_reg_pbmc_lossless_buffer_pack(char *payload, + int buf_index, u16 size, + u16 threshold) +{ + mlxsw_reg_pbmc_buf_lossy_set(payload, buf_index, 0); + mlxsw_reg_pbmc_buf_epsb_set(payload, buf_index, 0); + mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size); + mlxsw_reg_pbmc_buf_xoff_threshold_set(payload, buf_index, threshold); + mlxsw_reg_pbmc_buf_xon_threshold_set(payload, buf_index, threshold); +} + /* PSPA - Port Switch Partition Allocation * --------------------------------------- * Controls the association of a port with a switch partition and enables @@ -2695,6 +3222,1194 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id) mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT); } +/* RGCR - Router General Configuration Register + * -------------------------------------------- + * The register is used for setting up the router configuration. + */ +#define MLXSW_REG_RGCR_ID 0x8001 +#define MLXSW_REG_RGCR_LEN 0x28 + +static const struct mlxsw_reg_info mlxsw_reg_rgcr = { + .id = MLXSW_REG_RGCR_ID, + .len = MLXSW_REG_RGCR_LEN, +}; + +/* reg_rgcr_ipv4_en + * IPv4 router enable. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, ipv4_en, 0x00, 31, 1); + +/* reg_rgcr_ipv6_en + * IPv6 router enable. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, ipv6_en, 0x00, 30, 1); + +/* reg_rgcr_max_router_interfaces + * Defines the maximum number of active router interfaces for all virtual + * routers. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, max_router_interfaces, 0x10, 0, 16); + +/* reg_rgcr_usp + * Update switch priority and packet color. + * 0 - Preserve the value of Switch Priority and packet color. + * 1 - Recalculate the value of Switch Priority and packet color. + * Access: RW + * + * Note: Not supported by SwitchX and SwitchX-2. + */ +MLXSW_ITEM32(reg, rgcr, usp, 0x18, 20, 1); + +/* reg_rgcr_pcp_rw + * Indicates how to handle the pcp_rewrite_en value: + * 0 - Preserve the value of pcp_rewrite_en. + * 2 - Disable PCP rewrite. + * 3 - Enable PCP rewrite. + * Access: RW + * + * Note: Not supported by SwitchX and SwitchX-2. + */ +MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2); + +/* reg_rgcr_activity_dis + * Activity disable: + * 0 - Activity will be set when an entry is hit (default). + * 1 - Activity will not be set when an entry is hit. + * + * Bit 0 - Disable activity bit in Router Algorithmic LPM Unicast Entry + * (RALUE). + * Bit 1 - Disable activity bit in Router Algorithmic LPM Unicast Host + * Entry (RAUHT). + * Bits 2:7 are reserved. + * Access: RW + * + * Note: Not supported by SwitchX, SwitchX-2 and Switch-IB. + */ +MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8); + +static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en) +{ + MLXSW_REG_ZERO(rgcr, payload); + mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en); +} + +/* RITR - Router Interface Table Register + * -------------------------------------- + * The register is used to configure the router interface table. + */ +#define MLXSW_REG_RITR_ID 0x8002 +#define MLXSW_REG_RITR_LEN 0x40 + +static const struct mlxsw_reg_info mlxsw_reg_ritr = { + .id = MLXSW_REG_RITR_ID, + .len = MLXSW_REG_RITR_LEN, +}; + +/* reg_ritr_enable + * Enables routing on the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, enable, 0x00, 31, 1); + +/* reg_ritr_ipv4 + * IPv4 routing enable. Enables routing of IPv4 traffic on the router + * interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); + +/* reg_ritr_ipv6 + * IPv6 routing enable. Enables routing of IPv6 traffic on the router + * interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); + +enum mlxsw_reg_ritr_if_type { + MLXSW_REG_RITR_VLAN_IF, + MLXSW_REG_RITR_FID_IF, + MLXSW_REG_RITR_SP_IF, +}; + +/* reg_ritr_type + * Router interface type. + * 0 - VLAN interface. + * 1 - FID interface. + * 2 - Sub-port interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3); + +enum { + MLXSW_REG_RITR_RIF_CREATE, + MLXSW_REG_RITR_RIF_DEL, +}; + +/* reg_ritr_op + * Opcode: + * 0 - Create or edit RIF. + * 1 - Delete RIF. + * Reserved for SwitchX-2. For Spectrum, editing of interface properties + * is not supported. An interface must be deleted and re-created in order + * to update properties. + * Access: WO + */ +MLXSW_ITEM32(reg, ritr, op, 0x00, 20, 2); + +/* reg_ritr_rif + * Router interface index. A pointer to the Router Interface Table. + * Access: Index + */ +MLXSW_ITEM32(reg, ritr, rif, 0x00, 0, 16); + +/* reg_ritr_ipv4_fe + * IPv4 Forwarding Enable. + * Enables routing of IPv4 traffic on the router interface. When disabled, + * forwarding is blocked but local traffic (traps and IP2ME) will be enabled. + * Not supported in SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1); + +/* reg_ritr_ipv6_fe + * IPv6 Forwarding Enable. + * Enables routing of IPv6 traffic on the router interface. When disabled, + * forwarding is blocked but local traffic (traps and IP2ME) will be enabled. + * Not supported in SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1); + +/* reg_ritr_lb_en + * Loop-back filter enable for unicast packets. + * If the flag is set then loop-back filter for unicast packets is + * implemented on the RIF. Multicast packets are always subject to + * loop-back filtering. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1); + +/* reg_ritr_virtual_router + * Virtual router ID associated with the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, virtual_router, 0x04, 0, 16); + +/* reg_ritr_mtu + * Router interface MTU. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, mtu, 0x34, 0, 16); + +/* reg_ritr_if_swid + * Switch partition ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, if_swid, 0x08, 24, 8); + +/* reg_ritr_if_mac + * Router interface MAC address. + * In Spectrum, all MAC addresses must have the same 38 MSBits. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ritr, if_mac, 0x12, 6); + +/* VLAN Interface */ + +/* reg_ritr_vlan_if_vid + * VLAN ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, vlan_if_vid, 0x08, 0, 12); + +/* FID Interface */ + +/* reg_ritr_fid_if_fid + * Filtering ID. Used to connect a bridge to the router. Only FIDs from + * the vFID range are supported. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, fid_if_fid, 0x08, 0, 16); + +static inline void mlxsw_reg_ritr_fid_set(char *payload, + enum mlxsw_reg_ritr_if_type rif_type, + u16 fid) +{ + if (rif_type == MLXSW_REG_RITR_FID_IF) + mlxsw_reg_ritr_fid_if_fid_set(payload, fid); + else + mlxsw_reg_ritr_vlan_if_vid_set(payload, fid); +} + +/* Sub-port Interface */ + +/* reg_ritr_sp_if_lag + * LAG indication. When this bit is set the system_port field holds the + * LAG identifier. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_lag, 0x08, 24, 1); + +/* reg_ritr_sp_system_port + * Port unique indentifier. When lag bit is set, this field holds the + * lag_id in bits 0:9. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16); + +/* reg_ritr_sp_if_vid + * VLAN ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12); + +static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif) +{ + MLXSW_REG_ZERO(ritr, payload); + mlxsw_reg_ritr_rif_set(payload, rif); +} + +static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag, + u16 system_port, u16 vid) +{ + mlxsw_reg_ritr_sp_if_lag_set(payload, lag); + mlxsw_reg_ritr_sp_if_system_port_set(payload, system_port); + mlxsw_reg_ritr_sp_if_vid_set(payload, vid); +} + +static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, + enum mlxsw_reg_ritr_if_type type, + u16 rif, u16 mtu, const char *mac) +{ + bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL; + + MLXSW_REG_ZERO(ritr, payload); + mlxsw_reg_ritr_enable_set(payload, enable); + mlxsw_reg_ritr_ipv4_set(payload, 1); + mlxsw_reg_ritr_type_set(payload, type); + mlxsw_reg_ritr_op_set(payload, op); + mlxsw_reg_ritr_rif_set(payload, rif); + mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_lb_en_set(payload, 1); + mlxsw_reg_ritr_mtu_set(payload, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); +} + +/* RATR - Router Adjacency Table Register + * -------------------------------------- + * The RATR register is used to configure the Router Adjacency (next-hop) + * Table. + */ +#define MLXSW_REG_RATR_ID 0x8008 +#define MLXSW_REG_RATR_LEN 0x2C + +static const struct mlxsw_reg_info mlxsw_reg_ratr = { + .id = MLXSW_REG_RATR_ID, + .len = MLXSW_REG_RATR_LEN, +}; + +enum mlxsw_reg_ratr_op { + /* Read */ + MLXSW_REG_RATR_OP_QUERY_READ = 0, + /* Read and clear activity */ + MLXSW_REG_RATR_OP_QUERY_READ_CLEAR = 2, + /* Write Adjacency entry */ + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY = 1, + /* Write Adjacency entry only if the activity is cleared. + * The write may not succeed if the activity is set. There is not + * direct feedback if the write has succeeded or not, however + * the get will reveal the actual entry (SW can compare the get + * response to the set command). + */ + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY = 3, +}; + +/* reg_ratr_op + * Note that Write operation may also be used for updating + * counter_set_type and counter_index. In this case all other + * fields must not be updated. + * Access: OP + */ +MLXSW_ITEM32(reg, ratr, op, 0x00, 28, 4); + +/* reg_ratr_v + * Valid bit. Indicates if the adjacency entry is valid. + * Note: the device may need some time before reusing an invalidated + * entry. During this time the entry can not be reused. It is + * recommended to use another entry before reusing an invalidated + * entry (e.g. software can put it at the end of the list for + * reusing). Trying to access an invalidated entry not yet cleared + * by the device results with failure indicating "Try Again" status. + * When valid is '0' then egress_router_interface,trap_action, + * adjacency_parameters and counters are reserved + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1); + +/* reg_ratr_a + * Activity. Set for new entries. Set if a packet lookup has hit on + * the specific entry. To clear the a bit, use "clear activity". + * Access: RO + */ +MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1); + +/* reg_ratr_adjacency_index_low + * Bits 15:0 of index into the adjacency table. + * For SwitchX and SwitchX-2, the adjacency table is linear and + * used for adjacency entries only. + * For Spectrum, the index is to the KVD linear. + * Access: Index + */ +MLXSW_ITEM32(reg, ratr, adjacency_index_low, 0x04, 0, 16); + +/* reg_ratr_egress_router_interface + * Range is 0 .. cap_max_router_interfaces - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, egress_router_interface, 0x08, 0, 16); + +enum mlxsw_reg_ratr_trap_action { + MLXSW_REG_RATR_TRAP_ACTION_NOP, + MLXSW_REG_RATR_TRAP_ACTION_TRAP, + MLXSW_REG_RATR_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RATR_TRAP_ACTION_MIRROR, + MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS, +}; + +/* reg_ratr_trap_action + * see mlxsw_reg_ratr_trap_action + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4); + +enum mlxsw_reg_ratr_trap_id { + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0, + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1, +}; + +/* reg_ratr_adjacency_index_high + * Bits 23:16 of the adjacency_index. + * Access: Index + */ +MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8); + +/* reg_ratr_trap_id + * Trap ID to be reported to CPU. + * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8); + +/* reg_ratr_eth_destination_mac + * MAC address of the destination next-hop. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6); + +static inline void +mlxsw_reg_ratr_pack(char *payload, + enum mlxsw_reg_ratr_op op, bool valid, + u32 adjacency_index, u16 egress_rif) +{ + MLXSW_REG_ZERO(ratr, payload); + mlxsw_reg_ratr_op_set(payload, op); + mlxsw_reg_ratr_v_set(payload, valid); + mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index); + mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16); + mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif); +} + +static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, + const char *dest_mac) +{ + mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac); +} + +/* RALTA - Router Algorithmic LPM Tree Allocation Register + * ------------------------------------------------------- + * RALTA is used to allocate the LPM trees of the SHSPM method. + */ +#define MLXSW_REG_RALTA_ID 0x8010 +#define MLXSW_REG_RALTA_LEN 0x04 + +static const struct mlxsw_reg_info mlxsw_reg_ralta = { + .id = MLXSW_REG_RALTA_ID, + .len = MLXSW_REG_RALTA_LEN, +}; + +/* reg_ralta_op + * opcode (valid for Write, must be 0 on Read) + * 0 - allocate a tree + * 1 - deallocate a tree + * Access: OP + */ +MLXSW_ITEM32(reg, ralta, op, 0x00, 28, 2); + +enum mlxsw_reg_ralxx_protocol { + MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_REG_RALXX_PROTOCOL_IPV6, +}; + +/* reg_ralta_protocol + * Protocol. + * Deallocation opcode: Reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralta, protocol, 0x00, 24, 4); + +/* reg_ralta_tree_id + * An identifier (numbered from 1..cap_shspm_max_trees-1) representing + * the tree identifier (managed by software). + * Note that tree_id 0 is allocated for a default-route tree. + * Access: Index + */ +MLXSW_ITEM32(reg, ralta, tree_id, 0x00, 0, 8); + +static inline void mlxsw_reg_ralta_pack(char *payload, bool alloc, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + MLXSW_REG_ZERO(ralta, payload); + mlxsw_reg_ralta_op_set(payload, !alloc); + mlxsw_reg_ralta_protocol_set(payload, protocol); + mlxsw_reg_ralta_tree_id_set(payload, tree_id); +} + +/* RALST - Router Algorithmic LPM Structure Tree Register + * ------------------------------------------------------ + * RALST is used to set and query the structure of an LPM tree. + * The structure of the tree must be sorted as a sorted binary tree, while + * each node is a bin that is tagged as the length of the prefixes the lookup + * will refer to. Therefore, bin X refers to a set of entries with prefixes + * of X bits to match with the destination address. The bin 0 indicates + * the default action, when there is no match of any prefix. + */ +#define MLXSW_REG_RALST_ID 0x8011 +#define MLXSW_REG_RALST_LEN 0x104 + +static const struct mlxsw_reg_info mlxsw_reg_ralst = { + .id = MLXSW_REG_RALST_ID, + .len = MLXSW_REG_RALST_LEN, +}; + +/* reg_ralst_root_bin + * The bin number of the root bin. + * 0<root_bin=<(length of IP address) + * For a default-route tree configure 0xff + * Access: RW + */ +MLXSW_ITEM32(reg, ralst, root_bin, 0x00, 16, 8); + +/* reg_ralst_tree_id + * Tree identifier numbered from 1..(cap_shspm_max_trees-1). + * Access: Index + */ +MLXSW_ITEM32(reg, ralst, tree_id, 0x00, 0, 8); + +#define MLXSW_REG_RALST_BIN_NO_CHILD 0xff +#define MLXSW_REG_RALST_BIN_OFFSET 0x04 +#define MLXSW_REG_RALST_BIN_COUNT 128 + +/* reg_ralst_left_child_bin + * Holding the children of the bin according to the stored tree's structure. + * For trees composed of less than 4 blocks, the bins in excess are reserved. + * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff + * Access: RW + */ +MLXSW_ITEM16_INDEXED(reg, ralst, left_child_bin, 0x04, 8, 8, 0x02, 0x00, false); + +/* reg_ralst_right_child_bin + * Holding the children of the bin according to the stored tree's structure. + * For trees composed of less than 4 blocks, the bins in excess are reserved. + * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff + * Access: RW + */ +MLXSW_ITEM16_INDEXED(reg, ralst, right_child_bin, 0x04, 0, 8, 0x02, 0x00, + false); + +static inline void mlxsw_reg_ralst_pack(char *payload, u8 root_bin, u8 tree_id) +{ + MLXSW_REG_ZERO(ralst, payload); + + /* Initialize all bins to have no left or right child */ + memset(payload + MLXSW_REG_RALST_BIN_OFFSET, + MLXSW_REG_RALST_BIN_NO_CHILD, MLXSW_REG_RALST_BIN_COUNT * 2); + + mlxsw_reg_ralst_root_bin_set(payload, root_bin); + mlxsw_reg_ralst_tree_id_set(payload, tree_id); +} + +static inline void mlxsw_reg_ralst_bin_pack(char *payload, u8 bin_number, + u8 left_child_bin, + u8 right_child_bin) +{ + int bin_index = bin_number - 1; + + mlxsw_reg_ralst_left_child_bin_set(payload, bin_index, left_child_bin); + mlxsw_reg_ralst_right_child_bin_set(payload, bin_index, + right_child_bin); +} + +/* RALTB - Router Algorithmic LPM Tree Binding Register + * ---------------------------------------------------- + * RALTB is used to bind virtual router and protocol to an allocated LPM tree. + */ +#define MLXSW_REG_RALTB_ID 0x8012 +#define MLXSW_REG_RALTB_LEN 0x04 + +static const struct mlxsw_reg_info mlxsw_reg_raltb = { + .id = MLXSW_REG_RALTB_ID, + .len = MLXSW_REG_RALTB_LEN, +}; + +/* reg_raltb_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, raltb, virtual_router, 0x00, 16, 16); + +/* reg_raltb_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, raltb, protocol, 0x00, 12, 4); + +/* reg_raltb_tree_id + * Tree to be used for the {virtual_router, protocol} + * Tree identifier numbered from 1..(cap_shspm_max_trees-1). + * By default, all Unicast IPv4 and IPv6 are bound to tree_id 0. + * Access: RW + */ +MLXSW_ITEM32(reg, raltb, tree_id, 0x00, 0, 8); + +static inline void mlxsw_reg_raltb_pack(char *payload, u16 virtual_router, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + MLXSW_REG_ZERO(raltb, payload); + mlxsw_reg_raltb_virtual_router_set(payload, virtual_router); + mlxsw_reg_raltb_protocol_set(payload, protocol); + mlxsw_reg_raltb_tree_id_set(payload, tree_id); +} + +/* RALUE - Router Algorithmic LPM Unicast Entry Register + * ----------------------------------------------------- + * RALUE is used to configure and query LPM entries that serve + * the Unicast protocols. + */ +#define MLXSW_REG_RALUE_ID 0x8013 +#define MLXSW_REG_RALUE_LEN 0x38 + +static const struct mlxsw_reg_info mlxsw_reg_ralue = { + .id = MLXSW_REG_RALUE_ID, + .len = MLXSW_REG_RALUE_LEN, +}; + +/* reg_ralue_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, protocol, 0x00, 24, 4); + +enum mlxsw_reg_ralue_op { + /* Read operation. If entry doesn't exist, the operation fails. */ + MLXSW_REG_RALUE_OP_QUERY_READ = 0, + /* Clear on read operation. Used to read entry and + * clear Activity bit. + */ + MLXSW_REG_RALUE_OP_QUERY_CLEAR = 1, + /* Write operation. Used to write a new entry to the table. All RW + * fields are written for new entry. Activity bit is set + * for new entries. + */ + MLXSW_REG_RALUE_OP_WRITE_WRITE = 0, + /* Update operation. Used to update an existing route entry and + * only update the RW fields that are detailed in the field + * op_u_mask. If entry doesn't exist, the operation fails. + */ + MLXSW_REG_RALUE_OP_WRITE_UPDATE = 1, + /* Clear activity. The Activity bit (the field a) is cleared + * for the entry. + */ + MLXSW_REG_RALUE_OP_WRITE_CLEAR = 2, + /* Delete operation. Used to delete an existing entry. If entry + * doesn't exist, the operation fails. + */ + MLXSW_REG_RALUE_OP_WRITE_DELETE = 3, +}; + +/* reg_ralue_op + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, ralue, op, 0x00, 20, 3); + +/* reg_ralue_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry, only if the entry is a route. To clear the a bit, use + * "clear activity" op. + * Enabled by activity_dis in RGCR + * Access: RO + */ +MLXSW_ITEM32(reg, ralue, a, 0x00, 16, 1); + +/* reg_ralue_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, virtual_router, 0x04, 16, 16); + +#define MLXSW_REG_RALUE_OP_U_MASK_ENTRY_TYPE BIT(0) +#define MLXSW_REG_RALUE_OP_U_MASK_BMP_LEN BIT(1) +#define MLXSW_REG_RALUE_OP_U_MASK_ACTION BIT(2) + +/* reg_ralue_op_u_mask + * opcode update mask. + * On read operation, this field is reserved. + * This field is valid for update opcode, otherwise - reserved. + * This field is a bitmask of the fields that should be updated. + * Access: WO + */ +MLXSW_ITEM32(reg, ralue, op_u_mask, 0x04, 8, 3); + +/* reg_ralue_prefix_len + * Number of bits in the prefix of the LPM route. + * Note that for IPv6 prefixes, if prefix_len>64 the entry consumes + * two entries in the physical HW table. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8); + +/* reg_ralue_dip* + * The prefix of the route or of the marker that the object of the LPM + * is compared with. The most significant bits of the dip are the prefix. + * The list significant bits must be '0' if the prefix_len is smaller + * than 128 for IPv6 or smaller than 32 for IPv4. + * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32); + +enum mlxsw_reg_ralue_entry_type { + MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1, + MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY = 2, + MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_AND_ROUTE_ENTRY = 3, +}; + +/* reg_ralue_entry_type + * Entry type. + * Note - for Marker entries, the action_type and action fields are reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, entry_type, 0x1C, 30, 2); + +/* reg_ralue_bmp_len + * The best match prefix length in the case that there is no match for + * longer prefixes. + * If (entry_type != MARKER_ENTRY), bmp_len must be equal to prefix_len + * Note for any update operation with entry_type modification this + * field must be set. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, bmp_len, 0x1C, 16, 8); + +enum mlxsw_reg_ralue_action_type { + MLXSW_REG_RALUE_ACTION_TYPE_REMOTE, + MLXSW_REG_RALUE_ACTION_TYPE_LOCAL, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME, +}; + +/* reg_ralue_action_type + * Action Type + * Indicates how the IP address is connected. + * It can be connected to a local subnet through local_erif or can be + * on a remote subnet connected through a next-hop router, + * or transmitted to the CPU. + * Reserved when entry_type = MARKER_ENTRY + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, action_type, 0x1C, 0, 2); + +enum mlxsw_reg_ralue_trap_action { + MLXSW_REG_RALUE_TRAP_ACTION_NOP, + MLXSW_REG_RALUE_TRAP_ACTION_TRAP, + MLXSW_REG_RALUE_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RALUE_TRAP_ACTION_MIRROR, + MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR, +}; + +/* reg_ralue_trap_action + * Trap action. + * For IP2ME action, only NOP and MIRROR are possible. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, trap_action, 0x20, 28, 4); + +/* reg_ralue_trap_id + * Trap ID to be reported to CPU. + * Trap ID is RTR_INGRESS0 or RTR_INGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR, trap_id is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, trap_id, 0x20, 0, 9); + +/* reg_ralue_adjacency_index + * Points to the first entry of the group-based ECMP. + * Only relevant in case of REMOTE action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, adjacency_index, 0x24, 0, 24); + +/* reg_ralue_ecmp_size + * Amount of sequential entries starting + * from the adjacency_index (the number of ECMPs). + * The valid range is 1-64, 512, 1024, 2048 and 4096. + * Reserved when trap_action is TRAP or DISCARD_ERROR. + * Only relevant in case of REMOTE action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13); + +/* reg_ralue_local_erif + * Egress Router Interface. + * Only relevant in case of LOCAL action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); + +/* reg_ralue_v + * Valid bit for the tunnel_ptr field. + * If valid = 0 then trap to CPU as IP2ME trap ID. + * If valid = 1 and the packet format allows NVE or IPinIP tunnel + * decapsulation then tunnel decapsulation is done. + * If valid = 1 and packet format does not allow NVE or IPinIP tunnel + * decapsulation then trap as IP2ME trap ID. + * Only relevant in case of IP2ME action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1); + +/* reg_ralue_tunnel_ptr + * Tunnel Pointer for NVE or IPinIP tunnel decapsulation. + * For Spectrum, pointer to KVD Linear. + * Only relevant in case of IP2ME action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24); + +static inline void mlxsw_reg_ralue_pack(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len) +{ + MLXSW_REG_ZERO(ralue, payload); + mlxsw_reg_ralue_protocol_set(payload, protocol); + mlxsw_reg_ralue_op_set(payload, op); + mlxsw_reg_ralue_virtual_router_set(payload, virtual_router); + mlxsw_reg_ralue_prefix_len_set(payload, prefix_len); + mlxsw_reg_ralue_entry_type_set(payload, + MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY); + mlxsw_reg_ralue_bmp_len_set(payload, prefix_len); +} + +static inline void mlxsw_reg_ralue_pack4(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len, + u32 dip) +{ + mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); + mlxsw_reg_ralue_dip4_set(payload, dip); +} + +static inline void +mlxsw_reg_ralue_act_remote_pack(char *payload, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u32 adjacency_index, u16 ecmp_size) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_REMOTE); + mlxsw_reg_ralue_trap_action_set(payload, trap_action); + mlxsw_reg_ralue_trap_id_set(payload, trap_id); + mlxsw_reg_ralue_adjacency_index_set(payload, adjacency_index); + mlxsw_reg_ralue_ecmp_size_set(payload, ecmp_size); +} + +static inline void +mlxsw_reg_ralue_act_local_pack(char *payload, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u16 local_erif) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_LOCAL); + mlxsw_reg_ralue_trap_action_set(payload, trap_action); + mlxsw_reg_ralue_trap_id_set(payload, trap_id); + mlxsw_reg_ralue_local_erif_set(payload, local_erif); +} + +static inline void +mlxsw_reg_ralue_act_ip2me_pack(char *payload) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); +} + +/* RAUHT - Router Algorithmic LPM Unicast Host Table Register + * ---------------------------------------------------------- + * The RAUHT register is used to configure and query the Unicast Host table in + * devices that implement the Algorithmic LPM. + */ +#define MLXSW_REG_RAUHT_ID 0x8014 +#define MLXSW_REG_RAUHT_LEN 0x74 + +static const struct mlxsw_reg_info mlxsw_reg_rauht = { + .id = MLXSW_REG_RAUHT_ID, + .len = MLXSW_REG_RAUHT_LEN, +}; + +enum mlxsw_reg_rauht_type { + MLXSW_REG_RAUHT_TYPE_IPV4, + MLXSW_REG_RAUHT_TYPE_IPV6, +}; + +/* reg_rauht_type + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, type, 0x00, 24, 2); + +enum mlxsw_reg_rauht_op { + MLXSW_REG_RAUHT_OP_QUERY_READ = 0, + /* Read operation */ + MLXSW_REG_RAUHT_OP_QUERY_CLEAR_ON_READ = 1, + /* Clear on read operation. Used to read entry and clear + * activity bit. + */ + MLXSW_REG_RAUHT_OP_WRITE_ADD = 0, + /* Add. Used to write a new entry to the table. All R/W fields are + * relevant for new entry. Activity bit is set for new entries. + */ + MLXSW_REG_RAUHT_OP_WRITE_UPDATE = 1, + /* Update action. Used to update an existing route entry and + * only update the following fields: + * trap_action, trap_id, mac, counter_set_type, counter_index + */ + MLXSW_REG_RAUHT_OP_WRITE_CLEAR_ACTIVITY = 2, + /* Clear activity. A bit is cleared for the entry. */ + MLXSW_REG_RAUHT_OP_WRITE_DELETE = 3, + /* Delete entry */ + MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL = 4, + /* Delete all host entries on a RIF. In this command, dip + * field is reserved. + */ +}; + +/* reg_rauht_op + * Access: OP + */ +MLXSW_ITEM32(reg, rauht, op, 0x00, 20, 3); + +/* reg_rauht_a + * Activity. Set for new entries. Set if a packet lookup has hit on + * the specific entry. + * To clear the a bit, use "clear activity" op. + * Enabled by activity_dis in RGCR + * Access: RO + */ +MLXSW_ITEM32(reg, rauht, a, 0x00, 16, 1); + +/* reg_rauht_rif + * Router Interface + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16); + +/* reg_rauht_dip* + * Destination address. + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32); + +enum mlxsw_reg_rauht_trap_action { + MLXSW_REG_RAUHT_TRAP_ACTION_NOP, + MLXSW_REG_RAUHT_TRAP_ACTION_TRAP, + MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR, + MLXSW_REG_RAUHT_TRAP_ACTION_DISCARD_ERRORS, +}; + +/* reg_rauht_trap_action + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, trap_action, 0x60, 28, 4); + +enum mlxsw_reg_rauht_trap_id { + MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS0, + MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS1, +}; + +/* reg_rauht_trap_id + * Trap ID to be reported to CPU. + * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR, + * trap_id is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); + +/* reg_rauht_counter_set_type + * Counter set type for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, counter_set_type, 0x68, 24, 8); + +/* reg_rauht_counter_index + * Counter index for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, counter_index, 0x68, 0, 24); + +/* reg_rauht_mac + * MAC address. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rauht, mac, 0x6E, 6); + +static inline void mlxsw_reg_rauht_pack(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac) +{ + MLXSW_REG_ZERO(rauht, payload); + mlxsw_reg_rauht_op_set(payload, op); + mlxsw_reg_rauht_rif_set(payload, rif); + mlxsw_reg_rauht_mac_memcpy_to(payload, mac); +} + +static inline void mlxsw_reg_rauht_pack4(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, u32 dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_dip4_set(payload, dip); +} + +/* RALEU - Router Algorithmic LPM ECMP Update Register + * --------------------------------------------------- + * The register enables updating the ECMP section in the action for multiple + * LPM Unicast entries in a single operation. The update is executed to + * all entries of a {virtual router, protocol} tuple using the same ECMP group. + */ +#define MLXSW_REG_RALEU_ID 0x8015 +#define MLXSW_REG_RALEU_LEN 0x28 + +static const struct mlxsw_reg_info mlxsw_reg_raleu = { + .id = MLXSW_REG_RALEU_ID, + .len = MLXSW_REG_RALEU_LEN, +}; + +/* reg_raleu_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, protocol, 0x00, 24, 4); + +/* reg_raleu_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, virtual_router, 0x00, 0, 16); + +/* reg_raleu_adjacency_index + * Adjacency Index used for matching on the existing entries. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, adjacency_index, 0x10, 0, 24); + +/* reg_raleu_ecmp_size + * ECMP Size used for matching on the existing entries. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, ecmp_size, 0x14, 0, 13); + +/* reg_raleu_new_adjacency_index + * New Adjacency Index. + * Access: WO + */ +MLXSW_ITEM32(reg, raleu, new_adjacency_index, 0x20, 0, 24); + +/* reg_raleu_new_ecmp_size + * New ECMP Size. + * Access: WO + */ +MLXSW_ITEM32(reg, raleu, new_ecmp_size, 0x24, 0, 13); + +static inline void mlxsw_reg_raleu_pack(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + u16 virtual_router, + u32 adjacency_index, u16 ecmp_size, + u32 new_adjacency_index, + u16 new_ecmp_size) +{ + MLXSW_REG_ZERO(raleu, payload); + mlxsw_reg_raleu_protocol_set(payload, protocol); + mlxsw_reg_raleu_virtual_router_set(payload, virtual_router); + mlxsw_reg_raleu_adjacency_index_set(payload, adjacency_index); + mlxsw_reg_raleu_ecmp_size_set(payload, ecmp_size); + mlxsw_reg_raleu_new_adjacency_index_set(payload, new_adjacency_index); + mlxsw_reg_raleu_new_ecmp_size_set(payload, new_ecmp_size); +} + +/* RAUHTD - Router Algorithmic LPM Unicast Host Table Dump Register + * ---------------------------------------------------------------- + * The RAUHTD register allows dumping entries from the Router Unicast Host + * Table. For a given session an entry is dumped no more than one time. The + * first RAUHTD access after reset is a new session. A session ends when the + * num_rec response is smaller than num_rec request or for IPv4 when the + * num_entries is smaller than 4. The clear activity affect the current session + * or the last session if a new session has not started. + */ +#define MLXSW_REG_RAUHTD_ID 0x8018 +#define MLXSW_REG_RAUHTD_BASE_LEN 0x20 +#define MLXSW_REG_RAUHTD_REC_LEN 0x20 +#define MLXSW_REG_RAUHTD_REC_MAX_NUM 32 +#define MLXSW_REG_RAUHTD_LEN (MLXSW_REG_RAUHTD_BASE_LEN + \ + MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN) +#define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4 + +static const struct mlxsw_reg_info mlxsw_reg_rauhtd = { + .id = MLXSW_REG_RAUHTD_ID, + .len = MLXSW_REG_RAUHTD_LEN, +}; + +#define MLXSW_REG_RAUHTD_FILTER_A BIT(0) +#define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3) + +/* reg_rauhtd_filter_fields + * if a bit is '0' then the relevant field is ignored and dump is done + * regardless of the field value + * Bit0 - filter by activity: entry_a + * Bit3 - filter by entry rip: entry_rif + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, filter_fields, 0x00, 0, 8); + +enum mlxsw_reg_rauhtd_op { + MLXSW_REG_RAUHTD_OP_DUMP, + MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR, +}; + +/* reg_rauhtd_op + * Access: OP + */ +MLXSW_ITEM32(reg, rauhtd, op, 0x04, 24, 2); + +/* reg_rauhtd_num_rec + * At request: number of records requested + * At response: number of records dumped + * For IPv4, each record has 4 entries at request and up to 4 entries + * at response + * Range is 0..MLXSW_REG_RAUHTD_REC_MAX_NUM + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, num_rec, 0x04, 0, 8); + +/* reg_rauhtd_entry_a + * Dump only if activity has value of entry_a + * Reserved if filter_fields bit0 is '0' + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, entry_a, 0x08, 16, 1); + +enum mlxsw_reg_rauhtd_type { + MLXSW_REG_RAUHTD_TYPE_IPV4, + MLXSW_REG_RAUHTD_TYPE_IPV6, +}; + +/* reg_rauhtd_type + * Dump only if record type is: + * 0 - IPv4 + * 1 - IPv6 + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, type, 0x08, 0, 4); + +/* reg_rauhtd_entry_rif + * Dump only if RIF has value of entry_rif + * Reserved if filter_fields bit3 is '0' + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, entry_rif, 0x0C, 0, 16); + +static inline void mlxsw_reg_rauhtd_pack(char *payload, + enum mlxsw_reg_rauhtd_type type) +{ + MLXSW_REG_ZERO(rauhtd, payload); + mlxsw_reg_rauhtd_filter_fields_set(payload, MLXSW_REG_RAUHTD_FILTER_A); + mlxsw_reg_rauhtd_op_set(payload, MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR); + mlxsw_reg_rauhtd_num_rec_set(payload, MLXSW_REG_RAUHTD_REC_MAX_NUM); + mlxsw_reg_rauhtd_entry_a_set(payload, 1); + mlxsw_reg_rauhtd_type_set(payload, type); +} + +/* reg_rauhtd_ipv4_rec_num_entries + * Number of valid entries in this record: + * 0 - 1 valid entry + * 1 - 2 valid entries + * 2 - 3 valid entries + * 3 - 4 valid entries + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_rec_num_entries, + MLXSW_REG_RAUHTD_BASE_LEN, 28, 2, + MLXSW_REG_RAUHTD_REC_LEN, 0x00, false); + +/* reg_rauhtd_rec_type + * Record type. + * 0 - IPv4 + * 1 - IPv6 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, rec_type, MLXSW_REG_RAUHTD_BASE_LEN, 24, 2, + MLXSW_REG_RAUHTD_REC_LEN, 0x00, false); + +#define MLXSW_REG_RAUHTD_IPV4_ENT_LEN 0x8 + +/* reg_rauhtd_ipv4_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv4_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv4_ent_dip + * Destination IPv4 address. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false); + +static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, + int ent_index, u16 *p_rif, + u32 *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv4_ent_rif_get(payload, ent_index); + *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -2929,6 +4644,123 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); } +/* MPAT - Monitoring Port Analyzer Table + * ------------------------------------- + * MPAT Register is used to query and configure the Switch PortAnalyzer Table. + * For an enabled analyzer, all fields except e (enable) cannot be modified. + */ +#define MLXSW_REG_MPAT_ID 0x901A +#define MLXSW_REG_MPAT_LEN 0x78 + +static const struct mlxsw_reg_info mlxsw_reg_mpat = { + .id = MLXSW_REG_MPAT_ID, + .len = MLXSW_REG_MPAT_LEN, +}; + +/* reg_mpat_pa_id + * Port Analyzer ID. + * Access: Index + */ +MLXSW_ITEM32(reg, mpat, pa_id, 0x00, 28, 4); + +/* reg_mpat_system_port + * A unique port identifier for the final destination of the packet. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, system_port, 0x00, 0, 16); + +/* reg_mpat_e + * Enable. Indicating the Port Analyzer is enabled. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, e, 0x04, 31, 1); + +/* reg_mpat_qos + * Quality Of Service Mode. + * 0: CONFIGURED - QoS parameters (Switch Priority, and encapsulation + * PCP, DEI, DSCP or VL) are configured. + * 1: MAINTAIN - QoS parameters (Switch Priority, Color) are the + * same as in the original packet that has triggered the mirroring. For + * SPAN also the pcp,dei are maintained. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1); + +/* reg_mpat_be + * Best effort mode. Indicates mirroring traffic should not cause packet + * drop or back pressure, but will discard the mirrored packets. Mirrored + * packets will be forwarded on a best effort manner. + * 0: Do not discard mirrored packets + * 1: Discard mirrored packets if causing congestion + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1); + +static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id, + u16 system_port, bool e) +{ + MLXSW_REG_ZERO(mpat, payload); + mlxsw_reg_mpat_pa_id_set(payload, pa_id); + mlxsw_reg_mpat_system_port_set(payload, system_port); + mlxsw_reg_mpat_e_set(payload, e); + mlxsw_reg_mpat_qos_set(payload, 1); + mlxsw_reg_mpat_be_set(payload, 1); +} + +/* MPAR - Monitoring Port Analyzer Register + * ---------------------------------------- + * MPAR register is used to query and configure the port analyzer port mirroring + * properties. + */ +#define MLXSW_REG_MPAR_ID 0x901B +#define MLXSW_REG_MPAR_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mpar = { + .id = MLXSW_REG_MPAR_ID, + .len = MLXSW_REG_MPAR_LEN, +}; + +/* reg_mpar_local_port + * The local port to mirror the packets from. + * Access: Index + */ +MLXSW_ITEM32(reg, mpar, local_port, 0x00, 16, 8); + +enum mlxsw_reg_mpar_i_e { + MLXSW_REG_MPAR_TYPE_EGRESS, + MLXSW_REG_MPAR_TYPE_INGRESS, +}; + +/* reg_mpar_i_e + * Ingress/Egress + * Access: Index + */ +MLXSW_ITEM32(reg, mpar, i_e, 0x00, 0, 4); + +/* reg_mpar_enable + * Enable mirroring + * By default, port mirroring is disabled for all ports. + * Access: RW + */ +MLXSW_ITEM32(reg, mpar, enable, 0x04, 31, 1); + +/* reg_mpar_pa_id + * Port Analyzer ID. + * Access: RW + */ +MLXSW_ITEM32(reg, mpar, pa_id, 0x04, 0, 4); + +static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port, + enum mlxsw_reg_mpar_i_e i_e, + bool enable, u8 pa_id) +{ + MLXSW_REG_ZERO(mpar, payload); + mlxsw_reg_mpar_local_port_set(payload, local_port); + mlxsw_reg_mpar_enable_set(payload, enable); + mlxsw_reg_mpar_i_e_set(payload, i_e); + mlxsw_reg_mpar_pa_id_set(payload, pa_id); +} + /* MLCR - Management LED Control Register * -------------------------------------- * Controls the system LEDs. @@ -2985,9 +4817,10 @@ static const struct mlxsw_reg_info mlxsw_reg_sbpr = { .len = MLXSW_REG_SBPR_LEN, }; -enum mlxsw_reg_sbpr_dir { - MLXSW_REG_SBPR_DIR_INGRESS, - MLXSW_REG_SBPR_DIR_EGRESS, +/* shared direstion enum for SBPR, SBCM, SBPM */ +enum mlxsw_reg_sbxx_dir { + MLXSW_REG_SBXX_DIR_INGRESS, + MLXSW_REG_SBXX_DIR_EGRESS, }; /* reg_sbpr_dir @@ -3020,7 +4853,7 @@ enum mlxsw_reg_sbpr_mode { MLXSW_ITEM32(reg, sbpr, mode, 0x08, 0, 4); static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool, - enum mlxsw_reg_sbpr_dir dir, + enum mlxsw_reg_sbxx_dir dir, enum mlxsw_reg_sbpr_mode mode, u32 size) { MLXSW_REG_ZERO(sbpr, payload); @@ -3062,11 +4895,6 @@ MLXSW_ITEM32(reg, sbcm, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, sbcm, pg_buff, 0x00, 8, 6); -enum mlxsw_reg_sbcm_dir { - MLXSW_REG_SBCM_DIR_INGRESS, - MLXSW_REG_SBCM_DIR_EGRESS, -}; - /* reg_sbcm_dir * Direction. * Access: Index @@ -3079,6 +4907,10 @@ MLXSW_ITEM32(reg, sbcm, dir, 0x00, 0, 2); */ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24); +/* shared max_buff limits for dynamic threshold for SBCM, SBPM */ +#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1 +#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14 + /* reg_sbcm_max_buff * When the pool associated to the port-pg/tclass is configured to * static, Maximum buffer size for the limiter configured in cells. @@ -3099,7 +4931,7 @@ MLXSW_ITEM32(reg, sbcm, max_buff, 0x1C, 0, 24); MLXSW_ITEM32(reg, sbcm, pool, 0x24, 0, 4); static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff, - enum mlxsw_reg_sbcm_dir dir, + enum mlxsw_reg_sbxx_dir dir, u32 min_buff, u32 max_buff, u8 pool) { MLXSW_REG_ZERO(sbcm, payload); @@ -3111,8 +4943,8 @@ static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff, mlxsw_reg_sbcm_pool_set(payload, pool); } -/* SBPM - Shared Buffer Class Management Register - * ---------------------------------------------- +/* SBPM - Shared Buffer Port Management Register + * --------------------------------------------- * The SBPM register configures and retrieves the shared buffer allocation * and configuration according to Port-Pool, including the definition * of the associated quota. @@ -3139,17 +4971,33 @@ MLXSW_ITEM32(reg, sbpm, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, sbpm, pool, 0x00, 8, 4); -enum mlxsw_reg_sbpm_dir { - MLXSW_REG_SBPM_DIR_INGRESS, - MLXSW_REG_SBPM_DIR_EGRESS, -}; - /* reg_sbpm_dir * Direction. * Access: Index */ MLXSW_ITEM32(reg, sbpm, dir, 0x00, 0, 2); +/* reg_sbpm_buff_occupancy + * Current buffer occupancy in cells. + * Access: RO + */ +MLXSW_ITEM32(reg, sbpm, buff_occupancy, 0x10, 0, 24); + +/* reg_sbpm_clr + * Clear Max Buffer Occupancy + * When this bit is set, max_buff_occupancy field is cleared (and a + * new max value is tracked from the time the clear was performed). + * Access: OP + */ +MLXSW_ITEM32(reg, sbpm, clr, 0x14, 31, 1); + +/* reg_sbpm_max_buff_occupancy + * Maximum value of buffer occupancy in cells monitored. Cleared by + * writing to the clr field. + * Access: RO + */ +MLXSW_ITEM32(reg, sbpm, max_buff_occupancy, 0x14, 0, 24); + /* reg_sbpm_min_buff * Minimum buffer size for the limiter, in cells. * Access: RW @@ -3170,17 +5018,25 @@ MLXSW_ITEM32(reg, sbpm, min_buff, 0x18, 0, 24); MLXSW_ITEM32(reg, sbpm, max_buff, 0x1C, 0, 24); static inline void mlxsw_reg_sbpm_pack(char *payload, u8 local_port, u8 pool, - enum mlxsw_reg_sbpm_dir dir, + enum mlxsw_reg_sbxx_dir dir, bool clr, u32 min_buff, u32 max_buff) { MLXSW_REG_ZERO(sbpm, payload); mlxsw_reg_sbpm_local_port_set(payload, local_port); mlxsw_reg_sbpm_pool_set(payload, pool); mlxsw_reg_sbpm_dir_set(payload, dir); + mlxsw_reg_sbpm_clr_set(payload, clr); mlxsw_reg_sbpm_min_buff_set(payload, min_buff); mlxsw_reg_sbpm_max_buff_set(payload, max_buff); } +static inline void mlxsw_reg_sbpm_unpack(char *payload, u32 *p_buff_occupancy, + u32 *p_max_buff_occupancy) +{ + *p_buff_occupancy = mlxsw_reg_sbpm_buff_occupancy_get(payload); + *p_max_buff_occupancy = mlxsw_reg_sbpm_max_buff_occupancy_get(payload); +} + /* SBMM - Shared Buffer Multicast Management Register * -------------------------------------------------- * The SBMM register configures and retrieves the shared buffer allocation @@ -3236,6 +5092,143 @@ static inline void mlxsw_reg_sbmm_pack(char *payload, u8 prio, u32 min_buff, mlxsw_reg_sbmm_pool_set(payload, pool); } +/* SBSR - Shared Buffer Status Register + * ------------------------------------ + * The SBSR register retrieves the shared buffer occupancy according to + * Port-Pool. Note that this register enables reading a large amount of data. + * It is the user's responsibility to limit the amount of data to ensure the + * response can match the maximum transfer unit. In case the response exceeds + * the maximum transport unit, it will be truncated with no special notice. + */ +#define MLXSW_REG_SBSR_ID 0xB005 +#define MLXSW_REG_SBSR_BASE_LEN 0x5C /* base length, without records */ +#define MLXSW_REG_SBSR_REC_LEN 0x8 /* record length */ +#define MLXSW_REG_SBSR_REC_MAX_COUNT 120 +#define MLXSW_REG_SBSR_LEN (MLXSW_REG_SBSR_BASE_LEN + \ + MLXSW_REG_SBSR_REC_LEN * \ + MLXSW_REG_SBSR_REC_MAX_COUNT) + +static const struct mlxsw_reg_info mlxsw_reg_sbsr = { + .id = MLXSW_REG_SBSR_ID, + .len = MLXSW_REG_SBSR_LEN, +}; + +/* reg_sbsr_clr + * Clear Max Buffer Occupancy. When this bit is set, the max_buff_occupancy + * field is cleared (and a new max value is tracked from the time the clear + * was performed). + * Access: OP + */ +MLXSW_ITEM32(reg, sbsr, clr, 0x00, 31, 1); + +/* reg_sbsr_ingress_port_mask + * Bit vector for all ingress network ports. + * Indicates which of the ports (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other port + * does not change. + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, ingress_port_mask, 0x10, 0x20, 1); + +/* reg_sbsr_pg_buff_mask + * Bit vector for all switch priority groups. + * Indicates which of the priorities (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other priority + * does not change. + * Range is 0..cap_max_pg_buffers - 1 + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, pg_buff_mask, 0x30, 0x4, 1); + +/* reg_sbsr_egress_port_mask + * Bit vector for all egress network ports. + * Indicates which of the ports (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other port + * does not change. + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, egress_port_mask, 0x34, 0x20, 1); + +/* reg_sbsr_tclass_mask + * Bit vector for all traffic classes. + * Indicates which of the traffic classes (for which the relevant bit is + * set) are affected by the set operation. Configuration of any other + * traffic class does not change. + * Range is 0..cap_max_tclass - 1 + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, tclass_mask, 0x54, 0x8, 1); + +static inline void mlxsw_reg_sbsr_pack(char *payload, bool clr) +{ + MLXSW_REG_ZERO(sbsr, payload); + mlxsw_reg_sbsr_clr_set(payload, clr); +} + +/* reg_sbsr_rec_buff_occupancy + * Current buffer occupancy in cells. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sbsr, rec_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN, + 0, 24, MLXSW_REG_SBSR_REC_LEN, 0x00, false); + +/* reg_sbsr_rec_max_buff_occupancy + * Maximum value of buffer occupancy in cells monitored. Cleared by + * writing to the clr field. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sbsr, rec_max_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN, + 0, 24, MLXSW_REG_SBSR_REC_LEN, 0x04, false); + +static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index, + u32 *p_buff_occupancy, + u32 *p_max_buff_occupancy) +{ + *p_buff_occupancy = + mlxsw_reg_sbsr_rec_buff_occupancy_get(payload, rec_index); + *p_max_buff_occupancy = + mlxsw_reg_sbsr_rec_max_buff_occupancy_get(payload, rec_index); +} + +/* SBIB - Shared Buffer Internal Buffer Register + * --------------------------------------------- + * The SBIB register configures per port buffers for internal use. The internal + * buffers consume memory on the port buffers (note that the port buffers are + * used also by PBMC). + * + * For Spectrum this is used for egress mirroring. + */ +#define MLXSW_REG_SBIB_ID 0xB006 +#define MLXSW_REG_SBIB_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_sbib = { + .id = MLXSW_REG_SBIB_ID, + .len = MLXSW_REG_SBIB_LEN, +}; + +/* reg_sbib_local_port + * Local port number + * Not supported for CPU port and router port + * Access: Index + */ +MLXSW_ITEM32(reg, sbib, local_port, 0x00, 16, 8); + +/* reg_sbib_buff_size + * Units represented in cells + * Allowed range is 0 to (cap_max_headroom_size - 1) + * Default is 0 + * Access: RW + */ +MLXSW_ITEM32(reg, sbib, buff_size, 0x08, 0, 24); + +static inline void mlxsw_reg_sbib_pack(char *payload, u8 local_port, + u32 buff_size) +{ + MLXSW_REG_ZERO(sbib, payload); + mlxsw_reg_sbib_local_port_set(payload, local_port); + mlxsw_reg_sbib_buff_size_set(payload, buff_size); +} + static inline const char *mlxsw_reg_id_str(u16 reg_id) { switch (reg_id) { @@ -3283,6 +5276,10 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SFMR"; case MLXSW_REG_SPVMLR_ID: return "SPVMLR"; + case MLXSW_REG_QTCT_ID: + return "QTCT"; + case MLXSW_REG_QEEC_ID: + return "QEEC"; case MLXSW_REG_PMLP_ID: return "PMLP"; case MLXSW_REG_PMTU_ID: @@ -3293,8 +5290,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "PPAD"; case MLXSW_REG_PAOS_ID: return "PAOS"; + case MLXSW_REG_PFCC_ID: + return "PFCC"; case MLXSW_REG_PPCNT_ID: return "PPCNT"; + case MLXSW_REG_PPTB_ID: + return "PPTB"; case MLXSW_REG_PBMC_ID: return "PBMC"; case MLXSW_REG_PSPA_ID: @@ -3303,6 +5304,26 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "HTGT"; case MLXSW_REG_HPKT_ID: return "HPKT"; + case MLXSW_REG_RGCR_ID: + return "RGCR"; + case MLXSW_REG_RITR_ID: + return "RITR"; + case MLXSW_REG_RATR_ID: + return "RATR"; + case MLXSW_REG_RALTA_ID: + return "RALTA"; + case MLXSW_REG_RALST_ID: + return "RALST"; + case MLXSW_REG_RALTB_ID: + return "RALTB"; + case MLXSW_REG_RALUE_ID: + return "RALUE"; + case MLXSW_REG_RAUHT_ID: + return "RAUHT"; + case MLXSW_REG_RALEU_ID: + return "RALEU"; + case MLXSW_REG_RAUHTD_ID: + return "RAUHTD"; case MLXSW_REG_MFCR_ID: return "MFCR"; case MLXSW_REG_MFSC_ID: @@ -3311,6 +5332,10 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "MFSM"; case MLXSW_REG_MTCAP_ID: return "MTCAP"; + case MLXSW_REG_MPAT_ID: + return "MPAT"; + case MLXSW_REG_MPAR_ID: + return "MPAR"; case MLXSW_REG_MTMP_ID: return "MTMP"; case MLXSW_REG_MLCR_ID: @@ -3323,6 +5348,10 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SBPM"; case MLXSW_REG_SBMM_ID: return "SBMM"; + case MLXSW_REG_SBSR_ID: + return "SBSR"; + case MLXSW_REG_SBIB_ID: + return "SBIB"; default: return "*UNKNOWN*"; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 668b2f465ca5..d48873bcbddf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -49,9 +49,14 @@ #include <linux/jiffies.h> #include <linux/bitops.h> #include <linux/list.h> -#include <net/devlink.h> +#include <linux/notifier.h> +#include <linux/dcbnl.h> +#include <linux/inetdevice.h> #include <net/switchdev.h> #include <generated/utsrelease.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_mirred.h> +#include <net/netevent.h> #include "spectrum.h" #include "core.h" @@ -131,6 +136,8 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16); */ MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); +static bool mlxsw_sp_port_dev_check(const struct net_device *dev); + static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info) { @@ -159,6 +166,303 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + int i; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_span_valid) + return -EIO; + + mlxsw_sp->span.entries_count = resources->max_span; + mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count, + sizeof(struct mlxsw_sp_span_entry), + GFP_KERNEL); + if (!mlxsw_sp->span.entries) + return -ENOMEM; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) + INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list); + + return 0; +} + +static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + WARN_ON_ONCE(!list_empty(&curr->bound_ports_list)); + } + kfree(mlxsw_sp->span.entries); +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + struct mlxsw_sp_span_entry *span_entry; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + u8 local_port = port->local_port; + int index; + int i; + int err; + + /* find a free entry to use */ + index = -1; + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + if (!mlxsw_sp->span.entries[i].used) { + index = i; + span_entry = &mlxsw_sp->span.entries[i]; + break; + } + } + if (index < 0) + return NULL; + + /* create a new port analayzer entry for local_port */ + mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); + if (err) + return NULL; + + span_entry->used = true; + span_entry->id = index; + span_entry->ref_count = 0; + span_entry->local_port = local_port; + return span_entry; +} + +static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + u8 local_port = span_entry->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); + span_entry->used = false; +} + +struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + if (curr->used && curr->local_port == port->local_port) + return curr; + } + return NULL; +} + +struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp_span_entry *span_entry; + + span_entry = mlxsw_sp_span_entry_find(port); + if (span_entry) { + span_entry->ref_count++; + return span_entry; + } + + return mlxsw_sp_span_entry_create(port); +} + +static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + if (--span_entry->ref_count == 0) + mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); + return 0; +} + +static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + struct mlxsw_sp_span_inspected_port *p; + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + list_for_each_entry(p, &curr->bound_ports_list, list) + if (p->local_port == port->local_port && + p->type == MLXSW_SP_SPAN_EGRESS) + return true; + } + + return false; +} + +static int mlxsw_sp_span_mtu_to_buffsize(int mtu) +{ + return MLXSW_SP_BYTES_TO_CELLS(mtu * 5 / 2) + 1; +} + +static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + int err; + + /* If port is egress mirrored, the shared buffer size should be + * updated according to the mtu value + */ + if (mlxsw_sp_span_is_egress_mirror(port)) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, + mlxsw_sp_span_mtu_to_buffsize(mtu)); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + if (err) { + netdev_err(port->dev, "Could not update shared buffer for mirroring\n"); + return err; + } + } + + return 0; +} + +static struct mlxsw_sp_span_inspected_port * +mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry) +{ + struct mlxsw_sp_span_inspected_port *p; + + list_for_each_entry(p, &span_entry->bound_ports_list, list) + if (port->local_port == p->local_port) + return p; + return NULL; +} + +static int +mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type) +{ + struct mlxsw_sp_span_inspected_port *inspected_port; + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char mpar_pl[MLXSW_REG_MPAR_LEN]; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + int pa_id = span_entry->id; + int err; + + /* if it is an egress SPAN, bind a shared buffer to it */ + if (type == MLXSW_SP_SPAN_EGRESS) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, + mlxsw_sp_span_mtu_to_buffsize(port->dev->mtu)); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + if (err) { + netdev_err(port->dev, "Could not create shared buffer for mirroring\n"); + return err; + } + } + + /* bind the port to the SPAN entry */ + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, true, pa_id); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); + if (err) + goto err_mpar_reg_write; + + inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL); + if (!inspected_port) { + err = -ENOMEM; + goto err_inspected_port_alloc; + } + inspected_port->local_port = port->local_port; + inspected_port->type = type; + list_add_tail(&inspected_port->list, &span_entry->bound_ports_list); + + return 0; + +err_mpar_reg_write: +err_inspected_port_alloc: + if (type == MLXSW_SP_SPAN_EGRESS) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + } + return err; +} + +static void +mlxsw_sp_span_inspected_port_unbind(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type) +{ + struct mlxsw_sp_span_inspected_port *inspected_port; + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char mpar_pl[MLXSW_REG_MPAR_LEN]; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + int pa_id = span_entry->id; + + inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry); + if (!inspected_port) + return; + + /* remove the inspected port */ + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, false, pa_id); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); + + /* remove the SBIB buffer if it was egress SPAN */ + if (type == MLXSW_SP_SPAN_EGRESS) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + } + + mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); + + list_del(&inspected_port->list); + kfree(inspected_port); +} + +static int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, + struct mlxsw_sp_port *to, + enum mlxsw_sp_span_type type) +{ + struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp; + struct mlxsw_sp_span_entry *span_entry; + int err; + + span_entry = mlxsw_sp_span_entry_get(to); + if (!span_entry) + return -ENOENT; + + netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n", + span_entry->id); + + err = mlxsw_sp_span_inspected_port_bind(from, span_entry, type); + if (err) + goto err_port_bind; + + return 0; + +err_port_bind: + mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); + return err; +} + +static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from, + struct mlxsw_sp_port *to, + enum mlxsw_sp_span_type type) +{ + struct mlxsw_sp_span_entry *span_entry; + + span_entry = mlxsw_sp_span_entry_find(to); + if (!span_entry) { + netdev_err(from->dev, "no span entry found\n"); + return; + } + + netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n", + span_entry->id); + mlxsw_sp_span_inspected_port_unbind(from, span_entry, type); +} + static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port, bool is_up) { @@ -171,23 +475,6 @@ static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(paos), paos_pl); } -static int mlxsw_sp_port_oper_status_get(struct mlxsw_sp_port *mlxsw_sp_port, - bool *p_is_up) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char paos_pl[MLXSW_REG_PAOS_LEN]; - u8 oper_status; - int err; - - mlxsw_reg_paos_pack(paos_pl, mlxsw_sp_port->local_port, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(paos), paos_pl); - if (err) - return err; - oper_status = mlxsw_reg_paos_oper_status_get(paos_pl); - *p_is_up = oper_status == MLXSW_PORT_ADMIN_STATUS_UP ? true : false; - return 0; -} - static int mlxsw_sp_port_dev_addr_set(struct mlxsw_sp_port *mlxsw_sp_port, unsigned char *addr) { @@ -209,23 +496,6 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr); } -static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid, enum mlxsw_reg_spms_state state) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char *spms_pl; - int err; - - spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); - if (!spms_pl) - return -ENOMEM; - mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); - mlxsw_reg_spms_vid_pack(spms_pl, vid, state); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); - kfree(spms_pl); - return err; -} - static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -247,15 +517,23 @@ static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl); } -static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid) +static int __mlxsw_sp_port_swid_set(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 swid) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pspa_pl[MLXSW_REG_PSPA_LEN]; - mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sp_port->local_port); + mlxsw_reg_pspa_pack(pspa_pl, swid, local_port); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl); } +static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + return __mlxsw_sp_port_swid_set(mlxsw_sp, mlxsw_sp_port->local_port, + swid); +} + static int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) { @@ -307,7 +585,7 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 *p_module, - u8 *p_width) + u8 *p_width, u8 *p_lane) { char pmlp_pl[MLXSW_REG_PMLP_LEN]; int err; @@ -318,6 +596,7 @@ static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, return err; *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl); + *p_lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0); return 0; } @@ -379,7 +658,7 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, u64 len; int err; - if (mlxsw_core_skb_transmit_busy(mlxsw_sp, &tx_info)) + if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) return NETDEV_TX_BUSY; if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { @@ -399,11 +678,15 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, } mlxsw_sp_txhdr_construct(skb, &tx_info); - len = skb->len; + /* TX header is consumed by HW on the way so we shouldn't count its + * bytes as being sent. + */ + len = skb->len - MLXSW_TXHDR_LEN; + /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ - err = mlxsw_core_skb_transmit(mlxsw_sp, skb, &tx_info); + err = mlxsw_core_skb_transmit(mlxsw_sp->core, skb, &tx_info); if (!err) { pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); @@ -438,16 +721,94 @@ static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) return 0; } +static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int pg_index, int mtu, + bool pause_en, bool pfc_en, u16 delay) +{ + u16 pg_size = 2 * MLXSW_SP_BYTES_TO_CELLS(mtu); + + delay = pfc_en ? mlxsw_sp_pfc_delay_get(mtu, delay) : + MLXSW_SP_PAUSE_DELAY; + + if (pause_en || pfc_en) + mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, pg_index, + pg_size + delay, pg_size); + else + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg_index, pg_size); +} + +int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, + u8 *prio_tc, bool pause_en, + struct ieee_pfc *my_pfc) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0; + u16 delay = !!my_pfc ? my_pfc->delay : 0; + char pbmc_pl[MLXSW_REG_PBMC_LEN]; + int i, j, err; + + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); + if (err) + return err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + bool configure = false; + bool pfc = false; + + for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) { + if (prio_tc[j] == i) { + pfc = pfc_en & BIT(j); + configure = true; + break; + } + } + + if (!configure) + continue; + mlxsw_sp_pg_buf_pack(pbmc_pl, i, mtu, pause_en, pfc, delay); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); +} + +static int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, + int mtu, bool pause_en) +{ + u8 def_prio_tc[IEEE_8021QAZ_MAX_TCS] = {0}; + bool dcb_en = !!mlxsw_sp_port->dcb.ets; + struct ieee_pfc *my_pfc; + u8 *prio_tc; + + prio_tc = dcb_en ? mlxsw_sp_port->dcb.ets->prio_tc : def_prio_tc; + my_pfc = dcb_en ? mlxsw_sp_port->dcb.pfc : NULL; + + return __mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, prio_tc, + pause_en, my_pfc); +} + static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); int err; - err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); + err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, pause_en); if (err) return err; + err = mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, mtu); + if (err) + goto err_span_port_mtu_update; + err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); + if (err) + goto err_port_mtu_set; dev->mtu = mtu; return 0; + +err_port_mtu_set: + mlxsw_sp_span_port_mtu_update(mlxsw_sp_port, dev->mtu); +err_span_port_mtu_update: + mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + return err; } static struct rtnl_link_stats64 * @@ -550,94 +911,8 @@ static int mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -static struct mlxsw_sp_vfid * -mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, u16 vid) -{ - struct mlxsw_sp_vfid *vfid; - - list_for_each_entry(vfid, &mlxsw_sp->port_vfids.list, list) { - if (vfid->vid == vid) - return vfid; - } - - return NULL; -} - -static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) -{ - return find_first_zero_bit(mlxsw_sp->port_vfids.mapped, - MLXSW_SP_VFID_PORT_MAX); -} - -static int __mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, u16 vfid) -{ - u16 fid = mlxsw_sp_vfid_to_fid(vfid); - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - -static void __mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, u16 vfid) -{ - u16 fid = mlxsw_sp_vfid_to_fid(vfid); - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, fid, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - -static struct mlxsw_sp_vfid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, - u16 vid) -{ - struct device *dev = mlxsw_sp->bus_info->dev; - struct mlxsw_sp_vfid *vfid; - u16 n_vfid; - int err; - - n_vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp); - if (n_vfid == MLXSW_SP_VFID_PORT_MAX) { - dev_err(dev, "No available vFIDs\n"); - return ERR_PTR(-ERANGE); - } - - err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid); - if (err) { - dev_err(dev, "Failed to create vFID=%d\n", n_vfid); - return ERR_PTR(err); - } - - vfid = kzalloc(sizeof(*vfid), GFP_KERNEL); - if (!vfid) - goto err_allocate_vfid; - - vfid->vfid = n_vfid; - vfid->vid = vid; - - list_add(&vfid->list, &mlxsw_sp->port_vfids.list); - set_bit(n_vfid, mlxsw_sp->port_vfids.mapped); - - return vfid; - -err_allocate_vfid: - __mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid); - return ERR_PTR(-ENOMEM); -} - -static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vfid *vfid) -{ - clear_bit(vfid->vfid, mlxsw_sp->port_vfids.mapped); - list_del(&vfid->list); - - __mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid); - - kfree(vfid); -} - static struct mlxsw_sp_port * -mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_vfid *vfid) +mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_vport; @@ -655,8 +930,7 @@ mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_vport->stp_state = BR_STATE_FORWARDING; mlxsw_sp_vport->lagged = mlxsw_sp_port->lagged; mlxsw_sp_vport->lag_id = mlxsw_sp_port->lag_id; - mlxsw_sp_vport->vport.vfid = vfid; - mlxsw_sp_vport->vport.vid = vfid->vid; + mlxsw_sp_vport->vport.vid = vid; list_add(&mlxsw_sp_vport->vport.list, &mlxsw_sp_port->vports_list); @@ -669,13 +943,12 @@ static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport) kfree(mlxsw_sp_vport); } -int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, - u16 vid) +static int mlxsw_sp_port_add_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port *mlxsw_sp_vport; - struct mlxsw_sp_vfid *vfid; + bool untagged = vid == 1; int err; /* VLAN 0 is added to HW filter when device goes up, but it is @@ -684,37 +957,12 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, if (!vid) return 0; - if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) { - netdev_warn(dev, "VID=%d already configured\n", vid); + if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) return 0; - } - - vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid); - if (!vfid) { - vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid); - if (IS_ERR(vfid)) { - netdev_err(dev, "Failed to create vFID for VID=%d\n", - vid); - return PTR_ERR(vfid); - } - } - mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vfid); - if (!mlxsw_sp_vport) { - netdev_err(dev, "Failed to create vPort for VID=%d\n", vid); - err = -ENOMEM; - goto err_port_vport_create; - } - - if (!vfid->nr_vports) { - err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, - true, false); - if (err) { - netdev_err(dev, "Failed to setup flooding for vFID=%d\n", - vfid->vfid); - goto err_vport_flood_set; - } - } + mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid); + if (!mlxsw_sp_vport) + return -ENOMEM; /* When adding the first VLAN interface on a bridged port we need to * transition all the active 802.1Q bridge VLANs to use explicit @@ -722,77 +970,36 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, */ if (list_is_singular(&mlxsw_sp_port->vports_list)) { err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); - if (err) { - netdev_err(dev, "Failed to set to Virtual mode\n"); + if (err) goto err_port_vp_mode_trans; - } - } - - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - true, - mlxsw_sp_vfid_to_fid(vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to map {Port, VID=%d} to vFID=%d\n", - vid, vfid->vfid); - goto err_port_vid_to_fid_set; } err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); - if (err) { - netdev_err(dev, "Failed to disable learning for VID=%d\n", vid); + if (err) goto err_port_vid_learning_set; - } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, false); - if (err) { - netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", - vid); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged); + if (err) goto err_port_add_vid; - } - - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, - MLXSW_REG_SPMS_STATE_FORWARDING); - if (err) { - netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); - goto err_port_stp_state_set; - } - - vfid->nr_vports++; return 0; -err_port_stp_state_set: - mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); err_port_add_vid: mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); err_port_vid_learning_set: - mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false, - mlxsw_sp_vfid_to_fid(vfid->vfid), vid); -err_port_vid_to_fid_set: if (list_is_singular(&mlxsw_sp_port->vports_list)) mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); err_port_vp_mode_trans: - if (!vfid->nr_vports) - mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, - false); -err_vport_flood_set: mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); -err_port_vport_create: - if (!vfid->nr_vports) - mlxsw_sp_vfid_destroy(mlxsw_sp, vfid); return err; } -int mlxsw_sp_port_kill_vid(struct net_device *dev, - __be16 __always_unused proto, u16 vid) +static int mlxsw_sp_port_kill_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port *mlxsw_sp_vport; - struct mlxsw_sp_vfid *vfid; - int err; + struct mlxsw_sp_fid *f; /* VLAN 0 is removed from HW filter when device goes down, but * it is reserved in our case, so simply return. @@ -801,82 +1008,218 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, return 0; mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - netdev_warn(dev, "VID=%d does not exist\n", vid); + if (WARN_ON(!mlxsw_sp_vport)) return 0; - } - vfid = mlxsw_sp_vport->vport.vfid; + mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, - MLXSW_REG_SPMS_STATE_DISCARDING); - if (err) { - netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); - return err; - } + mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - if (err) { - netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", - vid); - return err; + /* Drop FID reference. If this was the last reference the + * resources will be freed. + */ + f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + if (f && !WARN_ON(!f->leave)) + f->leave(mlxsw_sp_vport); + + /* When removing the last VLAN interface on a bridged port we need to + * transition all active 802.1Q bridge VLANs to use VID to FID + * mappings and set port's mode to VLAN mode. + */ + if (list_is_singular(&mlxsw_sp_port->vports_list)) + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); + + mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); + + return 0; +} + +static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name, + size_t len) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + u8 module = mlxsw_sp_port->mapping.module; + u8 width = mlxsw_sp_port->mapping.width; + u8 lane = mlxsw_sp_port->mapping.lane; + int err; + + if (!mlxsw_sp_port->split) + err = snprintf(name, len, "p%d", module + 1); + else + err = snprintf(name, len, "p%ds%d", module + 1, + lane / width); + + if (err >= len) + return -EINVAL; + + return 0; +} + +static struct mlxsw_sp_port_mall_tc_entry * +mlxsw_sp_port_mirror_entry_find(struct mlxsw_sp_port *port, + unsigned long cookie) { + struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; + + list_for_each_entry(mall_tc_entry, &port->mall_tc_list, list) + if (mall_tc_entry->cookie == cookie) + return mall_tc_entry; + + return NULL; +} + +static int +mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_matchall_offload *cls, + const struct tc_action *a, + bool ingress) +{ + struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; + struct net *net = dev_net(mlxsw_sp_port->dev); + enum mlxsw_sp_span_type span_type; + struct mlxsw_sp_port *to_port; + struct net_device *to_dev; + int ifindex; + int err; + + ifindex = tcf_mirred_ifindex(a); + to_dev = __dev_get_by_index(net, ifindex); + if (!to_dev) { + netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n"); + return -EINVAL; } - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); - if (err) { - netdev_err(dev, "Failed to enable learning for VID=%d\n", vid); - return err; + if (!mlxsw_sp_port_dev_check(to_dev)) { + netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port"); + return -ENOTSUPP; } + to_port = netdev_priv(to_dev); - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - false, - mlxsw_sp_vfid_to_fid(vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to invalidate {Port, VID=%d} to vFID=%d mapping\n", - vid, vfid->vfid); - return err; + mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); + if (!mall_tc_entry) + return -ENOMEM; + + mall_tc_entry->cookie = cls->cookie; + mall_tc_entry->type = MLXSW_SP_PORT_MALL_MIRROR; + mall_tc_entry->mirror.to_local_port = to_port->local_port; + mall_tc_entry->mirror.ingress = ingress; + list_add_tail(&mall_tc_entry->list, &mlxsw_sp_port->mall_tc_list); + + span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; + err = mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type); + if (err) + goto err_mirror_add; + return 0; + +err_mirror_add: + list_del(&mall_tc_entry->list); + kfree(mall_tc_entry); + return err; +} + +static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, + __be16 protocol, + struct tc_cls_matchall_offload *cls, + bool ingress) +{ + const struct tc_action *a; + LIST_HEAD(actions); + int err; + + if (!tc_single_action(cls->exts)) { + netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n"); + return -ENOTSUPP; } - /* When removing the last VLAN interface on a bridged port we need to - * transition all active 802.1Q bridge VLANs to use VID to FID - * mappings and set port's mode to VLAN mode. - */ - if (list_is_singular(&mlxsw_sp_port->vports_list)) { - err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); - if (err) { - netdev_err(dev, "Failed to set to VLAN mode\n"); + tcf_exts_to_list(cls->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL)) + return -ENOTSUPP; + + err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port, cls, + a, ingress); + if (err) return err; - } } - vfid->nr_vports--; - mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); + return 0; +} - /* Destroy the vFID if no vPorts are assigned to it anymore. */ - if (!vfid->nr_vports) - mlxsw_sp_vfid_destroy(mlxsw_sp_port->mlxsw_sp, vfid); +static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_matchall_offload *cls) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; + enum mlxsw_sp_span_type span_type; + struct mlxsw_sp_port *to_port; + + mall_tc_entry = mlxsw_sp_port_mirror_entry_find(mlxsw_sp_port, + cls->cookie); + if (!mall_tc_entry) { + netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n"); + return; + } - return 0; + switch (mall_tc_entry->type) { + case MLXSW_SP_PORT_MALL_MIRROR: + to_port = mlxsw_sp->ports[mall_tc_entry->mirror.to_local_port]; + span_type = mall_tc_entry->mirror.ingress ? + MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; + + mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type); + break; + default: + WARN_ON(1); + } + + list_del(&mall_tc_entry->list); + kfree(mall_tc_entry); +} + +static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, + __be16 proto, struct tc_to_netdev *tc) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); + + if (tc->type == TC_SETUP_MATCHALL) { + switch (tc->cls_mall->command) { + case TC_CLSMATCHALL_REPLACE: + return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, + proto, + tc->cls_mall, + ingress); + case TC_CLSMATCHALL_DESTROY: + mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, + tc->cls_mall); + return 0; + default: + return -EINVAL; + } + } + + return -ENOTSUPP; } static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, .ndo_start_xmit = mlxsw_sp_port_xmit, + .ndo_setup_tc = mlxsw_sp_setup_tc, .ndo_set_rx_mode = mlxsw_sp_set_rx_mode, .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, + .ndo_neigh_construct = mlxsw_sp_router_neigh_construct, + .ndo_neigh_destroy = mlxsw_sp_router_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_get_phys_port_name = mlxsw_sp_port_get_phys_port_name, }; static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, @@ -897,12 +1240,74 @@ static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, sizeof(drvinfo->bus_info)); } +static void mlxsw_sp_port_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + pause->rx_pause = mlxsw_sp_port->link.rx_pause; + pause->tx_pause = mlxsw_sp_port->link.tx_pause; +} + +static int mlxsw_sp_port_pause_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ethtool_pauseparam *pause) +{ + char pfcc_pl[MLXSW_REG_PFCC_LEN]; + + mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pfcc_pprx_set(pfcc_pl, pause->rx_pause); + mlxsw_reg_pfcc_pptx_set(pfcc_pl, pause->tx_pause); + + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc), + pfcc_pl); +} + +static int mlxsw_sp_port_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = pause->tx_pause || pause->rx_pause; + int err; + + if (mlxsw_sp_port->dcb.pfc && mlxsw_sp_port->dcb.pfc->pfc_en) { + netdev_err(dev, "PFC already enabled on port\n"); + return -EINVAL; + } + + if (pause->autoneg) { + netdev_err(dev, "PAUSE frames autonegotiation isn't supported\n"); + return -EINVAL; + } + + err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + if (err) { + netdev_err(dev, "Failed to configure port's headroom\n"); + return err; + } + + err = mlxsw_sp_port_pause_set(mlxsw_sp_port, pause); + if (err) { + netdev_err(dev, "Failed to set PAUSE parameters\n"); + goto err_port_pause_configure; + } + + mlxsw_sp_port->link.rx_pause = pause->rx_pause; + mlxsw_sp_port->link.tx_pause = pause->tx_pause; + + return 0; + +err_port_pause_configure: + pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); + mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + return err; +} + struct mlxsw_sp_port_hw_stats { char str[ETH_GSTRING_LEN]; u64 (*getter)(char *payload); }; -static const struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { { .str = "a_frames_transmitted_ok", .getter = mlxsw_reg_ppcnt_a_frames_transmitted_ok_get, @@ -983,6 +1388,90 @@ static const struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { #define MLXSW_SP_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_stats) +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = { + { + .str = "rx_octets_prio", + .getter = mlxsw_reg_ppcnt_rx_octets_get, + }, + { + .str = "rx_frames_prio", + .getter = mlxsw_reg_ppcnt_rx_frames_get, + }, + { + .str = "tx_octets_prio", + .getter = mlxsw_reg_ppcnt_tx_octets_get, + }, + { + .str = "tx_frames_prio", + .getter = mlxsw_reg_ppcnt_tx_frames_get, + }, + { + .str = "rx_pause_prio", + .getter = mlxsw_reg_ppcnt_rx_pause_get, + }, + { + .str = "rx_pause_duration_prio", + .getter = mlxsw_reg_ppcnt_rx_pause_duration_get, + }, + { + .str = "tx_pause_prio", + .getter = mlxsw_reg_ppcnt_tx_pause_get, + }, + { + .str = "tx_pause_duration_prio", + .getter = mlxsw_reg_ppcnt_tx_pause_duration_get, + }, +}; + +#define MLXSW_SP_PORT_HW_PRIO_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_prio_stats) + +static u64 mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get(char *ppcnt_pl) +{ + u64 transmit_queue = mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl); + + return MLXSW_SP_CELLS_TO_BYTES(transmit_queue); +} + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = { + { + .str = "tc_transmit_queue_tc", + .getter = mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get, + }, + { + .str = "tc_no_buffer_discard_uc_tc", + .getter = mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get, + }, +}; + +#define MLXSW_SP_PORT_HW_TC_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_tc_stats) + +#define MLXSW_SP_PORT_ETHTOOL_STATS_LEN (MLXSW_SP_PORT_HW_STATS_LEN + \ + (MLXSW_SP_PORT_HW_PRIO_STATS_LEN + \ + MLXSW_SP_PORT_HW_TC_STATS_LEN) * \ + IEEE_8021QAZ_MAX_TCS) + +static void mlxsw_sp_port_get_prio_strings(u8 **p, int prio) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_HW_PRIO_STATS_LEN; i++) { + snprintf(*p, ETH_GSTRING_LEN, "%s_%d", + mlxsw_sp_port_hw_prio_stats[i].str, prio); + *p += ETH_GSTRING_LEN; + } +} + +static void mlxsw_sp_port_get_tc_strings(u8 **p, int tc) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_HW_TC_STATS_LEN; i++) { + snprintf(*p, ETH_GSTRING_LEN, "%s_%d", + mlxsw_sp_port_hw_tc_stats[i].str, tc); + *p += ETH_GSTRING_LEN; + } +} + static void mlxsw_sp_port_get_strings(struct net_device *dev, u32 stringset, u8 *data) { @@ -996,6 +1485,13 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port_get_prio_strings(&p, i); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port_get_tc_strings(&p, i); + break; } } @@ -1023,26 +1519,80 @@ static int mlxsw_sp_port_set_phys_id(struct net_device *dev, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mlcr), mlcr_pl); } -static void mlxsw_sp_port_get_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) +static int +mlxsw_sp_get_hw_stats_by_group(struct mlxsw_sp_port_hw_stats **p_hw_stats, + int *p_len, enum mlxsw_reg_ppcnt_grp grp) +{ + switch (grp) { + case MLXSW_REG_PPCNT_IEEE_8023_CNT: + *p_hw_stats = mlxsw_sp_port_hw_stats; + *p_len = MLXSW_SP_PORT_HW_STATS_LEN; + break; + case MLXSW_REG_PPCNT_PRIO_CNT: + *p_hw_stats = mlxsw_sp_port_hw_prio_stats; + *p_len = MLXSW_SP_PORT_HW_PRIO_STATS_LEN; + break; + case MLXSW_REG_PPCNT_TC_CNT: + *p_hw_stats = mlxsw_sp_port_hw_tc_stats; + *p_len = MLXSW_SP_PORT_HW_TC_STATS_LEN; + break; + default: + WARN_ON(1); + return -ENOTSUPP; + } + return 0; +} + +static void __mlxsw_sp_port_get_stats(struct net_device *dev, + enum mlxsw_reg_ppcnt_grp grp, int prio, + u64 *data, int data_index) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port_hw_stats *hw_stats; char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; - int i; + int i, len; int err; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port); + err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp); + if (err) + return; + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); - for (i = 0; i < MLXSW_SP_PORT_HW_STATS_LEN; i++) - data[i] = !err ? mlxsw_sp_port_hw_stats[i].getter(ppcnt_pl) : 0; + for (i = 0; i < len; i++) + data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0; +} + +static void mlxsw_sp_port_get_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + int i, data_index = 0; + + /* IEEE 802.3 Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, 0, + data, data_index); + data_index = MLXSW_SP_PORT_HW_STATS_LEN; + + /* Per-Priority Counters */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_PRIO_CNT, i, + data, data_index); + data_index += MLXSW_SP_PORT_HW_PRIO_STATS_LEN; + } + + /* Per-TC Counters */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_TC_CNT, i, + data, data_index); + data_index += MLXSW_SP_PORT_HW_TC_STATS_LEN; + } } static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) { switch (sset) { case ETH_SS_STATS: - return MLXSW_SP_PORT_HW_STATS_LEN; + return MLXSW_SP_PORT_ETHTOOL_STATS_LEN; default: return -EOPNOTSUPP; } @@ -1263,7 +1813,8 @@ static int mlxsw_sp_port_get_settings(struct net_device *dev, cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) | mlxsw_sp_from_ptys_supported_link(eth_proto_cap) | - SUPPORTED_Pause | SUPPORTED_Asym_Pause; + SUPPORTED_Pause | SUPPORTED_Asym_Pause | + SUPPORTED_Autoneg; cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin); mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, cmd); @@ -1322,7 +1873,6 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, u32 eth_proto_new; u32 eth_proto_cap; u32 eth_proto_admin; - bool is_up; int err; speed = ethtool_cmd_speed(cmd); @@ -1354,12 +1904,7 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, return err; } - err = mlxsw_sp_port_oper_status_get(mlxsw_sp_port, &is_up); - if (err) { - netdev_err(dev, "Failed to get oper status"); - return err; - } - if (!is_up) + if (!netif_running(dev)) return 0; err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); @@ -1380,6 +1925,8 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, + .get_pauseparam = mlxsw_sp_port_get_pauseparam, + .set_pauseparam = mlxsw_sp_port_set_pauseparam, .get_strings = mlxsw_sp_port_get_strings, .set_phys_id = mlxsw_sp_port_set_phys_id, .get_ethtool_stats = mlxsw_sp_port_get_stats, @@ -1402,12 +1949,124 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); } -static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width) +int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, + bool dwrr, u8 dwrr_weight) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_de_set(qeec_pl, true); + mlxsw_reg_qeec_dwrr_set(qeec_pl, dwrr); + mlxsw_reg_qeec_dwrr_weight_set(qeec_pl, dwrr_weight); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 maxrate) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_mase_set(qeec_pl, true); + mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, + u8 switch_prio, u8 tclass) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qtct_pl[MLXSW_REG_QTCT_LEN]; + + mlxsw_reg_qtct_pack(qtct_pl, mlxsw_sp_port->local_port, switch_prio, + tclass); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtct), qtct_pl); +} + +static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err, i; + + /* Setup the elements hierarcy, so that each TC is linked to + * one subgroup, which are all member in the same group. + */ + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false, + 0); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + 0, false, 0); + if (err) + return err; + } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, i, i, + false, 0); + if (err) + return err; + } + + /* Make sure the max shaper is disabled in all hierarcies that + * support it. + */ + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + i, 0, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; + } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, + i, i, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; + } + + /* Map all priorities to traffic class 0. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0); + if (err) + return err; + } + + return 0; +} + +static int mlxsw_sp_port_pvid_vport_create(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->pvid = 1; + + return mlxsw_sp_port_add_vid(mlxsw_sp_port->dev, 0, 1); +} + +static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); +} + +static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, + bool split, u8 module, u8 width, u8 lane) { - struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_port *mlxsw_sp_port; - struct devlink_port *devlink_port; struct net_device *dev; size_t bytes; int err; @@ -1420,6 +2079,9 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_sp_port->mlxsw_sp = mlxsw_sp; mlxsw_sp_port->local_port = local_port; mlxsw_sp_port->split = split; + mlxsw_sp_port->mapping.module = module; + mlxsw_sp_port->mapping.width = width; + mlxsw_sp_port->mapping.lane = lane; bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); if (!mlxsw_sp_port->active_vlans) { @@ -1432,6 +2094,7 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_untagged_vlans_alloc; } INIT_LIST_HEAD(&mlxsw_sp_port->vports_list); + INIT_LIST_HEAD(&mlxsw_sp_port->mall_tc_list); mlxsw_sp_port->pcpu_stats = netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats); @@ -1443,6 +2106,13 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, dev->netdev_ops = &mlxsw_sp_port_netdev_ops; dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops; + err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n", + mlxsw_sp_port->local_port); + goto err_port_swid_set; + } + err = mlxsw_sp_port_dev_addr_init(mlxsw_sp_port); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unable to init port mac address\n", @@ -1453,23 +2123,14 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, netif_carrier_off(dev); dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG | - NETIF_F_HW_VLAN_CTAG_FILTER; + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; + dev->hw_features |= NETIF_F_HW_TC; /* Each packet needs to have a Tx header (metadata) on top all other * headers. */ dev->hard_header_len += MLXSW_TXHDR_LEN; - devlink_port = &mlxsw_sp_port->devlink_port; - if (mlxsw_sp_port->split) - devlink_port_split_set(devlink_port, module); - err = devlink_port_register(devlink, devlink_port, local_port); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register devlink port\n", - mlxsw_sp_port->local_port); - goto err_devlink_port_register; - } - err = mlxsw_sp_port_system_port_mapping_set(mlxsw_sp_port); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set system port mapping\n", @@ -1477,13 +2138,6 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_system_port_mapping_set; } - err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n", - mlxsw_sp_port->local_port); - goto err_port_swid_set; - } - err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port, width); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n", @@ -1509,7 +2163,30 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_buffers_init; } + err = mlxsw_sp_port_ets_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize ETS\n", + mlxsw_sp_port->local_port); + goto err_port_ets_init; + } + + /* ETS and buffers must be initialized before DCB. */ + err = mlxsw_sp_port_dcb_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize DCB\n", + mlxsw_sp_port->local_port); + goto err_port_dcb_init; + } + + err = mlxsw_sp_port_pvid_vport_create(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create PVID vPort\n", + mlxsw_sp_port->local_port); + goto err_port_pvid_vport_create; + } + mlxsw_sp_port_switchdev_init(mlxsw_sp_port); + mlxsw_sp->ports[local_port] = mlxsw_sp_port; err = register_netdev(dev); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n", @@ -1517,27 +2194,35 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_register_netdev; } - devlink_port_type_eth_set(devlink_port, dev); - - err = mlxsw_sp_port_vlan_init(mlxsw_sp_port); - if (err) - goto err_port_vlan_init; + err = mlxsw_core_port_init(mlxsw_sp->core, &mlxsw_sp_port->core_port, + mlxsw_sp_port->local_port, dev, + mlxsw_sp_port->split, module); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n", + mlxsw_sp_port->local_port); + goto err_core_port_init; + } - mlxsw_sp->ports[local_port] = mlxsw_sp_port; return 0; -err_port_vlan_init: +err_core_port_init: unregister_netdev(dev); err_register_netdev: + mlxsw_sp->ports[local_port] = NULL; + mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); + mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); +err_port_pvid_vport_create: + mlxsw_sp_port_dcb_fini(mlxsw_sp_port); +err_port_dcb_init: +err_port_ets_init: err_port_buffers_init: err_port_admin_status_set: err_port_mtu_set: err_port_speed_by_width_set: -err_port_swid_set: err_port_system_port_mapping_set: - devlink_port_unregister(&mlxsw_sp_port->devlink_port); -err_devlink_port_register: err_dev_addr_init: + mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); +err_port_swid_set: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: kfree(mlxsw_sp_port->untagged_vlans); @@ -1548,64 +2233,24 @@ err_port_active_vlans_alloc: return err; } -static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, - bool split, u8 module, u8 width, u8 lane) -{ - int err; - - err = mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width, - lane); - if (err) - return err; - - err = __mlxsw_sp_port_create(mlxsw_sp, local_port, split, module, - width); - if (err) - goto err_port_create; - - return 0; - -err_port_create: - mlxsw_sp_port_module_unmap(mlxsw_sp, local_port); - return err; -} - -static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct net_device *dev = mlxsw_sp_port->dev; - struct mlxsw_sp_port *mlxsw_sp_vport, *tmp; - - list_for_each_entry_safe(mlxsw_sp_vport, tmp, - &mlxsw_sp_port->vports_list, vport.list) { - u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - - /* vPorts created for VLAN devices should already be gone - * by now, since we unregistered the port netdev. - */ - WARN_ON(is_vlan_dev(mlxsw_sp_vport->dev)); - mlxsw_sp_port_kill_vid(dev, 0, vid); - } -} - static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; - struct devlink_port *devlink_port; if (!mlxsw_sp_port) return; - mlxsw_sp->ports[local_port] = NULL; - devlink_port = &mlxsw_sp_port->devlink_port; - devlink_port_type_clear(devlink_port); + mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ - devlink_port_unregister(devlink_port); - mlxsw_sp_port_vports_fini(mlxsw_sp_port); + mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); + mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); + mlxsw_sp_port_dcb_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); kfree(mlxsw_sp_port->untagged_vlans); kfree(mlxsw_sp_port->active_vlans); + WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list)); free_netdev(mlxsw_sp_port->dev); } @@ -1620,8 +2265,8 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) { + u8 module, width, lane; size_t alloc_size; - u8 module, width; int i; int err; @@ -1632,13 +2277,14 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module, - &width); + &width, &lane); if (err) goto err_port_module_info_get; if (!width) continue; mlxsw_sp->port_to_module[i] = module; - err = __mlxsw_sp_port_create(mlxsw_sp, i, false, module, width); + err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width, + lane); if (err) goto err_port_create; } @@ -1659,11 +2305,85 @@ static u8 mlxsw_sp_cluster_base_port_get(u8 local_port) return local_port - offset; } -static int mlxsw_sp_port_split(void *priv, u8 local_port, unsigned int count) +static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port, + u8 module, unsigned int count) { - struct mlxsw_sp *mlxsw_sp = priv; - struct mlxsw_sp_port *mlxsw_sp_port; u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count; + int err, i; + + for (i = 0; i < count; i++) { + err = mlxsw_sp_port_module_map(mlxsw_sp, base_port + i, module, + width, i * width); + if (err) + goto err_port_module_map; + } + + for (i = 0; i < count; i++) { + err = __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, 0); + if (err) + goto err_port_swid_set; + } + + for (i = 0; i < count; i++) { + err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, + module, width, i * width); + if (err) + goto err_port_create; + } + + return 0; + +err_port_create: + for (i--; i >= 0; i--) + mlxsw_sp_port_remove(mlxsw_sp, base_port + i); + i = count; +err_port_swid_set: + for (i--; i >= 0; i--) + __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, + MLXSW_PORT_SWID_DISABLED_PORT); + i = count; +err_port_module_map: + for (i--; i >= 0; i--) + mlxsw_sp_port_module_unmap(mlxsw_sp, base_port + i); + return err; +} + +static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp, + u8 base_port, unsigned int count) +{ + u8 local_port, module, width = MLXSW_PORT_MODULE_MAX_WIDTH; + int i; + + /* Split by four means we need to re-create two ports, otherwise + * only one. + */ + count = count / 2; + + for (i = 0; i < count; i++) { + local_port = base_port + i * 2; + module = mlxsw_sp->port_to_module[local_port]; + + mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width, + 0); + } + + for (i = 0; i < count; i++) + __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i * 2, 0); + + for (i = 0; i < count; i++) { + local_port = base_port + i * 2; + module = mlxsw_sp->port_to_module[local_port]; + + mlxsw_sp_port_create(mlxsw_sp, local_port, false, module, + width, 0); + } +} + +static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, + unsigned int count) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_port *mlxsw_sp_port; u8 module, cur_width, base_port; int i; int err; @@ -1675,18 +2395,14 @@ static int mlxsw_sp_port_split(void *priv, u8 local_port, unsigned int count) return -EINVAL; } + module = mlxsw_sp_port->mapping.module; + cur_width = mlxsw_sp_port->mapping.width; + if (count != 2 && count != 4) { netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n"); return -EINVAL; } - err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module, - &cur_width); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n"); - return err; - } - if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) { netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n"); return -EINVAL; @@ -1711,36 +2427,26 @@ static int mlxsw_sp_port_split(void *priv, u8 local_port, unsigned int count) for (i = 0; i < count; i++) mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count; i++) { - err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true, - module, width, i * width); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to create split port\n"); - goto err_port_create; - } + err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n"); + goto err_port_split_create; } return 0; -err_port_create: - for (i--; i >= 0; i--) - mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count / 2; i++) { - module = mlxsw_sp->port_to_module[base_port + i * 2]; - mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false, - module, MLXSW_PORT_MODULE_MAX_WIDTH, 0); - } +err_port_split_create: + mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); return err; } -static int mlxsw_sp_port_unsplit(void *priv, u8 local_port) +static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) { - struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); struct mlxsw_sp_port *mlxsw_sp_port; - u8 module, cur_width, base_port; + u8 cur_width, base_port; unsigned int count; int i; - int err; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { @@ -1754,12 +2460,7 @@ static int mlxsw_sp_port_unsplit(void *priv, u8 local_port) return -EINVAL; } - err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module, - &cur_width); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n"); - return err; - } + cur_width = mlxsw_sp_port->mapping.width; count = cur_width == 1 ? 4 : 2; base_port = mlxsw_sp_cluster_base_port_get(local_port); @@ -1771,14 +2472,7 @@ static int mlxsw_sp_port_unsplit(void *priv, u8 local_port) for (i = 0; i < count; i++) mlxsw_sp_port_remove(mlxsw_sp, base_port + i); - for (i = 0; i < count / 2; i++) { - module = mlxsw_sp->port_to_module[base_port + i * 2]; - err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false, - module, MLXSW_PORT_MODULE_MAX_WIDTH, - 0); - if (err) - dev_err(mlxsw_sp->bus_info->dev, "Failed to reinstantiate port\n"); - } + mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count); return 0; } @@ -1793,11 +2487,8 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, local_port = mlxsw_reg_pude_local_port_get(pude_pl); mlxsw_sp_port = mlxsw_sp->ports[local_port]; - if (!mlxsw_sp_port) { - dev_warn(mlxsw_sp->bus_info->dev, "Port %d: Link event received for non-existent port\n", - local_port); + if (!mlxsw_sp_port) return; - } status = mlxsw_reg_pude_oper_status_get(pude_pl); if (status == MLXSW_PORT_OPER_STATUS_UP) { @@ -1952,6 +2643,51 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { .local_port = MLXSW_PORT_DONT_CARE, .trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT, }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_ARPBC, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_ARPUC, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_MTUERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_TTLERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_LBERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_OSPF, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IP2ME, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_RTR_INGRESS0, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4, + }, }; static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) @@ -1992,7 +2728,7 @@ err_rx_trap_set: mlxsw_sp); err_rx_listener_register: for (i--; i >= 0; i--) { - mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD, mlxsw_sp_rx_listener[i].trap_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); @@ -2009,7 +2745,7 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) int i; for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) { - mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD, mlxsw_sp_rx_listener[i].trap_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); @@ -2080,16 +2816,16 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); } -static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, +static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { - struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); int err; mlxsw_sp->core = mlxsw_core; mlxsw_sp->bus_info = mlxsw_bus_info; - INIT_LIST_HEAD(&mlxsw_sp->port_vfids.list); - INIT_LIST_HEAD(&mlxsw_sp->br_vfids.list); + INIT_LIST_HEAD(&mlxsw_sp->fids); + INIT_LIST_HEAD(&mlxsw_sp->vfids.list); INIT_LIST_HEAD(&mlxsw_sp->br_mids.list); err = mlxsw_sp_base_mac_get(mlxsw_sp); @@ -2098,16 +2834,10 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, return err; } - err = mlxsw_sp_ports_create(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); - return err; - } - err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to register for PUDE events\n"); - goto err_event_register; + return err; } err = mlxsw_sp_traps_init(mlxsw_sp); @@ -2140,28 +2870,59 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, goto err_switchdev_init; } + err = mlxsw_sp_router_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); + goto err_router_init; + } + + err = mlxsw_sp_span_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); + goto err_span_init; + } + + err = mlxsw_sp_ports_create(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); + goto err_ports_create; + } + return 0; +err_ports_create: + mlxsw_sp_span_fini(mlxsw_sp); +err_span_init: + mlxsw_sp_router_fini(mlxsw_sp); +err_router_init: + mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: err_lag_init: + mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); err_rx_listener_register: mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); -err_event_register: - mlxsw_sp_ports_remove(mlxsw_sp); return err; } -static void mlxsw_sp_fini(void *priv) +static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { - struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + int i; + mlxsw_sp_ports_remove(mlxsw_sp); + mlxsw_sp_span_fini(mlxsw_sp); + mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); + mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); - mlxsw_sp_ports_remove(mlxsw_sp); + WARN_ON(!list_empty(&mlxsw_sp->vfids.list)); + WARN_ON(!list_empty(&mlxsw_sp->fids)); + for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); } static struct mlxsw_config_profile mlxsw_sp_config_profile = { @@ -2192,37 +2953,667 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_ib_mc = 0, .used_max_pkey = 1, .max_pkey = 0, + .used_kvd_sizes = 1, + .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, + .kvd_hash_single_size = MLXSW_SP_KVD_HASH_SINGLE_SIZE, + .kvd_hash_double_size = MLXSW_SP_KVD_HASH_DOUBLE_SIZE, .swid_config = { { .used_type = 1, .type = MLXSW_PORT_SWID_TYPE_ETH, } }, + .resource_query_enable = 1, }; static struct mlxsw_driver mlxsw_sp_driver = { - .kind = MLXSW_DEVICE_KIND_SPECTRUM, - .owner = THIS_MODULE, - .priv_size = sizeof(struct mlxsw_sp), - .init = mlxsw_sp_init, - .fini = mlxsw_sp_fini, - .port_split = mlxsw_sp_port_split, - .port_unsplit = mlxsw_sp_port_unsplit, - .txhdr_construct = mlxsw_sp_txhdr_construct, - .txhdr_len = MLXSW_TXHDR_LEN, - .profile = &mlxsw_sp_config_profile, + .kind = MLXSW_DEVICE_KIND_SPECTRUM, + .owner = THIS_MODULE, + .priv_size = sizeof(struct mlxsw_sp), + .init = mlxsw_sp_init, + .fini = mlxsw_sp_fini, + .port_split = mlxsw_sp_port_split, + .port_unsplit = mlxsw_sp_port_unsplit, + .sb_pool_get = mlxsw_sp_sb_pool_get, + .sb_pool_set = mlxsw_sp_sb_pool_set, + .sb_port_pool_get = mlxsw_sp_sb_port_pool_get, + .sb_port_pool_set = mlxsw_sp_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .txhdr_construct = mlxsw_sp_txhdr_construct, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sp_config_profile, }; -static int -mlxsw_sp_port_fdb_flush_by_port(const struct mlxsw_sp_port *mlxsw_sp_port) +static bool mlxsw_sp_port_dev_check(const struct net_device *dev) +{ + return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; +} + +static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (mlxsw_sp_port_dev_check(dev)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev(dev, lower_dev, iter) { + if (mlxsw_sp_port_dev_check(lower_dev)) + return netdev_priv(lower_dev); + } + return NULL; +} + +static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev); + return mlxsw_sp_port ? mlxsw_sp_port->mlxsw_sp : NULL; +} + +static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (mlxsw_sp_port_dev_check(dev)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev_rcu(dev, lower_dev, iter) { + if (mlxsw_sp_port_dev_check(lower_dev)) + return netdev_priv(lower_dev); + } + return NULL; +} + +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + rcu_read_lock(); + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev); + if (mlxsw_sp_port) + dev_hold(mlxsw_sp_port->dev); + rcu_read_unlock(); + return mlxsw_sp_port; +} + +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) +{ + dev_put(mlxsw_sp_port->dev); +} + +static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, + unsigned long event) +{ + switch (event) { + case NETDEV_UP: + if (!r) + return true; + r->ref_count++; + return false; + case NETDEV_DOWN: + if (r && --r->ref_count == 0) + return true; + /* It is possible we already removed the RIF ourselves + * if it was assigned to a netdev that is now a bridge + * or LAG slave. + */ + return false; + } + + return false; +} + +static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + if (!mlxsw_sp->rifs[i]) + return i; + + return MLXSW_SP_RIF_MAX; +} + +static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, + bool *p_lagged, u16 *p_system_port) +{ + u8 local_port = mlxsw_sp_vport->local_port; + + *p_lagged = mlxsw_sp_vport->lagged; + *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port; +} + +static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev, u16 rif, + bool create) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + bool lagged = mlxsw_sp_vport->lagged; + char ritr_pl[MLXSW_REG_RITR_LEN]; + u16 system_port; + + mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif, + l3_dev->mtu, l3_dev->dev_addr); + + mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port); + mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port, + mlxsw_sp_vport_vid_get(mlxsw_sp_vport)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport); + +static struct mlxsw_sp_fid * +mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev) +{ + struct mlxsw_sp_fid *f; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + f->leave = mlxsw_sp_vport_rif_sp_leave; + f->ref_count = 0; + f->dev = l3_dev; + f->fid = fid; + + return f; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f) +{ + struct mlxsw_sp_rif *r; + + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (!r) + return NULL; + + ether_addr_copy(r->addr, l3_dev->dev_addr); + r->mtu = l3_dev->mtu; + r->ref_count = 1; + r->dev = l3_dev; + r->rif = rif; + r->f = f; + + return r; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_fid *f; + struct mlxsw_sp_rif *r; + u16 fid, rif; + int err; + + rif = mlxsw_sp_avail_rif_get(mlxsw_sp); + if (rif == MLXSW_SP_RIF_MAX) + return ERR_PTR(-ERANGE); + + err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true); + if (err) + return ERR_PTR(err); + + fid = mlxsw_sp_rif_sp_to_fid(rif); + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true); + if (err) + goto err_rif_fdb_op; + + f = mlxsw_sp_rfid_alloc(fid, l3_dev); + if (!f) { + err = -ENOMEM; + goto err_rfid_alloc; + } + + r = mlxsw_sp_rif_alloc(rif, l3_dev, f); + if (!r) { + err = -ENOMEM; + goto err_rif_alloc; + } + + f->r = r; + mlxsw_sp->rifs[rif] = r; + + return r; + +err_rif_alloc: + kfree(f); +err_rfid_alloc: + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); +err_rif_fdb_op: + mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false); + return ERR_PTR(err); +} + +static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport, + struct mlxsw_sp_rif *r) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct net_device *l3_dev = r->dev; + struct mlxsw_sp_fid *f = r->f; + u16 fid = f->fid; + u16 rif = r->rif; + + mlxsw_sp->rifs[rif] = NULL; + f->r = NULL; + + kfree(r); + + kfree(f); + + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); + + mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false); +} + +static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_rif *r; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (!r) { + r = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev); + if (IS_ERR(r)) + return PTR_ERR(r); + } + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, r->f); + r->f->ref_count++; + + netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", r->f->fid); + + return 0; +} + +static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); + if (--f->ref_count == 0) + mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->r); +} + +static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev, + struct net_device *port_dev, + unsigned long event, u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_vport)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev); + case NETDEV_DOWN: + mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, + unsigned long event) +{ + if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev)) + return 0; + + return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1); +} + +static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, + struct net_device *lag_dev, + unsigned long event, u16 vid) +{ + struct net_device *port_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(lag_dev, port_dev, iter) { + if (mlxsw_sp_port_dev_check(port_dev)) { + err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev, + event, vid); + if (err) + return err; + } + } + + return 0; +} + +static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, + unsigned long event) +{ + if (netif_is_bridge_port(lag_dev)) + return 0; + + return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1); +} + +static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + u16 fid; + + if (is_vlan_dev(l3_dev)) + fid = vlan_dev_vlan_id(l3_dev); + else if (mlxsw_sp->master_bridge.dev == l3_dev) + fid = 1; + else + return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev); + + return mlxsw_sp_fid_find(mlxsw_sp, fid); +} + +static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid) +{ + return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID : + MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; +} + +static u16 mlxsw_sp_flood_table_index_get(u16 fid) +{ + return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid; +} + +static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid, + bool set) +{ + enum mlxsw_flood_table_type table_type; + char *sftr_pl; + u16 index; + int err; + + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); + if (!sftr_pl) + return -ENOMEM; + + table_type = mlxsw_sp_flood_table_type_get(fid); + index = mlxsw_sp_flood_table_index_get(fid); + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, index, table_type, + 1, MLXSW_PORT_ROUTER_PORT, set); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + + kfree(sftr_pl); + return err; +} + +static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid) +{ + if (mlxsw_sp_fid_is_vfid(fid)) + return MLXSW_REG_RITR_FID_IF; + else + return MLXSW_REG_RITR_VLAN_IF; +} + +static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + u16 fid, u16 rif, + bool create) +{ + enum mlxsw_reg_ritr_if_type rif_type; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + rif_type = mlxsw_sp_rif_type_get(fid); + mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, l3_dev->mtu, + l3_dev->dev_addr); + mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + struct mlxsw_sp_fid *f) +{ + struct mlxsw_sp_rif *r; + u16 rif; + int err; + + rif = mlxsw_sp_avail_rif_get(mlxsw_sp); + if (rif == MLXSW_SP_RIF_MAX) + return -ERANGE; + + err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); + if (err) + return err; + + err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true); + if (err) + goto err_rif_bridge_op; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true); + if (err) + goto err_rif_fdb_op; + + r = mlxsw_sp_rif_alloc(rif, l3_dev, f); + if (!r) { + err = -ENOMEM; + goto err_rif_alloc; + } + + f->r = r; + mlxsw_sp->rifs[rif] = r; + + netdev_dbg(l3_dev, "RIF=%d created\n", rif); + + return 0; + +err_rif_alloc: + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); +err_rif_fdb_op: + mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); +err_rif_bridge_op: + mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); + return err; +} + +void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *r) +{ + struct net_device *l3_dev = r->dev; + struct mlxsw_sp_fid *f = r->f; + u16 rif = r->rif; + + mlxsw_sp->rifs[rif] = NULL; + f->r = NULL; + + kfree(r); + + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); + + mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); + + mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); + + netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif); +} + +static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, + struct net_device *br_dev, + unsigned long event) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); + struct mlxsw_sp_fid *f; + + /* FID can either be an actual FID if the L3 device is the + * VLAN-aware bridge or a VLAN device on top. Otherwise, the + * L3 device is a VLAN-unaware bridge and we get a vFID. + */ + f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev); + if (WARN_ON(!f)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f); + case NETDEV_DOWN: + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, + unsigned long event) +{ + struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); + u16 vid = vlan_dev_vlan_id(vlan_dev); + + if (mlxsw_sp_port_dev_check(real_dev)) + return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event, + vid); + else if (netif_is_lag_master(real_dev)) + return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, + vid); + else if (netif_is_bridge_master(real_dev) && + mlxsw_sp->master_bridge.dev == real_dev) + return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev, + event); + + return 0; +} + +static int mlxsw_sp_inetaddr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *r; + int err = 0; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(r, event)) + goto out; + + if (mlxsw_sp_port_dev_check(dev)) + err = mlxsw_sp_inetaddr_port_event(dev, event); + else if (netif_is_lag_master(dev)) + err = mlxsw_sp_inetaddr_lag_event(dev, event); + else if (netif_is_bridge_master(dev)) + err = mlxsw_sp_inetaddr_bridge_event(dev, dev, event); + else if (is_vlan_dev(dev)) + err = mlxsw_sp_inetaddr_vlan_event(dev, event); + +out: + return notifier_from_errno(err); +} + +static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif, + const char *mac, int mtu) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + int err; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + mlxsw_reg_ritr_mtu_set(ritr_pl, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac); + mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) +{ + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *r; + int err; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + return 0; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!r) + return 0; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, false); + if (err) + return err; + + err = mlxsw_sp_rif_edit(mlxsw_sp, r->rif, dev->dev_addr, dev->mtu); + if (err) + goto err_rif_edit; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, r->f->fid, true); + if (err) + goto err_rif_fdb_op; + + ether_addr_copy(r->addr, dev->dev_addr); + r->mtu = dev->mtu; + + netdev_dbg(dev, "Updated RIF=%d\n", r->rif); + + return 0; + +err_rif_fdb_op: + mlxsw_sp_rif_edit(mlxsw_sp, r->rif, r->addr, r->mtu); +err_rif_edit: + mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, true); + return err; +} + +static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port, + u16 fid) +{ + if (mlxsw_sp_fid_is_vfid(fid)) + return mlxsw_sp_port_vport_find_by_fid(lag_port, fid); + else + return test_bit(fid, lag_port->active_vlans); +} + +static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char sfdf_pl[MLXSW_REG_SFDF_LEN]; + u8 local_port = mlxsw_sp_port->local_port; + u16 lag_id = mlxsw_sp_port->lag_id; + int i, count = 0; - mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_PORT); - mlxsw_reg_sfdf_system_port_set(sfdf_pl, mlxsw_sp_port->local_port); + if (!mlxsw_sp_port->lagged) + return true; - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + struct mlxsw_sp_port *lag_port; + + lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); + if (!lag_port || lag_port->local_port == local_port) + continue; + if (mlxsw_sp_lag_port_fid_member(lag_port, fid)) + count++; + } + + return !count; } static int @@ -2237,17 +3628,8 @@ mlxsw_sp_port_fdb_flush_by_port_fid(const struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sfdf_port_fid_system_port_set(sfdf_pl, mlxsw_sp_port->local_port); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); -} - -static int -mlxsw_sp_port_fdb_flush_by_lag_id(const struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char sfdf_pl[MLXSW_REG_SFDF_LEN]; - - mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_LAG); - mlxsw_reg_sfdf_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id); + netdev_dbg(mlxsw_sp_port->dev, "FDB flushed using Port=%d, FID=%d\n", + mlxsw_sp_port->local_port, fid); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); } @@ -2263,71 +3645,64 @@ mlxsw_sp_port_fdb_flush_by_lag_id_fid(const struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sfdf_fid_set(sfdf_pl, fid); mlxsw_reg_sfdf_lag_fid_lag_id_set(sfdf_pl, mlxsw_sp_port->lag_id); + netdev_dbg(mlxsw_sp_port->dev, "FDB flushed using LAG ID=%d, FID=%d\n", + mlxsw_sp_port->lag_id, fid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); } -static int -__mlxsw_sp_port_fdb_flush(const struct mlxsw_sp_port *mlxsw_sp_port) +int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) { - int err, last_err = 0; - u16 vid; - - for (vid = 1; vid < VLAN_N_VID - 1; vid++) { - err = mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, vid); - if (err) - last_err = err; - } + if (!mlxsw_sp_port_fdb_should_flush(mlxsw_sp_port, fid)) + return 0; - return last_err; + if (mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, + fid); + else + return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, fid); } -static int -__mlxsw_sp_port_fdb_flush_lagged(const struct mlxsw_sp_port *mlxsw_sp_port) +static void mlxsw_sp_master_bridge_gone_sync(struct mlxsw_sp *mlxsw_sp) { - int err, last_err = 0; - u16 vid; - - for (vid = 1; vid < VLAN_N_VID - 1; vid++) { - err = mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_port, vid); - if (err) - last_err = err; - } + struct mlxsw_sp_fid *f, *tmp; - return last_err; + list_for_each_entry_safe(f, tmp, &mlxsw_sp->fids, list) + if (--f->ref_count == 0) + mlxsw_sp_fid_destroy(mlxsw_sp, f); + else + WARN_ON_ONCE(1); } -static int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port) +static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) { - if (!list_empty(&mlxsw_sp_port->vports_list)) - if (mlxsw_sp_port->lagged) - return __mlxsw_sp_port_fdb_flush_lagged(mlxsw_sp_port); - else - return __mlxsw_sp_port_fdb_flush(mlxsw_sp_port); - else - if (mlxsw_sp_port->lagged) - return mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port); - else - return mlxsw_sp_port_fdb_flush_by_port(mlxsw_sp_port); + return !mlxsw_sp->master_bridge.dev || + mlxsw_sp->master_bridge.dev == br_dev; } -static int mlxsw_sp_vport_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_vport) +static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) { - u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_vport); - u16 fid = mlxsw_sp_vfid_to_fid(vfid); - - if (mlxsw_sp_vport->lagged) - return mlxsw_sp_port_fdb_flush_by_lag_id_fid(mlxsw_sp_vport, - fid); - else - return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_vport, fid); + mlxsw_sp->master_bridge.dev = br_dev; + mlxsw_sp->master_bridge.ref_count++; } -static bool mlxsw_sp_port_dev_check(const struct net_device *dev) +static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp) { - return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; + if (--mlxsw_sp->master_bridge.ref_count == 0) { + mlxsw_sp->master_bridge.dev = NULL; + /* It's possible upper VLAN devices are still holding + * references to underlying FIDs. Drop the reference + * and release the resources if it was the last one. + * If it wasn't, then something bad happened. + */ + mlxsw_sp_master_bridge_gone_sync(mlxsw_sp); + } } -static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port) +static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *br_dev) { struct net_device *dev = mlxsw_sp_port->dev; int err; @@ -2341,6 +3716,8 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port) if (err) return err; + mlxsw_sp_master_bridge_inc(mlxsw_sp_port->mlxsw_sp, br_dev); + mlxsw_sp_port->learning = 1; mlxsw_sp_port->learning_sync = 1; mlxsw_sp_port->uc_flood = 1; @@ -2349,16 +3726,14 @@ static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, - bool flush_fdb) +static void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port) { struct net_device *dev = mlxsw_sp_port->dev; - if (flush_fdb && mlxsw_sp_port_fdb_flush(mlxsw_sp_port)) - netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n"); - mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); + mlxsw_sp_master_bridge_dec(mlxsw_sp_port->mlxsw_sp); + mlxsw_sp_port->learning = 0; mlxsw_sp_port->learning_sync = 0; mlxsw_sp_port->uc_flood = 0; @@ -2367,28 +3742,7 @@ static int mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, /* Add implicit VLAN interface in the device, so that untagged * packets will be classified to the default vFID. */ - return mlxsw_sp_port_add_vid(dev, 0, 1); -} - -static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) -{ - return !mlxsw_sp->master_bridge.dev || - mlxsw_sp->master_bridge.dev == br_dev; -} - -static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) -{ - mlxsw_sp->master_bridge.dev = br_dev; - mlxsw_sp->master_bridge.ref_count++; -} - -static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) -{ - if (--mlxsw_sp->master_bridge.ref_count == 0) - mlxsw_sp->master_bridge.dev = NULL; + mlxsw_sp_port_add_vid(dev, 0, 1); } static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) @@ -2504,6 +3858,45 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, return -EBUSY; } +static void +mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_fid *f; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1); + if (WARN_ON(!mlxsw_sp_vport)) + return; + + /* If vPort is assigned a RIF, then leave it since it's no + * longer valid. + */ + f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + if (f) + f->leave(mlxsw_sp_vport); + + mlxsw_sp_vport->lag_id = lag_id; + mlxsw_sp_vport->lagged = 1; +} + +static void +mlxsw_sp_port_pvid_vport_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_fid *f; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1); + if (WARN_ON(!mlxsw_sp_vport)) + return; + + f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + if (f) + f->leave(mlxsw_sp_vport); + + mlxsw_sp_vport->lagged = 0; +} + static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *lag_dev) { @@ -2539,6 +3932,9 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->lag_id = lag_id; mlxsw_sp_port->lagged = 1; lag->ref_count++; + + mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_id); + return 0; err_col_port_enable: @@ -2549,65 +3945,35 @@ err_col_port_add: return err; } -static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *br_dev, - bool flush_fdb); - -static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, - struct net_device *lag_dev) +static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_port *mlxsw_sp_vport; - struct mlxsw_sp_upper *lag; u16 lag_id = mlxsw_sp_port->lag_id; - int err; + struct mlxsw_sp_upper *lag; if (!mlxsw_sp_port->lagged) - return 0; + return; lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); WARN_ON(lag->ref_count == 0); - err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); - if (err) - return err; - err = mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); - if (err) - return err; - - /* In case we leave a LAG device that has bridges built on top, - * then their teardown sequence is never issued and we need to - * invoke the necessary cleanup routines ourselves. - */ - list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, - vport.list) { - struct net_device *br_dev; - - if (!mlxsw_sp_vport->bridged) - continue; - - br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport); - mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, false); - } + mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); if (mlxsw_sp_port->bridged) { mlxsw_sp_port_active_vlans_del(mlxsw_sp_port); - mlxsw_sp_port_bridge_leave(mlxsw_sp_port, false); - mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL); + mlxsw_sp_port_bridge_leave(mlxsw_sp_port); } - if (lag->ref_count == 1) { - if (mlxsw_sp_port_fdb_flush_by_lag_id(mlxsw_sp_port)) - netdev_err(mlxsw_sp_port->dev, "Failed to flush FDB\n"); - err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); - if (err) - return err; - } + if (lag->ref_count == 1) + mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, mlxsw_sp_port->local_port); mlxsw_sp_port->lagged = 0; lag->ref_count--; - return 0; + + mlxsw_sp_port_pvid_vport_lag_leave(mlxsw_sp_port); } static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -2656,42 +4022,25 @@ static int mlxsw_sp_port_vlan_link(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid = vlan_dev_vlan_id(vlan_dev); mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - WARN_ON(!mlxsw_sp_vport); + if (WARN_ON(!mlxsw_sp_vport)) return -EINVAL; - } mlxsw_sp_vport->dev = vlan_dev; return 0; } -static int mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port, - struct net_device *vlan_dev) +static void mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *vlan_dev) { struct mlxsw_sp_port *mlxsw_sp_vport; u16 vid = vlan_dev_vlan_id(vlan_dev); mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - WARN_ON(!mlxsw_sp_vport); - return -EINVAL; - } - - /* When removing a VLAN device while still bridged we should first - * remove it from the bridge, as we receive the bridge's notification - * when the vPort is already gone. - */ - if (mlxsw_sp_vport->bridged) { - struct net_device *br_dev; - - br_dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport); - mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, br_dev, true); - } + if (WARN_ON(!mlxsw_sp_vport)) + return; mlxsw_sp_vport->dev = mlxsw_sp_port->dev; - - return 0; } static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, @@ -2701,7 +4050,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; - int err; + int err = 0; mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -2710,73 +4059,56 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!info->master || !info->linking) + if (!is_vlan_dev(upper_dev) && + !netif_is_lag_master(upper_dev) && + !netif_is_bridge_master(upper_dev)) + return -EINVAL; + if (!info->linking) break; /* HW limitation forbids to put ports to multiple bridges. */ if (netif_is_bridge_master(upper_dev) && !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev)) - return NOTIFY_BAD; + return -EINVAL; if (netif_is_lag_master(upper_dev) && !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, info->upper_info)) - return NOTIFY_BAD; + return -EINVAL; + if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) + return -EINVAL; + if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && + !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) + return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; if (is_vlan_dev(upper_dev)) { - if (info->linking) { + if (info->linking) err = mlxsw_sp_port_vlan_link(mlxsw_sp_port, upper_dev); - if (err) { - netdev_err(dev, "Failed to link VLAN device\n"); - return NOTIFY_BAD; - } - } else { - err = mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, - upper_dev); - if (err) { - netdev_err(dev, "Failed to unlink VLAN device\n"); - return NOTIFY_BAD; - } - } + else + mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, + upper_dev); } else if (netif_is_bridge_master(upper_dev)) { - if (info->linking) { - err = mlxsw_sp_port_bridge_join(mlxsw_sp_port); - if (err) { - netdev_err(dev, "Failed to join bridge\n"); - return NOTIFY_BAD; - } - mlxsw_sp_master_bridge_inc(mlxsw_sp, upper_dev); - } else { - err = mlxsw_sp_port_bridge_leave(mlxsw_sp_port, - true); - mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev); - if (err) { - netdev_err(dev, "Failed to leave bridge\n"); - return NOTIFY_BAD; - } - } + if (info->linking) + err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, + upper_dev); + else + mlxsw_sp_port_bridge_leave(mlxsw_sp_port); } else if (netif_is_lag_master(upper_dev)) { - if (info->linking) { + if (info->linking) err = mlxsw_sp_port_lag_join(mlxsw_sp_port, upper_dev); - if (err) { - netdev_err(dev, "Failed to join link aggregation\n"); - return NOTIFY_BAD; - } - } else { - err = mlxsw_sp_port_lag_leave(mlxsw_sp_port, - upper_dev); - if (err) { - netdev_err(dev, "Failed to leave link aggregation\n"); - return NOTIFY_BAD; - } - } + else + mlxsw_sp_port_lag_leave(mlxsw_sp_port, + upper_dev); + } else { + err = -EINVAL; + WARN_ON(1); } break; } - return NOTIFY_DONE; + return err; } static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, @@ -2800,7 +4132,7 @@ static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, break; } - return NOTIFY_DONE; + return 0; } static int mlxsw_sp_netdevice_port_event(struct net_device *dev, @@ -2814,7 +4146,7 @@ static int mlxsw_sp_netdevice_port_event(struct net_device *dev, return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr); } - return NOTIFY_DONE; + return 0; } static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, @@ -2827,218 +4159,230 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, netdev_for_each_lower_dev(lag_dev, dev, iter) { if (mlxsw_sp_port_dev_check(dev)) { ret = mlxsw_sp_netdevice_port_event(dev, event, ptr); - if (ret == NOTIFY_BAD) + if (ret) return ret; } } - return NOTIFY_DONE; + return 0; } -static struct mlxsw_sp_vfid * -mlxsw_sp_br_vfid_find(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev) +static int mlxsw_sp_master_bridge_vlan_link(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev) { - struct mlxsw_sp_vfid *vfid; - - list_for_each_entry(vfid, &mlxsw_sp->br_vfids.list, list) { - if (vfid->br_dev == br_dev) - return vfid; + u16 fid = vlan_dev_vlan_id(vlan_dev); + struct mlxsw_sp_fid *f; + + f = mlxsw_sp_fid_find(mlxsw_sp, fid); + if (!f) { + f = mlxsw_sp_fid_create(mlxsw_sp, fid); + if (IS_ERR(f)) + return PTR_ERR(f); } - return NULL; + f->ref_count++; + + return 0; +} + +static void mlxsw_sp_master_bridge_vlan_unlink(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev) +{ + u16 fid = vlan_dev_vlan_id(vlan_dev); + struct mlxsw_sp_fid *f; + + f = mlxsw_sp_fid_find(mlxsw_sp, fid); + if (f && f->r) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + if (f && --f->ref_count == 0) + mlxsw_sp_fid_destroy(mlxsw_sp, f); } -static u16 mlxsw_sp_vfid_to_br_vfid(u16 vfid) +static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, + unsigned long event, void *ptr) { - return vfid - MLXSW_SP_VFID_PORT_MAX; + struct netdev_notifier_changeupper_info *info; + struct net_device *upper_dev; + struct mlxsw_sp *mlxsw_sp; + int err; + + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return 0; + if (br_dev != mlxsw_sp->master_bridge.dev) + return 0; + + info = ptr; + + switch (event) { + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (!is_vlan_dev(upper_dev)) + break; + if (info->linking) { + err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp, + upper_dev); + if (err) + return err; + } else { + mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, upper_dev); + } + break; + } + + return 0; } -static u16 mlxsw_sp_br_vfid_to_vfid(u16 br_vfid) +static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) { - return MLXSW_SP_VFID_PORT_MAX + br_vfid; + return find_first_zero_bit(mlxsw_sp->vfids.mapped, + MLXSW_SP_VFID_MAX); } -static u16 mlxsw_sp_avail_br_vfid_get(const struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create) { - return find_first_zero_bit(mlxsw_sp->br_vfids.mapped, - MLXSW_SP_VFID_BR_MAX); + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); } -static struct mlxsw_sp_vfid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) +static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport); + +static struct mlxsw_sp_fid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) { struct device *dev = mlxsw_sp->bus_info->dev; - struct mlxsw_sp_vfid *vfid; - u16 n_vfid; + struct mlxsw_sp_fid *f; + u16 vfid, fid; int err; - n_vfid = mlxsw_sp_br_vfid_to_vfid(mlxsw_sp_avail_br_vfid_get(mlxsw_sp)); - if (n_vfid == MLXSW_SP_VFID_MAX) { + vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp); + if (vfid == MLXSW_SP_VFID_MAX) { dev_err(dev, "No available vFIDs\n"); return ERR_PTR(-ERANGE); } - err = __mlxsw_sp_vfid_create(mlxsw_sp, n_vfid); + fid = mlxsw_sp_vfid_to_fid(vfid); + err = mlxsw_sp_vfid_op(mlxsw_sp, fid, true); if (err) { - dev_err(dev, "Failed to create vFID=%d\n", n_vfid); + dev_err(dev, "Failed to create FID=%d\n", fid); return ERR_PTR(err); } - vfid = kzalloc(sizeof(*vfid), GFP_KERNEL); - if (!vfid) + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) goto err_allocate_vfid; - vfid->vfid = n_vfid; - vfid->br_dev = br_dev; + f->leave = mlxsw_sp_vport_vfid_leave; + f->fid = fid; + f->dev = br_dev; - list_add(&vfid->list, &mlxsw_sp->br_vfids.list); - set_bit(mlxsw_sp_vfid_to_br_vfid(n_vfid), mlxsw_sp->br_vfids.mapped); + list_add(&f->list, &mlxsw_sp->vfids.list); + set_bit(vfid, mlxsw_sp->vfids.mapped); - return vfid; + return f; err_allocate_vfid: - __mlxsw_sp_vfid_destroy(mlxsw_sp, n_vfid); + mlxsw_sp_vfid_op(mlxsw_sp, fid, false); return ERR_PTR(-ENOMEM); } -static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vfid *vfid) +static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *f) { - u16 br_vfid = mlxsw_sp_vfid_to_br_vfid(vfid->vfid); + u16 vfid = mlxsw_sp_fid_to_vfid(f->fid); + u16 fid = f->fid; + + clear_bit(vfid, mlxsw_sp->vfids.mapped); + list_del(&f->list); - clear_bit(br_vfid, mlxsw_sp->br_vfids.mapped); - list_del(&vfid->list); + if (f->r) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); - __mlxsw_sp_vfid_destroy(mlxsw_sp, vfid->vfid); + kfree(f); - kfree(vfid); + mlxsw_sp_vfid_op(mlxsw_sp, fid, false); } -static int mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *br_dev, - bool flush_fdb) +static int mlxsw_sp_vport_fid_map(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, + bool valid) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - struct net_device *dev = mlxsw_sp_vport->dev; - struct mlxsw_sp_vfid *vfid, *new_vfid; - int err; - - vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev); - if (!vfid) { - WARN_ON(!vfid); - return -EINVAL; - } - - /* We need a vFID to go back to after leaving the bridge's vFID. */ - new_vfid = mlxsw_sp_vfid_find(mlxsw_sp, vid); - if (!new_vfid) { - new_vfid = mlxsw_sp_vfid_create(mlxsw_sp, vid); - if (IS_ERR(new_vfid)) { - netdev_err(dev, "Failed to create vFID for VID=%d\n", - vid); - return PTR_ERR(new_vfid); - } - } - /* Invalidate existing {Port, VID} to vFID mapping and create a new - * one for the new vFID. - */ - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - false, - mlxsw_sp_vfid_to_fid(vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n", - vfid->vfid); - goto err_port_vid_to_fid_invalidate; - } + return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, mt, valid, fid, + vid); +} - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - true, - mlxsw_sp_vfid_to_fid(new_vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n", - new_vfid->vfid); - goto err_port_vid_to_fid_validate; - } +static int mlxsw_sp_vport_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *br_dev) +{ + struct mlxsw_sp_fid *f; + int err; - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); - if (err) { - netdev_err(dev, "Failed to disable learning\n"); - goto err_port_vid_learning_set; + f = mlxsw_sp_vfid_find(mlxsw_sp_vport->mlxsw_sp, br_dev); + if (!f) { + f = mlxsw_sp_vfid_create(mlxsw_sp_vport->mlxsw_sp, br_dev); + if (IS_ERR(f)) + return PTR_ERR(f); } - err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, - false); - if (err) { - netdev_err(dev, "Failed clear to clear flooding\n"); + err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, true); + if (err) goto err_vport_flood_set; - } - - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, - MLXSW_REG_SPMS_STATE_FORWARDING); - if (err) { - netdev_err(dev, "Failed to set STP state\n"); - goto err_port_stp_state_set; - } - if (flush_fdb && mlxsw_sp_vport_fdb_flush(mlxsw_sp_vport)) - netdev_err(dev, "Failed to flush FDB\n"); + err = mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, true); + if (err) + goto err_vport_fid_map; - /* Switch between the vFIDs and destroy the old one if needed. */ - new_vfid->nr_vports++; - mlxsw_sp_vport->vport.vfid = new_vfid; - vfid->nr_vports--; - if (!vfid->nr_vports) - mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid); + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, f); + f->ref_count++; - mlxsw_sp_vport->learning = 0; - mlxsw_sp_vport->learning_sync = 0; - mlxsw_sp_vport->uc_flood = 0; - mlxsw_sp_vport->bridged = 0; + netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", f->fid); return 0; -err_port_stp_state_set: +err_vport_fid_map: + mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false); err_vport_flood_set: -err_port_vid_learning_set: -err_port_vid_to_fid_validate: -err_port_vid_to_fid_invalidate: - /* Rollback vFID only if new. */ - if (!new_vfid->nr_vports) - mlxsw_sp_vfid_destroy(mlxsw_sp, new_vfid); + if (!f->ref_count) + mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); return err; } +static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); + + mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, false); + + mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false); + + mlxsw_sp_port_fdb_flush(mlxsw_sp_vport, f->fid); + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); + if (--f->ref_count == 0) + mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); +} + static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, struct net_device *br_dev) { - struct mlxsw_sp_vfid *old_vfid = mlxsw_sp_vport->vport.vfid; - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); struct net_device *dev = mlxsw_sp_vport->dev; - struct mlxsw_sp_vfid *vfid; int err; - vfid = mlxsw_sp_br_vfid_find(mlxsw_sp, br_dev); - if (!vfid) { - vfid = mlxsw_sp_br_vfid_create(mlxsw_sp, br_dev); - if (IS_ERR(vfid)) { - netdev_err(dev, "Failed to create bridge vFID\n"); - return PTR_ERR(vfid); - } - } + if (f && !WARN_ON(!f->leave)) + f->leave(mlxsw_sp_vport); - err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, true, false); + err = mlxsw_sp_vport_vfid_join(mlxsw_sp_vport, br_dev); if (err) { - netdev_err(dev, "Failed to setup flooding for vFID=%d\n", - vfid->vfid); - goto err_port_flood_set; + netdev_err(dev, "Failed to join vFID\n"); + return err; } err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); @@ -3047,38 +4391,6 @@ static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, goto err_port_vid_learning_set; } - /* We need to invalidate existing {Port, VID} to vFID mapping and - * create a new one for the bridge's vFID. - */ - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - false, - mlxsw_sp_vfid_to_fid(old_vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to invalidate {Port, VID} to vFID=%d mapping\n", - old_vfid->vfid); - goto err_port_vid_to_fid_invalidate; - } - - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, - true, - mlxsw_sp_vfid_to_fid(vfid->vfid), - vid); - if (err) { - netdev_err(dev, "Failed to map {Port, VID} to vFID=%d\n", - vfid->vfid); - goto err_port_vid_to_fid_validate; - } - - /* Switch between the vFIDs and destroy the old one if needed. */ - vfid->nr_vports++; - mlxsw_sp_vport->vport.vfid = vfid; - old_vfid->nr_vports--; - if (!old_vfid->nr_vports) - mlxsw_sp_vfid_destroy(mlxsw_sp, old_vfid); - mlxsw_sp_vport->learning = 1; mlxsw_sp_vport->learning_sync = 1; mlxsw_sp_vport->uc_flood = 1; @@ -3086,20 +4398,25 @@ static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, return 0; -err_port_vid_to_fid_validate: - mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, - MLXSW_REG_SVFA_MT_PORT_VID_TO_FID, false, - mlxsw_sp_vfid_to_fid(old_vfid->vfid), vid); -err_port_vid_to_fid_invalidate: - mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); err_port_vid_learning_set: - mlxsw_sp_vport_flood_set(mlxsw_sp_vport, vfid->vfid, false, false); -err_port_flood_set: - if (!vfid->nr_vports) - mlxsw_sp_br_vfid_destroy(mlxsw_sp, vfid); + mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport); return err; } +static void mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + + mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); + + mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport); + + mlxsw_sp_vport->learning = 0; + mlxsw_sp_vport->learning_sync = 0; + mlxsw_sp_vport->uc_flood = 0; + mlxsw_sp_vport->bridged = 0; +} + static bool mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port, const struct net_device *br_dev) @@ -3108,7 +4425,9 @@ mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port, list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, vport.list) { - if (mlxsw_sp_vport_br_get(mlxsw_sp_vport) == br_dev) + struct net_device *dev = mlxsw_sp_vport_dev_get(mlxsw_sp_vport); + + if (dev && dev == br_dev) return false; } @@ -3123,56 +4442,39 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, struct netdev_notifier_changeupper_info *info = ptr; struct mlxsw_sp_port *mlxsw_sp_vport; struct net_device *upper_dev; - int err; + int err = 0; mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!info->master || !info->linking) - break; if (!netif_is_bridge_master(upper_dev)) - return NOTIFY_BAD; + return -EINVAL; + if (!info->linking) + break; /* We can't have multiple VLAN interfaces configured on * the same port and being members in the same bridge. */ if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, upper_dev)) - return NOTIFY_BAD; + return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (!info->master) - break; if (info->linking) { - if (!mlxsw_sp_vport) { - WARN_ON(!mlxsw_sp_vport); - return NOTIFY_BAD; - } + if (WARN_ON(!mlxsw_sp_vport)) + return -EINVAL; err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport, upper_dev); - if (err) { - netdev_err(dev, "Failed to join bridge\n"); - return NOTIFY_BAD; - } } else { - /* We ignore bridge's unlinking notifications if vPort - * is gone, since we already left the bridge when the - * VLAN device was unlinked from the real device. - */ if (!mlxsw_sp_vport) - return NOTIFY_DONE; - err = mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport, - upper_dev, true); - if (err) { - netdev_err(dev, "Failed to leave bridge\n"); - return NOTIFY_BAD; - } + return 0; + mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport); } } - return NOTIFY_DONE; + return err; } static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev, @@ -3187,12 +4489,12 @@ static int mlxsw_sp_netdevice_lag_vport_event(struct net_device *lag_dev, if (mlxsw_sp_port_dev_check(dev)) { ret = mlxsw_sp_netdevice_vport_event(dev, event, ptr, vid); - if (ret == NOTIFY_BAD) + if (ret) return ret; } } - return NOTIFY_DONE; + return 0; } static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, @@ -3208,41 +4510,58 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, return mlxsw_sp_netdevice_lag_vport_event(real_dev, event, ptr, vid); - return NOTIFY_DONE; + return 0; } static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - if (mlxsw_sp_port_dev_check(dev)) - return mlxsw_sp_netdevice_port_event(dev, event, ptr); - - if (netif_is_lag_master(dev)) - return mlxsw_sp_netdevice_lag_event(dev, event, ptr); - - if (is_vlan_dev(dev)) - return mlxsw_sp_netdevice_vlan_event(dev, event, ptr); - - return NOTIFY_DONE; + int err = 0; + + if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) + err = mlxsw_sp_netdevice_router_port_event(dev); + else if (mlxsw_sp_port_dev_check(dev)) + err = mlxsw_sp_netdevice_port_event(dev, event, ptr); + else if (netif_is_lag_master(dev)) + err = mlxsw_sp_netdevice_lag_event(dev, event, ptr); + else if (netif_is_bridge_master(dev)) + err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr); + else if (is_vlan_dev(dev)) + err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr); + + return notifier_from_errno(err); } static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { .notifier_call = mlxsw_sp_netdevice_event, }; +static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { + .notifier_call = mlxsw_sp_inetaddr_event, + .priority = 10, /* Must be called before FIB notifier block */ +}; + +static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { + .notifier_call = mlxsw_sp_router_netevent_event, +}; + static int __init mlxsw_sp_module_init(void) { int err; register_netdevice_notifier(&mlxsw_sp_netdevice_nb); + register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + register_netevent_notifier(&mlxsw_sp_router_netevent_nb); + err = mlxsw_core_driver_register(&mlxsw_sp_driver); if (err) goto err_core_driver_register; return 0; err_core_driver_register: + unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; } @@ -3250,6 +4569,8 @@ err_core_driver_register: static void __exit mlxsw_sp_module_exit(void) { mlxsw_core_driver_unregister(&mlxsw_sp_driver); + unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4b8abaf06321..ac48abebe904 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -39,19 +39,22 @@ #include <linux/types.h> #include <linux/netdevice.h> +#include <linux/rhashtable.h> #include <linux/bitops.h> #include <linux/if_vlan.h> #include <linux/list.h> +#include <linux/dcbnl.h> +#include <linux/in6.h> #include <net/switchdev.h> -#include <net/devlink.h> #include "port.h" #include "core.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID -#define MLXSW_SP_VFID_PORT_MAX 512 /* Non-bridged VLAN interfaces */ -#define MLXSW_SP_VFID_BR_MAX 8192 /* Bridged VLAN interfaces */ -#define MLXSW_SP_VFID_MAX (MLXSW_SP_VFID_PORT_MAX + MLXSW_SP_VFID_BR_MAX) +#define MLXSW_SP_VFID_MAX 6656 /* Bridged VLAN interfaces */ + +#define MLXSW_SP_RFID_BASE 15360 +#define MLXSW_SP_RIF_MAX 800 #define MLXSW_SP_LAG_MAX 64 #define MLXSW_SP_PORT_PER_LAG_MAX 16 @@ -60,8 +63,36 @@ #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 +#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */ +#define MLXSW_SP_LPM_TREE_MAX 22 +#define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN) + +#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256 + #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ +#define MLXSW_SP_BYTES_PER_CELL 96 + +#define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL) +#define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) + +#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ +#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */ +#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */ + +/* Maximum delay buffer needed in case of PAUSE frames, in cells. + * Assumes 100m cable and maximum MTU. + */ +#define MLXSW_SP_PAUSE_DELAY 612 + +#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */ + +static inline u16 mlxsw_sp_pfc_delay_get(int mtu, u16 delay) +{ + delay = MLXSW_SP_BYTES_TO_CELLS(DIV_ROUND_UP(delay, BITS_PER_BYTE)); + return MLXSW_SP_CELL_FACTOR * delay + MLXSW_SP_BYTES_TO_CELLS(mtu); +} + struct mlxsw_sp_port; struct mlxsw_sp_upper { @@ -69,12 +100,22 @@ struct mlxsw_sp_upper { unsigned int ref_count; }; -struct mlxsw_sp_vfid { +struct mlxsw_sp_fid { + void (*leave)(struct mlxsw_sp_port *mlxsw_sp_vport); struct list_head list; - u16 nr_vports; - u16 vfid; /* Starting at 0 */ - struct net_device *br_dev; - u16 vid; + unsigned int ref_count; + struct net_device *dev; + struct mlxsw_sp_rif *r; + u16 fid; +}; + +struct mlxsw_sp_rif { + struct net_device *dev; + unsigned int ref_count; + struct mlxsw_sp_fid *f; + unsigned char addr[ETH_ALEN]; + int mtu; + u16 rif; }; struct mlxsw_sp_mid { @@ -97,23 +138,144 @@ static inline u16 mlxsw_sp_fid_to_vfid(u16 fid) static inline bool mlxsw_sp_fid_is_vfid(u16 fid) { - return fid >= MLXSW_SP_VFID_BASE; + return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_RFID_BASE; } -struct mlxsw_sp { +static inline bool mlxsw_sp_fid_is_rfid(u16 fid) +{ + return fid >= MLXSW_SP_RFID_BASE; +} + +static inline u16 mlxsw_sp_rif_sp_to_fid(u16 rif) +{ + return MLXSW_SP_RFID_BASE + rif; +} + +struct mlxsw_sp_sb_pr { + enum mlxsw_reg_sbpr_mode mode; + u32 size; +}; + +struct mlxsw_cp_sb_occ { + u32 cur; + u32 max; +}; + +struct mlxsw_sp_sb_cm { + u32 min_buff; + u32 max_buff; + u8 pool; + struct mlxsw_cp_sb_occ occ; +}; + +struct mlxsw_sp_sb_pm { + u32 min_buff; + u32 max_buff; + struct mlxsw_cp_sb_occ occ; +}; + +#define MLXSW_SP_SB_POOL_COUNT 4 +#define MLXSW_SP_SB_TC_COUNT 8 + +struct mlxsw_sp_sb { + struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT]; struct { - struct list_head list; - unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_PORT_MAX)]; - } port_vfids; + struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; + struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; + } ports[MLXSW_PORT_MAX_PORTS]; +}; + +#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE) + +struct mlxsw_sp_prefix_usage { + DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT); +}; + +enum mlxsw_sp_l3proto { + MLXSW_SP_L3_PROTO_IPV4, + MLXSW_SP_L3_PROTO_IPV6, +}; + +struct mlxsw_sp_lpm_tree { + u8 id; /* tree ID */ + unsigned int ref_count; + enum mlxsw_sp_l3proto proto; + struct mlxsw_sp_prefix_usage prefix_usage; +}; + +struct mlxsw_sp_fib; + +struct mlxsw_sp_vr { + u16 id; /* virtual router ID */ + bool used; + enum mlxsw_sp_l3proto proto; + u32 tb_id; /* kernel fib table id */ + struct mlxsw_sp_lpm_tree *lpm_tree; + struct mlxsw_sp_fib *fib; +}; + +enum mlxsw_sp_span_type { + MLXSW_SP_SPAN_EGRESS, + MLXSW_SP_SPAN_INGRESS +}; + +struct mlxsw_sp_span_inspected_port { + struct list_head list; + enum mlxsw_sp_span_type type; + u8 local_port; +}; + +struct mlxsw_sp_span_entry { + u8 local_port; + bool used; + struct list_head bound_ports_list; + int ref_count; + int id; +}; + +enum mlxsw_sp_port_mall_action_type { + MLXSW_SP_PORT_MALL_MIRROR, +}; + +struct mlxsw_sp_port_mall_mirror_tc_entry { + u8 to_local_port; + bool ingress; +}; + +struct mlxsw_sp_port_mall_tc_entry { + struct list_head list; + unsigned long cookie; + enum mlxsw_sp_port_mall_action_type type; + union { + struct mlxsw_sp_port_mall_mirror_tc_entry mirror; + }; +}; + +struct mlxsw_sp_router { + struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; + struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX]; + struct rhashtable neigh_ht; + struct { + struct delayed_work dw; + unsigned long interval; /* ms */ + } neighs_update; + struct delayed_work nexthop_probe_dw; +#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ + struct list_head nexthop_group_list; + struct list_head nexthop_neighs_list; +}; + +struct mlxsw_sp { struct { struct list_head list; - unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_VFID_BR_MAX)]; - } br_vfids; + DECLARE_BITMAP(mapped, MLXSW_SP_VFID_MAX); + } vfids; struct { struct list_head list; - unsigned long mapped[BITS_TO_LONGS(MLXSW_SP_MID_MAX)]; + DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX); } br_mids; - unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)]; + struct list_head fids; /* VLAN-aware bridge FIDs */ + struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX]; struct mlxsw_sp_port **ports; struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; @@ -130,6 +292,16 @@ struct mlxsw_sp { struct mlxsw_sp_upper master_bridge; struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; u8 port_to_module[MLXSW_PORT_MAX_PORTS]; + struct mlxsw_sp_sb sb; + struct mlxsw_sp_router router; + struct { + DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); + } kvdl; + + struct { + struct mlxsw_sp_span_entry *entries; + int entries_count; + } span; }; static inline struct mlxsw_sp_upper * @@ -148,6 +320,7 @@ struct mlxsw_sp_port_pcpu_stats { }; struct mlxsw_sp_port { + struct mlxsw_core_port core_port; /* must be first */ struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; struct mlxsw_sp *mlxsw_sp; @@ -163,17 +336,41 @@ struct mlxsw_sp_port { u16 lag_id; struct { struct list_head list; - struct mlxsw_sp_vfid *vfid; + struct mlxsw_sp_fid *f; u16 vid; } vport; + struct { + u8 tx_pause:1, + rx_pause:1; + } link; + struct { + struct ieee_ets *ets; + struct ieee_maxrate *maxrate; + struct ieee_pfc *pfc; + } dcb; + struct { + u8 module; + u8 width; + u8 lane; + } mapping; /* 802.1Q bridge VLANs */ unsigned long *active_vlans; unsigned long *untagged_vlans; /* VLAN interfaces */ struct list_head vports_list; - struct devlink_port devlink_port; + /* TC handles */ + struct list_head mall_tc_list; }; +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); + +static inline bool +mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause; +} + static inline struct mlxsw_sp_port * mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) { @@ -186,28 +383,38 @@ mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL; } +static inline u16 +mlxsw_sp_vport_vid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +{ + return mlxsw_sp_vport->vport.vid; +} + static inline bool mlxsw_sp_port_is_vport(const struct mlxsw_sp_port *mlxsw_sp_port) { - return mlxsw_sp_port->vport.vfid; + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + + return vid != 0; } -static inline struct net_device * -mlxsw_sp_vport_br_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +static inline void mlxsw_sp_vport_fid_set(struct mlxsw_sp_port *mlxsw_sp_vport, + struct mlxsw_sp_fid *f) { - return mlxsw_sp_vport->vport.vfid->br_dev; + mlxsw_sp_vport->vport.f = f; } -static inline u16 -mlxsw_sp_vport_vid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +static inline struct mlxsw_sp_fid * +mlxsw_sp_vport_fid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) { - return mlxsw_sp_vport->vport.vid; + return mlxsw_sp_vport->vport.f; } -static inline u16 -mlxsw_sp_vport_vfid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +static inline struct net_device * +mlxsw_sp_vport_dev_get(const struct mlxsw_sp_port *mlxsw_sp_vport) { - return mlxsw_sp_vport->vport.vfid->vfid; + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + return f ? f->dev : NULL; } static inline struct mlxsw_sp_port * @@ -225,27 +432,99 @@ mlxsw_sp_port_vport_find(const struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) } static inline struct mlxsw_sp_port * -mlxsw_sp_port_vport_find_by_vfid(const struct mlxsw_sp_port *mlxsw_sp_port, - u16 vfid) +mlxsw_sp_port_vport_find_by_fid(const struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid) { struct mlxsw_sp_port *mlxsw_sp_vport; list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, vport.list) { - if (mlxsw_sp_vport_vfid_get(mlxsw_sp_vport) == vfid) + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + if (f && f->fid == fid) return mlxsw_sp_vport; } return NULL; } +static inline struct mlxsw_sp_fid *mlxsw_sp_fid_find(struct mlxsw_sp *mlxsw_sp, + u16 fid) +{ + struct mlxsw_sp_fid *f; + + list_for_each_entry(f, &mlxsw_sp->fids, list) + if (f->fid == fid) + return f; + + return NULL; +} + +static inline struct mlxsw_sp_fid * +mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev) +{ + struct mlxsw_sp_fid *f; + + list_for_each_entry(f, &mlxsw_sp->vfids.list, list) + if (f->dev == br_dev) + return f; + + return NULL; +} + +static inline struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + int i; + + for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) + return mlxsw_sp->rifs[i]; + + return NULL; +} + enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BM, }; int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info); +int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type); +int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); +int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold); +int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); +int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold); +int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); +int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); +int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); +int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp); @@ -257,13 +536,61 @@ int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged); -int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, - u16 vid); -int mlxsw_sp_port_kill_vid(struct net_device *dev, - __be16 __always_unused proto, u16 vid); -int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, - bool set, bool only_uc); +int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, + bool set); void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid); +int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, + bool adding); +struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid); +void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f); +void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *r); +int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, + bool dwrr, u8 dwrr_weight); +int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, + u8 switch_prio, u8 tclass); +int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, + u8 *prio_tc, bool pause_en, + struct ieee_pfc *my_pfc); +int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 maxrate); + +#ifdef CONFIG_MLXSW_SPECTRUM_DCB + +int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port); + +#else + +static inline int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return 0; +} + +static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{} + +#endif + +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans); +int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4); +int mlxsw_sp_router_neigh_construct(struct net_device *dev, + struct neighbour *n); +void mlxsw_sp_router_neigh_destroy(struct net_device *dev, + struct neighbour *n); +int mlxsw_sp_router_netevent_event(struct notifier_block *unused, + unsigned long event, void *ptr); + +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index d59195e3f7fb..953b214f38d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -34,36 +34,140 @@ #include <linux/kernel.h> #include <linux/types.h> +#include <linux/dcbnl.h> +#include <linux/if_ether.h> +#include <linux/list.h> #include "spectrum.h" #include "core.h" #include "port.h" #include "reg.h" -struct mlxsw_sp_pb { - u8 index; - u16 size; -}; +static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, + u8 pool, + enum mlxsw_reg_sbxx_dir dir) +{ + return &mlxsw_sp->sb.prs[dir][pool]; +} -#define MLXSW_SP_PB(_index, _size) \ - { \ - .index = _index, \ - .size = _size, \ +static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 pg_buff, + enum mlxsw_reg_sbxx_dir dir) +{ + return &mlxsw_sp->sb.ports[local_port].cms[dir][pg_buff]; +} + +static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 pool, + enum mlxsw_reg_sbxx_dir dir) +{ + return &mlxsw_sp->sb.ports[local_port].pms[dir][pool]; +} + +static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u8 pool, + enum mlxsw_reg_sbxx_dir dir, + enum mlxsw_reg_sbpr_mode mode, u32 size) +{ + char sbpr_pl[MLXSW_REG_SBPR_LEN]; + struct mlxsw_sp_sb_pr *pr; + int err; + + mlxsw_reg_sbpr_pack(sbpr_pl, pool, dir, mode, size); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); + if (err) + return err; + + pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + pr->mode = mode; + pr->size = size; + return 0; +} + +static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pg_buff, enum mlxsw_reg_sbxx_dir dir, + u32 min_buff, u32 max_buff, u8 pool) +{ + char sbcm_pl[MLXSW_REG_SBCM_LEN]; + int err; + + mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, dir, + min_buff, max_buff, pool); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); + if (err) + return err; + if (pg_buff < MLXSW_SP_SB_TC_COUNT) { + struct mlxsw_sp_sb_cm *cm; + + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); + cm->min_buff = min_buff; + cm->max_buff = max_buff; + cm->pool = pool; } + return 0; +} + +static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pool, enum mlxsw_reg_sbxx_dir dir, + u32 min_buff, u32 max_buff) +{ + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + struct mlxsw_sp_sb_pm *pm; + int err; + + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, + min_buff, max_buff); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl); + if (err) + return err; + + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); + pm->min_buff = min_buff; + pm->max_buff = max_buff; + return 0; +} + +static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pool, enum mlxsw_reg_sbxx_dir dir, + struct list_head *bulk_list) +{ + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, true, 0, 0); + return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, + bulk_list, NULL, 0); +} + +static void mlxsw_sp_sb_pm_occ_query_cb(struct mlxsw_core *mlxsw_core, + char *sbpm_pl, size_t sbpm_pl_len, + unsigned long cb_priv) +{ + struct mlxsw_sp_sb_pm *pm = (struct mlxsw_sp_sb_pm *) cb_priv; + + mlxsw_reg_sbpm_unpack(sbpm_pl, &pm->occ.cur, &pm->occ.max); +} + +static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pool, enum mlxsw_reg_sbxx_dir dir, + struct list_head *bulk_list) +{ + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + struct mlxsw_sp_sb_pm *pm; + + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, 0, 0); + return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, + bulk_list, + mlxsw_sp_sb_pm_occ_query_cb, + (unsigned long) pm); +} -static const struct mlxsw_sp_pb mlxsw_sp_pbs[] = { - MLXSW_SP_PB(0, 208), - MLXSW_SP_PB(1, 208), - MLXSW_SP_PB(2, 208), - MLXSW_SP_PB(3, 208), - MLXSW_SP_PB(4, 208), - MLXSW_SP_PB(5, 208), - MLXSW_SP_PB(6, 208), - MLXSW_SP_PB(7, 208), - MLXSW_SP_PB(9, 208), +static const u16 mlxsw_sp_pbs[] = { + [0] = 2 * MLXSW_SP_BYTES_TO_CELLS(ETH_FRAME_LEN), + [9] = 2 * MLXSW_SP_BYTES_TO_CELLS(MLXSW_PORT_MAX_MTU), }; #define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs) +#define MLXSW_SP_PB_UNUSED 8 static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) { @@ -73,194 +177,206 @@ static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0xffff, 0xffff / 2); for (i = 0; i < MLXSW_SP_PBS_LEN; i++) { - const struct mlxsw_sp_pb *pb; - - pb = &mlxsw_sp_pbs[i]; - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pb->index, pb->size); + if (i == MLXSW_SP_PB_UNUSED) + continue; + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, mlxsw_sp_pbs[i]); } + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, + MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0); return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); } -#define MLXSW_SP_SB_BYTES_PER_CELL 96 +static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + char pptb_pl[MLXSW_REG_PPTB_LEN]; + int i; -struct mlxsw_sp_sb_pool { - u8 pool; - enum mlxsw_reg_sbpr_dir dir; - enum mlxsw_reg_sbpr_mode mode; - u32 size; -}; + mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, 0); + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), + pptb_pl); +} + +static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_port_pb_init(mlxsw_sp_port); + if (err) + return err; + return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port); +} -#define MLXSW_SP_SB_POOL_INGRESS_SIZE \ - ((15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) / \ - MLXSW_SP_SB_BYTES_PER_CELL) -#define MLXSW_SP_SB_POOL_EGRESS_SIZE \ - ((14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) / \ - MLXSW_SP_SB_BYTES_PER_CELL) - -#define MLXSW_SP_SB_POOL(_pool, _dir, _mode, _size) \ - { \ - .pool = _pool, \ - .dir = _dir, \ - .mode = _mode, \ - .size = _size, \ +#define MLXSW_SP_SB_PR_INGRESS_SIZE \ + (15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) +#define MLXSW_SP_SB_PR_INGRESS_MNG_SIZE (200 * 1000) +#define MLXSW_SP_SB_PR_EGRESS_SIZE \ + (14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) + +#define MLXSW_SP_SB_PR(_mode, _size) \ + { \ + .mode = _mode, \ + .size = _size, \ } -#define MLXSW_SP_SB_POOL_INGRESS(_pool, _size) \ - MLXSW_SP_SB_POOL(_pool, MLXSW_REG_SBPR_DIR_INGRESS, \ - MLXSW_REG_SBPR_MODE_DYNAMIC, _size) - -#define MLXSW_SP_SB_POOL_EGRESS(_pool, _size) \ - MLXSW_SP_SB_POOL(_pool, MLXSW_REG_SBPR_DIR_EGRESS, \ - MLXSW_REG_SBPR_MODE_DYNAMIC, _size) - -static const struct mlxsw_sp_sb_pool mlxsw_sp_sb_pools[] = { - MLXSW_SP_SB_POOL_INGRESS(0, MLXSW_SP_SB_POOL_INGRESS_SIZE), - MLXSW_SP_SB_POOL_INGRESS(1, 0), - MLXSW_SP_SB_POOL_INGRESS(2, 0), - MLXSW_SP_SB_POOL_INGRESS(3, 0), - MLXSW_SP_SB_POOL_EGRESS(0, MLXSW_SP_SB_POOL_EGRESS_SIZE), - MLXSW_SP_SB_POOL_EGRESS(1, 0), - MLXSW_SP_SB_POOL_EGRESS(2, 0), - MLXSW_SP_SB_POOL_EGRESS(2, MLXSW_SP_SB_POOL_EGRESS_SIZE), +static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = { + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_SIZE)), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_MNG_SIZE)), }; -#define MLXSW_SP_SB_POOLS_LEN ARRAY_SIZE(mlxsw_sp_sb_pools) +#define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress) + +static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = { + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_EGRESS_SIZE)), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), +}; -static int mlxsw_sp_sb_pools_init(struct mlxsw_sp *mlxsw_sp) +#define MLXSW_SP_SB_PRS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_egress) + +static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_sbxx_dir dir, + const struct mlxsw_sp_sb_pr *prs, + size_t prs_len) { - char sbpr_pl[MLXSW_REG_SBPR_LEN]; int i; int err; - for (i = 0; i < MLXSW_SP_SB_POOLS_LEN; i++) { - const struct mlxsw_sp_sb_pool *pool; + for (i = 0; i < prs_len; i++) { + const struct mlxsw_sp_sb_pr *pr; - pool = &mlxsw_sp_sb_pools[i]; - mlxsw_reg_sbpr_pack(sbpr_pl, pool->pool, pool->dir, - pool->mode, pool->size); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); + pr = &prs[i]; + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, + pr->mode, pr->size); if (err) return err; } return 0; } -struct mlxsw_sp_sb_cm { - union { - u8 pg; - u8 tc; - } u; - enum mlxsw_reg_sbcm_dir dir; - u32 min_buff; - u32 max_buff; - u8 pool; -}; +static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; -#define MLXSW_SP_SB_CM(_pg_tc, _dir, _min_buff, _max_buff, _pool) \ - { \ - .u.pg = _pg_tc, \ - .dir = _dir, \ - .min_buff = _min_buff, \ - .max_buff = _max_buff, \ - .pool = _pool, \ + err = __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS, + mlxsw_sp_sb_prs_ingress, + MLXSW_SP_SB_PRS_INGRESS_LEN); + if (err) + return err; + return __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_sb_prs_egress, + MLXSW_SP_SB_PRS_EGRESS_LEN); +} + +#define MLXSW_SP_SB_CM(_min_buff, _max_buff, _pool) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool = _pool, \ } -#define MLXSW_SP_SB_CM_INGRESS(_pg, _min_buff, _max_buff) \ - MLXSW_SP_SB_CM(_pg, MLXSW_REG_SBCM_DIR_INGRESS, \ - _min_buff, _max_buff, 0) - -#define MLXSW_SP_SB_CM_EGRESS(_tc, _min_buff, _max_buff) \ - MLXSW_SP_SB_CM(_tc, MLXSW_REG_SBCM_DIR_EGRESS, \ - _min_buff, _max_buff, 0) - -#define MLXSW_SP_CPU_PORT_SB_CM_EGRESS(_tc) \ - MLXSW_SP_SB_CM(_tc, MLXSW_REG_SBCM_DIR_EGRESS, 104, 2, 3) - -static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms[] = { - MLXSW_SP_SB_CM_INGRESS(0, 10000 / MLXSW_SP_SB_BYTES_PER_CELL, 8), - MLXSW_SP_SB_CM_INGRESS(1, 0, 0), - MLXSW_SP_SB_CM_INGRESS(2, 0, 0), - MLXSW_SP_SB_CM_INGRESS(3, 0, 0), - MLXSW_SP_SB_CM_INGRESS(4, 0, 0), - MLXSW_SP_SB_CM_INGRESS(5, 0, 0), - MLXSW_SP_SB_CM_INGRESS(6, 0, 0), - MLXSW_SP_SB_CM_INGRESS(7, 0, 0), - MLXSW_SP_SB_CM_INGRESS(9, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff), - MLXSW_SP_SB_CM_EGRESS(0, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(1, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(2, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(3, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(4, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(5, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(6, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(7, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(8, 0, 0), - MLXSW_SP_SB_CM_EGRESS(9, 0, 0), - MLXSW_SP_SB_CM_EGRESS(10, 0, 0), - MLXSW_SP_SB_CM_EGRESS(11, 0, 0), - MLXSW_SP_SB_CM_EGRESS(12, 0, 0), - MLXSW_SP_SB_CM_EGRESS(13, 0, 0), - MLXSW_SP_SB_CM_EGRESS(14, 0, 0), - MLXSW_SP_SB_CM_EGRESS(15, 0, 0), - MLXSW_SP_SB_CM_EGRESS(16, 1, 0xff), +static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 8, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */ + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(20000), 1, 3), }; -#define MLXSW_SP_SB_CMS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms) +#define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress) + +static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(1, 0xff, 0), +}; + +#define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress) + +#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 0) static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(0), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(1), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(2), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(3), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(4), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(5), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(6), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(7), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(8), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(9), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(10), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(11), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(12), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(13), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(14), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(15), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(16), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(17), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(18), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(19), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(20), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(21), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(22), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(23), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(24), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(25), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(26), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(27), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(28), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(29), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(30), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(31), + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0), + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, }; #define MLXSW_SP_CPU_PORT_SB_MCS_LEN \ ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms) -static int mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, - const struct mlxsw_sp_sb_cm *cms, - size_t cms_len) +static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, + enum mlxsw_reg_sbxx_dir dir, + const struct mlxsw_sp_sb_cm *cms, + size_t cms_len) { - char sbcm_pl[MLXSW_REG_SBCM_LEN]; int i; int err; for (i = 0; i < cms_len; i++) { const struct mlxsw_sp_sb_cm *cm; + if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) + continue; /* PG number 8 does not exist, skip it */ cm = &cms[i]; - mlxsw_reg_sbcm_pack(sbcm_pl, local_port, cm->u.pg, cm->dir, - cm->min_buff, cm->max_buff, cm->pool); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); + err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir, + cm->min_buff, cm->max_buff, + cm->pool); if (err) return err; } @@ -269,105 +385,120 @@ static int mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, static int mlxsw_sp_port_sb_cms_init(struct mlxsw_sp_port *mlxsw_sp_port) { - return mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, mlxsw_sp_sb_cms, - MLXSW_SP_SB_CMS_LEN); + int err; + + err = __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_INGRESS, + mlxsw_sp_sb_cms_ingress, + MLXSW_SP_SB_CMS_INGRESS_LEN); + if (err) + return err; + return __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_sb_cms_egress, + MLXSW_SP_SB_CMS_EGRESS_LEN); } static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) { - return mlxsw_sp_sb_cms_init(mlxsw_sp, 0, mlxsw_sp_cpu_port_sb_cms, - MLXSW_SP_CPU_PORT_SB_MCS_LEN); + return __mlxsw_sp_sb_cms_init(mlxsw_sp, 0, MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_cpu_port_sb_cms, + MLXSW_SP_CPU_PORT_SB_MCS_LEN); } -struct mlxsw_sp_sb_pm { - u8 pool; - enum mlxsw_reg_sbpm_dir dir; - u32 min_buff; - u32 max_buff; +#define MLXSW_SP_SB_PM(_min_buff, _max_buff) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + } + +static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_ingress[] = { + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), }; -#define MLXSW_SP_SB_PM(_pool, _dir, _min_buff, _max_buff) \ - { \ - .pool = _pool, \ - .dir = _dir, \ - .min_buff = _min_buff, \ - .max_buff = _max_buff, \ - } +#define MLXSW_SP_SB_PMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_ingress) -#define MLXSW_SP_SB_PM_INGRESS(_pool, _min_buff, _max_buff) \ - MLXSW_SP_SB_PM(_pool, MLXSW_REG_SBPM_DIR_INGRESS, \ - _min_buff, _max_buff) - -#define MLXSW_SP_SB_PM_EGRESS(_pool, _min_buff, _max_buff) \ - MLXSW_SP_SB_PM(_pool, MLXSW_REG_SBPM_DIR_EGRESS, \ - _min_buff, _max_buff) - -static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { - MLXSW_SP_SB_PM_INGRESS(0, 0, 0xff), - MLXSW_SP_SB_PM_INGRESS(1, 0, 0), - MLXSW_SP_SB_PM_INGRESS(2, 0, 0), - MLXSW_SP_SB_PM_INGRESS(3, 0, 0), - MLXSW_SP_SB_PM_EGRESS(0, 0, 7), - MLXSW_SP_SB_PM_EGRESS(1, 0, 0), - MLXSW_SP_SB_PM_EGRESS(2, 0, 0), - MLXSW_SP_SB_PM_EGRESS(3, 0, 0), +static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_egress[] = { + MLXSW_SP_SB_PM(0, 7), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), }; -#define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms) +#define MLXSW_SP_SB_PMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_egress) -static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) +static int __mlxsw_sp_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, + enum mlxsw_reg_sbxx_dir dir, + const struct mlxsw_sp_sb_pm *pms, + size_t pms_len) { - char sbpm_pl[MLXSW_REG_SBPM_LEN]; int i; int err; - for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) { + for (i = 0; i < pms_len; i++) { const struct mlxsw_sp_sb_pm *pm; - pm = &mlxsw_sp_sb_pms[i]; - mlxsw_reg_sbpm_pack(sbpm_pl, mlxsw_sp_port->local_port, - pm->pool, pm->dir, - pm->min_buff, pm->max_buff); - err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, - MLXSW_REG(sbpm), sbpm_pl); + pm = &pms[i]; + err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, dir, + pm->min_buff, pm->max_buff); if (err) return err; } return 0; } +static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_INGRESS, + mlxsw_sp_sb_pms_ingress, + MLXSW_SP_SB_PMS_INGRESS_LEN); + if (err) + return err; + return __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_sb_pms_egress, + MLXSW_SP_SB_PMS_EGRESS_LEN); +} + struct mlxsw_sp_sb_mm { - u8 prio; u32 min_buff; u32 max_buff; u8 pool; }; -#define MLXSW_SP_SB_MM(_prio, _min_buff, _max_buff, _pool) \ - { \ - .prio = _prio, \ - .min_buff = _min_buff, \ - .max_buff = _max_buff, \ - .pool = _pool, \ +#define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool = _pool, \ } static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { - MLXSW_SP_SB_MM(0, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(1, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(2, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(3, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(4, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(5, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(6, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(7, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(8, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(9, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(10, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(11, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(12, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(13, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(14, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), }; #define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) @@ -382,7 +513,7 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) const struct mlxsw_sp_sb_mm *mc; mc = &mlxsw_sp_sb_mms[i]; - mlxsw_reg_sbmm_pack(sbmm_pl, mc->prio, mc->min_buff, + mlxsw_reg_sbmm_pack(sbmm_pl, i, mc->min_buff, mc->max_buff, mc->pool); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl); if (err) @@ -391,26 +522,39 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) return 0; } +#define MLXSW_SP_SB_SIZE (16 * 1024 * 1024) + int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { int err; - err = mlxsw_sp_sb_pools_init(mlxsw_sp); + err = mlxsw_sp_sb_prs_init(mlxsw_sp); if (err) return err; err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); if (err) return err; err = mlxsw_sp_sb_mms_init(mlxsw_sp); + if (err) + return err; + return devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, + MLXSW_SP_SB_SIZE, + MLXSW_SP_SB_POOL_COUNT, + MLXSW_SP_SB_POOL_COUNT, + MLXSW_SP_SB_TC_COUNT, + MLXSW_SP_SB_TC_COUNT); +} - return err; +void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp) +{ + devlink_sb_unregister(priv_to_devlink(mlxsw_sp->core), 0); } int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) { int err; - err = mlxsw_sp_port_pb_init(mlxsw_sp_port); + err = mlxsw_sp_port_headroom_init(mlxsw_sp_port); if (err) return err; err = mlxsw_sp_port_sb_cms_init(mlxsw_sp_port); @@ -420,3 +564,390 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) return err; } + +static u8 pool_get(u16 pool_index) +{ + return pool_index % MLXSW_SP_SB_POOL_COUNT; +} + +static u16 pool_index_get(u8 pool, enum mlxsw_reg_sbxx_dir dir) +{ + u16 pool_index; + + pool_index = pool; + if (dir == MLXSW_REG_SBXX_DIR_EGRESS) + pool_index += MLXSW_SP_SB_POOL_COUNT; + return pool_index; +} + +static enum mlxsw_reg_sbxx_dir dir_get(u16 pool_index) +{ + return pool_index < MLXSW_SP_SB_POOL_COUNT ? + MLXSW_REG_SBXX_DIR_INGRESS : MLXSW_REG_SBXX_DIR_EGRESS; +} + +int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + + pool_info->pool_type = dir; + pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size); + pool_info->threshold_type = pr->mode; + return 0; +} + +int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + enum mlxsw_reg_sbpr_mode mode = threshold_type; + u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size); + + return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size); +} + +#define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */ + +static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool, + enum mlxsw_reg_sbxx_dir dir, u32 max_buff) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + + if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) + return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; + return MLXSW_SP_CELLS_TO_BYTES(max_buff); +} + +static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool, + enum mlxsw_reg_sbxx_dir dir, u32 threshold, + u32 *p_max_buff) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + + if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) { + int val; + + val = threshold + MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; + if (val < MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN || + val > MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX) + return -EINVAL; + *p_max_buff = val; + } else { + *p_max_buff = MLXSW_SP_BYTES_TO_CELLS(threshold); + } + return 0; +} + +int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, + pool, dir); + + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool, dir, + pm->max_buff); + return 0; +} + +int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + u32 max_buff; + int err; + + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, + threshold, &max_buff); + if (err) + return err; + + return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool, dir, + 0, max_buff); +} + +int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = pool_type; + struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, + pg_buff, dir); + + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir, + cm->max_buff); + *p_pool_index = pool_index_get(cm->pool, pool_type); + return 0; +} + +int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = pool_type; + u8 pool = pool_get(pool_index); + u32 max_buff; + int err; + + if (dir != dir_get(pool_index)) + return -EINVAL; + + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, + threshold, &max_buff); + if (err) + return err; + + return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir, + 0, max_buff, pool); +} + +#define MASKED_COUNT_MAX \ + (MLXSW_REG_SBSR_REC_MAX_COUNT / (MLXSW_SP_SB_TC_COUNT * 2)) + +struct mlxsw_sp_sb_sr_occ_query_cb_ctx { + u8 masked_count; + u8 local_port_1; +}; + +static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, + char *sbsr_pl, size_t sbsr_pl_len, + unsigned long cb_priv) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx; + u8 masked_count; + u8 local_port; + int rec_index = 0; + struct mlxsw_sp_sb_cm *cm; + int i; + + memcpy(&cb_ctx, &cb_priv, sizeof(cb_ctx)); + + masked_count = 0; + for (local_port = cb_ctx.local_port_1; + local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_INGRESS); + mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, + &cm->occ.cur, &cm->occ.max); + } + if (++masked_count == cb_ctx.masked_count) + break; + } + masked_count = 0; + for (local_port = cb_ctx.local_port_1; + local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_EGRESS); + mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, + &cm->occ.cur, &cm->occ.max); + } + if (++masked_count == cb_ctx.masked_count) + break; + } +} + +int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, + unsigned int sb_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx; + unsigned long cb_priv; + LIST_HEAD(bulk_list); + char *sbsr_pl; + u8 masked_count; + u8 local_port_1; + u8 local_port = 0; + int i; + int err; + int err2; + + sbsr_pl = kmalloc(MLXSW_REG_SBSR_LEN, GFP_KERNEL); + if (!sbsr_pl) + return -ENOMEM; + +next_batch: + local_port++; + local_port_1 = local_port; + masked_count = 0; + mlxsw_reg_sbsr_pack(sbsr_pl, false); + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); + } + for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) { + err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_INGRESS, + &bulk_list); + if (err) + goto out; + err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_EGRESS, + &bulk_list); + if (err) + goto out; + } + if (++masked_count == MASKED_COUNT_MAX) + goto do_query; + } + +do_query: + cb_ctx.masked_count = masked_count; + cb_ctx.local_port_1 = local_port_1; + memcpy(&cb_priv, &cb_ctx, sizeof(cb_ctx)); + err = mlxsw_reg_trans_query(mlxsw_core, MLXSW_REG(sbsr), sbsr_pl, + &bulk_list, mlxsw_sp_sb_sr_occ_query_cb, + cb_priv); + if (err) + goto out; + if (local_port < MLXSW_PORT_MAX_PORTS) + goto next_batch; + +out: + err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); + if (!err) + err = err2; + kfree(sbsr_pl); + return err; +} + +int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, + unsigned int sb_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + LIST_HEAD(bulk_list); + char *sbsr_pl; + unsigned int masked_count; + u8 local_port = 0; + int i; + int err; + int err2; + + sbsr_pl = kmalloc(MLXSW_REG_SBSR_LEN, GFP_KERNEL); + if (!sbsr_pl) + return -ENOMEM; + +next_batch: + local_port++; + masked_count = 0; + mlxsw_reg_sbsr_pack(sbsr_pl, true); + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); + } + for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) { + err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_INGRESS, + &bulk_list); + if (err) + goto out; + err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_EGRESS, + &bulk_list); + if (err) + goto out; + } + if (++masked_count == MASKED_COUNT_MAX) + goto do_query; + } + +do_query: + err = mlxsw_reg_trans_query(mlxsw_core, MLXSW_REG(sbsr), sbsr_pl, + &bulk_list, NULL, 0); + if (err) + goto out; + if (local_port < MLXSW_PORT_MAX_PORTS) + goto next_batch; + +out: + err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); + if (!err) + err = err2; + kfree(sbsr_pl); + return err; +} + +int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, + pool, dir); + + *p_cur = MLXSW_SP_CELLS_TO_BYTES(pm->occ.cur); + *p_max = MLXSW_SP_CELLS_TO_BYTES(pm->occ.max); + return 0; +} + +int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = pool_type; + struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, + pg_buff, dir); + + *p_cur = MLXSW_SP_CELLS_TO_BYTES(cm->occ.cur); + *p_max = MLXSW_SP_CELLS_TO_BYTES(cm->occ.max); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c new file mode 100644 index 000000000000..b6ed7f7c531e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -0,0 +1,486 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/netdevice.h> +#include <linux/string.h> +#include <linux/bitops.h> +#include <net/dcbnl.h> + +#include "spectrum.h" +#include "reg.h" + +static u8 mlxsw_sp_dcbnl_getdcbx(struct net_device __always_unused *dev) +{ + return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; +} + +static u8 mlxsw_sp_dcbnl_setdcbx(struct net_device __always_unused *dev, + u8 mode) +{ + return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0; +} + +static int mlxsw_sp_dcbnl_ieee_getets(struct net_device *dev, + struct ieee_ets *ets) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(ets, mlxsw_sp_port->dcb.ets, sizeof(*ets)); + + return 0; +} + +static int mlxsw_sp_port_ets_validate(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + struct net_device *dev = mlxsw_sp_port->dev; + bool has_ets_tc = false; + int i, tx_bw_sum = 0; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + break; + case IEEE_8021QAZ_TSA_ETS: + has_ets_tc = true; + tx_bw_sum += ets->tc_tx_bw[i]; + break; + default: + netdev_err(dev, "Only strict priority and ETS are supported\n"); + return -EINVAL; + } + + if (ets->prio_tc[i] >= IEEE_8021QAZ_MAX_TCS) { + netdev_err(dev, "Invalid TC\n"); + return -EINVAL; + } + } + + if (has_ets_tc && tx_bw_sum != 100) { + netdev_err(dev, "Total ETS bandwidth should equal 100\n"); + return -EINVAL; + } + + return 0; +} + +static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port, + u8 *prio_tc) +{ + char pptb_pl[MLXSW_REG_PPTB_LEN]; + int i; + + mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_reg_pptb_prio_to_buff_pack(pptb_pl, i, prio_tc[i]); + + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), + pptb_pl); +} + +static bool mlxsw_sp_ets_has_pg(u8 *prio_tc, u8 pg) +{ + int i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + if (prio_tc[i] == pg) + return true; + return false; +} + +static int mlxsw_sp_port_pg_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + u8 *old_prio_tc, u8 *new_prio_tc) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pbmc_pl[MLXSW_REG_PBMC_LEN]; + int err, i; + + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); + if (err) + return err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + u8 pg = old_prio_tc[i]; + + if (!mlxsw_sp_ets_has_pg(new_prio_tc, pg)) + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg, 0); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); +} + +static int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); + struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets; + struct net_device *dev = mlxsw_sp_port->dev; + int err; + + /* Create the required PGs, but don't destroy existing ones, as + * traffic is still directed to them. + */ + err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, + ets->prio_tc, pause_en, + mlxsw_sp_port->dcb.pfc); + if (err) { + netdev_err(dev, "Failed to configure port's headroom\n"); + return err; + } + + err = mlxsw_sp_port_pg_prio_map(mlxsw_sp_port, ets->prio_tc); + if (err) { + netdev_err(dev, "Failed to set PG-priority mapping\n"); + goto err_port_prio_pg_map; + } + + err = mlxsw_sp_port_pg_destroy(mlxsw_sp_port, my_ets->prio_tc, + ets->prio_tc); + if (err) + netdev_warn(dev, "Failed to remove ununsed PGs\n"); + + return 0; + +err_port_prio_pg_map: + mlxsw_sp_port_pg_destroy(mlxsw_sp_port, ets->prio_tc, my_ets->prio_tc); + return err; +} + +static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets; + struct net_device *dev = mlxsw_sp_port->dev; + int i, err; + + /* Egress configuration. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + bool dwrr = ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS; + u8 weight = ets->tc_tx_bw[i]; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + 0, dwrr, weight); + if (err) { + netdev_err(dev, "Failed to link subgroup ETS element %d to group\n", + i); + goto err_port_ets_set; + } + } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, + ets->prio_tc[i]); + if (err) { + netdev_err(dev, "Failed to map prio %d to TC %d\n", i, + ets->prio_tc[i]); + goto err_port_prio_tc_set; + } + } + + /* Ingress configuration. */ + err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, ets); + if (err) + goto err_port_headroom_set; + + return 0; + +err_port_headroom_set: + i = IEEE_8021QAZ_MAX_TCS; +err_port_prio_tc_set: + for (i--; i >= 0; i--) + mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, my_ets->prio_tc[i]); + i = IEEE_8021QAZ_MAX_TCS; +err_port_ets_set: + for (i--; i >= 0; i--) { + bool dwrr = my_ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS; + u8 weight = my_ets->tc_tx_bw[i]; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + 0, dwrr, weight); + } + return err; +} + +static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev, + struct ieee_ets *ets) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + err = mlxsw_sp_port_ets_validate(mlxsw_sp_port, ets); + if (err) + return err; + + err = __mlxsw_sp_dcbnl_ieee_setets(mlxsw_sp_port, ets); + if (err) + return err; + + memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets)); + mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_getmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(maxrate, mlxsw_sp_port->dcb.maxrate, sizeof(*maxrate)); + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct ieee_maxrate *my_maxrate = mlxsw_sp_port->dcb.maxrate; + int err, i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + i, 0, + maxrate->tc_maxrate[i]); + if (err) { + netdev_err(dev, "Failed to set maxrate for TC %d\n", i); + goto err_port_ets_maxrate_set; + } + } + + memcpy(mlxsw_sp_port->dcb.maxrate, maxrate, sizeof(*maxrate)); + + return 0; + +err_port_ets_maxrate_set: + for (i--; i >= 0; i--) + mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + i, 0, my_maxrate->tc_maxrate[i]); + return err; +} + +static int mlxsw_sp_port_pfc_cnt_get(struct mlxsw_sp_port *mlxsw_sp_port, + u8 prio) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct ieee_pfc *my_pfc = mlxsw_sp_port->dcb.pfc; + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, + MLXSW_REG_PPCNT_PRIO_CNT, prio); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); + if (err) + return err; + + my_pfc->requests[prio] = mlxsw_reg_ppcnt_tx_pause_get(ppcnt_pl); + my_pfc->indications[prio] = mlxsw_reg_ppcnt_rx_pause_get(ppcnt_pl); + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_getpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err, i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_pfc_cnt_get(mlxsw_sp_port, i); + if (err) { + netdev_err(dev, "Failed to get PFC count for priority %d\n", + i); + return err; + } + } + + memcpy(pfc, mlxsw_sp_port->dcb.pfc, sizeof(*pfc)); + + return 0; +} + +static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_pfc *pfc) +{ + char pfcc_pl[MLXSW_REG_PFCC_LEN]; + + mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause); + mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause); + mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en); + + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc), + pfcc_pl); +} + +static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); + int err; + + if (pause_en && pfc->pfc_en) { + netdev_err(dev, "PAUSE frames already enabled on port\n"); + return -EINVAL; + } + + err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, + mlxsw_sp_port->dcb.ets->prio_tc, + pause_en, pfc); + if (err) { + netdev_err(dev, "Failed to configure port's headroom for PFC\n"); + return err; + } + + err = mlxsw_sp_port_pfc_set(mlxsw_sp_port, pfc); + if (err) { + netdev_err(dev, "Failed to configure PFC\n"); + goto err_port_pfc_set; + } + + memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc)); + mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; + +err_port_pfc_set: + __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, + mlxsw_sp_port->dcb.ets->prio_tc, pause_en, + mlxsw_sp_port->dcb.pfc); + return err; +} + +static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = { + .ieee_getets = mlxsw_sp_dcbnl_ieee_getets, + .ieee_setets = mlxsw_sp_dcbnl_ieee_setets, + .ieee_getmaxrate = mlxsw_sp_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlxsw_sp_dcbnl_ieee_setmaxrate, + .ieee_getpfc = mlxsw_sp_dcbnl_ieee_getpfc, + .ieee_setpfc = mlxsw_sp_dcbnl_ieee_setpfc, + + .getdcbx = mlxsw_sp_dcbnl_getdcbx, + .setdcbx = mlxsw_sp_dcbnl_setdcbx, +}; + +static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->dcb.ets = kzalloc(sizeof(*mlxsw_sp_port->dcb.ets), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.ets) + return -ENOMEM; + + mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; +} + +static void mlxsw_sp_port_ets_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.ets); +} + +static int mlxsw_sp_port_maxrate_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int i; + + mlxsw_sp_port->dcb.maxrate = kmalloc(sizeof(*mlxsw_sp_port->dcb.maxrate), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.maxrate) + return -ENOMEM; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port->dcb.maxrate->tc_maxrate[i] = MLXSW_REG_QEEC_MAS_DIS; + + return 0; +} + +static void mlxsw_sp_port_maxrate_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.maxrate); +} + +static int mlxsw_sp_port_pfc_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->dcb.pfc = kzalloc(sizeof(*mlxsw_sp_port->dcb.pfc), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.pfc) + return -ENOMEM; + + mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; +} + +static void mlxsw_sp_port_pfc_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.pfc); +} + +int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_port_ets_init(mlxsw_sp_port); + if (err) + return err; + err = mlxsw_sp_port_maxrate_init(mlxsw_sp_port); + if (err) + goto err_port_maxrate_init; + err = mlxsw_sp_port_pfc_init(mlxsw_sp_port); + if (err) + goto err_port_pfc_init; + + mlxsw_sp_port->dev->dcbnl_ops = &mlxsw_sp_dcbnl_ops; + + return 0; + +err_port_pfc_init: + mlxsw_sp_port_maxrate_fini(mlxsw_sp_port); +err_port_maxrate_init: + mlxsw_sp_port_ets_fini(mlxsw_sp_port); + return err; +} + +void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port_pfc_fini(mlxsw_sp_port); + mlxsw_sp_port_maxrate_fini(mlxsw_sp_port); + mlxsw_sp_port_ets_fini(mlxsw_sp_port); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c new file mode 100644 index 000000000000..ac321e8e5c1a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -0,0 +1,91 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "spectrum.h" + +#define MLXSW_SP_KVDL_SINGLE_BASE 0 +#define MLXSW_SP_KVDL_SINGLE_SIZE 16384 +#define MLXSW_SP_KVDL_CHUNKS_BASE \ + (MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE) +#define MLXSW_SP_KVDL_CHUNKS_SIZE \ + (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE) +#define MLXSW_SP_CHUNK_MAX 32 + +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count) +{ + int entry_index; + int size; + int type_base; + int type_size; + int type_entries; + + if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) { + return -EINVAL; + } else if (entry_count == 1) { + type_base = MLXSW_SP_KVDL_SINGLE_BASE; + type_size = MLXSW_SP_KVDL_SINGLE_SIZE; + type_entries = 1; + } else { + type_base = MLXSW_SP_KVDL_CHUNKS_BASE; + type_size = MLXSW_SP_KVDL_CHUNKS_SIZE; + type_entries = MLXSW_SP_CHUNK_MAX; + } + + entry_index = type_base; + size = type_base + type_size; + for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) { + int i; + + for (i = 0; i < type_entries; i++) + set_bit(entry_index + i, mlxsw_sp->kvdl.usage); + return entry_index; + } + return -ENOBUFS; +} + +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index) +{ + int type_entries; + int i; + + if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE) + type_entries = 1; + else + type_entries = MLXSW_SP_CHUNK_MAX; + for (i = 0; i < type_entries; i++) + clear_bit(entry_index + i, mlxsw_sp->kvdl.usage); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c new file mode 100644 index 000000000000..3f5c51da6d3e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -0,0 +1,1863 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/rhashtable.h> +#include <linux/bitops.h> +#include <linux/in6.h> +#include <linux/notifier.h> +#include <net/netevent.h> +#include <net/neighbour.h> +#include <net/arp.h> + +#include "spectrum.h" +#include "core.h" +#include "reg.h" + +#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ + for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) + +static bool +mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + unsigned char prefix; + + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { + if (!test_bit(prefix, prefix_usage2->b)) + return false; + } + return true; +} + +static bool +mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); +} + +static bool +mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage) +{ + struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } }; + + return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none); +} + +static void +mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); +} + +static void +mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage) +{ + memset(prefix_usage, 0, sizeof(*prefix_usage)); +} + +static void +mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, + unsigned char prefix_len) +{ + set_bit(prefix_len, prefix_usage->b); +} + +static void +mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, + unsigned char prefix_len) +{ + clear_bit(prefix_len, prefix_usage->b); +} + +struct mlxsw_sp_fib_key { + struct net_device *dev; + unsigned char addr[sizeof(struct in6_addr)]; + unsigned char prefix_len; +}; + +enum mlxsw_sp_fib_entry_type { + MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, + MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, + MLXSW_SP_FIB_ENTRY_TYPE_TRAP, +}; + +struct mlxsw_sp_nexthop_group; + +struct mlxsw_sp_fib_entry { + struct rhash_head ht_node; + struct mlxsw_sp_fib_key key; + enum mlxsw_sp_fib_entry_type type; + unsigned int ref_count; + u16 rif; /* used for action local */ + struct mlxsw_sp_vr *vr; + struct list_head nexthop_group_node; + struct mlxsw_sp_nexthop_group *nh_group; +}; + +struct mlxsw_sp_fib { + struct rhashtable ht; + unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; + struct mlxsw_sp_prefix_usage prefix_usage; +}; + +static const struct rhashtable_params mlxsw_sp_fib_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_fib_entry, key), + .head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_fib_key), + .automatic_shrinking = true, +}; + +static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_entry *fib_entry) +{ + unsigned char prefix_len = fib_entry->key.prefix_len; + int err; + + err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node, + mlxsw_sp_fib_ht_params); + if (err) + return err; + if (fib->prefix_ref_count[prefix_len]++ == 0) + mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); + return 0; +} + +static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_entry *fib_entry) +{ + unsigned char prefix_len = fib_entry->key.prefix_len; + + if (--fib->prefix_ref_count[prefix_len] == 0) + mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); + rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node, + mlxsw_sp_fib_ht_params); +} + +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len, + struct net_device *dev) +{ + struct mlxsw_sp_fib_entry *fib_entry; + + fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); + if (!fib_entry) + return NULL; + fib_entry->key.dev = dev; + memcpy(fib_entry->key.addr, addr, addr_len); + fib_entry->key.prefix_len = prefix_len; + return fib_entry; +} + +static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry) +{ + kfree(fib_entry); +} + +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len, + struct net_device *dev) +{ + struct mlxsw_sp_fib_key key; + + memset(&key, 0, sizeof(key)); + key.dev = dev; + memcpy(key.addr, addr, addr_len); + key.prefix_len = prefix_len; + return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); +} + +static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) +{ + struct mlxsw_sp_fib *fib; + int err; + + fib = kzalloc(sizeof(*fib), GFP_KERNEL); + if (!fib) + return ERR_PTR(-ENOMEM); + err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); + if (err) + goto err_rhashtable_init; + return fib; + +err_rhashtable_init: + kfree(fib); + return ERR_PTR(err); +} + +static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) +{ + rhashtable_destroy(&fib->ht); + kfree(fib); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved) +{ + static struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { + lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + if (lpm_tree->ref_count == 0) { + if (one_reserved) + one_reserved = false; + else + return lpm_tree; + } + } + return NULL; +} + +static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + + mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); +} + +static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + + mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); +} + +static int +mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralst_pl[MLXSW_REG_RALST_LEN]; + u8 root_bin = 0; + u8 prefix; + u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; + + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) + root_bin = prefix; + + mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { + if (prefix == 0) + continue; + mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, + MLXSW_REG_RALST_BIN_NO_CHILD); + last_prefix = prefix; + } + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + enum mlxsw_sp_l3proto proto, bool one_reserved) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int err; + + lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved); + if (!lpm_tree) + return ERR_PTR(-EBUSY); + lpm_tree->proto = proto; + err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); + if (err) + return ERR_PTR(err); + + err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, + lpm_tree); + if (err) + goto err_left_struct_set; + return lpm_tree; + +err_left_struct_set: + mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + return ERR_PTR(err); +} + +static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + enum mlxsw_sp_l3proto proto, bool one_reserved) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { + lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + if (lpm_tree->proto == proto && + mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, + prefix_usage)) + goto inc_ref_count; + } + lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, + proto, one_reserved); + if (IS_ERR(lpm_tree)) + return lpm_tree; + +inc_ref_count: + lpm_tree->ref_count++; + return lpm_tree; +} + +static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + if (--lpm_tree->ref_count == 0) + return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); + return 0; +} + +static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { + lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; + } +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_vr *vr; + int i; + + for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + vr = &mlxsw_sp->router.vrs[i]; + if (!vr->used) + return vr; + } + return NULL; +} + +static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) +{ + char raltb_pl[MLXSW_REG_RALTB_LEN]; + + mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); +} + +static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) +{ + char raltb_pl[MLXSW_REG_RALTB_LEN]; + + /* Bind to tree 0 which is default */ + mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); +} + +static u32 mlxsw_sp_fix_tb_id(u32 tb_id) +{ + /* For our purpose, squash main and local table into one */ + if (tb_id == RT_TABLE_LOCAL) + tb_id = RT_TABLE_MAIN; + return tb_id; +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, + u32 tb_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_vr *vr; + int i; + + tb_id = mlxsw_sp_fix_tb_id(tb_id); + for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + vr = &mlxsw_sp->router.vrs[i]; + if (vr->used && vr->proto == proto && vr->tb_id == tb_id) + return vr; + } + return NULL; +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, + unsigned char prefix_len, + u32 tb_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage; + struct mlxsw_sp_lpm_tree *lpm_tree; + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_find_unused(mlxsw_sp); + if (!vr) + return ERR_PTR(-EBUSY); + vr->fib = mlxsw_sp_fib_create(); + if (IS_ERR(vr->fib)) + return ERR_CAST(vr->fib); + + vr->proto = proto; + vr->tb_id = tb_id; + mlxsw_sp_prefix_usage_zero(&req_prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + proto, true); + if (IS_ERR(lpm_tree)) { + err = PTR_ERR(lpm_tree); + goto err_tree_get; + } + vr->lpm_tree = lpm_tree; + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); + if (err) + goto err_tree_bind; + + vr->used = true; + return vr; + +err_tree_bind: + mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); +err_tree_get: + mlxsw_sp_fib_destroy(vr->fib); + + return ERR_PTR(err); +} + +static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) +{ + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); + mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); + mlxsw_sp_fib_destroy(vr->fib); + vr->used = false; +} + +static int +mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct mlxsw_sp_prefix_usage *req_prefix_usage) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + + if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, + &vr->lpm_tree->prefix_usage)) + return 0; + + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, + vr->proto, false); + if (IS_ERR(lpm_tree)) { + /* We failed to get a tree according to the required + * prefix usage. However, the current tree might be still good + * for us if our requirement is subset of the prefixes used + * in the tree. + */ + if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, + &vr->lpm_tree->prefix_usage)) + return 0; + return PTR_ERR(lpm_tree); + } + + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); + mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); + vr->lpm_tree = lpm_tree; + return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, + unsigned char prefix_len, + u32 tb_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_vr *vr; + int err; + + tb_id = mlxsw_sp_fix_tb_id(tb_id); + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto); + if (!vr) { + vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto); + if (IS_ERR(vr)) + return vr; + } else { + struct mlxsw_sp_prefix_usage req_prefix_usage; + + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, + &vr->fib->prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); + /* Need to replace LPM tree in case new prefix is required. */ + err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, + &req_prefix_usage); + if (err) + return ERR_PTR(err); + } + return vr; +} + +static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) +{ + /* Destroy virtual router entity in case the associated FIB is empty + * and allow it to be used for other tables in future. Otherwise, + * check if some prefix usage did not disappear and change tree if + * that is the case. Note that in case new, smaller tree cannot be + * allocated, the original one will be kept being used. + */ + if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage)) + mlxsw_sp_vr_destroy(mlxsw_sp, vr); + else + mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, + &vr->fib->prefix_usage); +} + +static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_vr *vr; + int i; + + for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + vr = &mlxsw_sp->router.vrs[i]; + vr->id = i; + } +} + +struct mlxsw_sp_neigh_key { + unsigned char addr[sizeof(struct in6_addr)]; + struct net_device *dev; +}; + +struct mlxsw_sp_neigh_entry { + struct rhash_head ht_node; + struct mlxsw_sp_neigh_key key; + u16 rif; + struct neighbour *n; + bool offloaded; + struct delayed_work dw; + struct mlxsw_sp_port *mlxsw_sp_port; + unsigned char ha[ETH_ALEN]; + struct list_head nexthop_list; /* list of nexthops using + * this neigh entry + */ + struct list_head nexthop_neighs_list_node; +}; + +static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key), + .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_neigh_key), +}; + +static int +mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} + +static void +mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} + +static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len, + struct net_device *dev, u16 rif, + struct neighbour *n) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + + neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); + if (!neigh_entry) + return NULL; + memcpy(neigh_entry->key.addr, addr, addr_len); + neigh_entry->key.dev = dev; + neigh_entry->rif = rif; + neigh_entry->n = n; + INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); + INIT_LIST_HEAD(&neigh_entry->nexthop_list); + return neigh_entry; +} + +static void +mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + kfree(neigh_entry); +} + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr, + size_t addr_len, struct net_device *dev) +{ + struct mlxsw_sp_neigh_key key = {{ 0 } }; + + memcpy(key.addr, addr, addr_len); + key.dev = dev; + return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, + &key, mlxsw_sp_neigh_ht_params); +} + +int mlxsw_sp_router_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_rif *r; + u32 dip; + int err; + + if (n->tbl != &arp_tbl) + return 0; + + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), + n->dev); + if (neigh_entry) { + WARN_ON(neigh_entry->n != n); + return 0; + } + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); + if (WARN_ON(!r)) + return -EINVAL; + + neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev, + r->rif, n); + if (!neigh_entry) + return -ENOMEM; + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + return 0; + +err_neigh_entry_insert: + mlxsw_sp_neigh_entry_destroy(neigh_entry); + return err; +} + +void mlxsw_sp_router_neigh_destroy(struct net_device *dev, + struct neighbour *n) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + u32 dip; + + if (n->tbl != &arp_tbl) + return; + + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), + n->dev); + if (!neigh_entry) + return; + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_destroy(neigh_entry); +} + +static void +mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + + mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval); +} + +static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int ent_index) +{ + struct net_device *dev; + struct neighbour *n; + __be32 dipn; + u32 dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); + + if (!mlxsw_sp->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dipn = htonl(dip); + dev = mlxsw_sp->rifs[rif]->dev; + n = neigh_lookup(&arp_tbl, &dipn, dev); + if (!n) { + netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", + &dip); + return; + } + + netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} + +static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + u8 num_entries; + int i; + + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, + rec_index); + /* Hardware starts counting at 0, so add 1. */ + num_entries++; + + /* Each record consists of several neighbour entries. */ + for (i = 0; i < num_entries; i++) { + int ent_index; + + ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i; + mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl, + ent_index); + } + +} + +static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, int rec_index) +{ + switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) { + case MLXSW_REG_RAUHTD_TYPE_IPV4: + mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl, + rec_index); + break; + case MLXSW_REG_RAUHTD_TYPE_IPV6: + WARN_ON_ONCE(1); + break; + } +} + +static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +{ + char *rauhtd_pl; + u8 num_rec; + int i, err; + + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); + if (!rauhtd_pl) + return -ENOMEM; + + /* Make sure the neighbour's netdev isn't removed in the + * process. + */ + rtnl_lock(); + do { + mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), + rauhtd_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); + break; + } + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, + i); + } while (num_rec); + rtnl_unlock(); + + kfree(rauhtd_pl); + return err; +} + +static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + + /* Take RTNL mutex here to prevent lists from changes */ + rtnl_lock(); + list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, + nexthop_neighs_list_node) { + /* If this neigh have nexthops, make the kernel think this neigh + * is active regardless of the traffic. + */ + if (!list_empty(&neigh_entry->nexthop_list)) + neigh_event_send(neigh_entry->n, NULL); + } + rtnl_unlock(); +} + +static void +mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) +{ + unsigned long interval = mlxsw_sp->router.neighs_update.interval; + + mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) +{ + struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, + router.neighs_update.dw.work); + int err; + + err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); + + mlxsw_sp_router_neighs_update_nh(mlxsw_sp); + + mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp); +} + +static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, + router.nexthop_probe_dw.work); + + /* Iterate over nexthop neighbours, find those who are unresolved and + * send arp on them. This solves the chicken-egg problem when + * the nexthop wouldn't get offloaded until the neighbor is resolved + * but it wouldn't get resolved ever in case traffic is flowing in HW + * using different nexthop. + * + * Take RTNL mutex here to prevent lists from changes. + */ + rtnl_lock(); + list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, + nexthop_neighs_list_node) { + if (!(neigh_entry->n->nud_state & NUD_VALID) && + !list_empty(&neigh_entry->nexthop_list)) + neigh_event_send(neigh_entry->n, NULL); + } + rtnl_unlock(); + + mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, + MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); +} + +static void +mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool removing); + +static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = + container_of(work, struct mlxsw_sp_neigh_entry, dw.work); + struct neighbour *n = neigh_entry->n; + struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + struct net_device *dev; + bool entry_connected; + u8 nud_state; + bool updating; + bool removing; + bool adding; + u32 dip; + int err; + + read_lock_bh(&n->lock); + dip = ntohl(*((__be32 *) n->primary_key)); + memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha)); + nud_state = n->nud_state; + dev = n->dev; + read_unlock_bh(&n->lock); + + entry_connected = nud_state & NUD_VALID; + adding = (!neigh_entry->offloaded) && entry_connected; + updating = neigh_entry->offloaded && entry_connected; + removing = neigh_entry->offloaded && !entry_connected; + + if (adding || updating) { + mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD, + neigh_entry->rif, + neigh_entry->ha, dip); + err = mlxsw_reg_write(mlxsw_sp->core, + MLXSW_REG(rauht), rauht_pl); + if (err) { + netdev_err(dev, "Could not add neigh %pI4h\n", &dip); + neigh_entry->offloaded = false; + } else { + neigh_entry->offloaded = true; + } + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false); + } else if (removing) { + mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE, + neigh_entry->rif, + neigh_entry->ha, dip); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), + rauht_pl); + if (err) { + netdev_err(dev, "Could not delete neigh %pI4h\n", &dip); + neigh_entry->offloaded = true; + } else { + neigh_entry->offloaded = false; + } + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true); + } + + neigh_release(n); + mlxsw_sp_port_dev_put(mlxsw_sp_port); +} + +int mlxsw_sp_router_netevent_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + unsigned long interval; + struct net_device *dev; + struct neigh_parms *p; + struct neighbour *n; + u32 dip; + + switch (event) { + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We don't care about changes in the default table. */ + if (!p->dev || p->tbl != &arp_tbl) + return NOTIFY_DONE; + + /* We are in atomic context and can't take RTNL mutex, + * so use RCU variant to walk the device chain. + */ + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); + if (!mlxsw_sp_port) + return NOTIFY_DONE; + + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); + mlxsw_sp->router.neighs_update.interval = interval; + + mlxsw_sp_port_dev_put(mlxsw_sp_port); + break; + case NETEVENT_NEIGH_UPDATE: + n = ptr; + dev = n->dev; + + if (n->tbl != &arp_tbl) + return NOTIFY_DONE; + + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev); + if (!mlxsw_sp_port) + return NOTIFY_DONE; + + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, + &dip, + sizeof(__be32), + dev); + if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) { + mlxsw_sp_port_dev_put(mlxsw_sp_port); + return NOTIFY_DONE; + } + neigh_entry->mlxsw_sp_port = mlxsw_sp_port; + + /* Take a reference to ensure the neighbour won't be + * destructed until we drop the reference in delayed + * work. + */ + neigh_clone(n); + if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) { + neigh_release(n); + mlxsw_sp_port_dev_put(mlxsw_sp_port); + } + break; + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = rhashtable_init(&mlxsw_sp->router.neigh_ht, + &mlxsw_sp_neigh_ht_params); + if (err) + return err; + + /* Initialize the polling interval according to the default + * table. + */ + mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); + + /* Create the delayed works for the activity_update */ + INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, + mlxsw_sp_router_neighs_update_work); + INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw, + mlxsw_sp_router_probe_unresolved_nexthops); + mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); + mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); + return 0; +} + +static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) +{ + cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); + cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); + rhashtable_destroy(&mlxsw_sp->router.neigh_ht); +} + +struct mlxsw_sp_nexthop { + struct list_head neigh_list_node; /* member of neigh entry list */ + struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group + * this belongs to + */ + u8 should_offload:1, /* set indicates this neigh is connected and + * should be put to KVD linear area of this group. + */ + offloaded:1, /* set in case the neigh is actually put into + * KVD linear area of this group. + */ + update:1; /* set indicates that MAC of this neigh should be + * updated in HW + */ + struct mlxsw_sp_neigh_entry *neigh_entry; +}; + +struct mlxsw_sp_nexthop_group { + struct list_head list; /* node in mlxsw->router.nexthop_group_list */ + struct list_head fib_list; /* list of fib entries that use this group */ + u8 adj_index_valid:1; + u32 adj_index; + u16 ecmp_size; + u16 count; + struct mlxsw_sp_nexthop nexthops[0]; +}; + +static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr, + u32 adj_index, u16 ecmp_size, + u32 new_adj_index, + u16 new_ecmp_size) +{ + char raleu_pl[MLXSW_REG_RALEU_LEN]; + + mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id, + adj_index, ecmp_size, + new_adj_index, new_ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); +} + +static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + u32 old_adj_index, u16 old_ecmp_size) +{ + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr = NULL; + int err; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + if (vr == fib_entry->vr) + continue; + vr = fib_entry->vr; + err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, + old_adj_index, + old_ecmp_size, + nh_grp->adj_index, + nh_grp->ecmp_size); + if (err) + return err; + } + return 0; +} + +static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + char ratr_pl[MLXSW_REG_RATR_LEN]; + + mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, + true, adj_index, neigh_entry->rif); + mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int +mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + u32 adj_index = nh_grp->adj_index; /* base */ + struct mlxsw_sp_nexthop *nh; + int i; + int err; + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (!nh->should_offload) { + nh->offloaded = 0; + continue; + } + + if (nh->update) { + err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, + adj_index, nh); + if (err) + return err; + nh->update = 0; + nh->offloaded = 1; + } + adj_index++; + } + return 0; +} + +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry); + +static int +mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + return err; + } + return 0; +} + +static void +mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + bool offload_change = false; + u32 adj_index; + u16 ecmp_size = 0; + bool old_adj_index_valid; + u32 old_adj_index; + u16 old_ecmp_size; + int ret; + int i; + int err; + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (nh->should_offload ^ nh->offloaded) { + offload_change = true; + if (nh->should_offload) + nh->update = 1; + } + if (nh->should_offload) + ecmp_size++; + } + if (!offload_change) { + /* Nothing was added or removed, so no need to reallocate. Just + * update MAC on existing adjacency indexes. + */ + err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); + goto set_trap; + } + return; + } + if (!ecmp_size) + /* No neigh of this group is connected so we just set + * the trap and let everthing flow through kernel. + */ + goto set_trap; + + ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); + if (ret < 0) { + /* We ran out of KVD linear space, just set the + * trap and let everything flow through kernel. + */ + dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); + goto set_trap; + } + adj_index = ret; + old_adj_index_valid = nh_grp->adj_index_valid; + old_adj_index = nh_grp->adj_index; + old_ecmp_size = nh_grp->ecmp_size; + nh_grp->adj_index_valid = 1; + nh_grp->adj_index = adj_index; + nh_grp->ecmp_size = ecmp_size; + err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); + goto set_trap; + } + + if (!old_adj_index_valid) { + /* The trap was set for fib entries, so we have to call + * fib entry update to unset it and use adjacency index. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); + goto set_trap; + } + return; + } + + err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, + old_adj_index, old_ecmp_size); + mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); + goto set_trap; + } + return; + +set_trap: + old_adj_index_valid = nh_grp->adj_index_valid; + nh_grp->adj_index_valid = 0; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + nh->offloaded = 0; + } + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err) + dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); + if (old_adj_index_valid) + mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index); +} + +static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, + bool removing) +{ + if (!removing && !nh->should_offload) + nh->should_offload = 1; + else if (removing && nh->offloaded) + nh->should_offload = 0; + nh->update = 1; +} + +static void +mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool removing) +{ + struct mlxsw_sp_nexthop *nh; + + /* Take RTNL mutex here to prevent lists from changes */ + rtnl_lock(); + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + __mlxsw_sp_nexthop_neigh_update(nh, removing); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } + rtnl_unlock(); +} + +static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + u32 gwip = ntohl(fib_nh->nh_gw); + struct net_device *dev = fib_nh->nh_dev; + struct neighbour *n; + u8 nud_state; + + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, + sizeof(gwip), dev); + if (!neigh_entry) { + __be32 gwipn = htonl(gwip); + + n = neigh_create(&arp_tbl, &gwipn, dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, + sizeof(gwip), dev); + if (!neigh_entry) { + neigh_release(n); + return -EINVAL; + } + } else { + /* Take a reference of neigh here ensuring that neigh would + * not be detructed before the nexthop entry is finished. + * The second branch takes the reference in neith_create() + */ + n = neigh_entry->n; + neigh_clone(n); + } + + /* If that is the first nexthop connected to that neigh, add to + * nexthop_neighs_list + */ + if (list_empty(&neigh_entry->nexthop_list)) + list_add_tail(&neigh_entry->nexthop_neighs_list_node, + &mlxsw_sp->router.nexthop_neighs_list); + + nh->nh_grp = nh_grp; + nh->neigh_entry = neigh_entry; + list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + read_unlock_bh(&n->lock); + __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID)); + + return 0; +} + +static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + + list_del(&nh->neigh_list_node); + + /* If that is the last nexthop connected to that neigh, remove from + * nexthop_neighs_list + */ + if (list_empty(&nh->neigh_entry->nexthop_list)) + list_del(&nh->neigh_entry->nexthop_neighs_list_node); + + neigh_release(neigh_entry->n); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop *nh; + struct fib_nh *fib_nh; + size_t alloc_size; + int i; + int err; + + alloc_size = sizeof(*nh_grp) + + fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); + nh_grp = kzalloc(alloc_size, GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->count = fi->fib_nhs; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + fib_nh = &fi->fib_nh[i]; + err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); + if (err) + goto err_nexthop_init; + } + list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + return nh_grp; + +err_nexthop_init: + for (i--; i >= 0; i--) + mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + int i; + + list_del(&nh_grp->list); + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + } + kfree(nh_grp); +} + +static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, + struct fib_info *fi) +{ + int i; + + for (i = 0; i < fi->fib_nhs; i++) { + struct fib_nh *fib_nh = &fi->fib_nh[i]; + u32 gwip = ntohl(fib_nh->nh_gw); + + if (memcmp(nh->neigh_entry->key.addr, + &gwip, sizeof(u32)) == 0 && + nh->neigh_entry->key.dev == fib_nh->nh_dev) + return true; + } + return false; +} + +static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp, + struct fib_info *fi) +{ + int i; + + if (nh_grp->count != fi->fib_nhs) + return false; + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + if (!mlxsw_sp_nexthop_match(nh, fi)) + return false; + } + return true; +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list, + list) { + if (mlxsw_sp_nexthop_group_match(nh_grp, fi)) + return nh_grp; + } + return NULL; +} + +static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi); + if (!nh_grp) { + nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + } + list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); + fib_entry->nh_group = nh_grp; + return 0; +} + +static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + + list_del(&fib_entry->nexthop_group_node); + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); +} + +static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + + mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); +} + +static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + + mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); +} + +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); + err = __mlxsw_sp_router_init(mlxsw_sp); + if (err) + return err; + mlxsw_sp_lpm_init(mlxsw_sp); + mlxsw_sp_vrs_init(mlxsw_sp); + err = mlxsw_sp_neigh_init(mlxsw_sp); + if (err) + goto err_neigh_init; + return 0; + +err_neigh_init: + __mlxsw_sp_router_fini(mlxsw_sp); + return err; +} + +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_neigh_fini(mlxsw_sp); + __mlxsw_sp_router_fini(mlxsw_sp); +} + +static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u32 *p_dip = (u32 *) fib_entry->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->vr; + enum mlxsw_reg_ralue_trap_action trap_action; + u16 trap_id = 0; + u32 adjacency_index = 0; + u16 ecmp_size = 0; + + /* In case the nexthop group adjacency index is valid, use it + * with provided ECMP size. Otherwise, setup trap and pass + * traffic to kernel. + */ + if (fib_entry->nh_group->adj_index_valid) { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; + adjacency_index = fib_entry->nh_group->adj_index; + ecmp_size = fib_entry->nh_group->ecmp_size; + } else { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; + trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; + } + + mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, + fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, + adjacency_index, ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u32 *p_dip = (u32 *) fib_entry->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->vr; + + mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, + fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_act_local_pack(ralue_pl, + MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, + fib_entry->rif); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u32 *p_dip = (u32 *) fib_entry->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->vr; + + mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, + fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + switch (fib_entry->type) { + case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: + return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: + return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: + return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); + } + return -EINVAL; +} + +static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + switch (fib_entry->vr->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); + case MLXSW_SP_L3_PROTO_IPV6: + return -EINVAL; + } + return -EINVAL; +} + +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, + MLXSW_REG_RALUE_OP_WRITE_WRITE); +} + +static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, + MLXSW_REG_RALUE_OP_WRITE_DELETE); +} + +struct mlxsw_sp_router_fib4_add_info { + struct switchdev_trans_item tritem; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_fib_entry *fib_entry; +}; + +static void mlxsw_sp_router_fib4_add_info_destroy(void const *data) +{ + const struct mlxsw_sp_router_fib4_add_info *info = data; + struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry; + struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp; + struct mlxsw_sp_vr *vr = fib_entry->vr; + + mlxsw_sp_fib_entry_destroy(fib_entry); + mlxsw_sp_vr_put(mlxsw_sp, vr); + kfree(info); +} + +static int +mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, + const struct switchdev_obj_ipv4_fib *fib4, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct fib_info *fi = fib4->fi; + + if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + return 0; + } + if (fib4->type != RTN_UNICAST) + return -EINVAL; + + if (fi->fib_scope != RT_SCOPE_UNIVERSE) { + struct mlxsw_sp_rif *r; + + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fi->fib_dev); + if (!r) + return -EINVAL; + fib_entry->rif = r->rif; + return 0; + } + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); +} + +static void +mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) + return; + mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); +} + +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp, + const struct switchdev_obj_ipv4_fib *fib4) +{ + struct mlxsw_sp_fib_entry *fib_entry; + struct fib_info *fi = fib4->fi; + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(vr)) + return ERR_CAST(vr); + + fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, + sizeof(fib4->dst), + fib4->dst_len, fi->fib_dev); + if (fib_entry) { + /* Already exists, just take a reference */ + fib_entry->ref_count++; + return fib_entry; + } + fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst, + sizeof(fib4->dst), + fib4->dst_len, fi->fib_dev); + if (!fib_entry) { + err = -ENOMEM; + goto err_fib_entry_create; + } + fib_entry->vr = vr; + fib_entry->ref_count = 1; + + err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry); + if (err) + goto err_fib4_entry_init; + + return fib_entry; + +err_fib4_entry_init: + mlxsw_sp_fib_entry_destroy(fib_entry); +err_fib_entry_create: + mlxsw_sp_vr_put(mlxsw_sp, vr); + + return ERR_PTR(err); +} + +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, + const struct switchdev_obj_ipv4_fib *fib4) +{ + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4); + if (!vr) + return NULL; + + return mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, + sizeof(fib4->dst), fib4->dst_len, + fib4->fi->fib_dev); +} + +void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_vr *vr = fib_entry->vr; + + if (--fib_entry->ref_count == 0) { + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_destroy(fib_entry); + } + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + +static int +mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_router_fib4_add_info *info; + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fib4); + if (IS_ERR(fib_entry)) + return PTR_ERR(fib_entry); + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + err = -ENOMEM; + goto err_alloc_info; + } + info->mlxsw_sp = mlxsw_sp; + info->fib_entry = fib_entry; + switchdev_trans_item_enqueue(trans, info, + mlxsw_sp_router_fib4_add_info_destroy, + &info->tritem); + return 0; + +err_alloc_info: + mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); + return err; +} + +static int +mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_router_fib4_add_info *info; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr; + int err; + + info = switchdev_trans_item_dequeue(trans); + fib_entry = info->fib_entry; + kfree(info); + + if (fib_entry->ref_count != 1) + return 0; + + vr = fib_entry->vr; + err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry); + if (err) + goto err_fib_entry_insert; + err = mlxsw_sp_fib_entry_update(mlxsw_sp_port->mlxsw_sp, fib_entry); + if (err) + goto err_fib_entry_add; + return 0; + +err_fib_entry_add: + mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); +err_fib_entry_insert: + mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); + return err; +} + +int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + if (switchdev_trans_ph_prepare(trans)) + return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port, + fib4, trans); + return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port, + fib4, trans); +} + +int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_fib_entry *fib_entry; + + fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fib4); + if (!fib_entry) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n"); + return -ENOENT; + } + + if (fib_entry->ref_count == 1) { + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry); + } + + mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 9cd6f472234a..7b654c517b91 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -55,13 +55,10 @@ static u16 mlxsw_sp_port_vid_to_fid_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_port); u16 fid = vid; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); - - fid = mlxsw_sp_vfid_to_fid(vfid); - } + fid = f ? f->fid : fid; if (!fid) fid = mlxsw_sp_port->pvid; @@ -169,14 +166,9 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state); } -static bool mlxsw_sp_vfid_is_vport_br(u16 vfid) -{ - return vfid >= MLXSW_SP_VFID_PORT_MAX; -} - static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 idx_begin, u16 idx_end, bool set, - bool only_uc) + u16 idx_begin, u16 idx_end, bool uc_set, + bool bm_set) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u16 local_port = mlxsw_sp_port->local_port; @@ -185,43 +177,32 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, char *sftr_pl; int err; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID; - if (mlxsw_sp_vfid_is_vport_br(idx_begin)) - local_port = mlxsw_sp_port->local_port; - else - local_port = MLXSW_PORT_CPU_PORT; - } else { + else table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; - } sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); if (!sftr_pl) return -ENOMEM; mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin, - table_type, range, local_port, set); + table_type, range, local_port, uc_set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); if (err) goto buffer_out; - /* Flooding control allows one to decide whether a given port will - * flood unicast traffic for which there is no FDB entry. - */ - if (only_uc) - goto buffer_out; - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin, - table_type, range, local_port, set); + table_type, range, local_port, bm_set); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); if (err) goto err_flood_bm_set; - else - goto buffer_out; + + goto buffer_out; err_flood_bm_set: mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin, - table_type, range, local_port, !set); + table_type, range, local_port, !uc_set); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); buffer_out: kfree(sftr_pl); @@ -236,7 +217,8 @@ static int mlxsw_sp_port_uc_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, int err; if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - u16 vfid = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); + u16 fid = mlxsw_sp_vport_fid_get(mlxsw_sp_port)->fid; + u16 vfid = mlxsw_sp_fid_to_vfid(fid); return __mlxsw_sp_port_flood_set(mlxsw_sp_port, vfid, vfid, set, true); @@ -260,14 +242,16 @@ err_port_flood_set: return err; } -int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, - bool set, bool only_uc) +int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, + bool set) { + u16 vfid; + /* In case of vFIDs, index into the flooding table is relative to * the start of the vFIDs range. */ - return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, - only_uc); + vfid = mlxsw_sp_fid_to_vfid(fid); + return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, set); } static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -383,6 +367,192 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, return err; } +static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, fid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + +static int mlxsw_sp_fid_map(struct mlxsw_sp *mlxsw_sp, u16 fid, bool valid) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_VID_TO_FID; + char svfa_pl[MLXSW_REG_SVFA_LEN]; + + mlxsw_reg_svfa_pack(svfa_pl, 0, mt, valid, fid, fid); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); +} + +static struct mlxsw_sp_fid *mlxsw_sp_fid_alloc(u16 fid) +{ + struct mlxsw_sp_fid *f; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + f->fid = fid; + + return f; +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid) +{ + struct mlxsw_sp_fid *f; + int err; + + err = mlxsw_sp_fid_op(mlxsw_sp, fid, true); + if (err) + return ERR_PTR(err); + + /* Although all the ports member in the FID might be using a + * {Port, VID} to FID mapping, we create a global VID-to-FID + * mapping. This allows a port to transition to VLAN mode, + * knowing the global mapping exists. + */ + err = mlxsw_sp_fid_map(mlxsw_sp, fid, true); + if (err) + goto err_fid_map; + + f = mlxsw_sp_fid_alloc(fid); + if (!f) { + err = -ENOMEM; + goto err_allocate_fid; + } + + list_add(&f->list, &mlxsw_sp->fids); + + return f; + +err_allocate_fid: + mlxsw_sp_fid_map(mlxsw_sp, fid, false); +err_fid_map: + mlxsw_sp_fid_op(mlxsw_sp, fid, false); + return ERR_PTR(err); +} + +void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f) +{ + u16 fid = f->fid; + + list_del(&f->list); + + if (f->r) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + + kfree(f); + + mlxsw_sp_fid_map(mlxsw_sp, fid, false); + + mlxsw_sp_fid_op(mlxsw_sp, fid, false); +} + +static int __mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid) +{ + struct mlxsw_sp_fid *f; + + if (test_bit(fid, mlxsw_sp_port->active_vlans)) + return 0; + + f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid); + if (!f) { + f = mlxsw_sp_fid_create(mlxsw_sp_port->mlxsw_sp, fid); + if (IS_ERR(f)) + return PTR_ERR(f); + } + + f->ref_count++; + + netdev_dbg(mlxsw_sp_port->dev, "Joined FID=%d\n", fid); + + return 0; +} + +static void __mlxsw_sp_port_fid_leave(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid) +{ + struct mlxsw_sp_fid *f; + + f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid); + if (WARN_ON(!f)) + return; + + netdev_dbg(mlxsw_sp_port->dev, "Left FID=%d\n", fid); + + mlxsw_sp_port_fdb_flush(mlxsw_sp_port, fid); + + if (--f->ref_count == 0) + mlxsw_sp_fid_destroy(mlxsw_sp_port->mlxsw_sp, f); +} + +static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid, + bool valid) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + + /* If port doesn't have vPorts, then it can use the global + * VID-to-FID mapping. + */ + if (list_empty(&mlxsw_sp_port->vports_list)) + return 0; + + return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, valid, fid, fid); +} + +static int mlxsw_sp_port_fid_join(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid_begin, u16 fid_end) +{ + int fid, err; + + for (fid = fid_begin; fid <= fid_end; fid++) { + err = __mlxsw_sp_port_fid_join(mlxsw_sp_port, fid); + if (err) + goto err_port_fid_join; + } + + err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, + mlxsw_sp_port->uc_flood, true); + if (err) + goto err_port_flood_set; + + for (fid = fid_begin; fid <= fid_end; fid++) { + err = mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, true); + if (err) + goto err_port_fid_map; + } + + return 0; + +err_port_fid_map: + for (fid--; fid >= fid_begin; fid--) + mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, false); + __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, false, + false); +err_port_flood_set: + fid = fid_end; +err_port_fid_join: + for (fid--; fid >= fid_begin; fid--) + __mlxsw_sp_port_fid_leave(mlxsw_sp_port, fid); + return err; +} + +static void mlxsw_sp_port_fid_leave(struct mlxsw_sp_port *mlxsw_sp_port, + u16 fid_begin, u16 fid_end) +{ + int fid; + + for (fid = fid_begin; fid <= fid_end; fid++) + mlxsw_sp_port_fid_map(mlxsw_sp_port, fid, false); + + __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end, false, + false); + + for (fid = fid_begin; fid <= fid_end; fid++) + __mlxsw_sp_port_fid_leave(mlxsw_sp_port, fid); +} + static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { @@ -440,74 +610,6 @@ err_port_allow_untagged_set: return err; } -static int mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - int err; - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid, fid); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); - - if (err) - return err; - - set_bit(fid, mlxsw_sp->active_fids); - return 0; -} - -static void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, u16 fid) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - clear_bit(fid, mlxsw_sp->active_fids); - - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_DESTROY_FID, - fid, fid); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - -static int mlxsw_sp_port_fid_map(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) -{ - enum mlxsw_reg_svfa_mt mt; - - if (!list_empty(&mlxsw_sp_port->vports_list)) - mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; - else - mt = MLXSW_REG_SVFA_MT_VID_TO_FID; - - return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, true, fid, fid); -} - -static int mlxsw_sp_port_fid_unmap(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) -{ - enum mlxsw_reg_svfa_mt mt; - - if (list_empty(&mlxsw_sp_port->vports_list)) - return 0; - - mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; - return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, false, fid, fid); -} - -static int mlxsw_sp_port_add_vids(struct net_device *dev, u16 vid_begin, - u16 vid_end) -{ - u16 vid; - int err; - - for (vid = vid_begin; vid <= vid_end; vid++) { - err = mlxsw_sp_port_add_vid(dev, 0, vid); - if (err) - goto err_port_add_vid; - } - return 0; - -err_port_add_vid: - for (vid--; vid >= vid_begin; vid--) - mlxsw_sp_port_kill_vid(dev, 0, vid); - return err; -} - static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged) @@ -533,57 +635,17 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool flag_untagged, bool flag_pvid) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *dev = mlxsw_sp_port->dev; - u16 vid, last_visited_vid, old_pvid; - enum mlxsw_reg_svfa_mt mt; + u16 vid, old_pvid; int err; - /* In case this is invoked with BRIDGE_FLAGS_SELF and port is - * not bridged, then packets ingressing through the port with - * the specified VIDs will be directed to CPU. - */ if (!mlxsw_sp_port->bridged) - return mlxsw_sp_port_add_vids(dev, vid_begin, vid_end); - - for (vid = vid_begin; vid <= vid_end; vid++) { - if (!test_bit(vid, mlxsw_sp->active_fids)) { - err = mlxsw_sp_fid_create(mlxsw_sp, vid); - if (err) { - netdev_err(dev, "Failed to create FID=%d\n", - vid); - return err; - } - - /* When creating a FID, we set a VID to FID mapping - * regardless of the port's mode. - */ - mt = MLXSW_REG_SVFA_MT_VID_TO_FID; - err = mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_port, mt, - true, vid, vid); - if (err) { - netdev_err(dev, "Failed to create FID=VID=%d mapping\n", - vid); - goto err_port_vid_to_fid_set; - } - } - } - - /* Set FID mapping according to port's mode */ - for (vid = vid_begin; vid <= vid_end; vid++) { - err = mlxsw_sp_port_fid_map(mlxsw_sp_port, vid); - if (err) { - netdev_err(dev, "Failed to map FID=%d", vid); - last_visited_vid = --vid; - goto err_port_fid_map; - } - } + return -EINVAL; - err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, - true, false); + err = mlxsw_sp_port_fid_join(mlxsw_sp_port, vid_begin, vid_end); if (err) { - netdev_err(dev, "Failed to configure flooding\n"); - goto err_port_flood_set; + netdev_err(dev, "Failed to join FIDs\n"); + return err; } err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, @@ -628,10 +690,6 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, return 0; -err_port_vid_to_fid_set: - mlxsw_sp_fid_destroy(mlxsw_sp, vid); - return err; - err_port_stp_state_set: for (vid = vid_begin; vid <= vid_end; vid++) clear_bit(vid, mlxsw_sp_port->active_vlans); @@ -641,13 +699,7 @@ err_port_pvid_set: __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, false); err_port_vlans_set: - __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, false, - false); -err_port_flood_set: - last_visited_vid = vid_end; -err_port_fid_map: - for (vid = last_visited_vid; vid >= vid_begin; vid--) - mlxsw_sp_port_fid_unmap(mlxsw_sp_port, vid); + mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); return err; } @@ -678,9 +730,10 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) MLXSW_REG_SFD_OP_WRITE_REMOVE; } -static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, - const char *mac, u16 fid, bool adding, - bool dynamic) +static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, + const char *mac, u16 fid, bool adding, + enum mlxsw_reg_sfd_rec_action action, + bool dynamic) { char *sfd_pl; int err; @@ -691,14 +744,29 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), - mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP, - local_port); + mac, fid, action, local_port); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; } +static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, + const char *mac, u16 fid, bool adding, + bool dynamic) +{ + return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, + MLXSW_REG_SFD_REC_ACTION_NOP, dynamic); +} + +int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, + bool adding) +{ + return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding, + MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER, + false); +} + static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, const char *mac, u16 fid, u16 lag_vid, bool adding, bool dynamic) @@ -903,6 +971,11 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); break; + case SWITCHDEV_OBJ_ID_IPV4_FIB: + err = mlxsw_sp_router_fib4_add(mlxsw_sp_port, + SWITCHDEV_OBJ_IPV4_FIB(obj), + trans); + break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj), @@ -921,34 +994,15 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, return err; } -static int mlxsw_sp_port_kill_vids(struct net_device *dev, u16 vid_begin, - u16 vid_end) -{ - u16 vid; - int err; - - for (vid = vid_begin; vid <= vid_end; vid++) { - err = mlxsw_sp_port_kill_vid(dev, 0, vid); - if (err) - return err; - } - - return 0; -} - static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end, bool init) + u16 vid_begin, u16 vid_end) { struct net_device *dev = mlxsw_sp_port->dev; u16 vid, pvid; int err; - /* In case this is invoked with BRIDGE_FLAGS_SELF and port is - * not bridged, then prevent packets ingressing through the - * port with the specified VIDs from being trapped to CPU. - */ - if (!init && !mlxsw_sp_port->bridged) - return mlxsw_sp_port_kill_vids(dev, vid_begin, vid_end); + if (!mlxsw_sp_port->bridged) + return -EINVAL; err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, false); @@ -958,9 +1012,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, return err; } - if (init) - goto out; - pvid = mlxsw_sp_port->pvid; if (pvid >= vid_begin && pvid <= vid_end) { err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); @@ -970,23 +1021,8 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, } } - err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, - false, false); - if (err) { - netdev_err(dev, "Failed to clear flooding\n"); - return err; - } + mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); - for (vid = vid_begin; vid <= vid_end; vid++) { - /* Remove FID mapping in case of Virtual mode */ - err = mlxsw_sp_port_fid_unmap(mlxsw_sp_port, vid); - if (err) { - netdev_err(dev, "Failed to unmap FID=%d", vid); - return err; - } - } - -out: /* Changing activity bits only if HW operation succeded */ for (vid = vid_begin; vid <= vid_end; vid++) clear_bit(vid, mlxsw_sp_port->active_vlans); @@ -997,8 +1033,8 @@ out: static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_vlan *vlan) { - return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, - vlan->vid_begin, vlan->vid_end, false); + return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vlan->vid_begin, + vlan->vid_end); } void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port) @@ -1006,7 +1042,7 @@ void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port) u16 vid; for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) - __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false); + __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid); } static int @@ -1081,6 +1117,10 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; + case SWITCHDEV_OBJ_ID_IPV4_FIB: + err = mlxsw_sp_router_fib4_del(mlxsw_sp_port, + SWITCHDEV_OBJ_IPV4_FIB(obj)); + break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj)); @@ -1118,7 +1158,8 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port *tmp; - u16 vport_fid = 0; + struct mlxsw_sp_fid *f; + u16 vport_fid; char *sfd_pl; char mac[ETH_ALEN]; u16 fid; @@ -1133,12 +1174,8 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, if (!sfd_pl) return -ENOMEM; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { - u16 tmp; - - tmp = mlxsw_sp_vport_vfid_get(mlxsw_sp_port); - vport_fid = mlxsw_sp_vfid_to_fid(tmp); - } + f = mlxsw_sp_vport_fid_get(mlxsw_sp_port); + vport_fid = f ? f->fid : 0; mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); do { @@ -1310,11 +1347,10 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, } if (mlxsw_sp_fid_is_vfid(fid)) { - u16 vfid = mlxsw_sp_fid_to_vfid(fid); struct mlxsw_sp_port *mlxsw_sp_vport; - mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port, - vfid); + mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_fid(mlxsw_sp_port, + fid); if (!mlxsw_sp_vport) { netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); goto just_remove; @@ -1370,11 +1406,10 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, } if (mlxsw_sp_fid_is_vfid(fid)) { - u16 vfid = mlxsw_sp_fid_to_vfid(fid); struct mlxsw_sp_port *mlxsw_sp_vport; - mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_vfid(mlxsw_sp_port, - vfid); + mlxsw_sp_vport = mlxsw_sp_port_vport_find_by_fid(mlxsw_sp_port, + fid); if (!mlxsw_sp_vport) { netdev_err(mlxsw_sp_port->dev, "Failed to find a matching vPort following FDB notification\n"); goto just_remove; @@ -1438,8 +1473,8 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp) { - schedule_delayed_work(&mlxsw_sp->fdb_notify.dw, - msecs_to_jiffies(mlxsw_sp->fdb_notify.interval)); + mlxsw_core_schedule_dw(&mlxsw_sp->fdb_notify.dw, + msecs_to_jiffies(mlxsw_sp->fdb_notify.interval)); } static void mlxsw_sp_fdb_notify_work(struct work_struct *work) @@ -1495,14 +1530,6 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) cancel_delayed_work_sync(&mlxsw_sp->fdb_notify.dw); } -static void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp) -{ - u16 fid; - - for_each_set_bit(fid, mlxsw_sp->active_fids, VLAN_N_VID) - mlxsw_sp_fid_destroy(mlxsw_sp, fid); -} - int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) { return mlxsw_sp_fdb_init(mlxsw_sp); @@ -1511,33 +1538,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_fdb_fini(mlxsw_sp); - mlxsw_sp_fids_fini(mlxsw_sp); -} - -int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct net_device *dev = mlxsw_sp_port->dev; - int err; - - /* Allow only untagged packets to ingress and tag them internally - * with VID 1. - */ - mlxsw_sp_port->pvid = 1; - err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID - 1, - true); - if (err) { - netdev_err(dev, "Unable to init VLANs\n"); - return err; - } - - /* Add implicit VLAN interface in the device, so that untagged - * packets will be classified to the default vFID. - */ - err = mlxsw_sp_port_add_vid(dev, 0, 1); - if (err) - netdev_err(dev, "Failed to configure default vFID\n"); - - return err; } void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 7a60a26759b6..377daa4d509c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -43,7 +43,6 @@ #include <linux/device.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> -#include <net/devlink.h> #include <net/switchdev.h> #include <generated/utsrelease.h> @@ -75,11 +74,11 @@ struct mlxsw_sx_port_pcpu_stats { }; struct mlxsw_sx_port { + struct mlxsw_core_port core_port; /* must be first */ struct net_device *dev; struct mlxsw_sx_port_pcpu_stats __percpu *pcpu_stats; struct mlxsw_sx *mlxsw_sx; u8 local_port; - struct devlink_port devlink_port; }; /* tx_hdr_version @@ -303,7 +302,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, u64 len; int err; - if (mlxsw_core_skb_transmit_busy(mlxsw_sx, &tx_info)) + if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info)) return NETDEV_TX_BUSY; if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { @@ -317,11 +316,14 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, } } mlxsw_sx_txhdr_construct(skb, &tx_info); - len = skb->len; + /* TX header is consumed by HW on the way so we shouldn't count its + * bytes as being sent. + */ + len = skb->len - MLXSW_TXHDR_LEN; /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ - err = mlxsw_core_skb_transmit(mlxsw_sx, skb, &tx_info); + err = mlxsw_core_skb_transmit(mlxsw_sx->core, skb, &tx_info); if (!err) { pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats); @@ -518,7 +520,8 @@ static void mlxsw_sx_port_get_stats(struct net_device *dev, int i; int err; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port); + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port, + MLXSW_REG_PPCNT_IEEE_8023_CNT, 0); err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ppcnt), ppcnt_pl); for (i = 0; i < MLXSW_SX_PORT_HW_STATS_LEN; i++) data[i] = !err ? mlxsw_sx_port_hw_stats[i].getter(ppcnt_pl) : 0; @@ -955,9 +958,7 @@ mlxsw_sx_port_mac_learning_mode_set(struct mlxsw_sx_port *mlxsw_sx_port, static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) { - struct devlink *devlink = priv_to_devlink(mlxsw_sx->core); struct mlxsw_sx_port *mlxsw_sx_port; - struct devlink_port *devlink_port; struct net_device *dev; bool usable; int err; @@ -1011,14 +1012,6 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) goto port_not_usable; } - devlink_port = &mlxsw_sx_port->devlink_port; - err = devlink_port_register(devlink, devlink_port, local_port); - if (err) { - dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to register devlink port\n", - mlxsw_sx_port->local_port); - goto err_devlink_port_register; - } - err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port); if (err) { dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n", @@ -1076,11 +1069,19 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) goto err_register_netdev; } - devlink_port_type_eth_set(devlink_port, dev); + err = mlxsw_core_port_init(mlxsw_sx->core, &mlxsw_sx_port->core_port, + mlxsw_sx_port->local_port, dev, false, 0); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n", + mlxsw_sx_port->local_port); + goto err_core_port_init; + } mlxsw_sx->ports[local_port] = mlxsw_sx_port; return 0; +err_core_port_init: + unregister_netdev(dev); err_register_netdev: err_port_mac_learning_mode_set: err_port_stp_state_set: @@ -1089,8 +1090,6 @@ err_port_mtu_set: err_port_speed_set: err_port_swid_set: err_port_system_port_mapping_set: - devlink_port_unregister(&mlxsw_sx_port->devlink_port); -err_devlink_port_register: port_not_usable: err_port_module_check: err_dev_addr_get: @@ -1103,15 +1102,12 @@ err_alloc_stats: static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port) { struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port]; - struct devlink_port *devlink_port; if (!mlxsw_sx_port) return; - devlink_port = &mlxsw_sx_port->devlink_port; - devlink_port_type_clear(devlink_port); + mlxsw_core_port_fini(&mlxsw_sx_port->core_port); unregister_netdev(mlxsw_sx_port->dev); /* This calls ndo_stop */ mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); - devlink_port_unregister(devlink_port); free_percpu(mlxsw_sx_port->pcpu_stats); free_netdev(mlxsw_sx_port->dev); } @@ -1454,10 +1450,10 @@ static int mlxsw_sx_flood_init(struct mlxsw_sx *mlxsw_sx) return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sgcr), sgcr_pl); } -static int mlxsw_sx_init(void *priv, struct mlxsw_core *mlxsw_core, +static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { - struct mlxsw_sx *mlxsw_sx = priv; + struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core); int err; mlxsw_sx->core = mlxsw_core; @@ -1504,9 +1500,9 @@ err_event_register: return err; } -static void mlxsw_sx_fini(void *priv) +static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core) { - struct mlxsw_sx *mlxsw_sx = priv; + struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core); mlxsw_sx_traps_fini(mlxsw_sx); mlxsw_sx_event_unregister(mlxsw_sx, MLXSW_TRAP_ID_PUDE); @@ -1545,6 +1541,7 @@ static struct mlxsw_config_profile mlxsw_sx_config_profile = { .type = MLXSW_PORT_SWID_TYPE_ETH, } }, + .resource_query_enable = 0, }; static struct mlxsw_driver mlxsw_sx_driver = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 53a9550be75e..ed8e30186400 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -54,6 +54,15 @@ enum { MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32, MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33, MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, + MLXSW_TRAP_ID_ARPBC = 0x50, + MLXSW_TRAP_ID_ARPUC = 0x51, + MLXSW_TRAP_ID_MTUERROR = 0x52, + MLXSW_TRAP_ID_TTLERROR = 0x53, + MLXSW_TRAP_ID_LBERROR = 0x54, + MLXSW_TRAP_ID_OSPF = 0x55, + MLXSW_TRAP_ID_IP2ME = 0x5F, + MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, + MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, MLXSW_TRAP_ID_MAX = 0x1FF }; |