diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
148 files changed, 17407 insertions, 7220 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index f200b8c420d5..ff8057ed97ee 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -4,7 +4,6 @@ config MLX4_EN tristate "Mellanox Technologies 1/10/40Gbit Ethernet support" - depends on MAY_USE_DEVLINK depends on PCI && NETDEVICES && ETHERNET && INET select MLX4_CORE imply PTP_1588_CLOCK diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 9af34e03892c..b330020dc0d6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -185,8 +185,7 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, bitmap->avail = num - reserved_top - reserved_bot; bitmap->effective_len = bitmap->avail; spin_lock_init(&bitmap->lock); - bitmap->table = kcalloc(BITS_TO_LONGS(bitmap->max), sizeof(long), - GFP_KERNEL); + bitmap->table = bitmap_zalloc(bitmap->max, GFP_KERNEL); if (!bitmap->table) return -ENOMEM; @@ -197,7 +196,7 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap) { - kfree(bitmap->table); + bitmap_free(bitmap->table); } struct mlx4_zone_allocator { @@ -584,8 +583,8 @@ static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; buf->direct.buf = - dma_zalloc_coherent(&dev->persist->pdev->dev, - size, &t, GFP_KERNEL); + dma_alloc_coherent(&dev->persist->pdev->dev, size, &t, + GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; @@ -624,8 +623,8 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = - dma_zalloc_coherent(&dev->persist->pdev->dev, - PAGE_SIZE, &t, GFP_KERNEL); + dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &t, GFP_KERNEL); if (!buf->page_list[i].buf) goto err_free; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e65bc3c95630..a5d5d6fc1da0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2645,6 +2645,8 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) if (!priv->cmd.context) return -ENOMEM; + if (mlx4_is_mfunc(dev)) + mutex_lock(&priv->cmd.slave_cmd_mutex); down_write(&priv->cmd.switch_sem); for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; @@ -2670,6 +2672,8 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) down(&priv->cmd.poll_sem); priv->cmd.use_events = 1; up_write(&priv->cmd.switch_sem); + if (mlx4_is_mfunc(dev)) + mutex_unlock(&priv->cmd.slave_cmd_mutex); return err; } @@ -2682,6 +2686,8 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; + if (mlx4_is_mfunc(dev)) + mutex_lock(&priv->cmd.slave_cmd_mutex); down_write(&priv->cmd.switch_sem); priv->cmd.use_events = 0; @@ -2689,9 +2695,12 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) down(&priv->cmd.event_sem); kfree(priv->cmd.context); + priv->cmd.context = NULL; up(&priv->cmd.poll_sem); up_write(&priv->cmd.switch_sem); + if (mlx4_is_mfunc(dev)) + mutex_unlock(&priv->cmd.slave_cmd_mutex); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) @@ -3274,7 +3283,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n", link_state, slave, port); return -EINVAL; - }; + } s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; s_info->link_state = link_state; diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index d8e9a323122e..65f8a4b6ed0c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -144,9 +144,9 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) } static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, - int cq_num) + int cq_num, u8 opmod) { - return mlx4_cmd(dev, mailbox->dma, cq_num, 0, + return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } @@ -287,11 +287,63 @@ static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn) __mlx4_cq_free_icm(dev, cqn); } +static int mlx4_init_user_cqes(void *buf, int entries, int cqe_size) +{ + int entries_per_copy = PAGE_SIZE / cqe_size; + void *init_ents; + int err = 0; + int i; + + init_ents = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!init_ents) + return -ENOMEM; + + /* Populate a list of CQ entries to reduce the number of + * copy_to_user calls. 0xcc is the initialization value + * required by the FW. + */ + memset(init_ents, 0xcc, PAGE_SIZE); + + if (entries_per_copy < entries) { + for (i = 0; i < entries / entries_per_copy; i++) { + err = copy_to_user((void __user *)buf, init_ents, PAGE_SIZE) ? + -EFAULT : 0; + if (err) + goto out; + + buf += PAGE_SIZE; + } + } else { + err = copy_to_user((void __user *)buf, init_ents, entries * cqe_size) ? + -EFAULT : 0; + } + +out: + kfree(init_ents); + + return err; +} + +static void mlx4_init_kernel_cqes(struct mlx4_buf *buf, + int entries, + int cqe_size) +{ + int i; + + if (buf->nbufs == 1) + memset(buf->direct.buf, 0xcc, entries * cqe_size); + else + for (i = 0; i < buf->npages; i++) + memset(buf->page_list[i].buf, 0xcc, + 1UL << buf->page_shift); +} + int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq, unsigned vector, int collapsed, - int timestamp_en) + int timestamp_en, void *buf_addr, bool user_cq) { + bool sw_cq_init = dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SW_CQ_INIT; struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cq_table *cq_table = &priv->cq_table; struct mlx4_cmd_mailbox *mailbox; @@ -336,7 +388,20 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff); cq_context->db_rec_addr = cpu_to_be64(db_rec); - err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn); + if (sw_cq_init) { + if (user_cq) { + err = mlx4_init_user_cqes(buf_addr, nent, + dev->caps.cqe_size); + if (err) + sw_cq_init = false; + } else { + mlx4_init_kernel_cqes(buf_addr, nent, + dev->caps.cqe_size); + } + } + + err = mlx4_SW2HW_CQ(dev, mailbox, cq->cqn, sw_cq_init); + mlx4_free_cmd_mailbox(dev, mailbox); if (err) goto err_radix; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 1e487acb4667..74d466796b7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -54,11 +54,8 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node); if (!cq) { - cq = kzalloc(sizeof(*cq), GFP_KERNEL); - if (!cq) { - en_err(priv, "Failed to allocate CQ structure\n"); - return -ENOMEM; - } + en_err(priv, "Failed to allocate CQ structure\n"); + return -ENOMEM; } cq->size = entries; @@ -143,7 +140,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.usage = MLX4_RES_USAGE_DRIVER; err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar, cq->wqres.db.dma, &cq->mcq, - cq->vector, 0, timestamp_en); + cq->vector, 0, timestamp_en, &cq->wqres.buf, false); if (err) goto free_eq; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 6b88881b8e35..c1438ae52a11 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3360,7 +3360,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->addr_len = ETH_ALEN; mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]); if (!is_valid_ether_addr(dev->dev_addr)) { - en_err(priv, "Port: %d, invalid mac burned: %pM, quiting\n", + en_err(priv, "Port: %d, invalid mac burned: %pM, quitting\n", priv->port, dev->dev_addr); err = -EINVAL; goto out; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index db00bf1c23f5..6c01314e87b0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -271,11 +271,8 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node); if (!ring) { - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) { - en_err(priv, "Failed to allocate RX ring structure\n"); - return -ENOMEM; - } + en_err(priv, "Failed to allocate RX ring structure\n"); + return -ENOMEM; } ring->prod = 0; @@ -620,6 +617,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, } #endif +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) + /* We reach this function only after checking that any of * the (IPv4 | IPv6) bits are set in cqe->status. */ @@ -627,9 +626,20 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, netdev_features_t dev_features) { __wsum hw_checksum = 0; + void *hdr; + + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to skip + * checksum complete. + */ + if (short_frame(skb->len)) + return -EINVAL; - void *hdr = (u8 *)va + sizeof(struct ethhdr); - + hdr = (u8 *)va + sizeof(struct ethhdr); hw_checksum = csum_unfold((__force __sum16)cqe->checksum); if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && @@ -822,6 +832,11 @@ xdp_drop_no_cnt: skb_record_rx_queue(skb, cq_ring); if (likely(dev->features & NETIF_F_RXCSUM)) { + /* TODO: For IP non TCP/UDP packets when csum complete is + * not an option (not supported or any other reason) we can + * actually check cqe IPOK status bit and report + * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE + */ if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && @@ -875,7 +890,7 @@ csum_none: skb->data_len = length; napi_gro_frags(&cq->napi); } else { - skb->vlan_tci = 0; + __vlan_hwaccel_clear_tag(skb); skb_clear_hash(skb); } next: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 6f5153afcab4..2cbd2bd7c67c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -57,11 +57,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node); if (!ring) { - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) { - en_err(priv, "Failed allocating TX ring\n"); - return -ENOMEM; - } + en_err(priv, "Failed allocating TX ring\n"); + return -ENOMEM; } ring->size = size; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 2df92dbd38e1..a5be27772b8e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -100,7 +100,7 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not) req_not << 31), eq->doorbell); /* We still want ordering, just not swabbing, so add a barrier */ - mb(); + wmb(); } static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor, @@ -558,6 +558,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT. srq_no=0x%x, eq 0x%x\n", __func__, be32_to_cpu(eqe->event.srq.srqn), eq->eqn); + /* fall through */ case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: if (mlx4_is_master(dev)) { /* forward only to slave owning the SRQ */ @@ -820,7 +821,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? "HW" : "SW"); break; - }; + } ++eq->cons_index; eqes_found = 1; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index babcfd9c0571..6e501af0e532 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -166,6 +166,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [37] = "sl to vl mapping table change event support", [38] = "user MAC support", [39] = "Report driver version to FW support", + [40] = "SW CQ initialization support", }; int i; @@ -1098,6 +1099,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM; if (field32 & (1 << 21)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS; + if (field32 & (1 << 23)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SW_CQ_INIT; for (i = 1; i <= dev_cap->num_ports; i++) { err = mlx4_QUERY_PORT(dev, i, dev_cap->port_cap + i); @@ -2064,9 +2067,11 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, { struct mlx4_cmd_mailbox *mailbox; __be32 *outbox; + u64 qword_field; u32 dword_field; - int err; + u16 word_field; u8 byte_field; + int err; static const u8 a0_dmfs_query_hw_steering[] = { [0] = MLX4_STEERING_DMFS_A0_DEFAULT, [1] = MLX4_STEERING_DMFS_A0_DYNAMIC, @@ -2094,19 +2099,32 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* QPC/EEC/CQC/EQC/RDMARC attributes */ - MLX4_GET(param->qpc_base, outbox, INIT_HCA_QPC_BASE_OFFSET); - MLX4_GET(param->log_num_qps, outbox, INIT_HCA_LOG_QP_OFFSET); - MLX4_GET(param->srqc_base, outbox, INIT_HCA_SRQC_BASE_OFFSET); - MLX4_GET(param->log_num_srqs, outbox, INIT_HCA_LOG_SRQ_OFFSET); - MLX4_GET(param->cqc_base, outbox, INIT_HCA_CQC_BASE_OFFSET); - MLX4_GET(param->log_num_cqs, outbox, INIT_HCA_LOG_CQ_OFFSET); - MLX4_GET(param->altc_base, outbox, INIT_HCA_ALTC_BASE_OFFSET); - MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); - MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); - MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); - MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); - MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); - MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); + MLX4_GET(qword_field, outbox, INIT_HCA_QPC_BASE_OFFSET); + param->qpc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_QP_OFFSET); + param->log_num_qps = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_SRQC_BASE_OFFSET); + param->srqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_SRQ_OFFSET); + param->log_num_srqs = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_CQC_BASE_OFFSET); + param->cqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_CQ_OFFSET); + param->log_num_cqs = byte_field & 0x1f; + MLX4_GET(qword_field, outbox, INIT_HCA_ALTC_BASE_OFFSET); + param->altc_base = qword_field; + MLX4_GET(qword_field, outbox, INIT_HCA_AUXC_BASE_OFFSET); + param->auxc_base = qword_field; + MLX4_GET(qword_field, outbox, INIT_HCA_EQC_BASE_OFFSET); + param->eqc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_EQ_OFFSET); + param->log_num_eqs = byte_field & 0x1f; + MLX4_GET(word_field, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); + param->num_sys_eqs = word_field & 0xfff; + MLX4_GET(qword_field, outbox, INIT_HCA_RDMARC_BASE_OFFSET); + param->rdmarc_base = qword_field & ~((u64)0x1f); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_RD_OFFSET); + param->log_rd_per_qp = byte_field & 0x7; MLX4_GET(dword_field, outbox, INIT_HCA_FLAGS_OFFSET); if (dword_field & (1 << INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN)) { @@ -2125,22 +2143,21 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* steering attributes */ if (param->steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET); - MLX4_GET(param->log_mc_entry_sz, outbox, - INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); - MLX4_GET(param->log_mc_table_sz, outbox, - INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); - MLX4_GET(byte_field, outbox, - INIT_HCA_FS_A0_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); + param->log_mc_entry_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + param->log_mc_table_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_FS_A0_OFFSET); param->dmfs_high_steer_mode = a0_dmfs_query_hw_steering[(byte_field >> 6) & 3]; } else { MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); - MLX4_GET(param->log_mc_entry_sz, outbox, - INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); - MLX4_GET(param->log_mc_hash_sz, outbox, - INIT_HCA_LOG_MC_HASH_SZ_OFFSET); - MLX4_GET(param->log_mc_table_sz, outbox, - INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + param->log_mc_entry_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_HASH_SZ_OFFSET); + param->log_mc_hash_sz = byte_field & 0x1f; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + param->log_mc_table_sz = byte_field & 0x1f; } /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */ @@ -2164,15 +2181,18 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); - MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET); - MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_TPT_MW_OFFSET); + param->mw_enabled = byte_field >> 7; + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); + param->log_mpt_sz = byte_field & 0x3f; MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); /* UAR attributes */ MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); - MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + MLX4_GET(byte_field, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + param->log_uar_sz = byte_field & 0xf; /* phv_check enable */ MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 4b4351141b94..d89a3da89e5a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -57,12 +57,12 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu int i; if (chunk->nsg > 0) - pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages, - PCI_DMA_BIDIRECTIONAL); + dma_unmap_sg(&dev->persist->pdev->dev, chunk->sg, chunk->npages, + DMA_BIDIRECTIONAL); for (i = 0; i < chunk->npages; ++i) - __free_pages(sg_page(&chunk->mem[i]), - get_order(chunk->mem[i].length)); + __free_pages(sg_page(&chunk->sg[i]), + get_order(chunk->sg[i].length)); } static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk) @@ -71,9 +71,9 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk * for (i = 0; i < chunk->npages; ++i) dma_free_coherent(&dev->persist->pdev->dev, - chunk->mem[i].length, - lowmem_page_address(sg_page(&chunk->mem[i])), - sg_dma_address(&chunk->mem[i])); + chunk->buf[i].size, + chunk->buf[i].addr, + chunk->buf[i].dma_addr); } void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent) @@ -111,22 +111,21 @@ static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, return 0; } -static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem, - int order, gfp_t gfp_mask) +static int mlx4_alloc_icm_coherent(struct device *dev, struct mlx4_icm_buf *buf, + int order, gfp_t gfp_mask) { - void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, - &sg_dma_address(mem), gfp_mask); - if (!buf) + buf->addr = dma_alloc_coherent(dev, PAGE_SIZE << order, + &buf->dma_addr, gfp_mask); + if (!buf->addr) return -ENOMEM; - if (offset_in_page(buf)) { - dma_free_coherent(dev, PAGE_SIZE << order, - buf, sg_dma_address(mem)); + if (offset_in_page(buf->addr)) { + dma_free_coherent(dev, PAGE_SIZE << order, buf->addr, + buf->dma_addr); return -ENOMEM; } - sg_set_buf(mem, buf, PAGE_SIZE << order); - sg_dma_len(mem) = PAGE_SIZE << order; + buf->size = PAGE_SIZE << order; return 0; } @@ -159,21 +158,21 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, while (npages > 0) { if (!chunk) { - chunk = kmalloc_node(sizeof(*chunk), + chunk = kzalloc_node(sizeof(*chunk), gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN), dev->numa_node); if (!chunk) { - chunk = kmalloc(sizeof(*chunk), + chunk = kzalloc(sizeof(*chunk), gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); if (!chunk) goto fail; } + chunk->coherent = coherent; - sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN); - chunk->npages = 0; - chunk->nsg = 0; + if (!coherent) + sg_init_table(chunk->sg, MLX4_ICM_CHUNK_LEN); list_add_tail(&chunk->list, &icm->chunk_list); } @@ -186,10 +185,10 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, if (coherent) ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev, - &chunk->mem[chunk->npages], - cur_order, mask); + &chunk->buf[chunk->npages], + cur_order, mask); else - ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages], + ret = mlx4_alloc_icm_pages(&chunk->sg[chunk->npages], cur_order, mask, dev->numa_node); @@ -205,9 +204,9 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, if (coherent) ++chunk->nsg; else if (chunk->npages == MLX4_ICM_CHUNK_LEN) { - chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, - chunk->npages, - PCI_DMA_BIDIRECTIONAL); + chunk->nsg = dma_map_sg(&dev->persist->pdev->dev, + chunk->sg, chunk->npages, + DMA_BIDIRECTIONAL); if (chunk->nsg <= 0) goto fail; @@ -220,9 +219,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, } if (!coherent && chunk) { - chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, - chunk->npages, - PCI_DMA_BIDIRECTIONAL); + chunk->nsg = dma_map_sg(&dev->persist->pdev->dev, chunk->sg, + chunk->npages, DMA_BIDIRECTIONAL); if (chunk->nsg <= 0) goto fail; @@ -320,7 +318,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, u64 idx; struct mlx4_icm_chunk *chunk; struct mlx4_icm *icm; - struct page *page = NULL; + void *addr = NULL; if (!table->lowmem) return NULL; @@ -336,28 +334,49 @@ void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, list_for_each_entry(chunk, &icm->chunk_list, list) { for (i = 0; i < chunk->npages; ++i) { + dma_addr_t dma_addr; + size_t len; + + if (table->coherent) { + len = chunk->buf[i].size; + dma_addr = chunk->buf[i].dma_addr; + addr = chunk->buf[i].addr; + } else { + struct page *page; + + len = sg_dma_len(&chunk->sg[i]); + dma_addr = sg_dma_address(&chunk->sg[i]); + + /* XXX: we should never do this for highmem + * allocation. This function either needs + * to be split, or the kernel virtual address + * return needs to be made optional. + */ + page = sg_page(&chunk->sg[i]); + addr = lowmem_page_address(page); + } + if (dma_handle && dma_offset >= 0) { - if (sg_dma_len(&chunk->mem[i]) > dma_offset) - *dma_handle = sg_dma_address(&chunk->mem[i]) + - dma_offset; - dma_offset -= sg_dma_len(&chunk->mem[i]); + if (len > dma_offset) + *dma_handle = dma_addr + dma_offset; + dma_offset -= len; } + /* * DMA mapping can merge pages but not split them, * so if we found the page, dma_handle has already * been assigned to. */ - if (chunk->mem[i].length > offset) { - page = sg_page(&chunk->mem[i]); + if (len > offset) goto out; - } - offset -= chunk->mem[i].length; + offset -= len; } } + addr = NULL; out: mutex_unlock(&table->mutex); - return page ? lowmem_page_address(page) + offset : NULL; + return addr ? addr + offset : NULL; } int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index c9169a490557..d199874b1c07 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -47,11 +47,21 @@ enum { MLX4_ICM_PAGE_SIZE = 1 << MLX4_ICM_PAGE_SHIFT, }; +struct mlx4_icm_buf { + void *addr; + size_t size; + dma_addr_t dma_addr; +}; + struct mlx4_icm_chunk { struct list_head list; int npages; int nsg; - struct scatterlist mem[MLX4_ICM_CHUNK_LEN]; + bool coherent; + union { + struct scatterlist sg[MLX4_ICM_CHUNK_LEN]; + struct mlx4_icm_buf buf[MLX4_ICM_CHUNK_LEN]; + }; }; struct mlx4_icm { @@ -114,12 +124,18 @@ static inline void mlx4_icm_next(struct mlx4_icm_iter *iter) static inline dma_addr_t mlx4_icm_addr(struct mlx4_icm_iter *iter) { - return sg_dma_address(&iter->chunk->mem[iter->page_idx]); + if (iter->chunk->coherent) + return iter->chunk->buf[iter->page_idx].dma_addr; + else + return sg_dma_address(&iter->chunk->sg[iter->page_idx]); } static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter) { - return sg_dma_len(&iter->chunk->mem[iter->page_idx]); + if (iter->chunk->coherent) + return iter->chunk->buf[iter->page_idx].size; + else + return sg_dma_len(&iter->chunk->sg[iter->page_idx]); } int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 6a046030e873..1f6e16d5ea6b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -63,7 +63,7 @@ struct workqueue_struct *mlx4_wq; #ifdef CONFIG_MLX4_DEBUG -int mlx4_debug_level = 0; +int mlx4_debug_level; /* 0 by default */ module_param_named(debug_level, mlx4_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0"); @@ -83,7 +83,7 @@ MODULE_PARM_DESC(msi_x, "0 - don't use MSI-X, 1 - use MSI-X, >1 - limit number o static uint8_t num_vfs[3] = {0, 0, 0}; static int num_vfs_argc; -module_param_array(num_vfs, byte , &num_vfs_argc, 0444); +module_param_array(num_vfs, byte, &num_vfs_argc, 0444); MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n" "num_vfs=port1,port2,port1+2"); @@ -313,7 +313,7 @@ int mlx4_check_port_params(struct mlx4_dev *dev, for (i = 0; i < dev->caps.num_ports - 1; i++) { if (port_type[i] != port_type[i + 1]) { mlx4_err(dev, "Only same port types supported on this HCA, aborting\n"); - return -EINVAL; + return -EOPNOTSUPP; } } } @@ -322,7 +322,7 @@ int mlx4_check_port_params(struct mlx4_dev *dev, if (!(port_type[i] & dev->caps.supported_type[i+1])) { mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n", i + 1); - return -EINVAL; + return -EOPNOTSUPP; } } return 0; @@ -1188,8 +1188,7 @@ static int __set_port_type(struct mlx4_port_info *info, mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", info->port); - err = -EINVAL; - goto err_sup; + return -EOPNOTSUPP; } mlx4_stop_sense(mdev); @@ -1211,7 +1210,7 @@ static int __set_port_type(struct mlx4_port_info *info, for (i = 1; i <= mdev->caps.num_ports; i++) { if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { mdev->caps.possible_type[i] = mdev->caps.port_type[i]; - err = -EINVAL; + err = -EOPNOTSUPP; } } } @@ -1237,7 +1236,7 @@ static int __set_port_type(struct mlx4_port_info *info, out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); -err_sup: + return err; } @@ -3252,7 +3251,7 @@ disable_sriov: free_mem: dev->persist->num_vfs = 0; kfree(dev->dev_vfs); - dev->dev_vfs = NULL; + dev->dev_vfs = NULL; return dev_flags & ~MLX4_FLAG_MASTER; } @@ -3982,6 +3981,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) goto err_params_unregister; + devlink_params_publish(devlink); pci_save_state(pdev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 31bd56727022..4356f3a58002 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2719,13 +2719,13 @@ static int qp_get_mtt_size(struct mlx4_qp_context *qpc) int total_pages; int total_mem; int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f; + int tot; sq_size = 1 << (log_sq_size + log_sq_sride + 4); rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4)); total_mem = sq_size + rq_size; - total_pages = - roundup_pow_of_two((total_mem + (page_offset << 6)) >> - page_shift); + tot = (total_mem + (page_offset << 6)) >> page_shift; + total_pages = !tot ? 1 : roundup_pow_of_two(tot); return total_pages; } @@ -4729,7 +4729,6 @@ static void rem_slave_srqs(struct mlx4_dev *dev, int slave) struct res_srq *tmp; int state; u64 in_param; - LIST_HEAD(tlist); int srqn; int err; @@ -4795,7 +4794,6 @@ static void rem_slave_cqs(struct mlx4_dev *dev, int slave) struct res_cq *tmp; int state; u64 in_param; - LIST_HEAD(tlist); int cqn; int err; @@ -4858,7 +4856,6 @@ static void rem_slave_mrs(struct mlx4_dev *dev, int slave) struct res_mpt *tmp; int state; u64 in_param; - LIST_HEAD(tlist); int mptn; int err; @@ -4926,7 +4923,6 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave) struct res_mtt *mtt; struct res_mtt *tmp; int state; - LIST_HEAD(tlist); int base; int err; @@ -5115,7 +5111,6 @@ static void rem_slave_eqs(struct mlx4_dev *dev, int slave) struct res_eq *tmp; int err; int state; - LIST_HEAD(tlist); int eqn; err = move_all_busy(dev, slave, RES_EQ); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 37a551436e4a..6debffb8336b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -4,7 +4,6 @@ config MLX5_CORE tristate "Mellanox 5th generation network adapters (ConnectX series) core driver" - depends on MAY_USE_DEVLINK depends on PCI imply PTP_1588_CLOCK imply VXLAN diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d324a3884462..1a16f6d73cbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -12,17 +12,17 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5 core basic # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ - health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ - diag/fs_tracepoint.o diag/fw_tracer.o + health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ + lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o # # Netdev basic # mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ - en_selftest.o en/port.o + en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o # # Netdev extra @@ -30,12 +30,12 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o # # Core extra # -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 456f30007ad6..9008e17126db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -63,8 +63,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, mutex_lock(&priv->alloc_mutex); original_node = dev_to_node(&dev->pdev->dev); set_dev_node(&dev->pdev->dev, node); - cpu_handle = dma_zalloc_coherent(&dev->pdev->dev, size, - dma_handle, GFP_KERNEL); + cpu_handle = dma_alloc_coherent(&dev->pdev->dev, size, dma_handle, + GFP_KERNEL); set_dev_node(&dev->pdev->dev, original_node); mutex_unlock(&priv->alloc_mutex); return cpu_handle; @@ -186,10 +186,7 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, if (!pgdir) return NULL; - pgdir->bitmap = kcalloc(BITS_TO_LONGS(db_per_page), - sizeof(unsigned long), - GFP_KERNEL); - + pgdir->bitmap = bitmap_zalloc(db_per_page, GFP_KERNEL); if (!pgdir->bitmap) { kfree(pgdir); return NULL; @@ -200,7 +197,7 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE, &pgdir->db_dma, node); if (!pgdir->db_page) { - kfree(pgdir->bitmap); + bitmap_free(pgdir->bitmap); kfree(pgdir); return NULL; } @@ -280,7 +277,7 @@ void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); - kfree(db->u.pgdir->bitmap); + bitmap_free(db->u.pgdir->bitmap); kfree(db->u.pgdir); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a5a0823e5ada..be48c6440251 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -40,9 +40,11 @@ #include <linux/random.h> #include <linux/io-mapping.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/eq.h> #include <linux/debugfs.h> #include "mlx5_core.h" +#include "lib/eq.h" enum { CMD_IF_REV = 5, @@ -313,6 +315,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_FPGA_DESTROY_QP: case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: + case MLX5_CMD_OP_PAGE_FAULT_RESUME: + case MLX5_CMD_OP_QUERY_HOST_PARAMS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -326,7 +330,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_MKEY: case MLX5_CMD_OP_QUERY_MKEY: case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: - case MLX5_CMD_OP_PAGE_FAULT_RESUME: case MLX5_CMD_OP_CREATE_EQ: case MLX5_CMD_OP_QUERY_EQ: case MLX5_CMD_OP_GEN_EQE: @@ -371,6 +374,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_SET_MONITOR_COUNTER: + case MLX5_CMD_OP_ARM_MONITOR_COUNTER: case MLX5_CMD_OP_SET_PP_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: @@ -520,6 +525,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); + MLX5_COMMAND_STR_CASE(SET_MONITOR_COUNTER); + MLX5_COMMAND_STR_CASE(ARM_MONITOR_COUNTER); MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); @@ -621,6 +628,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); default: return "unknown command opcode"; } } @@ -805,6 +813,8 @@ static u16 msg_to_opcode(struct mlx5_cmd_msg *in) return MLX5_GET(mbox_in, in->first.data, opcode); } +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); + static void cb_timeout_handler(struct work_struct *work) { struct delayed_work *dwork = container_of(work, struct delayed_work, @@ -1412,14 +1422,32 @@ static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) up(&cmd->sem); } +static int cmd_comp_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_core_dev *dev; + struct mlx5_cmd *cmd; + struct mlx5_eqe *eqe; + + cmd = mlx5_nb_cof(nb, struct mlx5_cmd, nb); + dev = container_of(cmd, struct mlx5_core_dev, cmd); + eqe = data; + + mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); + + return NOTIFY_OK; +} void mlx5_cmd_use_events(struct mlx5_core_dev *dev) { + MLX5_NB_INIT(&dev->cmd.nb, cmd_comp_notifier, CMD); + mlx5_eq_notifier_register(dev, &dev->cmd.nb); mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); } void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) { mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); + mlx5_eq_notifier_unregister(dev, &dev->cmd.nb); } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) @@ -1435,7 +1463,7 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) } } -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) +static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; @@ -1533,7 +1561,47 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced) } } } -EXPORT_SYMBOL(mlx5_cmd_comp_handler); + +void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) +{ + unsigned long flags; + u64 vector; + + /* wait for pending handlers to complete */ + mlx5_eq_synchronize_cmd_irq(dev); + spin_lock_irqsave(&dev->cmd.alloc_lock, flags); + vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); + if (!vector) + goto no_trig; + + vector |= MLX5_TRIGGERED_CMD_COMP; + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); + + mlx5_core_dbg(dev, "vector 0x%llx\n", vector); + mlx5_cmd_comp_handler(dev, vector, true); + return; + +no_trig: + spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); +} + +void mlx5_cmd_flush(struct mlx5_core_dev *dev) +{ + struct mlx5_cmd *cmd = &dev->cmd; + int i; + + for (i = 0; i < cmd->max_reg_cmds; i++) + while (down_trylock(&cmd->sem)) + mlx5_cmd_trigger_completions(dev); + + while (down_trylock(&cmd->pages_sem)) + mlx5_cmd_trigger_completions(dev); + + /* Unlock cmdif */ + up(&cmd->pages_sem); + for (i = 0; i < cmd->max_reg_cmds; i++) + up(&cmd->sem); +} static int status_to_err(u8 status) { @@ -1663,12 +1731,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, } EXPORT_SYMBOL(mlx5_cmd_exec); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context) +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) +{ + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_waitqueue_head(&ctx->wait); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + atomic_dec(&ctx->num_inflight); + wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) { - return cmd_exec(dev, in, in_size, out, out_size, callback, context, - false); + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx = work->ctx; + + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) +{ + int ret; + + work->ctx = ctx; + work->user_callback = callback; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); + + return ret; } EXPORT_SYMBOL(mlx5_cmd_exec_cb); @@ -1741,8 +1854,8 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) { struct device *ddev = &dev->pdev->dev; - cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, - &cmd->alloc_dma, GFP_KERNEL); + cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, + &cmd->alloc_dma, GFP_KERNEL); if (!cmd->cmd_alloc_buf) return -ENOMEM; @@ -1756,9 +1869,9 @@ static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) dma_free_coherent(ddev, MLX5_ADAPTER_PAGE_SIZE, cmd->cmd_alloc_buf, cmd->alloc_dma); - cmd->cmd_alloc_buf = dma_zalloc_coherent(ddev, - 2 * MLX5_ADAPTER_PAGE_SIZE - 1, - &cmd->alloc_dma, GFP_KERNEL); + cmd->cmd_alloc_buf = dma_alloc_coherent(ddev, + 2 * MLX5_ADAPTER_PAGE_SIZE - 1, + &cmd->alloc_dma, GFP_KERNEL); if (!cmd->cmd_alloc_buf) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 4b85abb5c9f7..713a17ee3751 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -38,6 +38,7 @@ #include <rdma/ib_verbs.h> #include <linux/mlx5/cq.h> #include "mlx5_core.h" +#include "lib/eq.h" #define TASKLET_MAX_TIME 2 #define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) @@ -92,10 +93,10 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; - struct mlx5_eq *eq; + struct mlx5_eq_comp *eq; int err; - eq = mlx5_eqn2eq(dev, eqn); + eq = mlx5_eqn2comp_eq(dev, eqn); if (IS_ERR(eq)) return PTR_ERR(eq); @@ -119,12 +120,12 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, INIT_LIST_HEAD(&cq->tasklet_ctx.list); /* Add to comp EQ CQ tree to recv comp events */ - err = mlx5_eq_add_cq(eq, cq); + err = mlx5_eq_add_cq(&eq->core, cq); if (err) goto err_cmd; /* Add to async EQ CQ tree to recv async events */ - err = mlx5_eq_add_cq(&dev->priv.eq_table.async_eq, cq); + err = mlx5_eq_add_cq(mlx5_get_async_eq(dev), cq); if (err) goto err_cq_add; @@ -139,7 +140,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, return 0; err_cq_add: - mlx5_eq_del_cq(eq, cq); + mlx5_eq_del_cq(&eq->core, cq); err_cmd: memset(din, 0, sizeof(din)); memset(dout, 0, sizeof(dout)); @@ -157,11 +158,11 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; int err; - err = mlx5_eq_del_cq(&dev->priv.eq_table.async_eq, cq); + err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); if (err) return err; - err = mlx5_eq_del_cq(cq->eq, cq); + err = mlx5_eq_del_cq(&cq->eq->core, cq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 90fabd612b6c..a11e22d0b0cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -36,6 +36,7 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/driver.h> #include "mlx5_core.h" +#include "lib/eq.h" enum { QP_PID, @@ -349,6 +350,16 @@ out: return param; } +static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {}; + + MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); + MLX5_SET(query_eq_in, in, eq_number, eq->eqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, int index) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 37ba7c78859d..ebc046fa97d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -45,75 +45,11 @@ struct mlx5_device_context { unsigned long state; }; -struct mlx5_delayed_event { - struct list_head list; - struct mlx5_core_dev *dev; - enum mlx5_dev_event event; - unsigned long param; -}; - enum { MLX5_INTERFACE_ADDED, MLX5_INTERFACE_ATTACHED, }; -static void add_delayed_event(struct mlx5_priv *priv, - struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_delayed_event *delayed_event; - - delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC); - if (!delayed_event) { - mlx5_core_err(dev, "event %d is missed\n", event); - return; - } - - mlx5_core_dbg(dev, "Accumulating event %d\n", event); - delayed_event->dev = dev; - delayed_event->event = event; - delayed_event->param = param; - list_add_tail(&delayed_event->list, &priv->waiting_events_list); -} - -static void delayed_event_release(struct mlx5_device_context *dev_ctx, - struct mlx5_priv *priv) -{ - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - struct mlx5_delayed_event *de; - struct mlx5_delayed_event *n; - struct list_head temp; - - INIT_LIST_HEAD(&temp); - - spin_lock_irq(&priv->ctx_lock); - - priv->is_accum_events = false; - list_splice_init(&priv->waiting_events_list, &temp); - if (!dev_ctx->context) - goto out; - list_for_each_entry_safe(de, n, &temp, list) - dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); - -out: - spin_unlock_irq(&priv->ctx_lock); - - list_for_each_entry_safe(de, n, &temp, list) { - list_del(&de->list); - kfree(de); - } -} - -/* accumulating events that can come after mlx5_ib calls to - * ib_register_device, till adding that interface to the events list. - */ -static void delayed_event_start(struct mlx5_priv *priv) -{ - spin_lock_irq(&priv->ctx_lock); - priv->is_accum_events = true; - spin_unlock_irq(&priv->ctx_lock); -} void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { @@ -129,8 +65,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) dev_ctx->intf = intf; - delayed_event_start(priv); - dev_ctx->context = intf->add(dev); if (dev_ctx->context) { set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); @@ -139,22 +73,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (dev_ctx->intf->pfault) { - if (priv->pfault) { - mlx5_core_err(dev, "multiple page fault handlers not supported"); - } else { - priv->pfault_ctx = dev_ctx->context; - priv->pfault = dev_ctx->intf->pfault; - } - } -#endif spin_unlock_irq(&priv->ctx_lock); } - delayed_event_release(dev_ctx, priv); - if (!dev_ctx->context) kfree(dev_ctx); } @@ -179,15 +100,6 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (!dev_ctx) return; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - spin_lock_irq(&priv->ctx_lock); - if (priv->pfault == dev_ctx->intf->pfault) - priv->pfault = NULL; - spin_unlock_irq(&priv->ctx_lock); - - synchronize_srcu(&priv->pfault_srcu); -#endif - spin_lock_irq(&priv->ctx_lock); list_del(&dev_ctx->list); spin_unlock_irq(&priv->ctx_lock); @@ -207,26 +119,20 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv if (!dev_ctx) return; - delayed_event_start(priv); if (intf->attach) { if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) - goto out; + return; if (intf->attach(dev, dev_ctx->context)) - goto out; - + return; set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); } else { if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) - goto out; + return; dev_ctx->context = intf->add(dev); if (!dev_ctx->context) - goto out; - + return; set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); } - -out: - delayed_event_release(dev_ctx, priv); } void mlx5_attach_device(struct mlx5_core_dev *dev) @@ -350,28 +256,6 @@ void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol) mutex_unlock(&mlx5_intf_mutex); } -void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) -{ - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - void *result = NULL; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) - if ((dev_ctx->intf->protocol == protocol) && - dev_ctx->intf->get_dev) { - result = dev_ctx->intf->get_dev(dev_ctx->context); - break; - } - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - return result; -} -EXPORT_SYMBOL(mlx5_get_protocol_dev); - /* Must be called with intf_mutex held */ void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) { @@ -422,44 +306,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) return res; } -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - if (priv->is_accum_events) - add_delayed_event(priv, dev, event, param); - - /* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is - * still in priv->ctx_list. In this case, only notify the dev_ctx if its - * ADDED or ATTACHED bit are set. - */ - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event && - (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) || - test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))) - dev_ctx->intf->event(dev, dev_ctx->context, event, param); - - spin_unlock_irqrestore(&priv->ctx_lock, flags); -} - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -void mlx5_core_page_fault(struct mlx5_core_dev *dev, - struct mlx5_pagefault *pfault) -{ - struct mlx5_priv *priv = &dev->priv; - int srcu_idx; - - srcu_idx = srcu_read_lock(&priv->pfault_srcu); - if (priv->pfault) - priv->pfault(dev, priv->pfault_ctx, pfault); - srcu_read_unlock(&priv->pfault_srcu, srcu_idx); -} -#endif void mlx5_dev_list_lock(void) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 0f11fff32a9b..8ecac81a385d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -161,10 +161,10 @@ static void print_misc_parameters_hdrs(struct trace_seq *p, PRINT_MASKED_VAL(name, p, format); \ } DECLARE_MASK_VAL(u64, gre_key) = { - .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 | - MLX5_GET(fte_match_set_misc, mask, gre_key_l), - .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 | - MLX5_GET(fte_match_set_misc, value, gre_key_l)}; + .m = MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key.nvgre.lo), + .v = MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.hi) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key.nvgre.lo)}; PRINT_MASKED_VAL(gre_key, p, "%llu"); PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); @@ -258,6 +258,8 @@ const char *parse_fs_dst(struct trace_seq *p, return ret; } +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_ft); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_ft); EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg); EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg); EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index d027ce00c8ce..a4cf123e3f17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -61,6 +61,41 @@ const char *parse_fs_dst(struct trace_seq *p, const struct mlx5_flow_destination *dst, u32 counter_id); +TRACE_EVENT(mlx5_fs_add_ft, + TP_PROTO(const struct mlx5_flow_table *ft), + TP_ARGS(ft), + TP_STRUCT__entry( + __field(const struct mlx5_flow_table *, ft) + __field(u32, id) + __field(u32, level) + __field(u32, type) + ), + TP_fast_assign( + __entry->ft = ft; + __entry->id = ft->id; + __entry->level = ft->level; + __entry->type = ft->type; + ), + TP_printk("ft=%p id=%u level=%u type=%u \n", + __entry->ft, __entry->id, __entry->level, __entry->type) + ); + +TRACE_EVENT(mlx5_fs_del_ft, + TP_PROTO(const struct mlx5_flow_table *ft), + TP_ARGS(ft), + TP_STRUCT__entry( + __field(const struct mlx5_flow_table *, ft) + __field(u32, id) + ), + TP_fast_assign( + __entry->ft = ft; + __entry->id = ft->id; + + ), + TP_printk("ft=%p id=%u\n", + __entry->ft, __entry->id) + ); + TRACE_EVENT(mlx5_fs_add_fg, TP_PROTO(const struct mlx5_flow_group *fg), TP_ARGS(fg), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index d4ec93bde4de..6999f4486e9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -30,6 +30,7 @@ * SOFTWARE. */ #define CREATE_TRACE_POINTS +#include "lib/eq.h" #include "fw_tracer.h" #include "fw_tracer_tracepoint.h" @@ -846,9 +847,9 @@ free_tracer: return ERR_PTR(err); } -/* Create HW resources + start tracer - * must be called before Async EQ is created - */ +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data); + +/* Create HW resources + start tracer */ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) { struct mlx5_core_dev *dev; @@ -874,6 +875,9 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) goto err_dealloc_pd; } + MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER); + mlx5_eq_notifier_register(dev, &tracer->nb); + mlx5_fw_tracer_start(tracer); return 0; @@ -883,9 +887,7 @@ err_dealloc_pd: return err; } -/* Stop tracer + Cleanup HW resources - * must be called after Async EQ is destroyed - */ +/* Stop tracer + Cleanup HW resources */ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) { if (IS_ERR_OR_NULL(tracer)) @@ -893,7 +895,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n", tracer->owner); - + mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb); cancel_work_sync(&tracer->ownership_change_work); cancel_work_sync(&tracer->handle_traces_work); @@ -922,12 +924,11 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) kfree(tracer); } -void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) +static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data) { - struct mlx5_fw_tracer *tracer = dev->tracer; - - if (!tracer) - return; + struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb); + struct mlx5_core_dev *dev = tracer->dev; + struct mlx5_eqe *eqe = data; switch (eqe->sub_type) { case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE: @@ -942,6 +943,8 @@ void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", eqe->sub_type); } + + return NOTIFY_OK; } EXPORT_TRACEPOINT_SYMBOL(mlx5_fw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index 0347f2dd5cee..a8b8747f2b61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -55,6 +55,7 @@ struct mlx5_fw_tracer { struct mlx5_core_dev *dev; + struct mlx5_nb nb; bool owner; u8 trc_ver; struct workqueue_struct *work_queue; @@ -170,6 +171,5 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev); int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); -void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c new file mode 100644 index 000000000000..4746f2d28fb6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "ecpf.h" + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1; +} + +static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, 0); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_peer_pf_init(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_enable_hca(dev); + if (err) + mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n", + err); + + return err; +} + +static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_peer_pf_disable_hca(dev); + if (err) { + mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n", + err); + return; + } + + err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages); + if (err) + mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n", + err); +} + +int mlx5_ec_init(struct mlx5_core_dev *dev) +{ + int err = 0; + + if (!mlx5_core_is_ecpf(dev)) + return 0; + + /* ECPF shall enable HCA for peer PF in the same way a PF + * does this for its VFs. + */ + err = mlx5_peer_pf_init(dev); + if (err) + return err; + + return 0; +} + +void mlx5_ec_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_ecpf(dev)) + return; + + mlx5_peer_pf_cleanup(dev); +} + +static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; + + MLX5_SET(query_host_params_in, in, opcode, + MLX5_CMD_OP_QUERY_HOST_PARAMS); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ + u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; + int err; + + err = mlx5_query_host_params_context(dev, out, sizeof(out)); + if (err) + return err; + + *num_vf = MLX5_GET(query_host_params_out, out, + host_params_context.host_num_of_vfs); + mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h new file mode 100644 index 000000000000..346372df218f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_ECPF_H__ +#define __MLX5_ECPF_H__ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" + +#ifdef CONFIG_MLX5_ESWITCH + +enum { + MLX5_ECPU_BIT_NUM = 23, +}; + +bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); +int mlx5_ec_init(struct mlx5_core_dev *dev); +void mlx5_ec_cleanup(struct mlx5_core_dev *dev); +int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline bool +mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } +static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} +static inline int +mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) +{ return -EOPNOTSUPP; } + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_ECPF_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 118324802926..d3eaf2ceaa39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -49,6 +49,7 @@ #include <net/switchdev.h> #include <net/xdp.h> #include <linux/net_dim.h> +#include <linux/bits.h> #include "wq.h" #include "mlx5_core.h" #include "en_stats.h" @@ -75,15 +76,14 @@ struct page_pool; #define MLX5_SKB_FRAG_SZ(len) (SKB_DATA_ALIGN(len) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +#define MLX5E_RX_MAX_HEAD (256) + #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, req) \ max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req) -#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6) -#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8) -#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \ - (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \ - MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)) +#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \ + MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD)) #define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ @@ -118,8 +118,6 @@ struct page_pool; #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 -#define MLX5E_RX_MAX_HEAD (256) - #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_DEFAULT_LRO_TIMEOUT 32 #define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 @@ -147,9 +145,6 @@ struct page_pool; MLX5_UMR_MTT_ALIGNMENT)) #define MLX5E_UMR_WQEBBS \ (DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB)) -#define MLX5E_ICOSQ_MAX_WQEBBS MLX5E_UMR_WQEBBS - -#define MLX5E_NUM_MAIN_GROUPS 9 #define MLX5E_MSG_LEVEL NETIF_MSG_LINK @@ -178,8 +173,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return is_kdump_kernel() ? MLX5E_MIN_NUM_CHANNELS : - min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); + min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); } /* Use this function to get max num channels after netdev was created */ @@ -214,22 +208,24 @@ struct mlx5e_umr_wqe { extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; enum mlx5e_priv_flag { - MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), - MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1), - MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2), - MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3), - MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4), + MLX5E_PFLAG_RX_CQE_BASED_MODER, + MLX5E_PFLAG_TX_CQE_BASED_MODER, + MLX5E_PFLAG_RX_CQE_COMPRESS, + MLX5E_PFLAG_RX_STRIDING_RQ, + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, + MLX5E_PFLAG_XDP_TX_MPWQE, + MLX5E_NUM_PFLAGS, /* Keep last */ }; #define MLX5E_SET_PFLAG(params, pflag, enable) \ do { \ if (enable) \ - (params)->pflags |= (pflag); \ + (params)->pflags |= BIT(pflag); \ else \ - (params)->pflags &= ~(pflag); \ + (params)->pflags &= ~(BIT(pflag)); \ } while (0) -#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag))) +#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag)))) #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ @@ -247,9 +243,6 @@ struct mlx5e_params { bool lro_en; u32 lro_wqe_sz; u8 tx_min_inline_mode; - u8 rss_hfunc; - u8 toeplitz_hash_key[40]; - u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; bool vlan_strip_disable; bool scatter_fcs_en; bool rx_dim_enabled; @@ -313,16 +306,18 @@ struct mlx5e_cq { struct mlx5_core_cq mcq; struct mlx5e_channel *channel; + /* control */ + struct mlx5_core_dev *mdev; + struct mlx5_wq_ctrl wq_ctrl; +} ____cacheline_aligned_in_smp; + +struct mlx5e_cq_decomp { /* cqe decompression */ struct mlx5_cqe64 title; struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE]; u8 mini_arr_idx; - u16 decmprs_left; - u16 decmprs_wqe_counter; - - /* control */ - struct mlx5_core_dev *mdev; - struct mlx5_wq_ctrl wq_ctrl; + u16 left; + u16 wqe_counter; } ____cacheline_aligned_in_smp; struct mlx5e_tx_wqe_info { @@ -349,7 +344,6 @@ enum { MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, MLX5E_SQ_STATE_TLS, - MLX5E_SQ_STATE_REDIRECT, }; struct mlx5e_sq_wqe_info { @@ -393,10 +387,7 @@ struct mlx5e_txqsq { struct mlx5e_channel *channel; int txq_ix; u32 rate_limit; - struct mlx5e_txqsq_recover { - struct work_struct recover_work; - u64 last_recover; - } recover; + struct work_struct recover_work; } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { @@ -410,24 +401,51 @@ struct mlx5e_xdp_info { struct mlx5e_dma_info di; }; +struct mlx5e_xdp_info_fifo { + struct mlx5e_xdp_info *xi; + u32 *cc; + u32 *pc; + u32 mask; +}; + +struct mlx5e_xdp_wqe_info { + u8 num_wqebbs; + u8 num_ds; +}; + +struct mlx5e_xdp_mpwqe { + /* Current MPWQE session */ + struct mlx5e_tx_wqe *wqe; + u8 ds_count; + u8 max_ds_count; +}; + +struct mlx5e_xdpsq; +typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*, + struct mlx5e_xdp_info*); struct mlx5e_xdpsq { /* data path */ /* dirtied @completion */ + u32 xdpi_fifo_cc; u16 cc; bool redirect_flush; /* dirtied @xmit */ - u16 pc ____cacheline_aligned_in_smp; - bool doorbell; + u32 xdpi_fifo_pc ____cacheline_aligned_in_smp; + u16 pc; + struct mlx5_wqe_ctrl_seg *doorbell_cseg; + struct mlx5e_xdp_mpwqe mpwqe; struct mlx5e_cq cq; /* read only */ struct mlx5_wq_cyc wq; struct mlx5e_xdpsq_stats *stats; + mlx5e_fp_xmit_xdp_frame xmit_xdp_frame; struct { - struct mlx5e_xdp_info *xdpi; + struct mlx5e_xdp_wqe_info *wqe_info; + struct mlx5e_xdp_info_fifo xdpi_fifo; } db; void __iomem *uar_map; u32 sqn; @@ -559,6 +577,7 @@ struct mlx5e_rq { struct net_device *netdev; struct mlx5e_rq_stats *stats; struct mlx5e_cq cq; + struct mlx5e_cq_decomp cqd; struct mlx5e_page_cache page_cache; struct hwtstamp_config *tstamp; struct mlx5_clock *clock; @@ -616,6 +635,7 @@ struct mlx5e_channel { struct hwtstamp_config *tstamp; int ix; int cpu; + cpumask_var_t xps_cpumask; }; struct mlx5e_channels { @@ -633,9 +653,9 @@ struct mlx5e_channel_stats { } ____cacheline_aligned_in_smp; enum { - MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, + MLX5E_STATE_XDP_TX_ENABLED, }; struct mlx5e_rqt { @@ -654,6 +674,20 @@ enum { MLX5E_NIC_PRIO }; +struct mlx5e_rss_params { + u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; + u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; + u8 toeplitz_hash_key[40]; + u8 hfunc; +}; + +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + int rl_update; + int rl_index; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -674,6 +708,7 @@ struct mlx5e_priv { struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; @@ -683,6 +718,8 @@ struct mlx5e_priv { struct work_struct set_rx_mode_work; struct work_struct tx_timeout_work; struct work_struct update_stats_work; + struct work_struct monitor_counters_work; + struct mlx5_nb monitor_counters_nb; struct mlx5_core_dev *mdev; struct net_device *netdev; @@ -692,6 +729,8 @@ struct mlx5e_priv { struct hwtstamp_config tstamp; u16 q_counter; u16 drop_rq_q_counter; + struct notifier_block events_nb; + #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx dcbx; #endif @@ -704,6 +743,7 @@ struct mlx5e_priv { #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_tls *tls; #endif + struct devlink_health_reporter *tx_reporter; }; struct mlx5e_profile { @@ -769,6 +809,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt); void mlx5e_update_stats(struct mlx5e_priv *priv); +void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); void mlx5e_init_l2_addr(struct mlx5e_priv *priv); int mlx5e_self_test_num(struct mlx5e_priv *priv); @@ -799,9 +841,11 @@ struct mlx5e_redirect_rqt_param { int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, struct mlx5e_redirect_rqt_param rrp); -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, - enum mlx5e_traffic_types tt, +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, + const struct mlx5e_tirc_config *ttconfig, void *tirc, bool inner); +void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen); +struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -814,9 +858,10 @@ void mlx5e_close_channels(struct mlx5e_channels *chs); * switching channels */ typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); -void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify); +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv); +int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); @@ -830,6 +875,11 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p); +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); +void mlx5e_tx_disable_queue(struct netdev_queue *txq); + static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) { return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && @@ -931,14 +981,16 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); int mlx5e_create_tises(struct mlx5e_priv *priv); -void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); +void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); +void mlx5e_update_ndo_stats(struct mlx5e_priv *priv); void mlx5e_queue_update_stats(struct mlx5e_priv *priv); int mlx5e_bits_invert(unsigned long a, int size); typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv); +int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv); int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, change_hw_mtu_cb set_mtu_cb); @@ -962,12 +1014,20 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal); int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal); +int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings); +int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings); u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); +void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam); +int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam); /* mlx5e generic netdev management API */ int mlx5e_netdev_init(struct net_device *netdev, @@ -983,12 +1043,26 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -void mlx5e_build_rss_params(struct mlx5e_params *params); +void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, + u16 num_channels); u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); + +void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti); +void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti); +netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features); +#ifdef CONFIG_MLX5_ESWITCH +int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac); +int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); +int mlx5e_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); +int mlx5e_get_vf_stats(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats); +#endif #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 1431232c9a09..be5961ff24cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -73,6 +73,22 @@ enum mlx5e_traffic_types { MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; +struct mlx5e_tirc_config { + u8 l3_prot_type; + u8 l4_prot_type; + u32 rx_hash_fields; +}; + +#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP) +#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_L4_SPORT |\ + MLX5_HASH_FIELD_SEL_L4_DPORT) +#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ + MLX5_HASH_FIELD_SEL_DST_IP |\ + MLX5_HASH_FIELD_SEL_IPSEC_SPI) + enum mlx5e_tunnel_types { MLX5E_TT_IPV4_GRE, MLX5E_TT_IPV6_GRE, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c new file mode 100644 index 000000000000..7cd5b02e0f10 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include "en.h" +#include "monitor_stats.h" +#include "lib/eq.h" + +/* Driver will set the following watch counters list: + * Ppcnt.802_3: + * a_in_range_length_errors Type: 0x0, Counter: 0x0, group_id = N/A + * a_out_of_range_length_field Type: 0x0, Counter: 0x1, group_id = N/A + * a_frame_too_long_errors Type: 0x0, Counter: 0x2, group_id = N/A + * a_frame_check_sequence_errors Type: 0x0, Counter: 0x3, group_id = N/A + * a_alignment_errors Type: 0x0, Counter: 0x4, group_id = N/A + * if_out_discards Type: 0x0, Counter: 0x5, group_id = N/A + * Q_Counters: + * Q[index].rx_out_of_buffer Type: 0x1, Counter: 0x4, group_id = counter_ix + */ + +#define NUM_REQ_PPCNT_COUNTER_S1 MLX5_CMD_SET_MONITOR_NUM_PPCNT_COUNTER_SET1 +#define NUM_REQ_Q_COUNTERS_S1 MLX5_CMD_SET_MONITOR_NUM_Q_COUNTERS_SET1 + +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + if (!MLX5_CAP_GEN(mdev, max_num_of_monitor_counters)) + return false; + if (MLX5_CAP_PCAM_REG(mdev, ppcnt) && + MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters) < + NUM_REQ_PPCNT_COUNTER_S1) + return false; + if (MLX5_CAP_GEN(mdev, num_q_monitor_counters) < + NUM_REQ_Q_COUNTERS_S1) + return false; + return true; +} + +void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {}; + u32 out[MLX5_ST_SZ_DW(arm_monitor_counter_out)] = {}; + + MLX5_SET(arm_monitor_counter_in, in, opcode, + MLX5_CMD_OP_ARM_MONITOR_COUNTER); + mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)); +} + +static void mlx5e_monitor_counters_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + monitor_counters_work); + + mutex_lock(&priv->state_lock); + mlx5e_update_ndo_stats(priv); + mutex_unlock(&priv->state_lock); + mlx5e_monitor_counter_arm(priv); +} + +static int mlx5e_monitor_event_handler(struct notifier_block *nb, + unsigned long event, void *eqe) +{ + struct mlx5e_priv *priv = mlx5_nb_cof(nb, struct mlx5e_priv, + monitor_counters_nb); + queue_work(priv->wq, &priv->monitor_counters_work); + return NOTIFY_OK; +} + +static void mlx5e_monitor_counter_start(struct mlx5e_priv *priv) +{ + MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler, + MONITOR_COUNTER); + mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb); +} + +static void mlx5e_monitor_counter_stop(struct mlx5e_priv *priv) +{ + mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb); + cancel_work_sync(&priv->monitor_counters_work); +} + +static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in) +{ + enum mlx5_monitor_counter_ppcnt ppcnt_cnt; + + for (ppcnt_cnt = 0; + ppcnt_cnt < NUM_REQ_PPCNT_COUNTER_S1; + ppcnt_cnt++, cnt++) { + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].type, + MLX5_QUERY_MONITOR_CNT_TYPE_PPCNT); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter, + ppcnt_cnt); + } + return ppcnt_cnt; +} + +static int fill_monitor_counter_q_counter_set1(int cnt, int q_counter, u32 *in) +{ + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].type, + MLX5_QUERY_MONITOR_CNT_TYPE_Q_COUNTER); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter, + MLX5_QUERY_MONITOR_Q_COUNTER_RX_OUT_OF_BUFFER); + MLX5_SET(set_monitor_counter_in, in, + monitor_counter[cnt].counter_group_id, + q_counter); + return 1; +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int max_num_of_counters = MLX5_CAP_GEN(mdev, max_num_of_monitor_counters); + int num_q_counters = MLX5_CAP_GEN(mdev, num_q_monitor_counters); + int num_ppcnt_counters = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 : + MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters); + u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {}; + int q_counter = priv->q_counter; + int cnt = 0; + + if (num_ppcnt_counters >= NUM_REQ_PPCNT_COUNTER_S1 && + max_num_of_counters >= (NUM_REQ_PPCNT_COUNTER_S1 + cnt)) + cnt += fill_monitor_counter_ppcnt_set1(cnt, in); + + if (num_q_counters >= NUM_REQ_Q_COUNTERS_S1 && + max_num_of_counters >= (NUM_REQ_Q_COUNTERS_S1 + cnt) && + q_counter) + cnt += fill_monitor_counter_q_counter_set1(cnt, q_counter, in); + + MLX5_SET(set_monitor_counter_in, in, num_of_counters, cnt); + MLX5_SET(set_monitor_counter_in, in, opcode, + MLX5_CMD_OP_SET_MONITOR_COUNTER); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +void mlx5e_monitor_counter_init(struct mlx5e_priv *priv) +{ + INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work); + mlx5e_monitor_counter_start(priv); + mlx5e_set_monitor_counter(priv); + mlx5e_monitor_counter_arm(priv); + queue_work(priv->wq, &priv->update_stats_work); +} + +static void mlx5e_monitor_counter_disable(struct mlx5e_priv *priv) +{ + u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {}; + + MLX5_SET(set_monitor_counter_in, in, num_of_counters, 0); + MLX5_SET(set_monitor_counter_in, in, opcode, + MLX5_CMD_OP_SET_MONITOR_COUNTER); + + mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)); +} + +/* check if mlx5e_monitor_counter_supported before calling this function*/ +void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_monitor_counter_disable(priv); + mlx5e_monitor_counter_stop(priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h new file mode 100644 index 000000000000..e1ac4b3d22fb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_MONITOR_H__ +#define __MLX5_MONITOR_H__ + +int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_init(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv); +void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv); + +#endif /* __MLX5_MONITOR_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 4a37713023be..d5e5afbdca6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -63,66 +63,165 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_50GBASE_KR2] = 50000, }; -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper) +static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = 100, + [MLX5E_1000BASE_X_SGMII] = 1000, + [MLX5E_5GBASE_R] = 5000, + [MLX5E_10GBASE_XFI_XAUI_1] = 10000, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, + [MLX5E_400GAUI_8] = 400000, +}; + +static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, + const u32 **arr, u32 *size) +{ + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : + ARRAY_SIZE(mlx5e_link_speed); + *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; +} + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5e_port_eth_proto *eproto) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + if (!eproto) + return -EINVAL; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port); + if (err) + return err; + + eproto->cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin); + eproto->oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); + return 0; +} + +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} + +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, bool ext) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap, + &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); + MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN); + if (ext) + MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin); + else + MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PTYS, 0, 1); +} + +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper) { unsigned long temp = eth_proto_oper; + const u32 *table; u32 speed = 0; + u32 max_size; int i; - i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER); - if (i < MLX5E_LINK_MODES_NUMBER) - speed = mlx5e_link_speed[i]; - + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + i = find_first_bit(&temp, max_size); + if (i < max_size) + speed = table[i]; return speed; } int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { - u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; - u32 eth_proto_oper; + struct mlx5e_port_eth_proto eproto; + bool ext; int err; - err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) - return err; + goto out; - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - *speed = mlx5e_port_ptys2speed(eth_proto_oper); + *speed = mlx5e_port_ptys2speed(mdev, eproto.oper); if (!(*speed)) err = -EINVAL; +out: return err; } int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { + struct mlx5e_port_eth_proto eproto; u32 max_speed = 0; - u32 proto_cap; + const u32 *table; + u32 max_size; + bool ext; int err; int i; - err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN); + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); if (err) return err; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) - if (proto_cap & MLX5E_PROT_MASK(i)) - max_speed = max(max_speed, mlx5e_link_speed[i]); + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) + if (eproto.cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, table[i]); *speed = max_speed; return 0; } -u32 mlx5e_port_speed2linkmodes(u32 speed) +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed) { u32 link_modes = 0; + const u32 *table; + u32 max_size; int i; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (mlx5e_link_speed[i] == speed) + mlx5e_port_get_speed_arr(mdev, &table, &max_size); + for (i = 0; i < max_size; ++i) { + if (table[i] == speed) link_modes |= MLX5E_PROT_MASK(i); } - return link_modes; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index cd2160b8c9bf..70f536ec51c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -36,10 +36,22 @@ #include <linux/mlx5/driver.h> #include "en.h" -u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); +struct mlx5e_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5e_port_eth_proto *eproto); +void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin); +int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, bool ext); +u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); -u32 mlx5e_port_speed2linkmodes(u32 speed); +u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index eac245a93f91..4ab0d030b544 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -122,7 +122,9 @@ out: return err; } -/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) */ +/* xoff = ((301+2.16 * len [m]) * speed [Gbps] + 2.72 MTU [B]) + * minimum speed value is 40Gbps + */ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) { u32 speed; @@ -130,10 +132,9 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) int err; err = mlx5e_port_linkspeed(priv->mdev, &speed); - if (err) { - mlx5_core_warn(priv->mdev, "cannot get port speed\n"); - return 0; - } + if (err) + speed = SPEED_40000; + speed = max_t(u32, speed, SPEED_40000); xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; @@ -142,7 +143,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) } static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, - u32 xoff, unsigned int mtu) + u32 xoff, unsigned int max_mtu) { int i; @@ -154,11 +155,12 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, } if (port_buffer->buffer[i].size < - (xoff + mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) + (xoff + max_mtu + (1 << MLX5E_BUFFER_CELL_SHIFT))) return -ENOMEM; port_buffer->buffer[i].xoff = port_buffer->buffer[i].size - xoff; - port_buffer->buffer[i].xon = port_buffer->buffer[i].xoff - mtu; + port_buffer->buffer[i].xon = + port_buffer->buffer[i].xoff - max_mtu; } return 0; @@ -166,7 +168,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, /** * update_buffer_lossy() - * mtu: device's MTU + * max_mtu: netdev's max_mtu * pfc_en: <input> current pfc configuration * buffer: <input> current prio to buffer mapping * xoff: <input> xoff value @@ -183,7 +185,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * Return 0 if no error. * Set change to true if buffer configuration is modified. */ -static int update_buffer_lossy(unsigned int mtu, +static int update_buffer_lossy(unsigned int max_mtu, u8 pfc_en, u8 *buffer, u32 xoff, struct mlx5e_port_buffer *port_buffer, bool *change) @@ -220,7 +222,7 @@ static int update_buffer_lossy(unsigned int mtu, } if (changed) { - err = update_xoff_threshold(port_buffer, xoff, mtu); + err = update_xoff_threshold(port_buffer, xoff, max_mtu); if (err) return err; @@ -230,6 +232,7 @@ static int update_buffer_lossy(unsigned int mtu, return 0; } +#define MINIMUM_MAX_MTU 9216 int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u32 change, unsigned int mtu, struct ieee_pfc *pfc, @@ -241,12 +244,14 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, bool update_prio2buffer = false; u8 buffer[MLX5E_MAX_PRIORITY]; bool update_buffer = false; + unsigned int max_mtu; u32 total_used = 0; u8 curr_pfc_en; int err; int i; mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change); + max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU); err = mlx5e_port_query_buffer(priv, &port_buffer); if (err) @@ -254,7 +259,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_CABLE_LEN) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } @@ -264,7 +269,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(mtu, pfc->pfc_en, buffer, xoff, + err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, &port_buffer, &update_buffer); if (err) return err; @@ -276,8 +281,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(mtu, curr_pfc_en, prio2buffer, xoff, - &port_buffer, &update_buffer); + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, + xoff, &port_buffer, &update_buffer); if (err) return err; } @@ -301,7 +306,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, return -EINVAL; update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } @@ -309,7 +314,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, /* Need to update buffer configuration if xoff value is changed */ if (!update_buffer && xoff != priv->dcbx.xoff) { update_buffer = true; - err = update_xoff_threshold(&port_buffer, xoff, mtu); + err = update_xoff_threshold(&port_buffer, xoff, max_mtu); if (err) return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h new file mode 100644 index 000000000000..e78e92753d73 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_EN_REPORTER_H +#define __MLX5E_EN_REPORTER_H + +#include <linux/mlx5/driver.h> +#include "en.h" + +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); +int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c new file mode 100644 index 000000000000..476dd97f7f2f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -0,0 +1,314 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/devlink.h> +#include "reporter.h" +#include "lib/eq.h" + +#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256 + +struct mlx5e_tx_err_ctx { + int (*recover)(struct mlx5e_txqsq *sq); + struct mlx5e_txqsq *sq; +}; + +static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +{ + unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + + while (time_before(jiffies, exp_time)) { + if (sq->cc == sq->pc) + return 0; + + msleep(20); + } + + netdev_err(sq->channel->netdev, + "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", + sq->sqn, sq->cc, sq->pc); + + return -ETIMEDOUT; +} + +static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) +{ + WARN_ONCE(sq->cc != sq->pc, + "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", + sq->sqn, sq->cc, sq->pc); + sq->cc = 0; + sq->dma_fifo_cc = 0; + sq->pc = 0; +} + +static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + struct mlx5e_modify_sq_param msp = {0}; + int err; + + msp.curr_state = curr_state; + msp.next_state = MLX5_SQC_STATE_RST; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); + return err; + } + + memset(&msp, 0, sizeof(msp)); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); + if (err) { + netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); + return err; + } + + return 0; +} + +static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + struct net_device *dev = sq->channel->netdev; + u8 state; + int err; + + if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + return 0; + + err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); + if (err) { + netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", + sq->sqn, err); + return err; + } + + if (state != MLX5_SQC_STATE_ERR) { + netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); + return -EINVAL; + } + + mlx5e_tx_disable_queue(sq->txq); + + err = mlx5e_wait_for_sq_flush(sq); + if (err) + return err; + + /* At this point, no new packets will arrive from the stack as TXQ is + * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all + * pending WQEs. SQ can safely reset the SQ. + */ + + err = mlx5e_sq_to_ready(sq, state); + if (err) + return err; + + mlx5e_reset_txqsq_cc_pc(sq); + sq->stats->recover++; + mlx5e_activate_txqsq(sq); + + return 0; +} + +static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter, + char *err_str, + struct mlx5e_tx_err_ctx *err_ctx) +{ + if (IS_ERR_OR_NULL(tx_reporter)) { + netdev_err(err_ctx->sq->channel->netdev, err_str); + return err_ctx->recover(err_ctx->sq); + } + + return devlink_health_report(tx_reporter, err_str, err_ctx); +} + +void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; + struct mlx5e_tx_err_ctx err_ctx = {0}; + + err_ctx.sq = sq; + err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover; + sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn); + + mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str, + &err_ctx); +} + +static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq) +{ + struct mlx5_eq_comp *eq = sq->cq.mcq.eq; + u32 eqe_count; + int ret; + + netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", + eq->core.eqn, eq->core.cons_index, eq->core.irqn); + + eqe_count = mlx5_eq_poll_irq_disabled(eq); + ret = eqe_count ? false : true; + if (!eqe_count) { + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + return ret; + } + + netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n", + eqe_count, eq->core.eqn); + sq->channel->stats->eq_rearm++; + return ret; +} + +int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) +{ + char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; + struct mlx5e_tx_err_ctx err_ctx; + + err_ctx.sq = sq; + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; + sprintf(err_str, + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", + sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, + jiffies_to_usecs(jiffies - sq->txq->trans_start)); + + return mlx5_tx_health_report(sq->channel->priv->tx_reporter, err_str, + &err_ctx); +} + +/* state lock cannot be grabbed within this function. + * It can cause a dead lock or a read-after-free. + */ +static int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx) +{ + return err_ctx->recover(err_ctx->sq); +} + +static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv) +{ + int err = 0; + + rtnl_lock(); + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + + err = mlx5e_safe_reopen_channels(priv); + +out: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); + + return err; +} + +static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter, + void *context) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_tx_err_ctx *err_ctx = context; + + return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) : + mlx5e_tx_reporter_recover_all(priv); +} + +static int +mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, + u32 sqn, u8 state, bool stopped) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", sqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", state); + if (err) + return err; + + err = devlink_fmsg_bool_pair_put(fmsg, "stopped", stopped); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + int i, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); + if (err) + goto unlock; + + for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; + i++) { + struct mlx5e_txqsq *sq = priv->txq2sq[i]; + u8 state; + + err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); + if (err) + break; + + err = mlx5e_tx_reporter_build_diagnose_output(fmsg, sq->sqn, + state, + netif_xmit_stopped(sq->txq)); + if (err) + break; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + goto unlock; + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { + .name = "tx", + .recover = mlx5e_tx_reporter_recover, + .diagnose = mlx5e_tx_reporter_diagnose, +}; + +#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 + +int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct devlink *devlink = priv_to_devlink(mdev); + + priv->tx_reporter = + devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, + MLX5_REPORTER_TX_GRACEFUL_PERIOD, + true, priv); + if (IS_ERR(priv->tx_reporter)) + netdev_warn(priv->netdev, + "Failed to create tx reporter, err = %ld\n", + PTR_ERR(priv->tx_reporter)); + return IS_ERR_OR_NULL(priv->tx_reporter); +} + +void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) +{ + if (IS_ERR_OR_NULL(priv->tx_reporter)) + return; + + devlink_health_reporter_destroy(priv->tx_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c new file mode 100644 index 000000000000..eec07b34b4ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -0,0 +1,648 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/vxlan.h> +#include <net/gre.h> +#include "lib/vxlan.h" +#include "en/tc_tun.h" + +static int get_route_and_out_devs(struct mlx5e_priv *priv, + struct net_device *dev, + struct net_device **route_dev, + struct net_device **out_dev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_dev, *uplink_upper; + bool dst_is_lag_dev; + + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + dst_is_lag_dev = (uplink_upper && + netif_is_lag_master(uplink_upper) && + dev == uplink_upper && + mlx5_lag_is_sriov(priv->mdev)); + + /* if the egress device isn't on the same HW e-switch or + * it's a LAG device, use the uplink + */ + if (!netdev_port_same_parent_id(priv->netdev, dev) || + dst_is_lag_dev) { + *route_dev = uplink_dev; + *out_dev = *route_dev; + } else { + *route_dev = dev; + if (is_vlan_dev(*route_dev)) + *out_dev = uplink_dev; + else if (mlx5e_eswitch_rep(dev)) + *out_dev = *route_dev; + else + return -EOPNOTSUPP; + } + + if (!(mlx5e_eswitch_rep(*out_dev) && + mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) + return -EOPNOTSUPP; + + return 0; +} + +static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct net_device **route_dev, + struct flowi4 *fl4, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct rtable *rt; + struct neighbour *n = NULL; + +#if IS_ENABLED(CONFIG_INET) + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *uplink_dev; + int ret; + + if (mlx5_lag_is_multipath(mdev)) { + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + fl4->flowi4_oif = uplink_dev->ifindex; + } + + rt = ip_route_output_key(dev_net(mirred_dev), fl4); + ret = PTR_ERR_OR_ZERO(rt); + if (ret) + return ret; + + if (mlx5_lag_is_multipath(mdev) && !rt->rt_gateway) + return -ENETUNREACH; +#else + return -EOPNOTSUPP; +#endif + + ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev); + if (ret < 0) + return ret; + + if (!(*out_ttl)) + *out_ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &fl4->daddr); + ip_rt_put(rt); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + +static const char *mlx5e_netdev_kind(struct net_device *dev) +{ + if (dev->rtnl_link_ops) + return dev->rtnl_link_ops->kind; + else + return ""; +} + +static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct net_device **out_dev, + struct net_device **route_dev, + struct flowi6 *fl6, + struct neighbour **out_n, + u8 *out_ttl) +{ + struct neighbour *n = NULL; + struct dst_entry *dst; + +#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) + int ret; + + ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, + fl6); + if (ret < 0) + return ret; + + if (!(*out_ttl)) + *out_ttl = ip6_dst_hoplimit(dst); + + ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev); + if (ret < 0) + return ret; +#else + return -EOPNOTSUPP; +#endif + + n = dst_neigh_lookup(dst, &fl6->daddr); + dst_release(dst); + if (!n) + return -ENOMEM; + + *out_n = n; + return 0; +} + +static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) +{ + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh = (struct vxlanhdr *) + ((char *)udp + sizeof(struct udphdr)); + + udp->dest = tun_key->tp_dst; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(tun_id); + + return 0; +} + +static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key) +{ + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + int hdr_len; + struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); + + /* the HW does not calculate GRE csum or sequences */ + if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + return -EOPNOTSUPP; + + greh->protocol = htons(ETH_P_TEB); + + /* GRE key */ + hdr_len = gre_calc_hlen(tun_key->tun_flags); + greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); + if (tun_key->tun_flags & TUNNEL_KEY) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + *ptr = tun_id; + } + + return 0; +} + +static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + int err = 0; + struct ip_tunnel_key *key = &e->tun_info.key; + + if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + *ip_proto = IPPROTO_UDP; + err = mlx5e_gen_vxlan_header(buf, key); + } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + *ip_proto = IPPROTO_GRE; + err = mlx5e_gen_gre_header(buf, key); + } else { + pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" + , e->tunnel_type); + err = -EOPNOTSUPP; + } + + return err; +} + +static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, + struct mlx5e_encap_entry *e, + u16 proto) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + char *ip; + + ether_addr_copy(eth->h_dest, e->h_dest); + ether_addr_copy(eth->h_source, dev->dev_addr); + if (is_vlan_dev(dev)) { + struct vlan_hdr *vlan = (struct vlan_hdr *) + ((char *)eth + ETH_HLEN); + ip = (char *)vlan + VLAN_HLEN; + eth->h_proto = vlan_dev_vlan_proto(dev); + vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev)); + vlan->h_vlan_encapsulated_proto = htons(proto); + } else { + eth->h_proto = htons(proto); + ip = (char *)eth + ETH_HLEN; + } + + return ip; +} + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev, *route_dev; + struct neighbour *n = NULL; + struct flowi4 fl4 = {}; + int ipv4_encap_size; + char *encap_header; + u8 nud_state, ttl; + struct iphdr *ip; + int err; + + /* add the IP fields */ + fl4.flowi4_tos = tun_key->tos; + fl4.daddr = tun_key->u.ipv4.dst; + fl4.saddr = tun_key->u.ipv4.src; + ttl = tun_key->ttl; + + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev, + &fl4, &n, &ttl); + if (err) + return err; + + ipv4_encap_size = + (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct iphdr) + + e->tunnel_hlen; + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + e->route_dev = route_dev; + + /* It's important to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) + goto free_encap; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); + + /* add ethernet header */ + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, + ETH_P_IP); + + /* add ip header */ + ip->tos = tun_key->tos; + ip->version = 0x4; + ip->ihl = 0x5; + ip->ttl = ttl; + ip->daddr = fl4.daddr; + ip->saddr = fl4.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr), + &ip->protocol, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto out; + } + + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +out: + if (n) + neigh_release(n); + return err; +} + +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) +{ + int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev, *route_dev; + struct neighbour *n = NULL; + struct flowi6 fl6 = {}; + struct ipv6hdr *ip6h; + int ipv6_encap_size; + char *encap_header; + u8 nud_state, ttl; + int err; + + ttl = tun_key->ttl; + + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); + fl6.daddr = tun_key->u.ipv6.dst; + fl6.saddr = tun_key->u.ipv6.src; + + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev, + &fl6, &n, &ttl); + if (err) + return err; + + ipv6_encap_size = + (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + + sizeof(struct ipv6hdr) + + e->tunnel_hlen; + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; + + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + e->route_dev = route_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) + goto free_encap; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); + + /* add ethernet header */ + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, + ETH_P_IPV6); + + /* add ip header */ + ip6_flow_hdr(ip6h, tun_key->tos, 0); + /* the HW fills up ipv6 payload len */ + ip6h->hop_limit = ttl; + ip6h->daddr = fl6.daddr; + ip6h->saddr = fl6.saddr; + + /* add tunneling protocol header */ + err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr), + &ip6h->nexthdr, e); + if (err) + goto destroy_neigh_entry; + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + /* the encap entry will be made valid on neigh update event + * and not used before that. + */ + goto out; + } + + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); +free_encap: + kfree(encap_header); +out: + if (n) + neigh_release(n); + return err; +} + +int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return MLX5E_TC_TUNNEL_TYPE_VXLAN; + else if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return MLX5E_TC_TUNNEL_TYPE_GRETAP; + else + return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; +} + +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev) +{ + int tunnel_type = mlx5e_tc_tun_get_type(netdev); + + if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && + MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) + return true; + else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP && + MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) + return true; + else + return false; +} + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev); + + if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + int dst_port = be16_to_cpu(e->tun_info.key.tp_dst); + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + e->tunnel_hlen = VXLAN_HLEN; + } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; + e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags); + } else { + e->reformat_type = -1; + e->tunnel_hlen = -1; + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + void *misc_c = MLX5_ADDR_OF(fte_match_param, + spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + misc_parameters); + struct flow_match_ports enc_ports; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* Full udp dst port must be given */ + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || + memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "VXLAN decap filter must include enc_dst_port condition"); + netdev_warn(priv->netdev, + "VXLAN decap filter must include enc_dst_port condition\n"); + return -EOPNOTSUPP; + } + + /* udp dst port must be knonwn as a VXLAN port */ + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + /* dst UDP port is valid here */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, + ntohs(enc_ports.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + ntohs(enc_ports.key->dst)); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, + ntohs(enc_ports.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, + ntohs(enc_ports.key->src)); + + /* match on VNI */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(enc_keyid.key->keyid)); + } + return 0; +} + +static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *outer_headers_c, + void *outer_headers_v) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + + if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "GRE HW offloading is not supported"); + netdev_warn(priv->netdev, "GRE HW offloading is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ip_protocol, IPPROTO_GRE); + + /* gre protocol*/ + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); + + /* gre key */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + MLX5_SET(fte_match_set_misc, misc_c, + gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, + gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); + } + + return 0; +} + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v, u8 *match_level) +{ + int tunnel_type; + int err = 0; + + tunnel_type = mlx5e_tc_tun_get_type(filter_dev); + if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + *match_level = MLX5_MATCH_L4; + err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, + headers_c, headers_v); + } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + *match_level = MLX5_MATCH_L3; + err = mlx5e_tc_tun_parse_gretap(priv, spec, f, + headers_c, headers_v); + } else { + netdev_warn(priv->netdev, + "decapsulation offload is not supported for %s net device (%d)\n", + mlx5e_netdev_kind(filter_dev), tunnel_type); + return -EOPNOTSUPP; + } + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h new file mode 100644 index 000000000000..b63f15de899d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_TUNNEL_H__ +#define __MLX5_EN_TC_TUNNEL_H__ + +#include <linux/netdevice.h> +#include <linux/mlx5/fs.h> +#include <net/pkt_cls.h> +#include <linux/netlink.h> +#include "en.h" +#include "en_rep.h" + +enum { + MLX5E_TC_TUNNEL_TYPE_UNKNOWN, + MLX5E_TC_TUNNEL_TYPE_VXLAN, + MLX5E_TC_TUNNEL_TYPE_GRETAP +}; + +int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + +int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); + +int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e); + +int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev); +bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, + struct net_device *netdev); + +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct tc_cls_flower_offload *f, + void *headers_c, + void *headers_v, u8 *match_level); + +#endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index ad6d471d00dd..cad34d6f5f45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -33,6 +33,26 @@ #include <linux/bpf_trace.h> #include "en/xdp.h" +int mlx5e_xdp_max_mtu(struct mlx5e_params *params) +{ + int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; + + /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). + * The condition checked in mlx5e_rx_is_linear_skb is: + * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) + * (Note that hw_mtu == sw_mtu + hard_mtu.) + * What is returned from this function is: + * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) + * After assigning sw_mtu := max_mtu, the left side of (1) turns to + * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, + * because both PAGE_SIZE and S are already aligned. Any number greater + * than max_mtu would make the left side of (1) greater than PAGE_SIZE, + * so max_mtu is the maximum MTU allowed. + */ + + return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); +} + static inline bool mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, struct xdp_buff *xdp) @@ -47,7 +67,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, xdpi.xdpf->len, PCI_DMA_TODEVICE); xdpi.di = *di; - return mlx5e_xmit_xdp_frame(sq, &xdpi); + return sq->xmit_xdp_frame(sq, &xdpi); } /* returns true if packet was consumed by xdp */ @@ -102,7 +122,98 @@ xdp_abort: } } -bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) +static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5_wq_cyc *wq = &sq->wq; + u8 wqebbs; + u16 pi; + + mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); + + prefetchw(session->wqe->data); + session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + +/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS + * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. + * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a + * full-session WQE be cache-aligned. + */ +#if L1_CACHE_BYTES < 128 +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) +#else +#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) +#endif + + wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), + MLX5E_XDP_MPW_MAX_WQEBBS); + + session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; +} + +static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; + u16 ds_count = session->ds_count; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); + + wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); + wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT; + + sq->pc += wi->num_wqebbs; + + sq->doorbell_cseg = cseg; + + session->wqe = NULL; /* Close session */ +} + +static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_info *xdpi) +{ + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_xdpsq_stats *stats = sq->stats; + + dma_addr_t dma_addr = xdpi->dma_addr; + struct xdp_frame *xdpf = xdpi->xdpf; + unsigned int dma_len = xdpf->len; + + if (unlikely(sq->hw_mtu < dma_len)) { + stats->err++; + return false; + } + + if (unlikely(!session->wqe)) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + MLX5_SEND_WQE_MAX_WQEBBS))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + stats->full++; + return false; + } + + mlx5e_xdp_mpwqe_session_start(sq); + } + + mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len); + + if (unlikely(session->ds_count == session->max_ds_count)) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + stats->xmit++; + return true; +} + +static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) { struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); @@ -126,11 +237,8 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) } if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { - if (sq->doorbell) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - sq->doorbell = false; - } + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); stats->full++; return false; } @@ -152,23 +260,20 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - /* move page to reference to sq responsibility, - * and mark so it's not put back in page-cache. - */ - sq->db.xdpi[pi] = *xdpi; sq->pc++; - sq->doorbell = true; + sq->doorbell_cseg = cseg; + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); stats->xmit++; return true; } -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) { + struct mlx5e_xdp_info_fifo *xdpi_fifo; struct mlx5e_xdpsq *sq; struct mlx5_cqe64 *cqe; - struct mlx5e_rq *rq; bool is_redirect; u16 sqcc; int i; @@ -182,8 +287,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) if (!cqe) return false; - is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); - rq = container_of(sq, struct mlx5e_rq, xdpsq); + is_redirect = !rq; + xdpi_fifo = &sq->db.xdpi_fifo; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur @@ -199,20 +304,33 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) wqe_counter = be16_to_cpu(cqe->wqe_counter); + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) + netdev_WARN_ONCE(sq->channel->netdev, + "Bad OP in XDPSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + do { - u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); - struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci]; + struct mlx5e_xdp_wqe_info *wi; + u16 ci, j; last_wqe = (sqcc == wqe_counter); - sqcc++; - - if (is_redirect) { - xdp_return_frame(xdpi->xdpf); - dma_unmap_single(sq->pdev, xdpi->dma_addr, - xdpi->xdpf->len, DMA_TO_DEVICE); - } else { - /* Recycle RX page */ - mlx5e_page_release(rq, &xdpi->di, true); + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); + wi = &sq->db.wqe_info[ci]; + + sqcc += wi->num_wqebbs; + + for (j = 0; j < wi->num_ds; j++) { + struct mlx5e_xdp_info xdpi = + mlx5e_xdpi_fifo_pop(xdpi_fifo); + + if (is_redirect) { + dma_unmap_single(sq->pdev, xdpi.dma_addr, + xdpi.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.xdpf); + } else { + /* Recycle RX page */ + mlx5e_page_release(rq, &xdpi.di, true); + } } } while (!last_wqe); } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); @@ -228,27 +346,32 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) { - struct mlx5e_rq *rq; - bool is_redirect; - - is_redirect = test_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); - rq = is_redirect ? NULL : container_of(sq, struct mlx5e_rq, xdpsq); + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + bool is_redirect = !rq; while (sq->cc != sq->pc) { - u16 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); - struct mlx5e_xdp_info *xdpi = &sq->db.xdpi[ci]; - - sq->cc++; - - if (is_redirect) { - xdp_return_frame(xdpi->xdpf); - dma_unmap_single(sq->pdev, xdpi->dma_addr, - xdpi->xdpf->len, DMA_TO_DEVICE); - } else { - /* Recycle RX page */ - mlx5e_page_release(rq, &xdpi->di, false); + struct mlx5e_xdp_wqe_info *wi; + u16 ci, i; + + ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); + wi = &sq->db.wqe_info[ci]; + + sq->cc += wi->num_wqebbs; + + for (i = 0; i < wi->num_ds; i++) { + struct mlx5e_xdp_info xdpi = + mlx5e_xdpi_fifo_pop(xdpi_fifo); + + if (is_redirect) { + dma_unmap_single(sq->pdev, xdpi.dma_addr, + xdpi.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.xdpf); + } else { + /* Recycle RX page */ + mlx5e_page_release(rq, &xdpi.di, false); + } } } } @@ -262,7 +385,8 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, int sq_num; int i; - if (unlikely(!test_bit(MLX5E_STATE_OPENED, &priv->state))) + /* this flag is sufficient, no need to test internal sq state */ + if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) return -ENETDOWN; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) @@ -275,9 +399,6 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, sq = &priv->channels.c[sq_num]->xdpsq; - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return -ENETDOWN; - for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; struct mlx5e_xdp_info xdpi; @@ -292,7 +413,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, xdpi.xdpf = xdpf; - if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) { + if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) { dma_unmap_single(sq->pdev, xdpi.dma_addr, xdpf->len, DMA_TO_DEVICE); xdp_return_frame_rx_napi(xdpf); @@ -300,8 +421,33 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, } } - if (flags & XDP_XMIT_FLUSH) + if (flags & XDP_XMIT_FLUSH) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); mlx5e_xmit_xdp_doorbell(sq); + } return n - drops; } + +void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) +{ + struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + + if (xdpsq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(xdpsq); + + mlx5e_xmit_xdp_doorbell(xdpsq); + + if (xdpsq->redirect_flush) { + xdp_do_flush_map(); + xdpsq->redirect_flush = false; + } +} + +void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) +{ + sq->xmit_xdp_frame = is_mpw ? + mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 6dfab045925f..553956cadc8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -34,30 +34,81 @@ #include "en.h" -#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \ - MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM))) #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) -#define MLX5E_XDP_TX_DS_COUNT \ - ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) +#define MLX5E_XDP_TX_EMPTY_DS_COUNT \ + (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) +#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) +int mlx5e_xdp_max_mtu(struct mlx5e_params *params); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, void *va, u16 *rx_headroom, u32 *len); -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); - -bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq); +void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw); +void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); +static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + +static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) +{ + clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + /* let other device's napi(s) see our new state */ + synchronize_rcu(); +} + +static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); +} + static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { + if (sq->doorbell_cseg) { + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg); + sq->doorbell_cseg = NULL; + } +} + +static inline void +mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len) +{ + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5_wqe_data_seg *dseg = + (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; +} + +static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, + struct mlx5e_tx_wqe **wqe) +{ struct mlx5_wq_cyc *wq = &sq->wq; - struct mlx5e_tx_wqe *wqe; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc - 1); /* last pi */ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - wqe = mlx5_wq_cyc_get_wqe(wq, pi); + *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memset(*wqe, 0, sizeof(**wqe)); +} - mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl); +static inline void +mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo, + struct mlx5e_xdp_info *xi) +{ + u32 i = (*fifo->pc)++ & fifo->mask; + + fifo->xi[i] = *xi; +} + +static inline struct mlx5e_xdp_info +mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo) +{ + return fifo->xi[(*fifo->cc)++ & fifo->mask]; } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 128a82b1dbfc..53608afd39b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -254,11 +254,13 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, struct mlx5e_ipsec_metadata *mdata; struct mlx5e_ipsec_sa_entry *sa_entry; struct xfrm_state *x; + struct sec_path *sp; if (!xo) return skb; - if (unlikely(skb->sp->len != 1)) { + sp = skb_sec_path(skb); + if (unlikely(sp->len != 1)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_bundle); goto drop; } @@ -305,10 +307,11 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, struct mlx5e_priv *priv = netdev_priv(netdev); struct xfrm_offload *xo; struct xfrm_state *xs; + struct sec_path *sp; u32 sa_handle; - skb->sp = secpath_dup(skb->sp); - if (unlikely(!skb->sp)) { + sp = secpath_set(skb); + if (unlikely(!sp)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_rx_drop_sp_alloc); return NULL; } @@ -320,8 +323,9 @@ mlx5e_ipsec_build_sp(struct net_device *netdev, struct sk_buff *skb, return NULL; } - skb->sp->xvec[skb->sp->len++] = xs; - skb->sp->olen++; + sp = skb_sec_path(skb); + sp->xvec[sp->len++] = xs; + sp->olen++; xo = xfrm_offload(skb); xo->flags = CRYPTO_DONE; @@ -372,10 +376,11 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, netdev_features_t features) { + struct sec_path *sp = skb_sec_path(skb); struct xfrm_state *x; - if (skb->sp && skb->sp->len) { - x = skb->sp->xvec[0]; + if (sp && sp->len) { + x = sp->xvec[0]; if (x && x->xso.offload_handle) return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 3078491cc0d0..1539cf3de5dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -45,7 +45,9 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, if (err) return err; + mutex_lock(&mdev->mlx5e_res.td.list_lock); list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); return 0; } @@ -53,8 +55,10 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir) { + mutex_lock(&mdev->mlx5e_res.td.list_lock); mlx5_core_destroy_tir(mdev, tir->tirn); list_del(&tir->list); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); } static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, @@ -114,6 +118,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) } INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); + mutex_init(&mdev->mlx5e_res.td.list_lock); return 0; @@ -141,15 +146,17 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_tir *tir; - int err = -ENOMEM; + int err = 0; u32 tirn = 0; int inlen; void *in; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = kvzalloc(inlen, GFP_KERNEL); - if (!in) + if (!in) { + err = -ENOMEM; goto out; + } if (enable_uc_lb) MLX5_SET(modify_tir_in, in, ctx.self_lb_block, @@ -157,6 +164,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + mutex_lock(&mdev->mlx5e_res.td.list_lock); list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { tirn = tir->tirn; err = mlx5_core_modify_tir(mdev, tirn, in, inlen); @@ -168,6 +176,7 @@ out: kvfree(in); if (err) netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); + mutex_unlock(&mdev->mlx5e_res.td.list_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 722998d68564..554672edf8c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1126,9 +1126,7 @@ static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv) priv->channels.params.tx_min_inline_mode) goto out; - if (mlx5e_open_channels(priv, &new_channels)) - goto out; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + mlx5e_safe_switch_channels(priv, &new_channels, NULL); out: mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index f480763dcd0d..78dc8fe2a83c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -63,86 +63,158 @@ struct ptys2ethtool_config { __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); }; -static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER]; +static +struct ptys2ethtool_config ptys2legacy_ethtool_table[MLX5E_LINK_MODES_NUMBER]; +static +struct ptys2ethtool_config ptys2ext_ethtool_table[MLX5E_EXT_LINK_MODES_NUMBER]; -#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, ...) \ +#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, table, ...) \ ({ \ struct ptys2ethtool_config *cfg; \ const unsigned int modes[] = { __VA_ARGS__ }; \ - unsigned int i; \ - cfg = &ptys2ethtool_table[reg_]; \ + unsigned int i, bit, idx; \ + cfg = &ptys2##table##_ethtool_table[reg_]; \ bitmap_zero(cfg->supported, \ __ETHTOOL_LINK_MODE_MASK_NBITS); \ bitmap_zero(cfg->advertised, \ __ETHTOOL_LINK_MODE_MASK_NBITS); \ for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ - __set_bit(modes[i], cfg->supported); \ - __set_bit(modes[i], cfg->advertised); \ + bit = modes[i] % 64; \ + idx = modes[i] / 64; \ + __set_bit(bit, &cfg->supported[idx]); \ + __set_bit(bit, &cfg->advertised[idx]); \ } \ }) void mlx5e_build_ptys2ethtool_map(void) { - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, + memset(ptys2legacy_ethtool_table, 0, sizeof(ptys2legacy_ethtool_table)); + memset(ptys2ext_ethtool_table, 0, sizeof(ptys2ext_ethtool_table)); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, legacy, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, legacy, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, legacy, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, legacy, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, legacy, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, legacy, ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, legacy, ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, legacy, ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, legacy, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, legacy, ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, legacy, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, legacy, ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, legacy, ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, legacy, ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, legacy, ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy, ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy, ETHTOOL_LINK_MODE_10000baseT_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy, ETHTOOL_LINK_MODE_25000baseCR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, legacy, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, legacy, ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, legacy, ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT); - MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, legacy, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_SGMII_100M, ext, + ETHTOOL_LINK_MODE_100baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_X_SGMII, ext, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_5GBASE_R, ext, + ETHTOOL_LINK_MODE_5000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_XFI_XAUI_1, ext, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_XLAUI_4_XLPPI_4, ext, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GAUI_1_25GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2, + ext, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR, ext, + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_CAUI_4_100GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_2_100GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_4_200GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT); +} + +static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, + struct ptys2ethtool_config **arr, + u32 *size) +{ + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + + *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); } -static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { - "rx_cqe_moder", - "tx_cqe_moder", - "rx_cqe_compress", - "rx_striding_rq", - "rx_no_csum_complete", +typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); + +struct pflag_desc { + char name[ETH_GSTRING_LEN]; + mlx5e_pflag_handler handler; }; +static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS]; + int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { int i, num_stats = 0; @@ -153,7 +225,7 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) num_stats += mlx5e_stats_grps[i].get_num_stats(priv); return num_stats; case ETH_SS_PRIV_FLAGS: - return ARRAY_SIZE(mlx5e_priv_flags); + return MLX5E_NUM_PFLAGS; case ETH_SS_TEST: return mlx5e_self_test_num(priv); /* fallthrough */ @@ -183,8 +255,9 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++) - strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]); + for (i = 0; i < MLX5E_NUM_PFLAGS; i++) + strcpy(data + i * ETH_GSTRING_LEN, + mlx5e_priv_flags[i].name); break; case ETH_SS_TEST: @@ -296,11 +369,7 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, goto unlock; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto unlock; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); unlock: mutex_unlock(&priv->state_lock); @@ -352,32 +421,32 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, new_channels.params = priv->channels.params; new_channels.params.num_channels = count; - if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); goto out; } - /* Create fresh channels with new parameters */ - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto out; - arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE; if (arfs_enabled) mlx5e_arfs_disable(priv); + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); + /* Switch to new channels, set new parameters and close old ones */ - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (arfs_enabled) { - err = mlx5e_arfs_enable(priv); - if (err) + int err2 = mlx5e_arfs_enable(priv); + + if (err2) netdev_err(priv->netdev, "%s: mlx5e_arfs_enable failed: %d\n", - __func__, err); + __func__, err2); } out: @@ -503,12 +572,7 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, goto out; } - /* open fresh channels with new coal parameters */ - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto out; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); out: mutex_unlock(&priv->state_lock); @@ -523,27 +587,37 @@ static int mlx5e_set_coalesce(struct net_device *netdev, return mlx5e_ethtool_set_coalesce(priv, coal); } -static void ptys2ethtool_supported_link(unsigned long *supported_modes, +static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, + unsigned long *supported_modes, u32 eth_proto_cap) { unsigned long proto_cap = eth_proto_cap; + struct ptys2ethtool_config *table; + u32 max_size; int proto; - for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) + mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size); + for_each_set_bit(proto, &proto_cap, max_size) bitmap_or(supported_modes, supported_modes, - ptys2ethtool_table[proto].supported, + table[proto].supported, __ETHTOOL_LINK_MODE_MASK_NBITS); } static void ptys2ethtool_adver_link(unsigned long *advertising_modes, - u32 eth_proto_cap) + u32 eth_proto_cap, bool ext) { unsigned long proto_cap = eth_proto_cap; + struct ptys2ethtool_config *table; + u32 max_size; int proto; - for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER) + table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; + max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : + ARRAY_SIZE(ptys2legacy_ethtool_table); + + for_each_set_bit(proto, &proto_cap, max_size) bitmap_or(advertising_modes, advertising_modes, - ptys2ethtool_table[proto].advertised, + table[proto].advertised, __ETHTOOL_LINK_MODE_MASK_NBITS); } @@ -693,13 +767,14 @@ static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, struct ethtool_link_ksettings *link_ksettings) { + struct mlx5e_priv *priv = netdev_priv(netdev); u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; if (!netif_carrier_ok(netdev)) goto out; - speed = mlx5e_port_ptys2speed(eth_proto_oper); + speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper); if (!speed) { speed = SPEED_UNKNOWN; goto out; @@ -712,22 +787,22 @@ out: link_ksettings->base.duplex = duplex; } -static void get_supported(u32 eth_proto_cap, +static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap, struct ethtool_link_ksettings *link_ksettings) { unsigned long *supported = link_ksettings->link_modes.supported; + ptys2ethtool_supported_link(mdev, supported, eth_proto_cap); - ptys2ethtool_supported_link(supported, eth_proto_cap); ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); } -static void get_advertising(u32 eth_proto_cap, u8 tx_pause, - u8 rx_pause, - struct ethtool_link_ksettings *link_ksettings) +static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause, + struct ethtool_link_ksettings *link_ksettings, + bool ext) { unsigned long *advertising = link_ksettings->link_modes.advertising; + ptys2ethtool_adver_link(advertising, eth_proto_cap, ext); - ptys2ethtool_adver_link(advertising, eth_proto_cap); if (rx_pause) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); if (tx_pause ^ rx_pause) @@ -777,18 +852,18 @@ static u8 get_connector_port(u32 eth_proto, u8 connector_type) return PORT_OTHER; } -static void get_lp_advertising(u32 eth_proto_lp, +static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, struct ethtool_link_ksettings *link_ksettings) { unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; + bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); - ptys2ethtool_adver_link(lp_advertising, eth_proto_lp); + ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); } -static int mlx5e_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *link_ksettings) +int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; u32 rx_pause = 0; @@ -800,31 +875,50 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, u8 an_disable_admin; u8 an_status; u8 connector_type; + bool admin_ext; + bool ext; int err; err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (err) { - netdev_err(netdev, "%s: query port ptys failed: %d\n", + netdev_err(priv->netdev, "%s: query port ptys failed: %d\n", __func__, err); goto err_query_regs; } + ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + eth_proto_cap = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_capability); + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_admin); + /* Fields: eth_proto_admin and ext_eth_proto_admin are + * mutually exclusive. Hence try reading legacy advertising + * when extended advertising is zero. + * admin_ext indicates how eth_proto_admin should be + * interpreted + */ + admin_ext = ext; + if (ext && !eth_proto_admin) { + eth_proto_admin = MLX5_GET_ETH_PROTO(ptys_reg, out, false, + eth_proto_admin); + admin_ext = false; + } - eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); - an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); - an_status = MLX5_GET(ptys_reg, out, an_status); - connector_type = MLX5_GET(ptys_reg, out, connector_type); + eth_proto_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, + eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); + an_status = MLX5_GET(ptys_reg, out, an_status); + connector_type = MLX5_GET(ptys_reg, out, connector_type); mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); - get_supported(eth_proto_cap, link_ksettings); - get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings); - get_speed_duplex(netdev, eth_proto_oper, link_ksettings); + get_supported(mdev, eth_proto_cap, link_ksettings); + get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings, + admin_ext); + get_speed_duplex(priv->netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; @@ -832,7 +926,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, connector_type); ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin, connector_type); - get_lp_advertising(eth_proto_lp, link_ksettings); + get_lp_advertising(mdev, eth_proto_lp, link_ksettings); if (an_status == MLX5_AN_COMPLETE) ethtool_link_ksettings_add_link_mode(link_ksettings, @@ -843,9 +937,12 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Autoneg); - if (get_fec_supported_advertised(mdev, link_ksettings)) - netdev_dbg(netdev, "%s: FEC caps query failed: %d\n", + err = get_fec_supported_advertised(mdev, link_ksettings); + if (err) { + netdev_dbg(priv->netdev, "%s: FEC caps query failed: %d\n", __func__, err); + err = 0; /* don't fail caps query because of FEC error */ + } if (!an_disable_admin) ethtool_link_ksettings_add_link_mode(link_ksettings, @@ -855,12 +952,22 @@ err_query_regs: return err; } +static int mlx5e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); +} + static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) { u32 i, ptys_modes = 0; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (bitmap_intersects(ptys2ethtool_table[i].advertised, + if (*ptys2legacy_ethtool_table[i].advertised == 0) + continue; + if (bitmap_intersects(ptys2legacy_ethtool_table[i].advertised, link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) ptys_modes |= MLX5E_PROT_MASK(i); @@ -869,14 +976,34 @@ static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) return ptys_modes; } -static int mlx5e_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *link_ksettings) +static u32 mlx5e_ethtool2ptys_ext_adver_link(const unsigned long *link_modes) +{ + u32 i, ptys_modes = 0; + unsigned long modes[2]; + + for (i = 0; i < MLX5E_EXT_LINK_MODES_NUMBER; ++i) { + if (*ptys2ext_ethtool_table[i].advertised == 0) + continue; + memset(modes, 0, sizeof(modes)); + bitmap_and(modes, ptys2ext_ethtool_table[i].advertised, + link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS); + + if (modes[0] == ptys2ext_ethtool_table[i].advertised[0] && + modes[1] == ptys2ext_ethtool_table[i].advertised[1]) + ptys_modes |= MLX5E_PROT_MASK(i); + } + return ptys_modes; +} + +int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - u32 eth_proto_cap, eth_proto_admin; + struct mlx5e_port_eth_proto eproto; bool an_changes = false; u8 an_disable_admin; + bool ext_supported; + bool ext_requested; u8 an_disable_cap; bool an_disable; u32 link_modes; @@ -884,54 +1011,66 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev, u32 speed; int err; - speed = link_ksettings->base.speed; + u32 (*ethtool2ptys_adver_func)(const unsigned long *adver); - link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? - mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : - mlx5e_port_speed2linkmodes(speed); +#define MLX5E_PTYS_EXT ((1ULL << ETHTOOL_LINK_MODE_50000baseKR_Full_BIT) - 1) + + ext_requested = !!(link_ksettings->link_modes.advertising[0] > + MLX5E_PTYS_EXT || + link_ksettings->link_modes.advertising[1]); + ext_supported = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet); + ext_requested &= ext_supported; - err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); + speed = link_ksettings->base.speed; + ethtool2ptys_adver_func = ext_requested ? + mlx5e_ethtool2ptys_ext_adver_link : + mlx5e_ethtool2ptys_adver_link; + err = mlx5_port_query_eth_proto(mdev, 1, ext_requested, &eproto); if (err) { - netdev_err(netdev, "%s: query port eth proto cap failed: %d\n", + netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n", __func__, err); goto out; } + link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? + ethtool2ptys_adver_func(link_ksettings->link_modes.advertising) : + mlx5e_port_speed2linkmodes(mdev, speed); - link_modes = link_modes & eth_proto_cap; + link_modes = link_modes & eproto.cap; if (!link_modes) { - netdev_err(netdev, "%s: Not supported link mode(s) requested", + netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", __func__); err = -EINVAL; goto out; } - err = mlx5_query_port_proto_admin(mdev, ð_proto_admin, MLX5_PTYS_EN); - if (err) { - netdev_err(netdev, "%s: query port eth proto admin failed: %d\n", - __func__, err); - goto out; - } - - mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status, - &an_disable_cap, &an_disable_admin); + mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap, + &an_disable_admin); an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE; an_changes = ((!an_disable && an_disable_admin) || (an_disable && !an_disable_admin)); - if (!an_changes && link_modes == eth_proto_admin) + if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN); + mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext_requested); mlx5_toggle_port_link(mdev); out: return err; } +static int mlx5e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); +} + u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv) { - return sizeof(priv->channels.params.toeplitz_hash_key); + return sizeof(priv->rss_params.toeplitz_hash_key); } static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) @@ -957,50 +1096,27 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rss_params *rss = &priv->rss_params; if (indir) - memcpy(indir, priv->channels.params.indirection_rqt, - sizeof(priv->channels.params.indirection_rqt)); + memcpy(indir, rss->indirection_rqt, + sizeof(rss->indirection_rqt)); if (key) - memcpy(key, priv->channels.params.toeplitz_hash_key, - sizeof(priv->channels.params.toeplitz_hash_key)); + memcpy(key, rss->toeplitz_hash_key, + sizeof(rss->toeplitz_hash_key)); if (hfunc) - *hfunc = priv->channels.params.rss_hfunc; + *hfunc = rss->hfunc; return 0; } -static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) -{ - void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - struct mlx5_core_dev *mdev = priv->mdev; - int ctxlen = MLX5_ST_SZ_BYTES(tirc); - int tt; - - MLX5_SET(modify_tir_in, in, bitmask.hash, 1); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); - mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); - } - - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) - return; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); - mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen); - } -} - static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rss_params *rss = &priv->rss_params; int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); bool hash_changed = false; void *in; @@ -1016,15 +1132,14 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); - if (hfunc != ETH_RSS_HASH_NO_CHANGE && - hfunc != priv->channels.params.rss_hfunc) { - priv->channels.params.rss_hfunc = hfunc; + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hfunc) { + rss->hfunc = hfunc; hash_changed = true; } if (indir) { - memcpy(priv->channels.params.indirection_rqt, indir, - sizeof(priv->channels.params.indirection_rqt)); + memcpy(rss->indirection_rqt, indir, + sizeof(rss->indirection_rqt)); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { u32 rqtn = priv->indir_rqt.rqtn; @@ -1032,7 +1147,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, .is_rss = true, { .rss = { - .hfunc = priv->channels.params.rss_hfunc, + .hfunc = rss->hfunc, .channels = &priv->channels, }, }, @@ -1043,10 +1158,9 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, } if (key) { - memcpy(priv->channels.params.toeplitz_hash_key, key, - sizeof(priv->channels.params.toeplitz_hash_key)); - hash_changed = hash_changed || - priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP; + memcpy(rss->toeplitz_hash_key, key, + sizeof(rss->toeplitz_hash_key)); + hash_changed = hash_changed || rss->hfunc == ETH_RSS_HASH_TOP; } if (hash_changed) @@ -1150,25 +1264,31 @@ static int mlx5e_set_tunable(struct net_device *dev, return err; } -static void mlx5e_get_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) +void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; int err; err = mlx5_query_port_pause(mdev, &pauseparam->rx_pause, &pauseparam->tx_pause); if (err) { - netdev_err(netdev, "%s: mlx5_query_port_pause failed:0x%x\n", + netdev_err(priv->netdev, "%s: mlx5_query_port_pause failed:0x%x\n", __func__, err); } } -static int mlx5e_set_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) +static void mlx5e_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_ethtool_get_pauseparam(priv, pauseparam); +} + +int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; int err; @@ -1179,13 +1299,21 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, pauseparam->rx_pause ? 1 : 0, pauseparam->tx_pause ? 1 : 0); if (err) { - netdev_err(netdev, "%s: mlx5_set_port_pause failed:0x%x\n", + netdev_err(priv->netdev, "%s: mlx5_set_port_pause failed:0x%x\n", __func__, err); } return err; } +static int mlx5e_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_pauseparam(priv, pauseparam); +} + int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info) { @@ -1458,7 +1586,7 @@ static int mlx5e_get_module_info(struct net_device *netdev, break; case MLX5_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH; break; default: netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", @@ -1505,8 +1633,6 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, return 0; } -typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); - static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { @@ -1515,7 +1641,6 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, struct mlx5e_channels new_channels = {}; bool mode_changed; u8 cq_period_mode, current_cq_period_mode; - int err = 0; cq_period_mode = enable ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : @@ -1543,12 +1668,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, return 0; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - return err; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); - return 0; + return mlx5e_safe_switch_channels(priv, &new_channels, NULL); } static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) @@ -1581,11 +1701,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val return 0; } - err = mlx5e_open_channels(priv, &new_channels); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) return err; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n", MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); @@ -1618,7 +1737,6 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; - int err; if (enable) { if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) @@ -1640,12 +1758,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) return 0; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - return err; - - mlx5e_switch_priv_channels(priv, &new_channels, NULL); - return 0; + return mlx5e_safe_switch_channels(priv, &new_channels, NULL); } static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) @@ -1655,7 +1768,8 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) struct mlx5e_channel *c; int i; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || + priv->channels.params.xdp_prog) return 0; for (i = 0; i < channels->num; i++) { @@ -1669,23 +1783,54 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) return 0; } +static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + int err; + + if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) + return -EOPNOTSUPP; + + new_channels.params = priv->channels.params; + + MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); + return err; +} + +static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = { + { "rx_cqe_moder", set_pflag_rx_cqe_based_moder }, + { "tx_cqe_moder", set_pflag_tx_cqe_based_moder }, + { "rx_cqe_compress", set_pflag_rx_cqe_compress }, + { "rx_striding_rq", set_pflag_rx_striding_rq }, + { "rx_no_csum_complete", set_pflag_rx_no_csum_complete }, + { "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe }, +}; + static int mlx5e_handle_pflag(struct net_device *netdev, u32 wanted_flags, - enum mlx5e_priv_flag flag, - mlx5e_pflag_handler pflag_handler) + enum mlx5e_priv_flag flag) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool enable = !!(wanted_flags & flag); + bool enable = !!(wanted_flags & BIT(flag)); u32 changes = wanted_flags ^ priv->channels.params.pflags; int err; - if (!(changes & flag)) + if (!(changes & BIT(flag))) return 0; - err = pflag_handler(netdev, enable); + err = mlx5e_priv_flags[flag].handler(netdev, enable); if (err) { - netdev_err(netdev, "%s private flag 0x%x failed err %d\n", - enable ? "Enable" : "Disable", flag, err); + netdev_err(netdev, "%s private flag '%s' failed err %d\n", + enable ? "Enable" : "Disable", mlx5e_priv_flags[flag].name, err); return err; } @@ -1696,38 +1841,17 @@ static int mlx5e_handle_pflag(struct net_device *netdev, static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) { struct mlx5e_priv *priv = netdev_priv(netdev); + enum mlx5e_priv_flag pflag; int err; mutex_lock(&priv->state_lock); - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_CQE_BASED_MODER, - set_pflag_rx_cqe_based_moder); - if (err) - goto out; - - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_TX_CQE_BASED_MODER, - set_pflag_tx_cqe_based_moder); - if (err) - goto out; - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_CQE_COMPRESS, - set_pflag_rx_cqe_compress); - if (err) - goto out; - - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_STRIDING_RQ, - set_pflag_rx_striding_rq); - if (err) - goto out; - - err = mlx5e_handle_pflag(netdev, pflags, - MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, - set_pflag_rx_no_csum_complete); + for (pflag = 0; pflag < MLX5E_NUM_PFLAGS; pflag++) { + err = mlx5e_handle_pflag(netdev, pflags, pflag); + if (err) + break; + } -out: mutex_unlock(&priv->state_lock); /* Need to fix some features.. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index c18dcebe1462..4421c10f58ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -771,6 +771,112 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) INIT_LIST_HEAD(&priv->fs.ethtool.rules); } +static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type) +{ + switch (flow_type) { + case TCP_V4_FLOW: + return MLX5E_TT_IPV4_TCP; + case TCP_V6_FLOW: + return MLX5E_TT_IPV6_TCP; + case UDP_V4_FLOW: + return MLX5E_TT_IPV4_UDP; + case UDP_V6_FLOW: + return MLX5E_TT_IPV6_UDP; + case AH_V4_FLOW: + return MLX5E_TT_IPV4_IPSEC_AH; + case AH_V6_FLOW: + return MLX5E_TT_IPV6_IPSEC_AH; + case ESP_V4_FLOW: + return MLX5E_TT_IPV4_IPSEC_ESP; + case ESP_V6_FLOW: + return MLX5E_TT_IPV6_IPSEC_ESP; + case IPV4_FLOW: + return MLX5E_TT_IPV4; + case IPV6_FLOW: + return MLX5E_TT_IPV6; + default: + return MLX5E_NUM_INDIR_TIRS; + } +} + +static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, + struct ethtool_rxnfc *nfc) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + enum mlx5e_traffic_types tt; + u8 rx_hash_field = 0; + void *in; + + tt = flow_type_to_traffic_type(nfc->flow_type); + if (tt == MLX5E_NUM_INDIR_TIRS) + return -EINVAL; + + /* RSS does not support anything other than hashing to queues + * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest + * port. + */ + if (nfc->flow_type != TCP_V4_FLOW && + nfc->flow_type != TCP_V6_FLOW && + nfc->flow_type != UDP_V4_FLOW && + nfc->flow_type != UDP_V6_FLOW) + return -EOPNOTSUPP; + + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EOPNOTSUPP; + + if (nfc->data & RXH_IP_SRC) + rx_hash_field |= MLX5_HASH_FIELD_SEL_SRC_IP; + if (nfc->data & RXH_IP_DST) + rx_hash_field |= MLX5_HASH_FIELD_SEL_DST_IP; + if (nfc->data & RXH_L4_B_0_1) + rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_SPORT; + if (nfc->data & RXH_L4_B_2_3) + rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + mutex_lock(&priv->state_lock); + + if (rx_hash_field == priv->rss_params.rx_hash_fields[tt]) + goto out; + + priv->rss_params.rx_hash_fields[tt] = rx_hash_field; + mlx5e_modify_tirs_hash(priv, in, inlen); + +out: + mutex_unlock(&priv->state_lock); + kvfree(in); + return 0; +} + +static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, + struct ethtool_rxnfc *nfc) +{ + enum mlx5e_traffic_types tt; + u32 hash_field = 0; + + tt = flow_type_to_traffic_type(nfc->flow_type); + if (tt == MLX5E_NUM_INDIR_TIRS) + return -EINVAL; + + hash_field = priv->rss_params.rx_hash_fields[tt]; + nfc->data = 0; + + if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP) + nfc->data |= RXH_IP_SRC; + if (hash_field & MLX5_HASH_FIELD_SEL_DST_IP) + nfc->data |= RXH_IP_DST; + if (hash_field & MLX5_HASH_FIELD_SEL_L4_SPORT) + nfc->data |= RXH_L4_B_0_1; + if (hash_field & MLX5_HASH_FIELD_SEL_L4_DPORT) + nfc->data |= RXH_L4_B_2_3; + + return 0; +} + int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { int err = 0; @@ -783,6 +889,9 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXCLSRLDEL: err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); break; + case ETHTOOL_SRXFH: + err = mlx5e_set_rss_hash_opt(priv, cmd); + break; default: err = -EOPNOTSUPP; break; @@ -810,6 +919,9 @@ int mlx5e_get_rxnfc(struct net_device *dev, case ETHTOOL_GRXCLSRLALL: err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); break; + case ETHTOOL_GRXFH: + err = mlx5e_get_rss_hash_opt(priv, info); + break; default: err = -EOPNOTSUPP; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b70cb6fd164c..46157e2a1e5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -35,6 +35,7 @@ #include <linux/mlx5/fs.h> #include <net/vxlan.h> #include <linux/bpf.h> +#include <linux/if_bridge.h> #include <net/page_pool.h> #include "eswitch.h" #include "en.h" @@ -49,6 +50,9 @@ #include "lib/clock.h" #include "en/port.h" #include "en/xdp.h" +#include "lib/eq.h" +#include "en/monitor_stats.h" +#include "en/reporter.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -59,6 +63,7 @@ struct mlx5e_rq_param { struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; + bool is_mpw; }; struct mlx5e_cq_param { @@ -168,8 +173,7 @@ static u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) return order_base_2(mlx5e_rx_get_linear_frag_sz(params)); - return MLX5E_MPWQE_STRIDE_SZ(mdev, - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); + return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } static u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, @@ -228,7 +232,7 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) MLX5_WQ_TYPE_CYCLIC; } -static void mlx5e_update_carrier(struct mlx5e_priv *priv) +void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u8 port_state; @@ -267,7 +271,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) mlx5e_stats_grps[i].update_stats(priv); } -static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) +void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) { int i; @@ -298,33 +302,35 @@ void mlx5e_queue_update_stats(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->update_stats_work); } -static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, - enum mlx5_dev_event event, unsigned long param) +static int async_event(struct notifier_block *nb, unsigned long event, void *data) { - struct mlx5e_priv *priv = vpriv; + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + struct mlx5_eqe *eqe = data; - if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) - return; + if (event != MLX5_EVENT_TYPE_PORT_CHANGE) + return NOTIFY_DONE; - switch (event) { - case MLX5_DEV_EVENT_PORT_UP: - case MLX5_DEV_EVENT_PORT_DOWN: + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: queue_work(priv->wq, &priv->update_carrier_work); break; default: - break; + return NOTIFY_DONE; } + + return NOTIFY_OK; } static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { - set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); + priv->events_nb.notifier_call = async_event; + mlx5_notifier_register(priv->mdev, &priv->events_nb); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { - clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); - synchronize_irq(pci_irq_vector(priv->mdev->pdev, MLX5_EQ_VEC_ASYNC)); + mlx5_notifier_unregister(priv->mdev, &priv->events_nb); } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, @@ -945,7 +951,11 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (params->rx_dim_enabled) __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) + /* We disable csum_complete when XDP is enabled since + * XDP programs might manipulate packets which will render + * skb->checksum incorrect. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); return 0; @@ -988,18 +998,42 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) { - kvfree(sq->db.xdpi); + kvfree(sq->db.xdpi_fifo.xi); + kvfree(sq->db.wqe_info); +} + +static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa) +{ + struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + xdpi_fifo->xi = kvzalloc_node(sizeof(*xdpi_fifo->xi) * dsegs_per_wq, + GFP_KERNEL, numa); + if (!xdpi_fifo->xi) + return -ENOMEM; + + xdpi_fifo->pc = &sq->xdpi_fifo_pc; + xdpi_fifo->cc = &sq->xdpi_fifo_cc; + xdpi_fifo->mask = dsegs_per_wq - 1; + + return 0; } static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int err; - sq->db.xdpi = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.xdpi)), - GFP_KERNEL, numa); - if (!sq->db.xdpi) { - mlx5e_free_xdpsq_db(sq); + sq->db.wqe_info = kvzalloc_node(sizeof(*sq->db.wqe_info) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.wqe_info) return -ENOMEM; + + err = mlx5e_alloc_xdpsq_fifo(sq, numa); + if (err) { + mlx5e_free_xdpsq_db(sq); + return err; } return 0; @@ -1131,7 +1165,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } -static void mlx5e_sq_recover(struct work_struct *work); +static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, struct mlx5e_params *params, @@ -1153,7 +1187,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; - INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover); + INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) @@ -1241,15 +1275,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, return err; } -struct mlx5e_modify_sq_param { - int curr_state; - int next_state; - bool rl_update; - int rl_index; -}; - -static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, - struct mlx5e_modify_sq_param *p) +int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) { void *in; void *sqc; @@ -1347,17 +1374,7 @@ err_free_txqsq: return err; } -static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq) -{ - WARN_ONCE(sq->cc != sq->pc, - "SQ 0x%x: cc (0x%x) != pc (0x%x)\n", - sq->sqn, sq->cc, sq->pc); - sq->cc = 0; - sq->dma_fifo_cc = 0; - sq->pc = 0; -} - -static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); @@ -1366,7 +1383,7 @@ static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) netif_tx_start_queue(sq->txq); } -static inline void netif_tx_disable_queue(struct netdev_queue *txq) +void mlx5e_tx_disable_queue(struct netdev_queue *txq) { __netif_tx_lock_bh(txq); netif_tx_stop_queue(txq); @@ -1382,7 +1399,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) /* prevent netif_tx_wake_queue */ napi_synchronize(&c->napi); - netif_tx_disable_queue(sq->txq); + mlx5e_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) { @@ -1402,6 +1419,7 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) struct mlx5_rate_limit rl = {0}; cancel_work_sync(&sq->dim.work); + cancel_work_sync(&sq->recover_work); mlx5e_destroy_sq(mdev, sq->sqn); if (sq->rate_limit) { rl.rate = sq->rate_limit; @@ -1411,105 +1429,12 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) mlx5e_free_txqsq(sq); } -static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) +static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) { - unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, + recover_work); - while (time_before(jiffies, exp_time)) { - if (sq->cc == sq->pc) - return 0; - - msleep(20); - } - - netdev_err(sq->channel->netdev, - "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n", - sq->sqn, sq->cc, sq->pc); - - return -ETIMEDOUT; -} - -static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state) -{ - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - struct mlx5e_modify_sq_param msp = {0}; - int err; - - msp.curr_state = curr_state; - msp.next_state = MLX5_SQC_STATE_RST; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn); - return err; - } - - memset(&msp, 0, sizeof(msp)); - msp.curr_state = MLX5_SQC_STATE_RST; - msp.next_state = MLX5_SQC_STATE_RDY; - - err = mlx5e_modify_sq(mdev, sq->sqn, &msp); - if (err) { - netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn); - return err; - } - - return 0; -} - -static void mlx5e_sq_recover(struct work_struct *work) -{ - struct mlx5e_txqsq_recover *recover = - container_of(work, struct mlx5e_txqsq_recover, - recover_work); - struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq, - recover); - struct mlx5_core_dev *mdev = sq->channel->mdev; - struct net_device *dev = sq->channel->netdev; - u8 state; - int err; - - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); - if (err) { - netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", - sq->sqn, err); - return; - } - - if (state != MLX5_RQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return; - } - - netif_tx_disable_queue(sq->txq); - - if (mlx5e_wait_for_sq_flush(sq)) - return; - - /* If the interval between two consecutive recovers per SQ is too - * short, don't recover to avoid infinite loop of ERR_CQE -> recover. - * If we reached this state, there is probably a bug that needs to be - * fixed. let's keep the queue close and let tx timeout cleanup. - */ - if (jiffies_to_msecs(jiffies - recover->last_recover) < - MLX5E_SQ_RECOVER_MIN_INTERVAL) { - netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n", - sq->sqn); - return; - } - - /* At this point, no new packets will arrive from the stack as TXQ is - * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all - * pending WQEs. SQ can safely reset the SQ. - */ - if (mlx5e_sq_to_ready(sq, state)) - return; - - mlx5e_reset_txqsq_cc_pc(sq); - sq->stats->recover++; - recover->last_recover = jiffies; - mlx5e_activate_txqsq(sq); + mlx5e_tx_reporter_err_cqe(sq); } static int mlx5e_open_icosq(struct mlx5e_channel *c, @@ -1558,11 +1483,8 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_xdpsq *sq, bool is_redirect) { - unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; struct mlx5e_create_sq_param csp = {}; - unsigned int inline_hdr_sz = 0; int err; - int i; err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect); if (err) @@ -1573,30 +1495,40 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; - if (is_redirect) - set_bit(MLX5E_SQ_STATE_REDIRECT, &sq->state); set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); if (err) goto err_free_xdpsq; - if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - inline_hdr_sz = MLX5E_XDP_MIN_INLINE; - ds_cnt++; - } + mlx5e_set_xmit_fp(sq, param->is_mpw); + + if (!param->is_mpw) { + unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; + unsigned int inline_hdr_sz = 0; + int i; + + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + ds_cnt++; + } - /* Pre initialize fixed WQE fields */ - for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - struct mlx5_wqe_data_seg *dseg; + /* Pre initialize fixed WQE fields */ + for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i]; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); - dseg->lkey = sq->mkey_be; + dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); + dseg->lkey = sq->mkey_be; + + wi->num_wqebbs = 1; + wi->num_ds = 1; + } } return 0; @@ -1608,7 +1540,7 @@ err_free_xdpsq: return err; } -static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) +static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) { struct mlx5e_channel *c = sq->channel; @@ -1616,7 +1548,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) napi_synchronize(&c->napi); mlx5e_destroy_sq(c->mdev, sq->sqn); - mlx5e_free_xdpsq_descs(sq); + mlx5e_free_xdpsq_descs(sq, rq); mlx5e_free_xdpsq(sq); } @@ -1769,11 +1701,6 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq) mlx5e_free_cq(cq); } -static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) -{ - return cpumask_first(priv->mdev->priv.irq_info[ix].mask); -} - static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) @@ -1919,14 +1846,37 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +static int mlx5e_alloc_xps_cpumask(struct mlx5e_channel *c, + struct mlx5e_params *params) +{ + int num_comp_vectors = mlx5_comp_vectors_count(c->mdev); + int irq; + + if (!zalloc_cpumask_var(&c->xps_cpumask, GFP_KERNEL)) + return -ENOMEM; + + for (irq = c->ix; irq < num_comp_vectors; irq += params->num_channels) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(c->mdev, irq)); + + cpumask_set_cpu(cpu, c->xps_cpumask); + } + + return 0; +} + +static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c) +{ + free_cpumask_var(c->xps_cpumask); +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); struct net_dim_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; unsigned int irq; int err; @@ -1951,9 +1901,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; - c->irq_desc = irq_to_desc(irq); + err = mlx5e_alloc_xps_cpumask(c, params); + if (err) + goto err_free_channel; + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); @@ -2009,7 +1962,7 @@ err_close_rq: err_close_xdp_sq: if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq); + mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); err_close_sqs: mlx5e_close_sqs(c); @@ -2036,6 +1989,9 @@ err_close_icosq_cq: err_napi_del: netif_napi_del(&c->napi); + mlx5e_free_xps_cpumask(c); + +err_free_channel: kvfree(c); return err; @@ -2048,7 +2004,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); - netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); + netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) @@ -2062,10 +2018,10 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) static void mlx5e_close_channel(struct mlx5e_channel *c) { - mlx5e_close_xdpsq(&c->xdpsq); + mlx5e_close_xdpsq(&c->xdpsq, NULL); mlx5e_close_rq(&c->rq); if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq); + mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); napi_disable(&c->napi); @@ -2076,6 +2032,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); netif_napi_del(&c->napi); + mlx5e_free_xps_cpumask(c); kvfree(c); } @@ -2232,6 +2189,8 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, void *cqc = param->cqc; MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); + if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, @@ -2308,6 +2267,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, @@ -2346,6 +2306,10 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } + if (!IS_ERR_OR_NULL(priv->tx_reporter)) + devlink_health_reporter_state_update(priv->tx_reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); + kvfree(cparam); return 0; @@ -2510,7 +2474,7 @@ static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz, if (rrp.rss.hfunc == ETH_RSS_HASH_XOR) ix = mlx5e_bits_invert(i, ilog2(sz)); - ix = priv->channels.params.indirection_rqt[ix]; + ix = priv->rss_params.indirection_rqt[ix]; rqn = rrp.rss.channels->c[ix]->rq.rqn; } else { rqn = rrp.rqn; @@ -2593,7 +2557,7 @@ static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, { .rss = { .channels = chs, - .hfunc = chs->params.rss_hfunc, + .hfunc = priv->rss_params.hfunc, } }, }; @@ -2613,6 +2577,54 @@ static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) mlx5e_redirect_rqts(priv, drop_rrp); } +static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = { + [MLX5E_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, + .rx_hash_fields = MLX5_HASH_IP_L4PORTS, + }, + [MLX5E_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, + }, + [MLX5E_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, + [MLX5E_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, + .l4_prot_type = 0, + .rx_hash_fields = MLX5_HASH_IP, + }, +}; + +struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt) +{ + return tirc_default_config[tt]; +} + static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) { if (!params->lro_en) @@ -2628,116 +2640,68 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout); } -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, - enum mlx5e_traffic_types tt, +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, + const struct mlx5e_tirc_config *ttconfig, void *tirc, bool inner) { void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); -#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP) - -#define MLX5_HASH_IP_L4PORTS (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP |\ - MLX5_HASH_FIELD_SEL_L4_SPORT |\ - MLX5_HASH_FIELD_SEL_L4_DPORT) - -#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ - MLX5_HASH_FIELD_SEL_DST_IP |\ - MLX5_HASH_FIELD_SEL_IPSEC_SPI) - - MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc)); - if (params->rss_hfunc == ETH_RSS_HASH_TOP) { + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(rss_params->hfunc)); + if (rss_params->hfunc == ETH_RSS_HASH_TOP) { void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); - memcpy(rss_key, params->toeplitz_hash_key, len); + memcpy(rss_key, rss_params->toeplitz_hash_key, len); } + MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, + ttconfig->l3_prot_type); + MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, + ttconfig->l4_prot_type); + MLX5_SET(rx_hash_field_select, hfso, selected_fields, + ttconfig->rx_hash_fields); +} - switch (tt) { - case MLX5E_TT_IPV4_TCP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_TCP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV6_TCP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_TCP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV4_UDP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_UDP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV6_UDP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, - MLX5_L4_PROT_TYPE_UDP); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_L4PORTS); - break; - - case MLX5E_TT_IPV4_IPSEC_AH: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; +static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig, + enum mlx5e_traffic_types tt, + u32 rx_hash_fields) +{ + *ttconfig = tirc_default_config[tt]; + ttconfig->rx_hash_fields = rx_hash_fields; +} - case MLX5E_TT_IPV6_IPSEC_AH: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; +void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) +{ + void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); + struct mlx5e_rss_params *rss = &priv->rss_params; + struct mlx5_core_dev *mdev = priv->mdev; + int ctxlen = MLX5_ST_SZ_BYTES(tirc); + struct mlx5e_tirc_config ttconfig; + int tt; - case MLX5E_TT_IPV4_IPSEC_ESP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; + MLX5_SET(modify_tir_in, in, bitmask.hash, 1); - case MLX5E_TT_IPV6_IPSEC_ESP: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP_IPSEC_SPI); - break; + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_update_rx_hash_fields(&ttconfig, tt, + rss->rx_hash_fields[tt]); + mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false); + mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); + } - case MLX5E_TT_IPV4: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV4); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP); - break; + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; - case MLX5E_TT_IPV6: - MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, - MLX5_L3_PROT_TYPE_IPV6); - MLX5_SET(rx_hash_field_select, hfso, selected_fields, - MLX5_HASH_IP); - break; - default: - WARN_ONCE(true, "%s: bad traffic type!\n", __func__); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_update_rx_hash_fields(&ttconfig, tt, + rss->rx_hash_fields[tt]); + mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true); + mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, + inlen); } } @@ -2794,7 +2758,8 @@ static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); + mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, + &tirc_default_config[tt], tirc, true); } static int mlx5e_set_mtu(struct mlx5_core_dev *mdev, @@ -2825,7 +2790,7 @@ static void mlx5e_query_mtu(struct mlx5_core_dev *mdev, *mtu = MLX5E_HW2SW_MTU(params, hw_mtu); } -static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) +int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) { struct mlx5e_params *params = &priv->channels.params; struct net_device *netdev = priv->netdev; @@ -2903,9 +2868,10 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_build_tx2sq_maps(priv); mlx5e_activate_channels(&priv->channels); + mlx5e_xdp_tx_enable(priv); netif_tx_start_all_queues(priv->netdev); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) + if (mlx5e_is_vport_rep(priv)) mlx5e_add_sqs_fwd_rules(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); @@ -2916,7 +2882,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { mlx5e_redirect_rqts_to_drop(priv); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) + if (mlx5e_is_vport_rep(priv)) mlx5e_remove_sqs_fwd_rules(priv); /* FIXME: This is a W/A only for tx timeout watch dog false alarm when @@ -2924,16 +2890,18 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) */ netif_tx_stop_all_queues(priv->netdev); netif_tx_disable(priv->netdev); + mlx5e_xdp_tx_disable(priv); mlx5e_deactivate_channels(&priv->channels); } -void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_hw_modify hw_modify) +static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify) { struct net_device *netdev = priv->netdev; int new_num_txqs; int carrier_ok; + new_num_txqs = new_chs->num * new_chs->params.num_tc; carrier_ok = netif_carrier_ok(netdev); @@ -2959,6 +2927,28 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, netif_carrier_on(netdev); } +int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify) +{ + int err; + + err = mlx5e_open_channels(priv, new_chs); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, new_chs, hw_modify); + return 0; +} + +int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) +{ + struct mlx5e_channels new_channels = {}; + + new_channels.params = priv->channels.params; + return mlx5e_safe_switch_channels(priv, &new_channels, NULL); +} + void mlx5e_timestamp_init(struct mlx5e_priv *priv) { priv->tstamp.tx_type = HWTSTAMP_TX_OFF; @@ -3168,10 +3158,11 @@ err_close_tises: return err; } -void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) +static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; + mlx5e_tx_reporter_destroy(priv); for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); } @@ -3186,7 +3177,9 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); + + mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, + &tirc_default_config[tt], tirc, false); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) @@ -3372,13 +3365,12 @@ static int mlx5e_setup_tc_mqprio(struct net_device *netdev, goto out; } - err = mlx5e_open_channels(priv, &new_channels); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) goto out; priv->max_opened_tc = max_t(u8, priv->max_opened_tc, new_channels.params.num_tc); - mlx5e_switch_priv_channels(priv, &new_channels, NULL); out: mutex_unlock(&priv->state_lock); return err; @@ -3391,11 +3383,14 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, { switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, cls_flower, flags); + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, cls_flower, flags); + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, cls_flower, flags); + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); default: return -EOPNOTSUPP; } @@ -3408,7 +3403,8 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); + return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | + MLX5E_TC_NIC_OFFLOAD); default: return -EOPNOTSUPP; } @@ -3451,16 +3447,39 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, } } -static void +void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) +{ + int i; + + for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) { + struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; + struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; + int j; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->max_opened_tc; j++) { + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_dropped += sq_stats->dropped; + } + } +} + +void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_sw_stats *sstats = &priv->stats.sw; struct mlx5e_vport_stats *vstats = &priv->stats.vport; struct mlx5e_pport_stats *pstats = &priv->stats.pport; - /* update HW stats in background for next time */ - mlx5e_queue_update_stats(priv); + if (!mlx5e_monitor_counter_supported(priv)) { + /* update HW stats in background for next time */ + mlx5e_queue_update_stats(priv); + } if (mlx5e_is_uplink_rep(priv)) { stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); @@ -3468,12 +3487,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); } else { - mlx5e_grp_sw_update_stats(priv); - stats->rx_packets = sstats->rx_packets; - stats->rx_bytes = sstats->rx_bytes; - stats->tx_packets = sstats->tx_packets; - stats->tx_bytes = sstats->tx_bytes; - stats->tx_dropped = sstats->tx_queue_dropped; + mlx5e_fold_sw_stats64(priv, stats); } stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; @@ -3566,11 +3580,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable) goto out; } - err = mlx5e_open_channels(priv, &new_channels); - if (err) - goto out; - - mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro); + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_modify_tirs_lro); out: mutex_unlock(&priv->state_lock); return err; @@ -3593,7 +3603,7 @@ static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - if (!enable && mlx5e_tc_num_filters(priv)) { + if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; @@ -3767,7 +3777,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (params->xdp_prog && !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", - new_mtu, MLX5E_XDP_MAX_MTU); + new_mtu, mlx5e_xdp_max_mtu(params)); err = -EINVAL; goto out; } @@ -3788,11 +3798,10 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, goto out; } - err = mlx5e_open_channels(priv, &new_channels); + err = mlx5e_safe_switch_channels(priv, &new_channels, set_mtu_cb); if (err) goto out; - mlx5e_switch_priv_channels(priv, &new_channels, set_mtu_cb); netdev->mtu = new_channels.params.sw_mtu; out: @@ -3895,7 +3904,7 @@ static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } #ifdef CONFIG_MLX5_ESWITCH -static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3932,8 +3941,8 @@ static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); } -static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, - int max_tx_rate) +int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3974,8 +3983,8 @@ static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, mlx5_ifla_link2vport(link_state)); } -static int mlx5e_get_vf_config(struct net_device *dev, - int vf, struct ifla_vf_info *ivi) +int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3988,8 +3997,8 @@ static int mlx5e_get_vf_config(struct net_device *dev, return 0; } -static int mlx5e_get_vf_stats(struct net_device *dev, - int vf, struct ifla_vf_stats *vf_stats) +int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; @@ -4050,8 +4059,7 @@ static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add) queue_work(priv->wq, &vxlan_work->work); } -static void mlx5e_add_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4064,8 +4072,7 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1); } -static void mlx5e_del_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +void mlx5e_del_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4115,9 +4122,9 @@ out: return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } -static netdev_features_t mlx5e_features_check(struct sk_buff *skb, - struct net_device *netdev, - netdev_features_t features) +netdev_features_t mlx5e_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4137,31 +4144,13 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } -static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, - struct mlx5e_txqsq *sq) -{ - struct mlx5_eq *eq = sq->cq.mcq.eq; - u32 eqe_count; - - netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", - eq->eqn, eq->cons_index, eq->irqn); - - eqe_count = mlx5_eq_poll_irq_disabled(eq); - if (!eqe_count) - return false; - - netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn); - sq->channel->stats->eq_rearm++; - return true; -} - static void mlx5e_tx_timeout_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, tx_timeout_work); - struct net_device *dev = priv->netdev; - bool reopen_channels = false; - int i, err; + bool report_failed = false; + int err; + int i; rtnl_lock(); mutex_lock(&priv->state_lock); @@ -4170,34 +4159,24 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) goto unlock; for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); + struct netdev_queue *dev_queue = + netdev_get_tx_queue(priv->netdev, i); struct mlx5e_txqsq *sq = priv->txq2sq[i]; if (!netif_xmit_stopped(dev_queue)) continue; - netdev_err(dev, - "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", - i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, - jiffies_to_usecs(jiffies - dev_queue->trans_start)); - - /* If we recover a lost interrupt, most likely TX timeout will - * be resolved, skip reopening channels - */ - if (!mlx5e_tx_timeout_eq_recover(dev, sq)) { - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - reopen_channels = true; - } + if (mlx5e_tx_reporter_timeout(sq)) + report_failed = true; } - if (!reopen_channels) + if (!report_failed) goto unlock; - mlx5e_close_locked(dev); - err = mlx5e_open_locked(dev); + err = mlx5e_safe_reopen_channels(priv); if (err) netdev_err(priv->netdev, - "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", err); unlock: @@ -4233,7 +4212,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) { netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", - new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU); + new_channels.params.sw_mtu, + mlx5e_xdp_max_mtu(&new_channels.params)); return -EINVAL; } @@ -4342,6 +4322,61 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 mode, setting; + int err; + + err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); + if (err) + return err; + mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, + mode, + 0, 0, nlflags, filter_mask, NULL); +} + +static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, + u16 flags, struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct nlattr *attr, *br_spec; + u16 mode = BRIDGE_MODE_UNDEF; + u8 setting; + int rem; + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; + + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + if (nla_len(attr) < sizeof(mode)) + return -EINVAL; + + mode = nla_get_u16(attr); + if (mode > BRIDGE_MODE_VEPA) + return -EINVAL; + + break; + } + + if (mode == BRIDGE_MODE_UNDEF) + return -EINVAL; + + setting = (mode == BRIDGE_MODE_VEPA) ? 1 : 0; + return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting); +} +#endif + const struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -4368,6 +4403,9 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif #ifdef CONFIG_MLX5_ESWITCH + .ndo_bridge_setlink = mlx5e_bridge_setlink, + .ndo_bridge_getlink = mlx5e_bridge_getlink, + /* SRIOV E-Switch NDOs */ .ndo_set_vf_mac = mlx5e_set_vf_mac, .ndo_set_vf_vlan = mlx5e_set_vf_vlan, @@ -4377,8 +4415,6 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, - .ndo_has_offload_stats = mlx5e_has_offload_stats, - .ndo_get_offload_stats = mlx5e_get_offload_stats, #endif }; @@ -4524,15 +4560,23 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, mlx5e_init_rq_type_params(mdev, params); } -void mlx5e_build_rss_params(struct mlx5e_params *params) +void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, + u16 num_channels) { - params->rss_hfunc = ETH_RSS_HASH_XOR; - netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(params->indirection_rqt, - MLX5E_INDIR_RQT_SIZE, params->num_channels); + enum mlx5e_traffic_types tt; + + rss_params->hfunc = ETH_RSS_HASH_TOP; + netdev_rss_key_fill(rss_params->toeplitz_hash_key, + sizeof(rss_params->toeplitz_hash_key)); + mlx5e_build_default_indir_rqt(rss_params->indirection_rqt, + MLX5E_INDIR_RQT_SIZE, num_channels); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + rss_params->rx_hash_fields[tt] = + tirc_default_config[tt].rx_hash_fields; } void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu) { @@ -4548,6 +4592,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + /* XDP SQ */ + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, + MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)); + /* set CQE compression */ params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && @@ -4581,7 +4629,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); /* RSS */ - mlx5e_build_rss_params(params); + mlx5e_build_rss_params(rss_params, params->num_channels); } static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) @@ -4596,12 +4644,6 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } -#if IS_ENABLED(CONFIG_MLX5_ESWITCH) -static const struct switchdev_ops mlx5e_switchdev_ops = { - .switchdev_port_attr_get = mlx5e_attr_get, -}; -#endif - static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4711,12 +4753,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; mlx5e_set_netdev_dev_addr(netdev); - -#if IS_ENABLED(CONFIG_MLX5_ESWITCH) - if (MLX5_ESWITCH_MANAGER(mdev)) - netdev->switchdev_ops = &mlx5e_switchdev_ops; -#endif - mlx5e_ipsec_build_netdev(priv); mlx5e_tls_build_netdev(priv); } @@ -4754,14 +4790,16 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rss_params *rss = &priv->rss_params; int err; err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv); if (err) return err; - mlx5e_build_nic_params(mdev, &priv->channels.params, - mlx5e_get_netdev_max_channels(netdev), netdev->mtu); + mlx5e_build_nic_params(mdev, rss, &priv->channels.params, + mlx5e_get_netdev_max_channels(netdev), + netdev->mtu); mlx5e_timestamp_init(priv); @@ -4867,6 +4905,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); #endif + mlx5e_tx_reporter_create(priv); return 0; } @@ -4891,9 +4930,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); mlx5e_enable_async_events(priv); - - if (MLX5_ESWITCH_MANAGER(priv->mdev)) - mlx5e_register_vport_reps(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_init(priv); if (netdev->reg_state != NETREG_REGISTERED) return; @@ -4927,8 +4965,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) - mlx5e_unregister_vport_reps(priv); + if (mlx5e_monitor_counter_supported(priv)) + mlx5e_monitor_counter_cleanup(priv); mlx5e_disable_async_events(priv); mlx5_lag_remove(mdev); @@ -4981,7 +5019,7 @@ int mlx5e_netdev_init(struct net_device *netdev, netif_carrier_off(netdev); #ifdef CONFIG_MLX5_EN_ARFS - netdev->rx_cpu_rmap = mdev->rmap; + netdev->rx_cpu_rmap = mlx5_eq_table_get_rmap(mdev); #endif return 0; @@ -5036,7 +5074,7 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) if (priv->channels.params.num_channels > max_nch) { mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch); priv->channels.params.num_channels = max_nch; - mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt, + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, max_nch); } @@ -5125,7 +5163,6 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) static void *mlx5e_add(struct mlx5_core_dev *mdev) { struct net_device *netdev; - void *rpriv = NULL; void *priv; int err; int nch; @@ -5135,20 +5172,18 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) return NULL; #ifdef CONFIG_MLX5_ESWITCH - if (MLX5_ESWITCH_MANAGER(mdev)) { - rpriv = mlx5e_alloc_nic_rep_priv(mdev); - if (!rpriv) { - mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); - return NULL; - } + if (MLX5_ESWITCH_MANAGER(mdev) && + mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + mlx5e_rep_register_vport_reps(mdev); + return mdev; } #endif nch = mlx5e_get_max_num_channels(mdev); - netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, rpriv); + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, NULL); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); - goto err_free_rpriv; + return NULL; } priv = netdev_priv(netdev); @@ -5174,30 +5209,26 @@ err_detach: mlx5e_detach(mdev, priv); err_destroy_netdev: mlx5e_destroy_netdev(priv); -err_free_rpriv: - kfree(rpriv); return NULL; } static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { - struct mlx5e_priv *priv = vpriv; - void *ppriv = priv->ppriv; + struct mlx5e_priv *priv; +#ifdef CONFIG_MLX5_ESWITCH + if (MLX5_ESWITCH_MANAGER(mdev) && vpriv == mdev) { + mlx5e_rep_unregister_vport_reps(mdev); + return; + } +#endif + priv = vpriv; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); #endif unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(priv); - kfree(ppriv); -} - -static void *mlx5e_get_netdev(void *vpriv) -{ - struct mlx5e_priv *priv = vpriv; - - return priv->netdev; } static struct mlx5_interface mlx5e_interface = { @@ -5205,9 +5236,7 @@ static struct mlx5_interface mlx5e_interface = { .remove = mlx5e_remove, .attach = mlx5e_attach, .detach = mlx5e_detach, - .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, - .get_dev = mlx5e_get_netdev, }; void mlx5e_init(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 820fe85100b0..a66b6ed80b30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -42,14 +42,26 @@ #include "en.h" #include "en_rep.h" #include "en_tc.h" +#include "en/tc_tun.h" #include "fs_core.h" +#include "lib/port_tun.h" -#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \ - max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) +#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ + max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) #define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; +struct mlx5e_rep_indr_block_priv { + struct net_device *netdev; + struct mlx5e_rep_priv *rpriv; + + struct list_head list; +}; + +static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev); + static void mlx5e_rep_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { @@ -99,7 +111,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev, } } -static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) +static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -122,29 +134,45 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) vport_stats->tx_bytes = vf_stats.rx_bytes; } -static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) +static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats *s = &priv->stats.sw; - struct mlx5e_rq_stats *rq_stats; - struct mlx5e_sq_stats *sq_stats; - int i, j; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct rtnl_link_stats64 *vport_stats; - memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->channels.num; i++) { - struct mlx5e_channel *c = priv->channels.c[i]; + mlx5e_grp_802_3_update_stats(priv); - rq_stats = c->rq.stats; + vport_stats = &priv->stats.vf_vport; - s->rx_packets += rq_stats->packets; - s->rx_bytes += rq_stats->bytes; + vport_stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); + vport_stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); + vport_stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); + vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); +} - for (j = 0; j < priv->channels.params.num_tc; j++) { - sq_stats = c->sq[j].stats; +static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; - s->tx_packets += sq_stats->packets; - s->tx_bytes += sq_stats->bytes; - } - } + if (rep->vport == MLX5_VPORT_UPLINK) + mlx5e_uplink_rep_update_hw_counters(priv); + else + mlx5e_vf_rep_update_hw_counters(priv); +} + +static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + struct rtnl_link_stats64 stats64 = {}; + + memset(s, 0, sizeof(*s)); + mlx5e_fold_sw_stats64(priv, &stats64); + + s->rx_packets = stats64.rx_packets; + s->rx_bytes = stats64.rx_bytes; + s->tx_packets = stats64.tx_packets; + s->tx_bytes = stats64.tx_bytes; + s->tx_queue_dropped = stats64.tx_dropped; } static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, @@ -157,8 +185,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, return; mutex_lock(&priv->state_lock); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_rep_update_sw_counters(priv); + mlx5e_rep_update_sw_counters(priv); mlx5e_rep_update_hw_counters(priv); mutex_unlock(&priv->state_lock); @@ -257,6 +284,22 @@ static int mlx5e_rep_set_channels(struct net_device *dev, return 0; } +static int mlx5e_rep_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_coalesce(priv, coal); +} + +static int mlx5e_rep_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_coalesce(priv, coal); +} + static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -271,7 +314,39 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev) return mlx5e_ethtool_get_rxfh_indir_size(priv); } -static const struct ethtool_ops mlx5e_rep_ethtool_ops = { +static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_ethtool_get_pauseparam(priv, pauseparam); +} + +static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pauseparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_pauseparam(priv, pauseparam); +} + +static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); +} + +static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); +} + +static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = { .get_drvinfo = mlx5e_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -281,27 +356,58 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .set_ringparam = mlx5e_rep_set_ringparam, .get_channels = mlx5e_rep_get_channels, .set_channels = mlx5e_rep_set_channels, + .get_coalesce = mlx5e_rep_get_coalesce, + .set_coalesce = mlx5e_rep_set_coalesce, .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, }; -int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) +static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { + .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_rep_get_strings, + .get_sset_count = mlx5e_rep_get_sset_count, + .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, + .get_ringparam = mlx5e_rep_get_ringparam, + .set_ringparam = mlx5e_rep_set_ringparam, + .get_channels = mlx5e_rep_get_channels, + .set_channels = mlx5e_rep_set_channels, + .get_coalesce = mlx5e_rep_get_coalesce, + .set_coalesce = mlx5e_rep_set_coalesce, + .get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings, + .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings, + .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, + .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, + .get_pauseparam = mlx5e_uplink_rep_get_pauseparam, + .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, +}; + +static int mlx5e_rep_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_upper = NULL; + struct mlx5e_priv *uplink_priv = NULL; + struct net_device *uplink_dev; if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = ETH_ALEN; - ether_addr_copy(attr->u.ppid.id, rep->hw_id); - break; - default: - return -EOPNOTSUPP; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + if (uplink_dev) { + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + uplink_priv = netdev_priv(uplink_dev); + } + + ppid->id_len = ETH_ALEN; + if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { + ether_addr_copy(ppid->id, uplink_upper->dev_addr); + } else { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + ether_addr_copy(ppid->id, rep->hw_id); } return 0; @@ -474,6 +580,10 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { ether_addr_copy(e->h_dest, ha); ether_addr_copy(eth->h_dest, ha); + /* Update the encap source mac, in case that we delete + * the flows when encap source mac changed. + */ + ether_addr_copy(eth->h_source, e->route_dev->dev_addr); mlx5e_tc_encap_flows_add(priv, e); } @@ -519,6 +629,186 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) neigh_release(n); } +static struct mlx5e_rep_indr_block_priv * +mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + struct mlx5e_rep_indr_block_priv *cb_priv; + + /* All callback list access should be protected by RTNL. */ + ASSERT_RTNL(); + + list_for_each_entry(cb_priv, + &rpriv->uplink_priv.tc_indr_block_priv_list, + list) + if (cb_priv->netdev == netdev) + return cb_priv; + + return NULL; +} + +static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_rep_indr_block_priv *cb_priv, *temp; + struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list; + + list_for_each_entry_safe(cb_priv, temp, head, list) { + mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev); + kfree(cb_priv); + } +} + +static int +mlx5e_rep_indr_offload(struct net_device *netdev, + struct tc_cls_flower_offload *flower, + struct mlx5e_rep_indr_block_priv *indr_priv) +{ + struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); + int flags = MLX5E_TC_EGRESS | MLX5E_TC_ESW_OFFLOAD; + int err = 0; + + switch (flower->command) { + case TC_CLSFLOWER_REPLACE: + err = mlx5e_configure_flower(netdev, priv, flower, flags); + break; + case TC_CLSFLOWER_DESTROY: + err = mlx5e_delete_flower(netdev, priv, flower, flags); + break; + case TC_CLSFLOWER_STATS: + err = mlx5e_stats_flower(netdev, priv, flower, flags); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_indr_offload(priv->netdev, type_data, priv); + default: + return -EOPNOTSUPP; + } +} + +static int +mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, + struct mlx5e_rep_priv *rpriv, + struct tc_block_offload *f) +{ + struct mlx5e_rep_indr_block_priv *indr_priv; + int err = 0; + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (indr_priv) + return -EEXIST; + + indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); + if (!indr_priv) + return -ENOMEM; + + indr_priv->netdev = netdev; + indr_priv->rpriv = rpriv; + list_add(&indr_priv->list, + &rpriv->uplink_priv.tc_indr_block_priv_list); + + err = tcf_block_cb_register(f->block, + mlx5e_rep_indr_setup_block_cb, + indr_priv, indr_priv, f->extack); + if (err) { + list_del(&indr_priv->list); + kfree(indr_priv); + } + + return err; + case TC_BLOCK_UNBIND: + indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); + if (!indr_priv) + return -ENOENT; + + tcf_block_cb_unregister(f->block, + mlx5e_rep_indr_setup_block_cb, + indr_priv); + list_del(&indr_priv->list); + kfree(indr_priv); + + return 0; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static +int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv, + type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + int err; + + err = __tc_indr_block_cb_register(netdev, rpriv, + mlx5e_rep_indr_setup_tc_cb, + rpriv); + if (err) { + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n", + netdev_name(netdev), err); + } + return err; +} + +static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, + struct net_device *netdev) +{ + __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, + rpriv); +} + +static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + uplink_priv.netdevice_nb); + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + if (!mlx5e_tc_tun_device_to_offload(priv, netdev)) + return NOTIFY_OK; + + switch (event) { + case NETDEV_REGISTER: + mlx5e_rep_indr_register_block(rpriv, netdev); + break; + case NETDEV_UNREGISTER: + mlx5e_rep_indr_unregister_block(rpriv, netdev); + break; + } + return NOTIFY_OK; +} + static struct mlx5e_neigh_hash_entry * mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, struct mlx5e_neigh *m_neigh); @@ -755,14 +1045,23 @@ static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv, int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; struct mlx5e_neigh_hash_entry *nhe; int err; + err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type); + if (err) + return err; nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); if (!nhe) { err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); - if (err) + if (err) { + mlx5_tun_entropy_refcount_dec(tun_entropy, + e->reformat_type); return err; + } } list_add(&e->encap_list, &nhe->encap_list); return 0; @@ -771,6 +1070,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv; + struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy; struct mlx5e_neigh_hash_entry *nhe; list_del(&e->encap_list); @@ -778,9 +1080,10 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, if (list_empty(&nhe->encap_list)) mlx5e_rep_neigh_entry_destroy(priv, nhe); + mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); } -static int mlx5e_rep_open(struct net_device *dev) +static int mlx5e_vf_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -794,7 +1097,8 @@ static int mlx5e_rep_open(struct net_device *dev) if (!mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_UP)) + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_UP)) netif_carrier_on(dev); unlock: @@ -802,7 +1106,7 @@ unlock: return err; } -static int mlx5e_rep_close(struct net_device *dev) +static int mlx5e_vf_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -812,7 +1116,8 @@ static int mlx5e_rep_close(struct net_device *dev) mutex_lock(&priv->state_lock); mlx5_modify_vport_admin_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN); + rep->vport, 1, + MLX5_VPORT_ADMIN_STATE_DOWN); ret = mlx5e_close_locked(dev); mutex_unlock(&priv->state_lock); return ret; @@ -824,9 +1129,18 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; + unsigned int fn; int ret; - ret = snprintf(buf, len, "%d", rep->vport - 1); + fn = PCI_FUNC(priv->mdev->pdev->devfn); + if (fn >= MLX5_MAX_PORTS) + return -EOPNOTSUPP; + + if (rep->vport == MLX5_VPORT_UPLINK) + ret = snprintf(buf, len, "p%d", fn); + else + ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1); + if (ret >= len) return -EOPNOTSUPP; @@ -839,24 +1153,14 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, { switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, cls_flower, flags); + return mlx5e_configure_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, cls_flower, flags); + return mlx5e_delete_flower(priv->netdev, priv, cls_flower, + flags); case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, cls_flower, flags); - default: - return -EOPNOTSUPP; - } -} - -static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data, - void *cb_priv) -{ - struct mlx5e_priv *priv = cb_priv; - - switch (type) { - case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS); + return mlx5e_stats_flower(priv->netdev, priv, cls_flower, + flags); default: return -EOPNOTSUPP; } @@ -869,7 +1173,8 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); + return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | + MLX5E_TC_ESW_OFFLOAD); default: return -EOPNOTSUPP; } @@ -908,43 +1213,23 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep; if (!MLX5_ESWITCH_MANAGER(priv->mdev)) return false; - rep = rpriv->rep; - if (esw->mode == SRIOV_OFFLOADS && - rep && rep->vport == FDB_UPLINK_VPORT) - return true; - - return false; -} - -static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep; - - if (!MLX5_ESWITCH_MANAGER(priv->mdev)) + if (!rpriv) /* non vport rep mlx5e instances don't use this field */ return false; rep = rpriv->rep; - if (rep && rep->vport != FDB_UPLINK_VPORT) - return true; - - return false; + return (rep->vport == MLX5_VPORT_UPLINK); } -bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) +static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) { - struct mlx5e_priv *priv = netdev_priv(dev); - switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: - if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv)) return true; } @@ -956,22 +1241,13 @@ mlx5e_get_sw_stats64(const struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_sw_stats *sstats = &priv->stats.sw; - - mlx5e_rep_update_sw_counters(priv); - - stats->rx_packets = sstats->rx_packets; - stats->rx_bytes = sstats->rx_bytes; - stats->tx_packets = sstats->tx_packets; - stats->tx_bytes = sstats->tx_bytes; - - stats->tx_dropped = sstats->tx_queue_dropped; + mlx5e_fold_sw_stats64(priv, stats); return 0; } -int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp) +static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@ -982,7 +1258,7 @@ int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, } static void -mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -991,37 +1267,104 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); } -static const struct switchdev_ops mlx5e_rep_switchdev_ops = { - .switchdev_port_attr_get = mlx5e_attr_get, -}; - -static int mlx5e_change_rep_mtu(struct net_device *netdev, int new_mtu) +static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu) { return mlx5e_change_mtu(netdev, new_mtu, NULL); } -static const struct net_device_ops mlx5e_netdev_ops_rep = { - .ndo_open = mlx5e_rep_open, - .ndo_stop = mlx5e_rep_close, +static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu) +{ + return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu); +} + +static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr) +{ + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + ether_addr_copy(netdev->dev_addr, saddr->sa_data); + return 0; +} + +static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n"); + + if (vlan != 0) + return -EOPNOTSUPP; + + /* allow setting 0-vid for compatibility with libvirt */ + return 0; +} + +static const struct net_device_ops mlx5e_netdev_ops_vf_rep = { + .ndo_open = mlx5e_vf_rep_open, + .ndo_stop = mlx5e_vf_rep_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, + .ndo_setup_tc = mlx5e_rep_setup_tc, + .ndo_get_stats64 = mlx5e_vf_rep_get_stats, + .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, + .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, + .ndo_change_mtu = mlx5e_vf_rep_change_mtu, + .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, +}; + +static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, + .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_stats64 = mlx5e_rep_get_stats, - .ndo_has_offload_stats = mlx5e_has_offload_stats, - .ndo_get_offload_stats = mlx5e_get_offload_stats, - .ndo_change_mtu = mlx5e_change_rep_mtu, + .ndo_get_stats64 = mlx5e_get_stats, + .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, + .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, + .ndo_change_mtu = mlx5e_uplink_rep_change_mtu, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, + .ndo_features_check = mlx5e_features_check, + .ndo_set_vf_mac = mlx5e_set_vf_mac, + .ndo_set_vf_rate = mlx5e_set_vf_rate, + .ndo_get_vf_config = mlx5e_get_vf_config, + .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, + .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, }; -static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, u16 mtu) +bool mlx5e_eswitch_rep(struct net_device *netdev) +{ + if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep || + netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep) + return true; + + return false; +} + +static void mlx5e_build_rep_params(struct net_device *netdev) { + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params *params; + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params = &priv->channels.params; params->hard_mtu = MLX5E_ETH_HARD_MTU; - params->sw_mtu = mtu; - params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE; + params->sw_mtu = netdev->mtu; + + /* SQ */ + if (rep->vport == MLX5_VPORT_UPLINK) + params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + else + params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; /* RQ */ mlx5e_build_rq_params(mdev, params); @@ -1035,24 +1378,36 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); /* RSS */ - mlx5e_build_rss_params(params); + mlx5e_build_rss_params(&priv->rss_params, params->num_channels); } static void mlx5e_build_rep_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_core_dev *mdev = priv->mdev; - u16 max_mtu; - netdev->netdev_ops = &mlx5e_netdev_ops_rep; + if (rep->vport == MLX5_VPORT_UPLINK) { + SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev); + netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; + /* we want a persistent mac for the uplink rep */ + mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr); + netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +#endif + } else { + netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep; + eth_hw_addr_random(netdev); + netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops; + } netdev->watchdog_timeo = 15 * HZ; - netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; - - netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; - netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; + netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC; netdev->hw_features |= NETIF_F_SG; @@ -1063,13 +1418,10 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; - netdev->features |= netdev->hw_features; - - eth_hw_addr_random(netdev); + if (rep->vport != MLX5_VPORT_UPLINK) + netdev->features |= NETIF_F_VLAN_CHALLENGED; - netdev->min_mtu = ETH_MIN_MTU; - mlx5_query_port_max_mtu(mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); + netdev->features |= netdev->hw_features; } static int mlx5e_init_rep(struct mlx5_core_dev *mdev, @@ -1086,7 +1438,7 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev, priv->channels.params.num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; - mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu); + mlx5e_build_rep_params(netdev); mlx5e_build_rep_netdev(netdev); mlx5e_timestamp_init(priv); @@ -1209,94 +1561,193 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) { - int err; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_rep_uplink_priv *uplink_priv; + int tc, err; err = mlx5e_create_tises(priv); if (err) { mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); return err; } + + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { + uplink_priv = &rpriv->uplink_priv; + + INIT_LIST_HEAD(&uplink_priv->unready_flows); + + /* init shared tc flow table */ + err = mlx5e_tc_esw_init(&uplink_priv->tc_ht); + if (err) + goto destroy_tises; + + mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev); + + /* init indirect block notifications */ + INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list); + uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event; + err = register_netdevice_notifier(&uplink_priv->netdevice_nb); + if (err) { + mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n"); + goto tc_esw_cleanup; + } + } + return 0; + +tc_esw_cleanup: + mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht); +destroy_tises: + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); + return err; } -static const struct mlx5e_profile mlx5e_rep_profile = { - .init = mlx5e_init_rep, - .cleanup = mlx5e_cleanup_rep, - .init_rx = mlx5e_init_rep_rx, - .cleanup_rx = mlx5e_cleanup_rep_rx, - .init_tx = mlx5e_init_rep_tx, - .cleanup_tx = mlx5e_cleanup_nic_tx, - .update_stats = mlx5e_rep_update_hw_counters, - .update_carrier = NULL, - .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, - .max_tc = 1, -}; +static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int tc; -/* e-Switch vport representors */ + for (tc = 0; tc < priv->profile->max_tc; tc++) + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); -static int -mlx5e_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) + if (rpriv->rep->vport == MLX5_VPORT_UPLINK) { + /* clean indirect TC block notifications */ + unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb); + mlx5e_rep_indr_clean_block_privs(rpriv); + + /* delete shared tc flow table */ + mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht); + } +} + +static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + u16 max_mtu; - int err; + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); +} - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - err = mlx5e_add_sqs_fwd_rules(priv); - if (err) - return err; +static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); + + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { + struct mlx5_eqe *eqe = data; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + queue_work(priv->wq, &priv->update_carrier_work); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; } - err = mlx5e_rep_neigh_init(rpriv); - if (err) - goto err_remove_sqs; + if (event == MLX5_DEV_EVENT_PORT_AFFINITY) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; - /* init shared tc flow table */ - err = mlx5e_tc_esw_init(&rpriv->tc_ht); - if (err) - goto err_neigh_cleanup; + queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work); - return 0; + return NOTIFY_OK; + } -err_neigh_cleanup: - mlx5e_rep_neigh_cleanup(rpriv); -err_remove_sqs: - mlx5e_remove_sqs_fwd_rules(priv); - return err; + return NOTIFY_DONE; } -static void -mlx5e_nic_rep_unload(struct mlx5_eswitch_rep *rep) +static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) { - struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); - struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + u16 max_mtu; + + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); + mlx5e_set_dev_port_mtu(priv); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_remove_sqs_fwd_rules(priv); + INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work, + mlx5e_tc_reoffload_flows_work); - /* clean uplink offloaded TC rules, delete shared tc flow table */ - mlx5e_tc_esw_cleanup(&rpriv->tc_ht); + mlx5_lag_add(mdev, netdev); + priv->events_nb.notifier_call = uplink_rep_async_event; + mlx5_notifier_register(mdev, &priv->events_nb); +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_initialize(priv); + mlx5e_dcbnl_init_app(priv); +#endif +} - mlx5e_rep_neigh_cleanup(rpriv); +static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_delete_app(priv); +#endif + mlx5_notifier_unregister(mdev, &priv->events_nb); + cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); + mlx5_lag_remove(mdev); } +static const struct mlx5e_profile mlx5e_vf_rep_profile = { + .init = mlx5e_init_rep, + .cleanup = mlx5e_cleanup_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_rep_tx, + .enable = mlx5e_vf_rep_enable, + .update_stats = mlx5e_vf_rep_update_hw_counters, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .max_tc = 1, +}; + +static const struct mlx5e_profile mlx5e_uplink_rep_profile = { + .init = mlx5e_init_rep, + .cleanup = mlx5e_cleanup_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_rep_tx, + .enable = mlx5e_uplink_rep_enable, + .disable = mlx5e_uplink_rep_disable, + .update_stats = mlx5e_uplink_rep_update_hw_counters, + .update_carrier = mlx5e_update_carrier, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, + .max_tc = MLX5E_MAX_NUM_TC, +}; + +/* e-Switch vport representors */ static int mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { - struct mlx5e_rep_priv *uplink_rpriv; + const struct mlx5e_profile *profile; struct mlx5e_rep_priv *rpriv; struct net_device *netdev; - struct mlx5e_priv *upriv; int nch, err; rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); if (!rpriv) return -ENOMEM; + /* rpriv->rep to be looked up when profile->init() is called */ + rpriv->rep = rep; + nch = mlx5e_get_max_num_channels(dev); - netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, nch, rpriv); + profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", rep->vport); @@ -1305,15 +1756,20 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) } rpriv->netdev = netdev; - rpriv->rep = rep; rep->rep_if[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); + if (rep->vport == MLX5_VPORT_UPLINK) { + err = mlx5e_create_mdev_resources(dev); + if (err) + goto err_destroy_netdev; + } + err = mlx5e_attach_netdev(netdev_priv(netdev)); if (err) { pr_warn("Failed to attach representor netdev for vport %d\n", rep->vport); - goto err_destroy_netdev; + goto err_destroy_mdev_resources; } err = mlx5e_rep_neigh_init(rpriv); @@ -1323,32 +1779,25 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto err_detach_netdev; } - uplink_rpriv = mlx5_eswitch_get_uplink_priv(dev->priv.eswitch, REP_ETH); - upriv = netdev_priv(uplink_rpriv->netdev); - err = tc_setup_cb_egdev_register(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); - if (err) - goto err_neigh_cleanup; - err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_egdev_cleanup; + goto err_neigh_cleanup; } return 0; -err_egdev_cleanup: - tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); - err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); err_detach_netdev: mlx5e_detach_netdev(netdev_priv(netdev)); +err_destroy_mdev_resources: + if (rep->vport == MLX5_VPORT_UPLINK) + mlx5e_destroy_mdev_resources(dev); + err_destroy_netdev: mlx5e_destroy_netdev(netdev_priv(netdev)); kfree(rpriv); @@ -1361,18 +1810,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_rep_priv *uplink_rpriv; void *ppriv = priv->ppriv; - struct mlx5e_priv *upriv; unregister_netdev(netdev); - uplink_rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, - REP_ETH); - upriv = netdev_priv(uplink_rpriv->netdev); - tc_setup_cb_egdev_unregister(netdev, mlx5e_rep_setup_tc_cb_egdev, - upriv); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); + if (rep->vport == MLX5_VPORT_UPLINK) + mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); kfree(ppriv); /* mlx5e_rep_priv */ } @@ -1386,72 +1830,21 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } -static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - - for (vport = 1; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH); - } -} - -static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv) +void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH); -} - -void mlx5e_register_vport_reps(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if; - struct mlx5e_rep_priv *rpriv; - - rpriv = priv->ppriv; - rpriv->netdev = priv->netdev; + struct mlx5_eswitch_rep_if rep_if = {}; - rep_if.load = mlx5e_nic_rep_load; - rep_if.unload = mlx5e_nic_rep_unload; + rep_if.load = mlx5e_vport_rep_load; + rep_if.unload = mlx5e_vport_rep_unload; rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - rep_if.priv = rpriv; - INIT_LIST_HEAD(&rpriv->vport_sqs_list); - mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/ - - mlx5e_rep_register_vf_vports(priv); /* VFs vports */ -} - -void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ - mlx5_eswitch_unregister_vport_rep(esw, 0, REP_ETH); /* UPLINK PF*/ + mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); } -void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev) +void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5e_rep_priv *rpriv; - - rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); - if (!rpriv) - return NULL; - rpriv->rep = &esw->offloads.vport_reps[0]; - return rpriv; + mlx5_eswitch_unregister_vport_reps(esw, REP_ETH); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 844d32d5c29f..83b573b1abac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -37,6 +37,7 @@ #include <linux/rhashtable.h> #include "eswitch.h" #include "en.h" +#include "lib/port_tun.h" #ifdef CONFIG_MLX5_ESWITCH struct mlx5e_neigh_update_table { @@ -53,13 +54,38 @@ struct mlx5e_neigh_update_table { unsigned long min_interval; /* jiffies */ }; +struct mlx5_rep_uplink_priv { + /* Filters DB - instantiated by the uplink representor and shared by + * the uplink's VFs + */ + struct rhashtable tc_ht; + + /* indirect block callbacks are invoked on bind/unbind events + * on registered higher level devices (e.g. tunnel devices) + * + * tc_indr_block_cb_priv_list is used to lookup indirect callback + * private data + * + * netdevice_nb is the netdev events notifier - used to register + * tunnel devices for block events + * + */ + struct list_head tc_indr_block_priv_list; + struct notifier_block netdevice_nb; + + struct mlx5_tun_entropy tun_entropy; + + struct list_head unready_flows; + struct work_struct reoffload_flows_work; +}; + struct mlx5e_rep_priv { struct mlx5_eswitch_rep *rep; struct mlx5e_neigh_update_table neigh_update; struct net_device *netdev; struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; - struct rhashtable tc_ht; /* valid for uplink rep */ + struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ }; static inline @@ -128,7 +154,10 @@ struct mlx5e_encap_entry { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; + struct net_device *route_dev; int tunnel_type; + int tunnel_hlen; + int reformat_type; u8 flags; char *encap_header; int encap_size; @@ -140,16 +169,12 @@ struct mlx5e_rep_sq { }; void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev); -void mlx5e_register_vport_reps(struct mlx5e_priv *priv); -void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); +void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev); +void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); -int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp); -bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); - -int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, @@ -158,12 +183,17 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +bool mlx5e_eswitch_rep(struct net_device *netdev); + #else /* CONFIG_MLX5_ESWITCH */ -static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {} -static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {} static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {} #endif +static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv) +{ + return (MLX5_ESWITCH_MANAGER(priv->mdev) && priv->ppriv); +} #endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0b5ef6d4e815..c3b3002ff62f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -52,40 +52,45 @@ static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) return config->rx_filter == HWTSTAMP_FILTER_ALL; } -static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, - void *data) +static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq, + u32 cqcc, void *data) { - u32 ci = mlx5_cqwq_ctr2ix(&cq->wq, cqcc); + u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); - memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64)); + memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64)); } static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, u32 cqcc) + struct mlx5_cqwq *wq, + u32 cqcc) { - mlx5e_read_cqe_slot(cq, cqcc, &cq->title); - cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt); - cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter); + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_cqe64 *title = &cqd->title; + + mlx5e_read_cqe_slot(wq, cqcc, title); + cqd->left = be32_to_cpu(title->byte_cnt); + cqd->wqe_counter = be16_to_cpu(title->wqe_counter); rq->stats->cqe_compress_blks++; } -static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc) +static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq, + struct mlx5e_cq_decomp *cqd, + u32 cqcc) { - mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr); - cq->mini_arr_idx = 0; + mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr); + cqd->mini_arr_idx = 0; } -static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) +static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n) { - struct mlx5_cqwq *wq = &cq->wq; - + u32 cqcc = wq->cc; u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1; u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); u32 wq_sz = mlx5_cqwq_get_size(wq); u32 ci_top = min_t(u32, wq_sz, ci + n); for (; ci < ci_top; ci++, n--) { - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); cqe->op_own = op_own; } @@ -93,7 +98,7 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) if (unlikely(ci == wq_sz)) { op_own = !op_own; for (ci = 0; ci < n; ci++) { - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); cqe->op_own = op_own; } @@ -101,68 +106,79 @@ static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) } static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, u32 cqcc) + struct mlx5_cqwq *wq, + u32 cqcc) { - cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; - cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; - cq->title.op_own &= 0xf0; - cq->title.op_own |= 0x01 & (cqcc >> cq->wq.fbc.log_sz); - cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); + struct mlx5e_cq_decomp *cqd = &rq->cqd; + struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx]; + struct mlx5_cqe64 *title = &cqd->title; + + title->byte_cnt = mini_cqe->byte_cnt; + title->check_sum = mini_cqe->checksum; + title->op_own &= 0xf0; + title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz); + title->wqe_counter = cpu_to_be16(cqd->wqe_counter); if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - cq->decmprs_wqe_counter += - mpwrq_get_cqe_consumed_strides(&cq->title); + cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title); else - cq->decmprs_wqe_counter = - mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cq->decmprs_wqe_counter + 1); + cqd->wqe_counter = + mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1); } static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, u32 cqcc) + struct mlx5_cqwq *wq, + u32 cqcc) { - mlx5e_decompress_cqe(rq, cq, cqcc); - cq->title.rss_hash_type = 0; - cq->title.rss_hash_result = 0; + struct mlx5e_cq_decomp *cqd = &rq->cqd; + + mlx5e_decompress_cqe(rq, wq, cqcc); + cqd->title.rss_hash_type = 0; + cqd->title.rss_hash_result = 0; } static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, + struct mlx5_cqwq *wq, int update_owner_only, int budget_rem) { - u32 cqcc = cq->wq.cc + update_owner_only; + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cqcc = wq->cc + update_owner_only; u32 cqe_count; u32 i; - cqe_count = min_t(u32, cq->decmprs_left, budget_rem); + cqe_count = min_t(u32, cqd->left, budget_rem); for (i = update_owner_only; i < cqe_count; - i++, cq->mini_arr_idx++, cqcc++) { - if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) - mlx5e_read_mini_arr_slot(cq, cqcc); + i++, cqd->mini_arr_idx++, cqcc++) { + if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) + mlx5e_read_mini_arr_slot(wq, cqd, cqcc); - mlx5e_decompress_cqe_no_hash(rq, cq, cqcc); - rq->handle_rx_cqe(rq, &cq->title); + mlx5e_decompress_cqe_no_hash(rq, wq, cqcc); + rq->handle_rx_cqe(rq, &cqd->title); } - mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc); - cq->wq.cc = cqcc; - cq->decmprs_left -= cqe_count; + mlx5e_cqes_update_owner(wq, cqcc - wq->cc); + wq->cc = cqcc; + cqd->left -= cqe_count; rq->stats->cqe_compress_pkts += cqe_count; return cqe_count; } static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, - struct mlx5e_cq *cq, + struct mlx5_cqwq *wq, int budget_rem) { - mlx5e_read_title_slot(rq, cq, cq->wq.cc); - mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1); - mlx5e_decompress_cqe(rq, cq, cq->wq.cc); - rq->handle_rx_cqe(rq, &cq->title); - cq->mini_arr_idx++; + struct mlx5e_cq_decomp *cqd = &rq->cqd; + u32 cc = wq->cc; - return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; + mlx5e_read_title_slot(rq, wq, cc); + mlx5e_read_mini_arr_slot(wq, cqd, cc + 1); + mlx5e_decompress_cqe(rq, wq, cc); + rq->handle_rx_cqe(rq, &cqd->title); + cqd->mini_arr_idx++; + + return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1; } static inline bool mlx5e_page_is_reserved(struct page *page) @@ -369,7 +385,7 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, static inline void mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, struct mlx5e_dma_info *dma_info, - int offset_from, int offset_to, u32 headlen) + int offset_from, u32 headlen) { const void *from = page_address(dma_info->page) + offset_from; /* Aligning len to sizeof(long) optimizes memcpy performance */ @@ -377,24 +393,7 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len, DMA_FROM_DEVICE); - skb_copy_to_linear_data_offset(skb, offset_to, from, len); -} - -static inline void -mlx5e_copy_skb_header_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_dma_info *dma_info, - u32 offset, u32 headlen) -{ - u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); - - mlx5e_copy_skb_header(pdev, skb, dma_info, offset, 0, headlen_pg); - - if (unlikely(offset + headlen > PAGE_SIZE)) { - dma_info++; - mlx5e_copy_skb_header(pdev, skb, dma_info, 0, headlen_pg, - headlen - headlen_pg); - } + skb_copy_to_linear_data(skb, from, len); } static void @@ -554,9 +553,9 @@ static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, mlx5_cqwq_pop(&cq->wq); - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { netdev_WARN_ONCE(cq->channel->netdev, - "Bad OP in ICOSQ CQE: 0x%x\n", cqe->op_own); + "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); return; } @@ -693,7 +692,14 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, { *proto = ((struct ethhdr *)skb->data)->h_proto; *proto = __vlan_get_protocol(skb, *proto, network_depth); - return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6)); + + if (*proto == htons(ETH_P_IP)) + return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); + + if (*proto == htons(ETH_P_IPV6)) + return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); + + return false; } static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) @@ -713,23 +719,76 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) rq->stats->ecn_mark += !!rc; } -static u32 mlx5e_get_fcs(const struct sk_buff *skb) +static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) { - const void *fcs_bytes; - u32 _fcs_bytes; + void *ip_p = skb->data + network_depth; + + return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : + ((struct ipv6hdr *)ip_p)->nexthdr; +} + +#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) - fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, - ETH_FCS_LEN, &_fcs_bytes); +#define MAX_PADDING 8 - return __get_unaligned_cpu32(fcs_bytes); +static void +tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, + struct mlx5e_rq_stats *stats) +{ + stats->csum_complete_tail_slow++; + skb->csum = csum_block_add(skb->csum, + skb_checksum(skb, offset, len, 0), + offset); } -static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) +static void +tail_padding_csum(struct sk_buff *skb, int offset, + struct mlx5e_rq_stats *stats) { - void *ip_p = skb->data + network_depth; + u8 tail_padding[MAX_PADDING]; + int len = skb->len - offset; + void *tail; - return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : - ((struct ipv6hdr *)ip_p)->nexthdr; + if (unlikely(len > MAX_PADDING)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + tail = skb_header_pointer(skb, offset, len, tail_padding); + if (unlikely(!tail)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + stats->csum_complete_tail++; + skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); +} + +static void +mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) +{ + struct ipv6hdr *ip6; + struct iphdr *ip4; + int pkt_len; + + switch (proto) { + case htons(ETH_P_IP): + ip4 = (struct iphdr *)(skb->data + network_depth); + pkt_len = network_depth + ntohs(ip4->tot_len); + break; + case htons(ETH_P_IPV6): + ip6 = (struct ipv6hdr *)(skb->data + network_depth); + pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); + break; + default: + return; + } + + if (likely(pkt_len >= skb->len)) + return; + + tail_padding_csum(skb, pkt_len, stats); } static inline void mlx5e_handle_csum(struct net_device *netdev, @@ -751,7 +810,19 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) + /* True when explicitly set via priv flag, or XDP prog is loaded */ + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) + goto csum_unnecessary; + + /* CQE csum doesn't cover padding octets in short ethernet + * frames. And the pad field is appended prior to calculating + * and appending the FCS field. + * + * Detecting these padded frames requires to verify and parse + * IP headers, so we simply force all those small frames to be + * CHECKSUM_UNNECESSARY even if they are not padded. + */ + if (short_frame(skb->len)) goto csum_unnecessary; if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { @@ -768,18 +839,15 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum = csum_partial(skb->data + ETH_HLEN, network_depth - ETH_HLEN, skb->csum); - if (unlikely(netdev->features & NETIF_F_RXFCS)) - skb->csum = csum_block_add(skb->csum, - (__force __wsum)mlx5e_get_fcs(skb), - skb->len - ETH_FCS_LEN); + + mlx5e_skb_padding_csum(skb, network_depth, proto, stats); stats->csum_complete++; return; } csum_unnecessary: if (likely((cqe->hds_ip_ext & CQE_L3_OK) && - ((cqe->hds_ip_ext & CQE_L4_OK) || - (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) { + (cqe->hds_ip_ext & CQE_L4_OK))) { skb->ip_summed = CHECKSUM_UNNECESSARY; if (cqe_is_tunneled(cqe)) { skb->csum_level = 1; @@ -898,7 +966,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, prefetchw(va); /* xdp_frame data area */ prefetch(data); - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; return NULL; } @@ -930,7 +998,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u16 byte_cnt = cqe_bcnt - headlen; struct sk_buff *skb; - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; return NULL; } @@ -960,8 +1028,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, } /* copy header */ - mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, - 0, headlen); + mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -1083,8 +1150,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w di++; } /* copy header */ - mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, head_di, - head_offset, headlen); + mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -1154,7 +1220,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi->consumed_strides += cstrides; - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { rq->stats->wqe_err++; goto mpwrq_cqe_out; } @@ -1190,17 +1256,17 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5_cqwq *cqwq = &cq->wq; struct mlx5_cqe64 *cqe; int work_done = 0; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return 0; - if (cq->decmprs_left) - work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); + if (rq->cqd.left) + work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget); - cqe = mlx5_cqwq_get_cqe(&cq->wq); + cqe = mlx5_cqwq_get_cqe(cqwq); if (!cqe) { if (unlikely(work_done)) goto out; @@ -1210,28 +1276,21 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) do { if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { work_done += - mlx5e_decompress_cqes_start(rq, cq, + mlx5e_decompress_cqes_start(rq, cqwq, budget - work_done); continue; } - mlx5_cqwq_pop(&cq->wq); + mlx5_cqwq_pop(cqwq); rq->handle_rx_cqe(rq, cqe); - } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); out: - if (xdpsq->doorbell) { - mlx5e_xmit_xdp_doorbell(xdpsq); - xdpsq->doorbell = false; - } - - if (xdpsq->redirect_flush) { - xdp_do_flush_map(); - xdpsq->redirect_flush = false; - } + if (rq->xdp_prog) + mlx5e_xdp_rx_poll_complete(rq); - mlx5_cqwq_update_db_record(&cq->wq); + mlx5_cqwq_update_db_record(cqwq); /* ensure cq space is freed before enabling more cqes */ wmb(); @@ -1292,8 +1351,14 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->protocol = *((__be16 *)(skb->data)); - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + if (netdev->features & NETIF_F_RXCSUM) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + stats->csum_complete++; + } else { + skb->ip_summed = CHECKSUM_NONE; + stats->csum_none++; + } if (unlikely(mlx5e_rx_hw_stamp(tstamp))) skb_hwtstamps(skb)->hwtstamp = @@ -1312,7 +1377,6 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->dev = netdev; - stats->csum_complete++; stats->packets++; stats->bytes += cqe_bcnt; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 4337afd610d7..b75aa8b8bf04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include "lib/mlx5.h" #include "en.h" #include "en_accel/ipsec.h" #include "en_accel/tls.h" @@ -58,6 +59,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, @@ -126,9 +129,9 @@ static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) return idx; } -void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) +static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) { - struct mlx5e_sw_stats temp, *s = &temp; + struct mlx5e_sw_stats *s = &priv->stats.sw; int i; memset(s, 0, sizeof(*s)); @@ -150,6 +153,8 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_complete_tail += rq_stats->csum_complete_tail; + s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow; s->rx_csum_unnecessary += rq_stats->csum_unnecessary; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; @@ -211,8 +216,6 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_cqes += sq_stats->cqes; } } - - memcpy(&priv->stats.sw, s, sizeof(*s)); } static const struct counter_desc q_stats_desc[] = { @@ -480,7 +483,10 @@ static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, return idx; } -static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) +#define MLX5_BASIC_PPCNT_SUPPORTED(mdev) \ + (MLX5_CAP_GEN(mdev, pcam_reg) ? MLX5_CAP_PCAM_REG(mdev, ppcnt) : 1) + +void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) { struct mlx5e_pport_stats *pstats = &priv->stats.pport; struct mlx5_core_dev *mdev = priv->mdev; @@ -488,6 +494,9 @@ static void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv) int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); void *out; + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + MLX5_SET(ppcnt_reg, in, local_port, 1); out = pstats->IEEE_802_3_counters; MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); @@ -600,6 +609,9 @@ static void mlx5e_grp_2819_update_stats(struct mlx5e_priv *priv) int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); void *out; + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + MLX5_SET(ppcnt_reg, in, local_port, 1); out = pstats->RFC_2819_counters; MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); @@ -934,7 +946,7 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { }; static const struct counter_desc pport_pfc_stall_stats_desc[] = { - { "tx_pause_storm_warning_events ", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) }, + { "tx_pause_storm_warning_events", PPORT_PER_PRIO_OFF(device_stall_minor_watermark_cnt) }, { "tx_pause_storm_error_events", PPORT_PER_PRIO_OFF(device_stall_critical_watermark_cnt) }, }; @@ -1075,6 +1087,9 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) int prio; void *out; + if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) + return; + MLX5_SET(ppcnt_reg, in, local_port, 1); MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { @@ -1086,13 +1101,13 @@ static void mlx5e_grp_per_prio_update_stats(struct mlx5e_priv *priv) } static const struct counter_desc mlx5e_pme_status_desc[] = { - { "module_unplug", 8 }, + { "module_unplug", sizeof(u64) * MLX5_MODULE_STATUS_UNPLUGGED }, }; static const struct counter_desc mlx5e_pme_error_desc[] = { - { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ - { "module_high_temp", 48 }, /* high temperature */ - { "module_bad_shorted", 56 }, /* bad or shorted cable/module */ + { "module_bus_stuck", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BUS_STUCK }, + { "module_high_temp", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE }, + { "module_bad_shorted", sizeof(u64) * MLX5_MODULE_EVENT_ERROR_BAD_CABLE }, }; #define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc) @@ -1120,15 +1135,17 @@ static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data, static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { - struct mlx5_priv *mlx5_priv = &priv->mdev->priv; + struct mlx5_pme_stats pme_stats; int i; + mlx5_get_pme_stats(priv->mdev, &pme_stats); + for (i = 0; i < NUM_PME_STATUS_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters, + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.status_counters, mlx5e_pme_status_desc, i); for (i = 0; i < NUM_PME_ERR_STATS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, + data[idx++] = MLX5E_READ_CTR64_CPU(pme_stats.error_counters, mlx5e_pme_error_desc, i); return idx; @@ -1177,6 +1194,8 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 3ff69ddae2d3..16c3b785f282 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -71,6 +71,8 @@ struct mlx5e_sw_stats { u64 rx_csum_unnecessary; u64 rx_csum_none; u64 rx_csum_complete; + u64 rx_csum_complete_tail; + u64 rx_csum_complete_tail_slow; u64 rx_csum_unnecessary_inner; u64 rx_xdp_drop; u64 rx_xdp_redirect; @@ -181,6 +183,8 @@ struct mlx5e_rq_stats { u64 packets; u64 bytes; u64 csum_complete; + u64 csum_complete_tail; + u64 csum_complete_tail_slow; u64 csum_unnecessary; u64 csum_unnecessary_inner; u64 csum_none; @@ -277,6 +281,6 @@ struct mlx5e_stats_grp { extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; extern const int mlx5e_num_stats_grps; -void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv); +void mlx5e_grp_802_3_update_stats(struct mlx5e_priv *priv); #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9dabe9d4b279..d75dc44eb2ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -38,21 +38,20 @@ #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> -#include <net/switchdev.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> -#include <net/vxlan.h> #include <net/arp.h> #include "en.h" #include "en_rep.h" #include "en_tc.h" #include "eswitch.h" -#include "lib/vxlan.h" #include "fs_core.h" #include "en/port.h" +#include "en/tc_tun.h" +#include "lib/devcom.h" struct mlx5_nic_flow_attr { u32 action; @@ -69,25 +68,56 @@ struct mlx5_nic_flow_attr { enum { MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS, MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS, - MLX5E_TC_FLOW_ESWITCH = BIT(MLX5E_TC_FLOW_BASE), - MLX5E_TC_FLOW_NIC = BIT(MLX5E_TC_FLOW_BASE + 1), - MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2), - MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3), - MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4), - MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5), + MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD, + MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD, + MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE), + MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1), + MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2), + MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3), + MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4), + MLX5E_TC_FLOW_NOT_READY = BIT(MLX5E_TC_FLOW_BASE + 5), }; #define MLX5E_TC_MAX_SPLITS 1 +/* Helper struct for accessing a struct containing list_head array. + * Containing struct + * |- Helper array + * [0] Helper item 0 + * |- list_head item 0 + * |- index (0) + * [1] Helper item 1 + * |- list_head item 1 + * |- index (1) + * To access the containing struct from one of the list_head items: + * 1. Get the helper item from the list_head item using + * helper item = + * container_of(list_head item, helper struct type, list_head field) + * 2. Get the contining struct from the helper item and its index in the array: + * containing struct = + * container_of(helper item, containing struct type, helper field[index]) + */ +struct encap_flow_item { + struct list_head list; + int index; +}; + struct mlx5e_tc_flow { struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; u16 flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; - struct list_head encap; /* flows sharing the same encap ID */ + /* Flow can be associated with multiple encap IDs. + * The number of encaps is bounded by the number of supported + * destinations. + */ + struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; + struct mlx5e_tc_flow *peer_flow; struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ struct list_head hairpin; /* flows sharing the same hairpin */ + struct list_head peer; /* flows with peer flow */ + struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@ -95,11 +125,13 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { - struct ip_tunnel_info tun_info; + struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; + int max_mod_hdr_actions; void *mod_hdr_actions; - int mirred_ifindex; + int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 @@ -316,7 +348,7 @@ static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) for (i = 0; i < sz; i++) { ix = i; - if (priv->channels.params.rss_hfunc == ETH_RSS_HASH_XOR) + if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR) ix = mlx5e_bits_invert(i, ilog2(sz)); ix = indirection_rqt[ix]; rqn = hp->pair->rqn[ix]; @@ -360,13 +392,15 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) void *tirc; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt); + memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in)); tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); MLX5_SET(tirc, tirc, transport_domain, hp->tdn); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); + mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false); err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]); @@ -569,7 +603,7 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, struct netlink_ext_ack *extack) { - int peer_ifindex = parse_attr->mirred_ifindex; + int peer_ifindex = parse_attr->mirred_ifindex[0]; struct mlx5_hairpin_params params; struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; @@ -802,7 +836,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv) && priv->fs.tc.t) { + if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } @@ -815,14 +849,15 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, } static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow); + struct mlx5e_tc_flow *flow, int out_index); static int mlx5e_attach_encap(struct mlx5e_priv *priv, - struct ip_tunnel_info *tun_info, + struct mlx5e_tc_flow *flow, struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, struct net_device **encap_dev, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack); + bool *encap_valid); static struct mlx5_flow_handle * mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, @@ -836,7 +871,7 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, if (IS_ERR(rule)) return rule; - if (attr->mirror_count) { + if (attr->split_count) { flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); if (IS_ERR(flow->rule[1])) { mlx5_eswitch_del_offloaded_rule(esw, rule, attr); @@ -855,7 +890,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, { flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - if (attr->mirror_count) + if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); @@ -871,7 +906,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - slow_attr->mirror_count = 0; + slow_attr->split_count = 0; slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); @@ -888,27 +923,49 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, { memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - slow_attr->mirror_count = 0; + slow_attr->split_count = 0; slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); flow->flags &= ~MLX5E_TC_FLOW_SLOW; } +static void add_unready_flow(struct mlx5e_tc_flow *flow) +{ + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + flow->flags |= MLX5E_TC_FLOW_NOT_READY; + list_add_tail(&flow->unready, &uplink_priv->unready_flows); +} + +static void remove_unready_flow(struct mlx5e_tc_flow *flow) +{ + list_del(&flow->unready); + flow->flags &= ~MLX5E_TC_FLOW_NOT_READY; +} + static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; u32 max_chain = mlx5_eswitch_get_chain_range(esw); struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; u16 max_prio = mlx5_eswitch_get_prio_range(esw); struct net_device *out_dev, *encap_dev = NULL; struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; - int err = 0, encap_err = 0; + bool encap_valid = true; + int err = 0; + int out_index; if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) { NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW"); @@ -927,20 +984,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, goto err_max_prio_chain; } - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + int mirred_ifindex; + + if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mirred_ifindex = parse_attr->mirred_ifindex[out_index]; out_dev = __dev_get_by_index(dev_net(priv->netdev), - attr->parse_attr->mirred_ifindex); - encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info, - out_dev, &encap_dev, flow, - extack); - if (encap_err && encap_err != -EAGAIN) { - err = encap_err; + mirred_ifindex); + err = mlx5e_attach_encap(priv, flow, out_dev, out_index, + extack, &encap_dev, &encap_valid); + if (err) goto err_attach_encap; - } + out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; - attr->out_rep[attr->out_count] = rpriv->rep; - attr->out_mdev[attr->out_count++] = out_priv->mdev; + attr->dests[out_index].rep = rpriv->rep; + attr->dests[out_index].mdev = out_priv->mdev; } err = mlx5_eswitch_add_vlan_action(esw, attr); @@ -955,7 +1016,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(esw->dev, true); + counter = mlx5_fc_create(attr->counter_dev, true); if (IS_ERR(counter)) { err = PTR_ERR(counter); goto err_create_counter; @@ -964,10 +1025,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, attr->counter = counter; } - /* we get here if (1) there's no error or when - * (2) there's an encap action and we're on -EAGAIN (no valid neigh) + /* we get here if one of the following takes place: + * (1) there's no error + * (2) there's an encap action and we don't have valid neigh */ - if (encap_err == -EAGAIN) { + if (!encap_valid) { /* continue with goto slow path rule instead */ struct mlx5_esw_flow_attr slow_attr; @@ -984,15 +1046,16 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return 0; err_add_rule: - mlx5_fc_destroy(esw->dev, counter); + mlx5_fc_destroy(attr->counter_dev, counter); err_create_counter: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); err_mod_hdr: mlx5_eswitch_del_vlan_action(esw, attr); err_add_vlan: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - mlx5e_detach_encap(priv, flow); + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + mlx5e_detach_encap(priv, flow, out_index); err_attach_encap: err_max_prio_chain: return err; @@ -1004,6 +1067,13 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5_esw_flow_attr slow_attr; + int out_index; + + if (flow->flags & MLX5E_TC_FLOW_NOT_READY) { + remove_unready_flow(flow); + kvfree(attr->parse_attr); + return; + } if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { if (flow->flags & MLX5E_TC_FLOW_SLOW) @@ -1014,16 +1084,16 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5_eswitch_del_vlan_action(esw, attr); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { - mlx5e_detach_encap(priv, flow); - kvfree(attr->parse_attr); - } + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) + mlx5e_detach_encap(priv, flow, out_index); + kvfree(attr->parse_attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - mlx5_fc_destroy(esw->dev, attr->counter); + mlx5_fc_destroy(attr->counter_dev, attr->counter); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -1033,10 +1103,12 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr slow_attr, *esw_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; + struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + err = mlx5_packet_reformat_alloc(priv->mdev, + e->reformat_type, e->encap_size, e->encap_header, MLX5_FLOW_NAMESPACE_FDB, &e->encap_id); @@ -1048,11 +1120,31 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(priv); - list_for_each_entry(flow, &e->flows, encap) { + list_for_each_entry(efi, &e->flows, list) { + bool all_flow_encaps_valid = true; + int i; + + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); esw_attr = flow->esw_attr; - esw_attr->encap_id = e->encap_id; spec = &esw_attr->parse_attr->spec; + esw_attr->dests[efi->index].encap_id = e->encap_id; + esw_attr->dests[efi->index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + /* Flow can be associated with multiple encap entries. + * Before offloading the flow verify that all of them have + * a valid neighbour. + */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) + continue; + if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) { + all_flow_encaps_valid = false; + break; + } + } + /* Do not offload flows with unresolved neighbors */ + if (!all_flow_encaps_valid) + continue; /* update from slow path rule to encap rule */ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); if (IS_ERR(rule)) { @@ -1075,14 +1167,18 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr slow_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; + struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err; - list_for_each_entry(flow, &e->flows, encap) { + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); spec = &flow->esw_attr->parse_attr->spec; /* update from encap rule to slow path rule */ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); + /* mark the flow's encap dest as non-valid */ + flow->esw_attr->dests[efi->index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1130,9 +1226,12 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) return; list_for_each_entry(e, &nhe->encap_list, encap_list) { + struct encap_flow_item *efi; if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) continue; - list_for_each_entry(flow, &e->flows, encap) { + list_for_each_entry(efi, &e->flows, list) { + flow = container_of(efi, struct mlx5e_tc_flow, + encaps[efi->index]); if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { counter = mlx5e_tc_get_counter(flow); mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); @@ -1162,11 +1261,11 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) } static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, int out_index) { - struct list_head *next = flow->encap.next; + struct list_head *next = flow->encaps[out_index].list.next; - list_del(&flow->encap); + list_del(&flow->encaps[out_index].list); if (list_empty(next)) { struct mlx5e_encap_entry *e; @@ -1182,177 +1281,137 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, } } -static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) +static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) - mlx5e_tc_del_fdb_flow(priv, flow); - else - mlx5e_tc_del_nic_flow(priv, flow); + struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; + + if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || + !(flow->flags & MLX5E_TC_FLOW_DUP)) + return; + + mutex_lock(&esw->offloads.peer_mutex); + list_del(&flow->peer); + mutex_unlock(&esw->offloads.peer_mutex); + + flow->flags &= ~MLX5E_TC_FLOW_DUP; + + mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); + kvfree(flow->peer_flow); + flow->peer_flow = NULL; } -static void parse_vxlan_attr(struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) +static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { - void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - outer_headers); - void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); + struct mlx5_core_dev *dev = flow->priv->mdev; + struct mlx5_devcom *devcom = dev->priv.devcom; + struct mlx5_eswitch *peer_esw; - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return; + + __mlx5e_tc_del_fdb_peer_flow(flow); + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->key); - struct flow_dissector_key_keyid *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); - MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, - be32_to_cpu(mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, - be32_to_cpu(key->keyid)); +static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + mlx5e_tc_del_fdb_peer_flow(flow); + mlx5e_tc_del_fdb_flow(priv, flow); + } else { + mlx5e_tc_del_nic_flow(priv, flow); } } + static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, + struct net_device *filter_dev, u8 *match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_match_control enc_control; + int err; - struct flow_dissector_key_control *enc_control = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL, - f->key); - - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->mask); - - /* Full udp dst port must be given */ - if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) - goto vxlan_match_offload_err; - - if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst)) && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) - parse_vxlan_attr(spec, f); - else { - NL_SET_ERR_MSG_MOD(extack, - "port isn't an offloaded vxlan udp dport"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst)); - return -EOPNOTSUPP; - } - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_dport, ntohs(mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_dport, ntohs(key->dst)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_sport, ntohs(mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_sport, ntohs(key->src)); - } else { /* udp dst port must be given */ -vxlan_match_offload_err: + err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, + headers_c, headers_v, match_level); + if (err) { NL_SET_ERR_MSG_MOD(extack, - "IP tunnel decap offload supported only for vxlan, must set UDP dport"); - netdev_warn(priv->netdev, - "IP tunnel decap offload supported only for vxlan, must set UDP dport\n"); - return -EOPNOTSUPP; + "failed to parse tunnel attributes"); + return err; } - if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, - f->mask); + flow_rule_match_enc_control(rule, &enc_control); + + if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(mask->src)); + ntohl(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(key->src)); + ntohl(match.key->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(mask->dst)); + ntohl(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(key->dst)); + ntohl(match.key->dst)); MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, - f->mask); + } else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + flow_rule_match_enc_ipv6_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { - struct flow_dissector_key_ip *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IP, - f->key); - struct flow_dissector_key_ip *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); - if (mask->ttl && + if (match.mask->ttl && !MLX5_CAP_ESW_FLOWTABLE_FDB (priv->mdev, ft_field_support.outer_ipv4_ttl)) { @@ -1381,7 +1440,8 @@ vxlan_match_offload_err: static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, - u8 *match_level) + struct net_device *filter_dev, + u8 *match_level, u8 *tunnel_match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@ -1392,12 +1452,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0; *match_level = MLX5_MATCH_NONE; - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@ -1416,23 +1478,21 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_IP))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", - f->dissector->used_keys); + dissector->used_keys); return -EOPNOTSUPP; } - if ((dissector_uses_key(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) || - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) && - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL, - f->key); - switch (key->addr_type) { + if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) && + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_enc_control(rule, &match); + switch (match.key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f)) + if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; break; default: @@ -1448,35 +1508,27 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, inner_headers); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, - ntohs(mask->n_proto)); + ntohs(match.mask->n_proto)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ntohs(key->n_proto)); + ntohs(match.key->n_proto)); - if (mask->n_proto) + if (match.mask->n_proto) *match_level = MLX5_MATCH_L2; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); - if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) { - if (key->vlan_tpid == htons(ETH_P_8021AD)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, @@ -1488,11 +1540,15 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, cvlan_tag, 1); } - MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, + match.mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, + match.key->vlan_id); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, + match.mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, + match.key->vlan_priority); *match_level = MLX5_MATCH_L2; } @@ -1502,17 +1558,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_L2; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CVLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CVLAN, - f->mask); - if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) { - if (key->vlan_tpid == htons(ETH_P_8021AD)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { MLX5_SET(fte_match_set_misc, misc_c, outer_second_svlan_tag, 1); MLX5_SET(fte_match_set_misc, misc_v, @@ -1525,69 +1578,58 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, - mask->vlan_id); + match.mask->vlan_id); MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, - key->vlan_id); + match.key->vlan_id); MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, - mask->vlan_priority); + match.mask->vlan_priority); MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, - key->vlan_priority); + match.key->vlan_priority); *match_level = MLX5_MATCH_L2; } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); - struct flow_dissector_key_eth_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + flow_rule_match_eth_addrs(rule, &match); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dmac_47_16), - mask->dst); + match.mask->dst); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dmac_47_16), - key->dst); + match.key->dst); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, smac_47_16), - mask->src); + match.mask->src); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, smac_47_16), - key->src); + match.key->src); - if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst)) + if (!is_zero_ether_addr(match.mask->src) || + !is_zero_ether_addr(match.mask->dst)) *match_level = MLX5_MATCH_L2; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; - struct flow_dissector_key_control *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->mask); - addr_type = key->addr_type; + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; /* the HW doesn't support frag first/later */ - if (mask->flags & FLOW_DIS_FIRST_FRAG) + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) return -EOPNOTSUPP; - if (mask->flags & FLOW_DIS_IS_FRAGMENT) { + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, - key->flags & FLOW_DIS_IS_FRAGMENT); + match.key->flags & FLOW_DIS_IS_FRAGMENT); /* the HW doesn't need L3 inline to match on frag=no */ - if (!(key->flags & FLOW_DIS_IS_FRAGMENT)) + if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) *match_level = MLX5_MATCH_L2; /* *** L2 attributes parsing up to here *** */ else @@ -1595,102 +1637,85 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - ip_proto = key->ip_proto; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ip_proto = match.key->ip_proto; MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - mask->ip_proto); + match.mask->ip_proto); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - key->ip_proto); + match.key->ip_proto); - if (mask->ip_proto) + if (match.mask->ip_proto) *match_level = MLX5_MATCH_L3; } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); + struct flow_match_ipv4_addrs match; + flow_rule_match_ipv4_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &mask->src, sizeof(mask->src)); + &match.mask->src, sizeof(match.mask->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &key->src, sizeof(key->src)); + &match.key->src, sizeof(match.key->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &mask->dst, sizeof(mask->dst)); + &match.mask->dst, sizeof(match.mask->dst)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &key->dst, sizeof(key->dst)); + &match.key->dst, sizeof(match.key->dst)); - if (mask->src || mask->dst) + if (match.mask->src || match.mask->dst) *match_level = MLX5_MATCH_L3; } if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); + struct flow_match_ipv6_addrs match; + flow_rule_match_ipv6_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &mask->src, sizeof(mask->src)); + &match.mask->src, sizeof(match.mask->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &key->src, sizeof(key->src)); + &match.key->src, sizeof(match.key->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &mask->dst, sizeof(mask->dst)); + &match.mask->dst, sizeof(match.mask->dst)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &key->dst, sizeof(key->dst)); + &match.key->dst, sizeof(match.key->dst)); - if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY || - ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY) + if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || + ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) *match_level = MLX5_MATCH_L3; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) { - struct flow_dissector_key_ip *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->key); - struct flow_dissector_key_ip *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + flow_rule_match_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl); - if (mask->ttl && + if (match.mask->ttl && !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_ipv4_ttl)) { NL_SET_ERR_MSG_MOD(extack, @@ -1698,44 +1723,39 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (mask->tos || mask->ttl) + if (match.mask->tos || match.mask->ttl) *match_level = MLX5_MATCH_L3; } /* *** L3 attributes parsing up to here *** */ - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); switch (ip_proto) { case IPPROTO_TCP: MLX5_SET(fte_match_set_lyr_2_4, headers_c, - tcp_sport, ntohs(mask->src)); + tcp_sport, ntohs(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - tcp_sport, ntohs(key->src)); + tcp_sport, ntohs(match.key->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_c, - tcp_dport, ntohs(mask->dst)); + tcp_dport, ntohs(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - tcp_dport, ntohs(key->dst)); + tcp_dport, ntohs(match.key->dst)); break; case IPPROTO_UDP: MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_sport, ntohs(mask->src)); + udp_sport, ntohs(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_sport, ntohs(key->src)); + udp_sport, ntohs(match.key->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_dport, ntohs(mask->dst)); + udp_dport, ntohs(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_dport, ntohs(key->dst)); + udp_dport, ntohs(match.key->dst)); break; default: NL_SET_ERR_MSG_MOD(extack, @@ -1745,26 +1765,20 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, return -EINVAL; } - if (mask->src || mask->dst) + if (match.mask->src || match.mask->dst) *match_level = MLX5_MATCH_L4; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) { - struct flow_dissector_key_tcp *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->key); - struct flow_dissector_key_tcp *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match; + flow_rule_match_tcp(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, - ntohs(mask->flags)); + ntohs(match.mask->flags)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, - ntohs(key->flags)); + ntohs(match.key->flags)); - if (mask->flags) + if (match.mask->flags) *match_level = MLX5_MATCH_L4; } @@ -1774,21 +1788,22 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, + struct net_device *filter_dev) { struct netlink_ext_ack *extack = f->common.extack; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; + u8 match_level, tunnel_match_level = MLX5_MATCH_NONE; struct mlx5_eswitch_rep *rep; - u8 match_level; int err; - err = __parse_cls_flower(priv, spec, f, &match_level); + err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level); if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; - if (rep->vport != FDB_UPLINK_VPORT && + if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && esw->offloads.inline_mode < match_level)) { NL_SET_ERR_MSG_MOD(extack, @@ -1800,10 +1815,12 @@ static int parse_cls_flower(struct mlx5e_priv *priv, } } - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { flow->esw_attr->match_level = match_level; - else + flow->esw_attr->tunnel_match_level = tunnel_match_level; + } else { flow->nic_attr->match_level = match_level; + } return err; } @@ -1816,27 +1833,29 @@ struct pedit_headers { struct udphdr udp; }; +struct pedit_headers_action { + struct pedit_headers vals; + struct pedit_headers masks; + u32 pedits; +}; + static int pedit_header_offsets[] = { - [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), - [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), - [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), - [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), - [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), + [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), }; #define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, - struct pedit_headers *masks, - struct pedit_headers *vals) + struct pedit_headers_action *hdrs) { u32 *curr_pmask, *curr_pval; - if (hdr_type >= __PEDIT_HDR_TYPE_MAX) - goto out_err; - - curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset); - curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset); + curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset); if (*curr_pmask & mask) /* disallow acting twice on the same location */ goto out_err; @@ -1888,12 +1907,11 @@ static struct mlx5_fields fields[] = { OFFLOAD(UDP_DPORT, 2, udp.dest, 0), }; -/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, it says how many HW actions were - * actually parsed. +/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, attr->num_mod_hdr_actions + * says how many HW actions were actually parsed. */ -static int offload_pedit_fields(struct pedit_headers *masks, - struct pedit_headers *vals, +static int offload_pedit_fields(struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, struct netlink_ext_ack *extack) { @@ -1908,15 +1926,17 @@ static int offload_pedit_fields(struct pedit_headers *masks, __be16 mask_be16; void *action; - set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET]; - add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD]; - set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET]; - add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; + set_masks = &hdrs[0].masks; + add_masks = &hdrs[1].masks; + set_vals = &hdrs[0].vals; + add_vals = &hdrs[1].vals; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions; - max_actions = parse_attr->num_mod_hdr_actions; - nactions = 0; + action = parse_attr->mod_hdr_actions + + parse_attr->num_mod_hdr_actions * action_size; + + max_actions = parse_attr->max_mod_hdr_actions; + nactions = parse_attr->num_mod_hdr_actions; for (i = 0; i < ARRAY_SIZE(fields); i++) { f = &fields[i]; @@ -2007,12 +2027,14 @@ static int offload_pedit_fields(struct pedit_headers *masks, } static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, - const struct tc_action *a, int namespace, + struct pedit_headers_action *hdrs, + int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr) { int nkeys, action_size, max_actions; - nkeys = tcf_pedit_nkeys(a); + nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits + + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ @@ -2027,62 +2049,67 @@ static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, if (!parse_attr->mod_hdr_actions) return -ENOMEM; - parse_attr->num_mod_hdr_actions = max_actions; + parse_attr->max_mod_hdr_actions = max_actions; return 0; } static const struct pedit_headers zero_masks = {}; static int parse_tc_pedit_action(struct mlx5e_priv *priv, - const struct tc_action *a, int namespace, + const struct flow_action_entry *act, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { - struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks; - int nkeys, i, err = -EOPNOTSUPP; + u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; + int err = -EOPNOTSUPP; u32 mask, val, offset; - u8 cmd, htype; + u8 htype; - nkeys = tcf_pedit_nkeys(a); + htype = act->mangle.htype; + err = -EOPNOTSUPP; /* can't be all optimistic */ - memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); - memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + if (htype == FLOW_ACT_MANGLE_UNSPEC) { + NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); + goto out_err; + } - for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); - cmd = tcf_pedit_cmd(a, i); - err = -EOPNOTSUPP; /* can't be all optimistic */ + mask = act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset; - if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) { - NL_SET_ERR_MSG_MOD(extack, - "legacy pedit isn't offloaded"); - goto out_err; - } + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]); + if (err) + goto out_err; - if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) { - NL_SET_ERR_MSG_MOD(extack, "pedit cmd isn't offloaded"); - goto out_err; - } + hdrs[cmd].pedits++; + + return 0; +out_err: + return err; +} - mask = tcf_pedit_mask(a, i); - val = tcf_pedit_val(a, i); - offset = tcf_pedit_offset(a, i); +static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + struct pedit_headers *cmd_masks; + int err; + u8 cmd; - err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]); + if (!parse_attr->mod_hdr_actions) { + err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); if (err) goto out_err; } - err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); - if (err) - goto out_err; - - err = offload_pedit_fields(masks, vals, parse_attr, extack); + err = offload_pedit_fields(hdrs, parse_attr, extack); if (err < 0) goto out_dealloc_parsed_actions; for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { - cmd_masks = &masks[cmd]; + cmd_masks = &hdrs[cmd].masks; if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); @@ -2131,19 +2158,69 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, return true; } +struct ip_ttl_word { + __u8 ttl; + __u8 protocol; + __sum16 check; +}; + +struct ipv6_hoplimit_word { + __be16 payload_len; + __u8 nexthdr; + __u8 hop_limit; +}; + +static bool is_action_keys_supported(const struct flow_action_entry *act) +{ + u32 mask, offset; + u8 htype; + + htype = act->mangle.htype; + offset = act->mangle.offset; + mask = ~act->mangle.mask; + /* For IPv4 & IPv6 header check 4 byte word, + * to determine that modified fields + * are NOT ttl & hop_limit only. + */ + if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) { + struct ip_ttl_word *ttl_word = + (struct ip_ttl_word *)&mask; + + if (offset != offsetof(struct iphdr, ttl) || + ttl_word->protocol || + ttl_word->check) { + return true; + } + } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + struct ipv6_hoplimit_word *hoplimit_word = + (struct ipv6_hoplimit_word *)&mask; + + if (offset != offsetof(struct ipv6hdr, payload_len) || + hoplimit_word->payload_len || + hoplimit_word->nexthdr) { + return true; + } + } + return false; +} + static bool modify_header_match_supported(struct mlx5_flow_spec *spec, - struct tcf_exts *exts, + struct flow_action *flow_action, + u32 actions, struct netlink_ext_ack *extack) { - const struct tc_action *a; + const struct flow_action_entry *act; bool modify_ip_header; - LIST_HEAD(actions); - u8 htype, ip_proto; void *headers_v; u16 ethertype; - int nkeys, i; + u8 ip_proto; + int i; + + if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); + else + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype); /* for non-IP we only re-write MACs, so we're okay */ @@ -2151,20 +2228,14 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, goto out_ok; modify_ip_header = false; - tcf_exts_for_each_action(i, a, exts) { - int k; - - if (!is_tcf_pedit(a)) + flow_action_for_each(i, act, flow_action) { + if (act->id != FLOW_ACTION_MANGLE && + act->id != FLOW_ACTION_ADD) continue; - nkeys = tcf_pedit_nkeys(a); - for (k = 0; k < nkeys; k++) { - htype = tcf_pedit_htype(a, k); - if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || - htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { - modify_ip_header = true; - break; - } + if (is_action_keys_supported(act)) { + modify_ip_header = true; + break; } } @@ -2182,7 +2253,7 @@ out_ok: } static bool actions_match_supported(struct mlx5e_priv *priv, - struct tcf_exts *exts, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@ -2199,7 +2270,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv, return false; if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return modify_header_match_supported(&parse_attr->spec, exts, + return modify_header_match_supported(&parse_attr->spec, + flow_action, actions, extack); return true; @@ -2219,57 +2291,54 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) return (fsystem_guid == psystem_guid); } -static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, +static int parse_tc_nic_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { struct mlx5_nic_flow_attr *attr = flow->nic_attr; - const struct tc_action *a; - LIST_HEAD(actions); + struct pedit_headers_action hdrs[2] = {}; + const struct flow_action_entry *act; u32 action = 0; int err, i; - if (!tcf_exts_has_actions(exts)) + if (!flow_action_has_entries(flow_action)) return -EINVAL; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - tcf_exts_for_each_action(i, a, exts) { - if (is_tcf_gact_shot(a)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.flow_counter)) action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - continue; - } - - if (is_tcf_pedit(a)) { - err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr, extack); + break; + case FLOW_ACTION_MANGLE: + case FLOW_ACTION_ADD: + err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr, hdrs, extack); if (err) return err; action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - continue; - } - - if (is_tcf_csum(a)) { + break; + case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, - tcf_csum_update_flags(a), + act->csum_flags, extack)) - continue; + break; return -EOPNOTSUPP; - } - - if (is_tcf_mirred_egress_redirect(a)) { - struct net_device *peer_dev = tcf_mirred_dev(a); + case FLOW_ACTION_REDIRECT: { + struct net_device *peer_dev = act->dev; if (priv->netdev->netdev_ops == peer_dev->netdev_ops && same_hw_devs(priv, netdev_priv(peer_dev))) { - parse_attr->mirred_ifindex = peer_dev->ifindex; + parse_attr->mirred_ifindex[0] = peer_dev->ifindex; flow->flags |= MLX5E_TC_FLOW_HAIRPIN; action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; @@ -2280,11 +2349,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, peer_dev->name); return -EINVAL; } - continue; - } - - if (is_tcf_skbedit_mark(a)) { - u32 mark = tcf_skbedit_mark(a); + } + break; + case FLOW_ACTION_MARK: { + u32 mark = act->mark; if (mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(extack, @@ -2294,70 +2362,47 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->flow_tag = mark; action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - continue; + } + break; + default: + return -EINVAL; } + } - return -EINVAL; + if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { + err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr, hdrs, extack); + if (err) + return err; } attr->action = action; - if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP; return 0; } -static inline int cmp_encap_info(struct ip_tunnel_key *a, - struct ip_tunnel_key *b) -{ - return memcmp(a, b, sizeof(*a)); -} +struct encap_key { + struct ip_tunnel_key *ip_tun_key; + int tunnel_type; +}; -static inline int hash_encap_info(struct ip_tunnel_key *key) +static inline int cmp_encap_info(struct encap_key *a, + struct encap_key *b) { - return jhash(key, sizeof(*key), 0); + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || + a->tunnel_type != b->tunnel_type; } -static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct flowi4 *fl4, - struct neighbour **out_n, - u8 *out_ttl) +static inline int hash_encap_info(struct encap_key *key) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5e_rep_priv *uplink_rpriv; - struct rtable *rt; - struct neighbour *n = NULL; - -#if IS_ENABLED(CONFIG_INET) - int ret; - - rt = ip_route_output_key(dev_net(mirred_dev), fl4); - ret = PTR_ERR_OR_ZERO(rt); - if (ret) - return ret; -#else - return -EOPNOTSUPP; -#endif - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) - *out_dev = uplink_rpriv->netdev; - else - *out_dev = rt->dst.dev; - - if (!(*out_ttl)) - *out_ttl = ip4_dst_hoplimit(&rt->dst); - n = dst_neigh_lookup(&rt->dst, &fl4->daddr); - ip_rt_put(rt); - if (!n) - return -ENOMEM; - - *out_n = n; - return 0; + return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), + key->tunnel_type); } + static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, struct net_device *peer_netdev) { @@ -2366,389 +2411,45 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, peer_priv = netdev_priv(peer_netdev); return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) && - (priv->netdev->netdev_ops == peer_netdev->netdev_ops) && - same_hw_devs(priv, peer_priv) && - MLX5_VPORT_MANAGER(peer_priv->mdev) && - (peer_priv->mdev->priv.eswitch->mode == SRIOV_OFFLOADS)); -} - -static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct net_device **out_dev, - struct flowi6 *fl6, - struct neighbour **out_n, - u8 *out_ttl) -{ - struct neighbour *n = NULL; - struct dst_entry *dst; - -#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - struct mlx5e_rep_priv *uplink_rpriv; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - int ret; - - ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, - fl6); - if (ret < 0) - return ret; - - if (!(*out_ttl)) - *out_ttl = ip6_dst_hoplimit(dst); - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - /* if the egress device isn't on the same HW e-switch, we use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, dst->dev)) - *out_dev = uplink_rpriv->netdev; - else - *out_dev = dst->dev; -#else - return -EOPNOTSUPP; -#endif - - n = dst_neigh_lookup(dst, &fl6->daddr); - dst_release(dst); - if (!n) - return -ENOMEM; - - *out_n = n; - return 0; -} - -static void gen_vxlan_header_ipv4(struct net_device *out_dev, - char buf[], int encap_size, - unsigned char h_dest[ETH_ALEN], - u8 tos, u8 ttl, - __be32 daddr, - __be32 saddr, - __be16 udp_dst_port, - __be32 vx_vni) -{ - struct ethhdr *eth = (struct ethhdr *)buf; - struct iphdr *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); - struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr)); - struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); - - memset(buf, 0, encap_size); - - ether_addr_copy(eth->h_dest, h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IP); - - ip->daddr = daddr; - ip->saddr = saddr; - - ip->tos = tos; - ip->ttl = ttl; - ip->protocol = IPPROTO_UDP; - ip->version = 0x4; - ip->ihl = 0x5; - - udp->dest = udp_dst_port; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(vx_vni); -} - -static void gen_vxlan_header_ipv6(struct net_device *out_dev, - char buf[], int encap_size, - unsigned char h_dest[ETH_ALEN], - u8 tos, u8 ttl, - struct in6_addr *daddr, - struct in6_addr *saddr, - __be16 udp_dst_port, - __be32 vx_vni) -{ - struct ethhdr *eth = (struct ethhdr *)buf; - struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); - struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr)); - struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); - - memset(buf, 0, encap_size); - - ether_addr_copy(eth->h_dest, h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IPV6); - - ip6_flow_hdr(ip6h, tos, 0); - /* the HW fills up ipv6 payload len */ - ip6h->nexthdr = IPPROTO_UDP; - ip6h->hop_limit = ttl; - ip6h->daddr = *daddr; - ip6h->saddr = *saddr; - - udp->dest = udp_dst_port; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(vx_vni); -} - -static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) -{ - int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; - struct ip_tunnel_key *tun_key = &e->tun_info.key; - struct net_device *out_dev; - struct neighbour *n = NULL; - struct flowi4 fl4 = {}; - u8 nud_state, tos, ttl; - char *encap_header; - int err; - - if (max_encap_size < ipv4_encap_size) { - mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", - ipv4_encap_size, max_encap_size); - return -EOPNOTSUPP; - } - - encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - fl4.flowi4_proto = IPPROTO_UDP; - fl4.fl4_dport = tun_key->tp_dst; - break; - default: - err = -EOPNOTSUPP; - goto free_encap; - } - - tos = tun_key->tos; - ttl = tun_key->ttl; - - fl4.flowi4_tos = tun_key->tos; - fl4.daddr = tun_key->u.ipv4.dst; - fl4.saddr = tun_key->u.ipv4.src; - - err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, - &fl4, &n, &ttl); - if (err) - goto free_encap; - - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - - /* It's importent to add the neigh to the hash table before checking - * the neigh validity state. So if we'll get a notification, in case the - * neigh changes it's validity state, we would find the relevant neigh - * in the hash. - */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); - if (err) - goto free_encap; - - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - gen_vxlan_header_ipv4(out_dev, encap_header, - ipv4_encap_size, e->h_dest, tos, ttl, - fl4.daddr, - fl4.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); - break; - default: - err = -EOPNOTSUPP; - goto destroy_neigh_entry; - } - e->encap_size = ipv4_encap_size; - e->encap_header = encap_header; - - if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - err = -EAGAIN; - goto out; - } - - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, - ipv4_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) - goto destroy_neigh_entry; - - e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - neigh_release(n); - return err; - -destroy_neigh_entry: - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -free_encap: - kfree(encap_header); -out: - if (n) - neigh_release(n); - return err; + mlx5e_eswitch_rep(priv->netdev) && + mlx5e_eswitch_rep(peer_netdev) && + same_hw_devs(priv, peer_priv)); } -static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) -{ - int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; - struct ip_tunnel_key *tun_key = &e->tun_info.key; - struct net_device *out_dev; - struct neighbour *n = NULL; - struct flowi6 fl6 = {}; - u8 nud_state, tos, ttl; - char *encap_header; - int err; - - if (max_encap_size < ipv6_encap_size) { - mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", - ipv6_encap_size, max_encap_size); - return -EOPNOTSUPP; - } - - encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - fl6.flowi6_proto = IPPROTO_UDP; - fl6.fl6_dport = tun_key->tp_dst; - break; - default: - err = -EOPNOTSUPP; - goto free_encap; - } - - tos = tun_key->tos; - ttl = tun_key->ttl; - - fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); - fl6.daddr = tun_key->u.ipv6.dst; - fl6.saddr = tun_key->u.ipv6.src; - - err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, - &fl6, &n, &ttl); - if (err) - goto free_encap; - - /* used by mlx5e_detach_encap to lookup a neigh hash table - * entry in the neigh hash table when a user deletes a rule - */ - e->m_neigh.dev = n->dev; - e->m_neigh.family = n->ops->family; - memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - e->out_dev = out_dev; - - /* It's importent to add the neigh to the hash table before checking - * the neigh validity state. So if we'll get a notification, in case the - * neigh changes it's validity state, we would find the relevant neigh - * in the hash. - */ - err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); - if (err) - goto free_encap; - - read_lock_bh(&n->lock); - nud_state = n->nud_state; - ether_addr_copy(e->h_dest, n->ha); - read_unlock_bh(&n->lock); - - switch (e->tunnel_type) { - case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: - gen_vxlan_header_ipv6(out_dev, encap_header, - ipv6_encap_size, e->h_dest, tos, ttl, - &fl6.daddr, - &fl6.saddr, tun_key->tp_dst, - tunnel_id_to_key32(tun_key->tun_id)); - break; - default: - err = -EOPNOTSUPP; - goto destroy_neigh_entry; - } - - e->encap_size = ipv6_encap_size; - e->encap_header = encap_header; - - if (!(nud_state & NUD_VALID)) { - neigh_event_send(n, NULL); - err = -EAGAIN; - goto out; - } - - err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, - ipv6_encap_size, encap_header, - MLX5_FLOW_NAMESPACE_FDB, - &e->encap_id); - if (err) - goto destroy_neigh_entry; - e->flags |= MLX5_ENCAP_ENTRY_VALID; - mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); - neigh_release(n); - return err; - -destroy_neigh_entry: - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); -free_encap: - kfree(encap_header); -out: - if (n) - neigh_release(n); - return err; -} static int mlx5e_attach_encap(struct mlx5e_priv *priv, - struct ip_tunnel_info *tun_info, + struct mlx5e_tc_flow *flow, struct net_device *mirred_dev, + int out_index, + struct netlink_ext_ack *extack, struct net_device **encap_dev, - struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) + bool *encap_valid) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - unsigned short family = ip_tunnel_info_af(tun_info); struct mlx5_esw_flow_attr *attr = flow->esw_attr; - struct ip_tunnel_key *key = &tun_info->key; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct ip_tunnel_info *tun_info; + struct encap_key key, e_key; struct mlx5e_encap_entry *e; - int tunnel_type, err = 0; + unsigned short family; uintptr_t hash_key; bool found = false; + int err = 0; - /* udp dst port must be set */ - if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) - goto vxlan_encap_offload_err; - - /* setting udp src port isn't supported */ - if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) { -vxlan_encap_offload_err: - NL_SET_ERR_MSG_MOD(extack, - "must set udp dst port and not set udp src port"); - netdev_warn(priv->netdev, - "must set udp dst port and not set udp src port\n"); - return -EOPNOTSUPP; - } - - if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; - } else { - NL_SET_ERR_MSG_MOD(extack, - "port isn't an offloaded vxlan udp dport"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst)); - return -EOPNOTSUPP; - } + parse_attr = attr->parse_attr; + tun_info = &parse_attr->tun_info[out_index]; + family = ip_tunnel_info_af(tun_info); + key.ip_tun_key = &tun_info->key; + key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev); - hash_key = hash_encap_info(key); + hash_key = hash_encap_info(&key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - if (!cmp_encap_info(&e->tun_info.key, key)) { + e_key.ip_tun_key = &e->tun_info.key; + e_key.tunnel_type = e->tunnel_type; + if (!cmp_encap_info(&e_key, &key)) { found = true; break; } @@ -2763,26 +2464,33 @@ vxlan_encap_offload_err: return -ENOMEM; e->tun_info = *tun_info; - e->tunnel_type = tunnel_type; + err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); + if (err) + goto out_err; + INIT_LIST_HEAD(&e->flows); if (family == AF_INET) - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e); + err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) - err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e); + err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); - if (err && err != -EAGAIN) + if (err) goto out_err; hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); attach_flow: - list_add(&flow->encap, &e->flows); + list_add(&flow->encaps[out_index].list, &e->flows); + flow->encaps[out_index].index = out_index; *encap_dev = e->out_dev; - if (e->flags & MLX5_ENCAP_ENTRY_VALID) - attr->encap_id = e->encap_id; - else - err = -EAGAIN; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + attr->dests[out_index].encap_id = e->encap_id; + attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; + *encap_valid = true; + } else { + *encap_valid = false; + } return err; @@ -2792,7 +2500,7 @@ out_err: } static int parse_tc_vlan_action(struct mlx5e_priv *priv, - const struct tc_action *a, + const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, u32 *action) { @@ -2801,7 +2509,8 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, if (vlan_idx >= MLX5_FS_VLAN_DEPTH) return -EOPNOTSUPP; - if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { + switch (act->id) { + case FLOW_ACTION_VLAN_POP: if (vlan_idx) { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, MLX5_FS_VLAN_DEPTH)) @@ -2811,10 +2520,11 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, } else { *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; } - } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { - attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a); - attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a); - attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a); + break; + case FLOW_ACTION_VLAN_PUSH: + attr->vlan_vid[vlan_idx] = act->vlan.vid; + attr->vlan_prio[vlan_idx] = act->vlan.prio; + attr->vlan_proto[vlan_idx] = act->vlan.proto; if (!attr->vlan_proto[vlan_idx]) attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); @@ -2826,13 +2536,15 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; } else { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && - (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) || - tcf_vlan_push_prio(a))) + (act->vlan.proto != htons(ETH_P_8021Q) || + act->vlan.prio)) return -EOPNOTSUPP; *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; } - } else { /* action is TCA_VLAN_ACT_MODIFY */ + break; + default: + /* action is FLOW_ACT_VLAN_MANGLE */ return -EOPNOTSUPP; } @@ -2841,59 +2553,63 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, return 0; } -static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, +static int parse_tc_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct ip_tunnel_info *info = NULL; - const struct tc_action *a; - LIST_HEAD(actions); + const struct ip_tunnel_info *info = NULL; + const struct flow_action_entry *act; bool encap = false; u32 action = 0; int err, i; - if (!tcf_exts_has_actions(exts)) + if (!flow_action_has_entries(flow_action)) return -EINVAL; attr->in_rep = rpriv->rep; attr->in_mdev = priv->mdev; - tcf_exts_for_each_action(i, a, exts) { - if (is_tcf_gact_shot(a)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; - continue; - } - - if (is_tcf_pedit(a)) { - err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB, - parse_attr, extack); + break; + case FLOW_ACTION_MANGLE: + case FLOW_ACTION_ADD: + err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, + parse_attr, hdrs, extack); if (err) return err; action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - attr->mirror_count = attr->out_count; - continue; - } - - if (is_tcf_csum(a)) { + attr->split_count = attr->out_count; + break; + case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, - tcf_csum_update_flags(a), - extack)) - continue; + act->csum_flags, extack)) + break; return -EOPNOTSUPP; - } - - if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_MIRRED: { struct mlx5e_priv *out_priv; struct net_device *out_dev; - out_dev = tcf_mirred_dev(a); + out_dev = act->dev; + if (!out_dev) { + /* out_dev is NULL when filters with + * non-existing mirred device are replayed to + * the driver. + */ + return -EINVAL; + } if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { NL_SET_ERR_MSG_MOD(extack, @@ -2903,23 +2619,47 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EOPNOTSUPP; } - if (switchdev_port_same_parent_id(priv->netdev, - out_dev) || + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + if (netdev_port_same_parent_id(priv->netdev, + out_dev) || is_merged_eswitch_dev(priv, out_dev)) { - action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); + + if (uplink_upper && + netif_is_lag_master(uplink_upper) && + uplink_upper == out_dev) + out_dev = uplink_dev; + + if (!mlx5e_eswitch_rep(out_dev)) + return -EOPNOTSUPP; + out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; - attr->out_rep[attr->out_count] = rpriv->rep; - attr->out_mdev[attr->out_count++] = out_priv->mdev; + attr->dests[attr->out_count].rep = rpriv->rep; + attr->dests[attr->out_count].mdev = out_priv->mdev; + attr->out_count++; } else if (encap) { - parse_attr->mirred_ifindex = out_dev->ifindex; - parse_attr->tun_info = *info; + parse_attr->mirred_ifindex[attr->out_count] = + out_dev->ifindex; + parse_attr->tun_info[attr->out_count] = *info; + encap = false; attr->parse_attr = parse_attr; - action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - /* attr->out_rep is resolved when we handle encap */ + attr->dests[attr->out_count].flags |= + MLX5_ESW_DEST_ENCAP; + attr->out_count++; + /* attr->dests[].rep is resolved when we + * handle encap + */ + } else if (parse_attr->filter_dev != priv->netdev) { + /* All mlx5 devices are called to configure + * high level device filters. Therefore, the + * *attempt* to install a filter on invalid + * eswitch should not trigger an explicit error + */ + return -EINVAL; } else { NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding"); @@ -2927,36 +2667,29 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, priv->netdev->name, out_dev->name); return -EINVAL; } - continue; - } - - if (is_tcf_tunnel_set(a)) { - info = tcf_tunnel_info(a); + } + break; + case FLOW_ACTION_TUNNEL_ENCAP: + info = act->tunnel; if (info) encap = true; else return -EOPNOTSUPP; - attr->mirror_count = attr->out_count; - continue; - } - - if (is_tcf_vlan(a)) { - err = parse_tc_vlan_action(priv, a, attr, &action); + break; + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_POP: + err = parse_tc_vlan_action(priv, act, attr, &action); if (err) return err; - attr->mirror_count = attr->out_count; - continue; - } - - if (is_tcf_tunnel_release(a)) { + attr->split_count = attr->out_count; + break; + case FLOW_ACTION_TUNNEL_DECAP: action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; - continue; - } - - if (is_tcf_gact_goto_chain(a)) { - u32 dest_chain = tcf_gact_goto_chain_index(a); + break; + case FLOW_ACTION_GOTO: { + u32 dest_chain = act->chain_index; u32 max_chain = mlx5_eswitch_get_chain_range(esw); if (dest_chain <= attr->chain) { @@ -2969,15 +2702,23 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = dest_chain; - - continue; + break; + } + default: + return -EINVAL; } + } - return -EINVAL; + if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { + err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB, + parse_attr, hdrs, extack); + if (err) + return err; } attr->action = action; - if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP; if (attr->dest_chain) { @@ -2988,7 +2729,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } - if (attr->mirror_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { + if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { NL_SET_ERR_MSG_MOD(extack, "current firmware doesn't support split rule for port mirroring"); netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n"); @@ -3007,6 +2748,11 @@ static void get_flags(int flags, u16 *flow_flags) if (flags & MLX5E_TC_EGRESS) __flow_flags |= MLX5E_TC_FLOW_EGRESS; + if (flags & MLX5E_TC_ESW_OFFLOAD) + __flow_flags |= MLX5E_TC_FLOW_ESWITCH; + if (flags & MLX5E_TC_NIC_OFFLOAD) + __flow_flags |= MLX5E_TC_FLOW_NIC; + *flow_flags = __flow_flags; } @@ -3017,18 +2763,39 @@ static const struct rhashtable_params tc_ht_params = { .automatic_shrinking = true, }; -static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) +static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *uplink_rpriv; - if (MLX5_VPORT_MANAGER(priv->mdev) && esw->mode == SRIOV_OFFLOADS) { + if (flags & MLX5E_TC_ESW_OFFLOAD) { uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - return &uplink_rpriv->tc_ht; - } else + return &uplink_rpriv->uplink_priv.tc_ht; + } else /* NIC offload */ return &priv->fs.tc.ht; } +static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) +{ + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && + flow->flags & MLX5E_TC_FLOW_INGRESS; + bool act_is_encap = !!(attr->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); + bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS); + + if (!esw_paired) + return false; + + if ((mlx5_lag_is_sriov(attr->in_mdev) || + mlx5_lag_is_multipath(attr->in_mdev)) && + (is_rep_ingress || act_is_encap)) + return true; + + return false; +} + static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, struct tc_cls_flower_offload *f, u16 flow_flags, @@ -3050,10 +2817,6 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, flow->flags = flow_flags; flow->priv = priv; - err = parse_cls_flower(priv, flow, &parse_attr->spec, f); - if (err) - goto err_free; - *__flow = flow; *__parse_attr = parse_attr; @@ -3065,12 +2828,39 @@ err_free: return err; } -static int -mlx5e_add_fdb_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, - u16 flow_flags, - struct mlx5e_tc_flow **__flow) +static void +mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, + struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct tc_cls_flower_offload *f, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + esw_attr->parse_attr = parse_attr; + esw_attr->chain = f->common.chain_index; + esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; + + esw_attr->in_rep = in_rep; + esw_attr->in_mdev = in_mdev; + + if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) == + MLX5_COUNTER_SOURCE_ESWITCH) + esw_attr->counter_dev = in_mdev; + else + esw_attr->counter_dev = priv->mdev; +} + +static struct mlx5e_tc_flow * +__mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct net_device *filter_dev, + struct mlx5_eswitch_rep *in_rep, + struct mlx5_core_dev *in_mdev) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -3083,28 +2873,118 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto out; - flow->esw_attr->chain = f->common.chain_index; - flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; - err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack); + parse_attr->filter_dev = filter_dev; + mlx5e_flow_esw_attr_init(flow->esw_attr, + priv, parse_attr, + f, in_rep, in_mdev); + + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); if (err) goto err_free; - err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); + err = parse_tc_fdb_actions(priv, &rule->action, parse_attr, flow, extack); if (err) goto err_free; - if (!(flow->esw_attr->action & - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)) - kvfree(parse_attr); + err = mlx5e_tc_add_fdb_flow(priv, flow, extack); + if (err) { + if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) + goto err_free; - *__flow = flow; + add_unready_flow(flow); + } - return 0; + return flow; err_free: kfree(flow); kvfree(parse_attr); out: + return ERR_PTR(err); +} + +static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, + struct mlx5e_tc_flow *flow, + u16 flow_flags) +{ + struct mlx5e_priv *priv = flow->priv, *peer_priv; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_rep_priv *peer_urpriv; + struct mlx5e_tc_flow *peer_flow; + struct mlx5_core_dev *in_mdev; + int err = 0; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return -ENODEV; + + peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); + peer_priv = netdev_priv(peer_urpriv->netdev); + + /* in_mdev is assigned of which the packet originated from. + * So packets redirected to uplink use the same mdev of the + * original flow and packets redirected from uplink use the + * peer mdev. + */ + if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK) + in_mdev = peer_priv->mdev; + else + in_mdev = priv->mdev; + + parse_attr = flow->esw_attr->parse_attr; + peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags, + parse_attr->filter_dev, + flow->esw_attr->in_rep, in_mdev); + if (IS_ERR(peer_flow)) { + err = PTR_ERR(peer_flow); + goto out; + } + + flow->peer_flow = peer_flow; + flow->flags |= MLX5E_TC_FLOW_DUP; + mutex_lock(&esw->offloads.peer_mutex); + list_add_tail(&flow->peer, &esw->offloads.peer_flows); + mutex_unlock(&esw->offloads.peer_mutex); + +out: + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return err; +} + +static int +mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct net_device *filter_dev, + struct mlx5e_tc_flow **__flow) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *in_rep = rpriv->rep; + struct mlx5_core_dev *in_mdev = priv->mdev; + struct mlx5e_tc_flow *flow; + int err; + + flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, + in_mdev); + if (IS_ERR(flow)) + return PTR_ERR(flow); + + if (is_peer_flow_needed(flow)) { + err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags); + if (err) { + mlx5e_tc_del_fdb_flow(priv, flow); + goto out; + } + } + + *__flow = flow; + + return 0; + +out: return err; } @@ -3112,8 +2992,10 @@ static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, u16 flow_flags, + struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -3130,7 +3012,13 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (err) goto out; - err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack); + parse_attr->filter_dev = filter_dev; + err = parse_cls_flower(flow->priv, flow, &parse_attr->spec, + f, filter_dev); + if (err) + goto err_free; + + err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); if (err) goto err_free; @@ -3155,6 +3043,7 @@ static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags, + struct net_device *filter_dev, struct mlx5e_tc_flow **flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3167,18 +3056,20 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, return -EOPNOTSUPP; if (esw && esw->mode == SRIOV_OFFLOADS) - err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow); + err = mlx5e_add_fdb_flow(priv, f, flow_flags, + filter_dev, flow); else - err = mlx5e_add_nic_flow(priv, f, flow_flags, flow); + err = mlx5e_add_nic_flow(priv, f, flow_flags, + filter_dev, flow); return err; } -int mlx5e_configure_flower(struct mlx5e_priv *priv, +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { struct netlink_ext_ack *extack = f->common.extack; - struct rhashtable *tc_ht = get_tc_ht(priv); + struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; int err = 0; @@ -3192,7 +3083,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, goto out; } - err = mlx5e_tc_add_flow(priv, f, flags, &flow); + err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow); if (err) goto out; @@ -3220,10 +3111,10 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) return false; } -int mlx5e_delete_flower(struct mlx5e_priv *priv, +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { - struct rhashtable *tc_ht = get_tc_ht(priv); + struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); @@ -3239,30 +3130,57 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, return 0; } -int mlx5e_stats_flower(struct mlx5e_priv *priv, +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags) { - struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; + struct rhashtable *tc_ht = get_tc_ht(priv, flags); + struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; struct mlx5_fc *counter; - u64 bytes; - u64 packets; - u64 lastuse; + u64 lastuse = 0; + u64 packets = 0; + u64 bytes = 0; flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); if (!flow || !same_flow_direction(flow, flags)) return -EINVAL; - if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) - return 0; + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + counter = mlx5e_tc_get_counter(flow); + if (!counter) + return 0; - counter = mlx5e_tc_get_counter(flow); - if (!counter) - return 0; + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + } + + /* Under multipath it's possible for one rule to be currently + * un-offloaded while the other rule is offloaded. + */ + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + goto out; - mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + if ((flow->flags & MLX5E_TC_FLOW_DUP) && + (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) { + u64 bytes2; + u64 packets2; + u64 lastuse2; - tcf_exts_stats_update(f->exts, bytes, packets, lastuse); + counter = mlx5e_tc_get_counter(flow->peer_flow); + if (!counter) + goto no_peer_counter; + mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); + + bytes += bytes2; + packets += packets2; + lastuse = max_t(u64, lastuse, lastuse2); + } + +no_peer_counter: + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +out: + flow_stats_update(&f->stats, bytes, packets, lastuse); return 0; } @@ -3350,7 +3268,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) if (tc->netdevice_nb.notifier_call) unregister_netdevice_notifier(&tc->netdevice_nb); - rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); + rhashtable_destroy(&tc->ht); if (!IS_ERR_OR_NULL(tc->t)) { mlx5_destroy_flow_table(tc->t); @@ -3368,9 +3286,32 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); } -int mlx5e_tc_num_filters(struct mlx5e_priv *priv) +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { - struct rhashtable *tc_ht = get_tc_ht(priv); + struct rhashtable *tc_ht = get_tc_ht(priv, flags); return atomic_read(&tc_ht->nelems); } + +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) +{ + struct mlx5e_tc_flow *flow, *tmp; + + list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) + __mlx5e_tc_del_fdb_peer_flow(flow); +} + +void mlx5e_tc_reoffload_flows_work(struct work_struct *work) +{ + struct mlx5_rep_uplink_priv *rpriv = + container_of(work, struct mlx5_rep_uplink_priv, + reoffload_flows_work); + struct mlx5e_tc_flow *flow, *tmp; + + rtnl_lock(); + list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { + if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) + remove_unready_flow(flow); + } + rtnl_unlock(); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 49436bf3b80a..f62e81902d27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -42,7 +42,9 @@ enum { MLX5E_TC_INGRESS = BIT(0), MLX5E_TC_EGRESS = BIT(1), - MLX5E_TC_LAST_EXPORTED_BIT = 1, + MLX5E_TC_NIC_OFFLOAD = BIT(2), + MLX5E_TC_ESW_OFFLOAD = BIT(3), + MLX5E_TC_LAST_EXPORTED_BIT = 3, }; int mlx5e_tc_nic_init(struct mlx5e_priv *priv); @@ -51,12 +53,12 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv); int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); -int mlx5e_configure_flower(struct mlx5e_priv *priv, +int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); -int mlx5e_delete_flower(struct mlx5e_priv *priv, +int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); -int mlx5e_stats_flower(struct mlx5e_priv *priv, +int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, int flags); struct mlx5e_encap_entry; @@ -68,12 +70,14 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5e_neigh_hash_entry; void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); -int mlx5e_tc_num_filters(struct mlx5e_priv *priv); +int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags); + +void mlx5e_tc_reoffload_flows_work(struct work_struct *work); #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} -static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; } +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) { return 0; } #endif #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 6dacaeba2fbf..25a8f8260c14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -127,7 +127,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, else #endif if (skb_vlan_tag_present(skb)) - up = skb->vlan_tci >> VLAN_PRIO_SHIFT; + up = skb_vlan_tag_get_prio(skb); /* channel_ix can be larger than num_channels since * dev->num_real_tx_queues = num_channels * num_tc @@ -148,12 +148,8 @@ static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) { - struct flow_keys keys; - if (skb_transport_header_was_set(skb)) return skb_transport_offset(skb); - else if (skb_flow_dissect_flow_keys(skb, &keys, 0)) - return keys.control.thoff; else return mlx5e_skb_l2_header_offset(skb); } @@ -172,15 +168,8 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, hlen += VLAN_HLEN; break; case MLX5_INLINE_MODE_IP: - /* When transport header is set to zero, it means no transport - * header. When transport header is set to 0xff's, it means - * transport header wasn't set. - */ - if (skb_transport_offset(skb)) { - hlen = mlx5e_skb_l3_header_offset(skb); - break; - } - /* fall through */ + hlen = mlx5e_skb_l3_header_offset(skb); + break; case MLX5_INLINE_MODE_L2: default: hlen = mlx5e_skb_l2_header_offset(skb); @@ -387,8 +376,14 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); if (unlikely(contig_wqebbs_room < num_wqebbs)) { +#ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5_wqe_eth_seg cur_eth = wqe->eth; +#endif mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); mlx5e_sq_fetch_wqe(sq, &wqe, &pi); +#ifdef CONFIG_MLX5_EN_IPSEC + wqe->eth = cur_eth; +#endif } /* fill wqe */ @@ -459,9 +454,10 @@ static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq, u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq); netdev_err(sq->channel->netdev, - "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", - sq->cq.mcq.cqn, ci, sq->sqn, err_cqe->syndrome, - err_cqe->vendor_err_synd); + "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", + sq->cq.mcq.cqn, ci, sq->sqn, + get_cqe_opcode((struct mlx5_cqe64 *)err_cqe), + err_cqe->syndrome, err_cqe->vendor_err_synd); mlx5_dump_err_cqe(sq->cq.mdev, err_cqe); } @@ -507,13 +503,13 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wqe_counter = be16_to_cpu(cqe->wqe_counter); - if (unlikely(cqe->op_own >> 4 == MLX5_CQE_REQ_ERR)) { + if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { mlx5e_dump_error_cqe(sq, (struct mlx5_err_cqe *)cqe); queue_work(cq->channel->priv->wq, - &sq->recover.recover_work); + &sq->recover_work); } stats->cqe_err++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 85d517360157..b4af5e19f6ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -76,6 +76,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_rq *rq = &c->rq; bool busy = false; int work_done = 0; int i; @@ -85,17 +86,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); - busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL); if (c->xdp) - busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq); + busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq); if (likely(budget)) { /* budget=0 means: don't poll rx rings */ - work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); + work_done = mlx5e_poll_rx_cq(&rq->cq, budget); busy |= work_done == budget; } - busy |= c->rq.post_wqes(&c->rq); + busy |= c->rq.post_wqes(rq); if (busy) { if (likely(mlx5e_channel_no_affinity_change(c))) @@ -115,9 +116,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_cq_arm(&c->sq[i].cq); } - mlx5e_handle_rx_dim(&c->rq); + mlx5e_handle_rx_dim(rq); - mlx5e_cq_arm(&c->rq.cq); + mlx5e_cq_arm(&rq->cq); mlx5e_cq_arm(&c->icosq.cq); mlx5e_cq_arm(&c->xdpsq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index c1e1a16a9b07..bb6e5b5d9681 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -31,20 +31,23 @@ */ #include <linux/interrupt.h> +#include <linux/notifier.h> #include <linux/module.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/eq.h> #include <linux/mlx5/cmd.h> #ifdef CONFIG_RFS_ACCEL #include <linux/cpu_rmap.h> #endif #include "mlx5_core.h" +#include "lib/eq.h" #include "fpga/core.h" #include "eswitch.h" #include "lib/clock.h" #include "diag/fw_tracer.h" enum { - MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), MLX5_EQE_OWNER_INIT_VAL = 0x1, }; @@ -55,14 +58,32 @@ enum { }; enum { - MLX5_NUM_SPARE_EQE = 0x80, - MLX5_NUM_ASYNC_EQE = 0x1000, - MLX5_NUM_CMD_EQE = 32, - MLX5_NUM_PF_DRAIN = 64, + MLX5_EQ_DOORBEL_OFFSET = 0x40, }; -enum { - MLX5_EQ_DOORBEL_OFFSET = 0x40, +struct mlx5_irq_info { + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; + void *context; /* dev_id provided to request_irq */ +}; + +struct mlx5_eq_table { + struct list_head comp_eqs_list; + struct mlx5_eq pages_eq; + struct mlx5_eq cmd_eq; + struct mlx5_eq async_eq; + + struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; + + /* Since CQ DB is stored in async_eq */ + struct mlx5_nb cq_err_nb; + + struct mutex lock; /* sync async eqs creations */ + int num_comp_vectors; + struct mlx5_irq_info *irq_info; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -78,17 +99,6 @@ enum { (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \ (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)) -struct map_eq_in { - u64 mask; - u32 reserved; - u32 unmap_eqn; -}; - -struct cre_des_eq { - u8 reserved[15]; - u8 eqn; -}; - static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; @@ -99,213 +109,56 @@ static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) -{ - return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); -} - -static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) -{ - struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); - - return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; -} - -static const char *eqe_type_str(u8 type) -{ - switch (type) { - case MLX5_EVENT_TYPE_COMP: - return "MLX5_EVENT_TYPE_COMP"; - case MLX5_EVENT_TYPE_PATH_MIG: - return "MLX5_EVENT_TYPE_PATH_MIG"; - case MLX5_EVENT_TYPE_COMM_EST: - return "MLX5_EVENT_TYPE_COMM_EST"; - case MLX5_EVENT_TYPE_SQ_DRAINED: - return "MLX5_EVENT_TYPE_SQ_DRAINED"; - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; - case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: - return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; - case MLX5_EVENT_TYPE_CQ_ERROR: - return "MLX5_EVENT_TYPE_CQ_ERROR"; - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; - case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: - return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; - case MLX5_EVENT_TYPE_INTERNAL_ERROR: - return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; - case MLX5_EVENT_TYPE_PORT_CHANGE: - return "MLX5_EVENT_TYPE_PORT_CHANGE"; - case MLX5_EVENT_TYPE_GPIO_EVENT: - return "MLX5_EVENT_TYPE_GPIO_EVENT"; - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; - case MLX5_EVENT_TYPE_REMOTE_CONFIG: - return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; - case MLX5_EVENT_TYPE_DB_BF_CONGESTION: - return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; - case MLX5_EVENT_TYPE_STALL_EVENT: - return "MLX5_EVENT_TYPE_STALL_EVENT"; - case MLX5_EVENT_TYPE_CMD: - return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_PAGE_REQUEST: - return "MLX5_EVENT_TYPE_PAGE_REQUEST"; - case MLX5_EVENT_TYPE_PAGE_FAULT: - return "MLX5_EVENT_TYPE_PAGE_FAULT"; - case MLX5_EVENT_TYPE_PPS_EVENT: - return "MLX5_EVENT_TYPE_PPS_EVENT"; - case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: - return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; - case MLX5_EVENT_TYPE_FPGA_ERROR: - return "MLX5_EVENT_TYPE_FPGA_ERROR"; - case MLX5_EVENT_TYPE_FPGA_QP_ERROR: - return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; - case MLX5_EVENT_TYPE_GENERAL_EVENT: - return "MLX5_EVENT_TYPE_GENERAL_EVENT"; - case MLX5_EVENT_TYPE_DEVICE_TRACER: - return "MLX5_EVENT_TYPE_DEVICE_TRACER"; - default: - return "Unrecognized event"; - } -} - -static enum mlx5_dev_event port_subtype_event(u8 subtype) -{ - switch (subtype) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - return MLX5_DEV_EVENT_PORT_DOWN; - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - return MLX5_DEV_EVENT_PORT_UP; - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - return MLX5_DEV_EVENT_PORT_INITIALIZED; - case MLX5_PORT_CHANGE_SUBTYPE_LID: - return MLX5_DEV_EVENT_LID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - return MLX5_DEV_EVENT_PKEY_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - return MLX5_DEV_EVENT_GUID_CHANGE; - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - return MLX5_DEV_EVENT_CLIENT_REREG; - } - return -1; -} - -static void eq_update_ci(struct mlx5_eq *eq, int arm) +/* caller must eventually call mlx5_cq_put on the returned cq */ +static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) { - __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); - u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); - - __raw_writel((__force u32)cpu_to_be32(val), addr); - /* We still want ordering, just not swabbing, so add a barrier */ - mb(); -} + struct mlx5_cq_table *table = &eq->cq_table; + struct mlx5_core_cq *cq = NULL; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING -static void eqe_pf_action(struct work_struct *work) -{ - struct mlx5_pagefault *pfault = container_of(work, - struct mlx5_pagefault, - work); - struct mlx5_eq *eq = pfault->eq; + rcu_read_lock(); + cq = radix_tree_lookup(&table->tree, cqn); + if (likely(cq)) + mlx5_cq_hold(cq); + rcu_read_unlock(); - mlx5_core_page_fault(eq->dev, pfault); - mempool_free(pfault, eq->pf_ctx.pool); + return cq; } -static void eq_pf_process(struct mlx5_eq *eq) +static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) { - struct mlx5_core_dev *dev = eq->dev; - struct mlx5_eqe_page_fault *pf_eqe; - struct mlx5_pagefault *pfault; + struct mlx5_eq_comp *eq_comp = eq_ptr; + struct mlx5_eq *eq = eq_ptr; struct mlx5_eqe *eqe; int set_ci = 0; + u32 cqn = -1; while ((eqe = next_eqe_sw(eq))) { - pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC); - if (!pfault) { - schedule_work(&eq->pf_ctx.work); - break; - } - + struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ dma_rmb(); - pf_eqe = &eqe->data.page_fault; - pfault->event_subtype = eqe->sub_type; - pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); - - mlx5_core_dbg(dev, - "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", - eqe->sub_type, pfault->bytes_committed); - - switch (eqe->sub_type) { - case MLX5_PFAULT_SUBTYPE_RDMA: - /* RDMA based event */ - pfault->type = - be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; - pfault->token = - be32_to_cpu(pf_eqe->rdma.pftype_token) & - MLX5_24BIT_MASK; - pfault->rdma.r_key = - be32_to_cpu(pf_eqe->rdma.r_key); - pfault->rdma.packet_size = - be16_to_cpu(pf_eqe->rdma.packet_length); - pfault->rdma.rdma_op_len = - be32_to_cpu(pf_eqe->rdma.rdma_op_len); - pfault->rdma.rdma_va = - be64_to_cpu(pf_eqe->rdma.rdma_va); - mlx5_core_dbg(dev, - "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", - pfault->type, pfault->token, - pfault->rdma.r_key); - mlx5_core_dbg(dev, - "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", - pfault->rdma.rdma_op_len, - pfault->rdma.rdma_va); - break; - - case MLX5_PFAULT_SUBTYPE_WQE: - /* WQE based event */ - pfault->type = - (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; - pfault->token = - be32_to_cpu(pf_eqe->wqe.token); - pfault->wqe.wq_num = - be32_to_cpu(pf_eqe->wqe.pftype_wq) & - MLX5_24BIT_MASK; - pfault->wqe.wqe_index = - be16_to_cpu(pf_eqe->wqe.wqe_index); - pfault->wqe.packet_size = - be16_to_cpu(pf_eqe->wqe.packet_length); - mlx5_core_dbg(dev, - "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", - pfault->type, pfault->token, - pfault->wqe.wq_num, - pfault->wqe.wqe_index); - break; - - default: - mlx5_core_warn(dev, - "Unsupported page fault event sub-type: 0x%02hhx\n", - eqe->sub_type); - /* Unsupported page faults should still be - * resolved by the page fault handler - */ + /* Assume (eqe->type) is always MLX5_EVENT_TYPE_COMP */ + cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; + + cq = mlx5_eq_cq_get(eq, cqn); + if (likely(cq)) { + ++cq->arm_sn; + cq->comp(cq); + mlx5_cq_put(cq); + } else { + mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); } - pfault->eq = eq; - INIT_WORK(&pfault->work, eqe_pf_action); - queue_work(eq->pf_ctx.wq, &pfault->work); - ++eq->cons_index; ++set_ci; + /* The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MLX5_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { eq_update_ci(eq, 0); set_ci = 0; @@ -313,165 +166,41 @@ static void eq_pf_process(struct mlx5_eq *eq) } eq_update_ci(eq, 1); -} - -static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr) -{ - struct mlx5_eq *eq = eq_ptr; - unsigned long flags; - if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) { - eq_pf_process(eq); - spin_unlock_irqrestore(&eq->pf_ctx.lock, flags); - } else { - schedule_work(&eq->pf_ctx.work); - } + if (cqn != -1) + tasklet_schedule(&eq_comp->tasklet_ctx.task); return IRQ_HANDLED; } -/* mempool_refill() was proposed but unfortunately wasn't accepted - * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html - * Chip workaround. +/* Some architectures don't latch interrupts when they are disabled, so using + * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to + * avoid losing them. It is not recommended to use it, unless this is the last + * resort. */ -static void mempool_refill(mempool_t *pool) +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) { - while (pool->curr_nr < pool->min_nr) - mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); -} - -static void eq_pf_action(struct work_struct *work) -{ - struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work); - - mempool_refill(eq->pf_ctx.pool); - - spin_lock_irq(&eq->pf_ctx.lock); - eq_pf_process(eq); - spin_unlock_irq(&eq->pf_ctx.lock); -} - -static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name) -{ - spin_lock_init(&pf_ctx->lock); - INIT_WORK(&pf_ctx->work, eq_pf_action); - - pf_ctx->wq = alloc_ordered_workqueue(name, - WQ_MEM_RECLAIM); - if (!pf_ctx->wq) - return -ENOMEM; - - pf_ctx->pool = mempool_create_kmalloc_pool - (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault)); - if (!pf_ctx->pool) - goto err_wq; - - return 0; -err_wq: - destroy_workqueue(pf_ctx->wq); - return -ENOMEM; -} - -int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token, - u32 wq_num, u8 type, int error) -{ - u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; - - MLX5_SET(page_fault_resume_in, in, opcode, - MLX5_CMD_OP_PAGE_FAULT_RESUME); - MLX5_SET(page_fault_resume_in, in, error, !!error); - MLX5_SET(page_fault_resume_in, in, page_fault_type, type); - MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); - MLX5_SET(page_fault_resume_in, in, token, token); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} -EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); -#endif - -static void general_event_handler(struct mlx5_core_dev *dev, - struct mlx5_eqe *eqe) -{ - switch (eqe->sub_type) { - case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: - if (dev->event) - dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0); - break; - default: - mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n", - eqe->sub_type); - } -} - -static void mlx5_temp_warning_event(struct mlx5_core_dev *dev, - struct mlx5_eqe *eqe) -{ - u64 value_lsb; - u64 value_msb; - - value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); - value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); - - mlx5_core_warn(dev, - "High temperature on sensors with bit set %llx %llx", - value_msb, value_lsb); -} - -/* caller must eventually call mlx5_cq_put on the returned cq */ -static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) -{ - struct mlx5_cq_table *table = &eq->cq_table; - struct mlx5_core_cq *cq = NULL; - - spin_lock(&table->lock); - cq = radix_tree_lookup(&table->tree, cqn); - if (likely(cq)) - mlx5_cq_hold(cq); - spin_unlock(&table->lock); - - return cq; -} - -static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn) -{ - struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); - - if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); - return; - } - - ++cq->arm_sn; - - cq->comp(cq); - - mlx5_cq_put(cq); -} - -static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type) -{ - struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn); - - if (unlikely(!cq)) { - mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); - return; - } + u32 count_eqe; - cq->event(cq, event_type); + disable_irq(eq->core.irqn); + count_eqe = eq->core.cons_index; + mlx5_eq_comp_int(eq->core.irqn, eq); + count_eqe = eq->core.cons_index - count_eqe; + enable_irq(eq->core.irqn); - mlx5_cq_put(cq); + return count_eqe; } -static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) +static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) { struct mlx5_eq *eq = eq_ptr; - struct mlx5_core_dev *dev = eq->dev; + struct mlx5_eq_table *eqt; + struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; int set_ci = 0; - u32 cqn = -1; - u32 rsn; - u8 port; + + dev = eq->dev; + eqt = dev->priv.eq_table; while ((eqe = next_eqe_sw(eq))) { /* @@ -480,116 +209,12 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) */ dma_rmb(); - mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", - eq->eqn, eqe_type_str(eqe->type)); - switch (eqe->type) { - case MLX5_EVENT_TYPE_COMP: - cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; - mlx5_eq_cq_completion(eq, cqn); - break; - case MLX5_EVENT_TYPE_DCT_DRAINED: - rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; - rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); - mlx5_rsc_event(dev, rsn, eqe->type); - break; - case MLX5_EVENT_TYPE_PATH_MIG: - case MLX5_EVENT_TYPE_COMM_EST: - case MLX5_EVENT_TYPE_SQ_DRAINED: - case MLX5_EVENT_TYPE_SRQ_LAST_WQE: - case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: - case MLX5_EVENT_TYPE_PATH_MIG_FAILED: - case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: - case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); - mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", - eqe_type_str(eqe->type), eqe->type, rsn); - mlx5_rsc_event(dev, rsn, eqe->type); - break; - - case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: - case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: - rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n", - eqe_type_str(eqe->type), eqe->type, rsn); - mlx5_srq_event(dev, rsn, eqe->type); - break; - - case MLX5_EVENT_TYPE_CMD: - mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false); - break; - - case MLX5_EVENT_TYPE_PORT_CHANGE: - port = (eqe->data.port.port >> 4) & 0xf; - switch (eqe->sub_type) { - case MLX5_PORT_CHANGE_SUBTYPE_DOWN: - case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - case MLX5_PORT_CHANGE_SUBTYPE_LID: - case MLX5_PORT_CHANGE_SUBTYPE_PKEY: - case MLX5_PORT_CHANGE_SUBTYPE_GUID: - case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: - case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: - if (dev->event) - dev->event(dev, port_subtype_event(eqe->sub_type), - (unsigned long)port); - break; - default: - mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n", - port, eqe->sub_type); - } - break; - case MLX5_EVENT_TYPE_CQ_ERROR: - cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; - mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", - cqn, eqe->data.cq_err.syndrome); - mlx5_eq_cq_event(eq, cqn, eqe->type); - break; + if (likely(eqe->type < MLX5_EVENT_TYPE_MAX)) + atomic_notifier_call_chain(&eqt->nh[eqe->type], eqe->type, eqe); + else + mlx5_core_warn_once(dev, "notifier_call_chain is not setup for eqe: %d\n", eqe->type); - case MLX5_EVENT_TYPE_PAGE_REQUEST: - { - u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id); - s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages); - - mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", - func_id, npages); - mlx5_core_req_pages_handler(dev, func_id, npages); - } - break; - - case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: - mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); - break; - - case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: - mlx5_port_module_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_PPS_EVENT: - mlx5_pps_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_FPGA_ERROR: - case MLX5_EVENT_TYPE_FPGA_QP_ERROR: - mlx5_fpga_event(dev, eqe->type, &eqe->data.raw); - break; - - case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: - mlx5_temp_warning_event(dev, eqe); - break; - - case MLX5_EVENT_TYPE_GENERAL_EVENT: - general_event_handler(dev, eqe); - break; - - case MLX5_EVENT_TYPE_DEVICE_TRACER: - mlx5_fw_tracer_event(dev, eqe); - break; - - default: - mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", - eqe->type, eq->eqn); - break; - } + atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; ++set_ci; @@ -608,30 +233,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) eq_update_ci(eq, 1); - if (cqn != -1) - tasklet_schedule(&eq->tasklet_ctx.task); - return IRQ_HANDLED; } -/* Some architectures don't latch interrupts when they are disabled, so using - * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to - * avoid losing them. It is not recommended to use it, unless this is the last - * resort. - */ -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq) -{ - u32 count_eqe; - - disable_irq(eq->irqn); - count_eqe = eq->cons_index; - mlx5_eq_int(eq->irqn, eq); - count_eqe = eq->cons_index - count_eqe; - enable_irq(eq->irqn); - - return count_eqe; -} - static void init_eq_buf(struct mlx5_eq *eq) { struct mlx5_eqe *eqe; @@ -643,39 +247,35 @@ static void init_eq_buf(struct mlx5_eq *eq) } } -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type) +static int +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, + struct mlx5_eq_param *param) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - irq_handler_t handler; + u8 vecidx = param->index; __be64 *pas; void *eqc; int inlen; u32 *in; int err; + if (eq_table->irq_info[vecidx].context) + return -EEXIST; + /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); - eq->type = type; - eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); + eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); if (err) return err; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (type == MLX5_EQ_TYPE_PF) - handler = mlx5_eq_pf_int; - else -#endif - handler = mlx5_eq_int; - init_eq_buf(eq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + @@ -691,7 +291,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, mlx5_fill_page_array(&eq->buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); - MLX5_SET64(create_eq_in, in, event_bitmask, mask); + MLX5_SET64(create_eq_in, in, event_bitmask, param->mask); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); @@ -704,15 +304,17 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_in; - snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", + snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); + eq_table->irq_info[vecidx].context = param->context; + eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, handler, 0, - priv->irq_info[vecidx].name, eq); + err = request_irq(eq->irqn, param->handler, 0, + eq_table->irq_info[vecidx].name, param->context); if (err) goto err_eq; @@ -720,21 +322,6 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, if (err) goto err_irq; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (type == MLX5_EQ_TYPE_PF) { - err = init_pf_ctx(&eq->pf_ctx, name); - if (err) - goto err_irq; - } else -#endif - { - INIT_LIST_HEAD(&eq->tasklet_ctx.list); - INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); - spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, - (unsigned long)&eq->tasklet_ctx); - } - /* EQs are created in ARMED state */ eq_update_ci(eq, 1); @@ -756,27 +343,25 @@ err_buf: return err; } -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + struct mlx5_irq_info *irq_info; int err; + irq_info = &eq_table->irq_info[eq->vecidx]; + mlx5_debug_eq_remove(dev, eq); - free_irq(eq->irqn, eq); + + free_irq(eq->irqn, irq_info->context); + irq_info->context = NULL; + err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); synchronize_irq(eq->irqn); - if (eq->type == MLX5_EQ_TYPE_COMP) { - tasklet_disable(&eq->tasklet_ctx.task); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - } else if (eq->type == MLX5_EQ_TYPE_PF) { - cancel_work_sync(&eq->pf_ctx.work); - destroy_workqueue(eq->pf_ctx.wq); - mempool_destroy(eq->pf_ctx.pool); -#endif - } mlx5_buf_free(dev, &eq->buf); return err; @@ -787,9 +372,9 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) struct mlx5_cq_table *table = &eq->cq_table; int err; - spin_lock_irq(&table->lock); + spin_lock(&table->lock); err = radix_tree_insert(&table->tree, cq->cqn, cq); - spin_unlock_irq(&table->lock); + spin_unlock(&table->lock); return err; } @@ -799,9 +384,9 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *tmp; - spin_lock_irq(&table->lock); + spin_lock(&table->lock); tmp = radix_tree_delete(&table->tree, cq->cqn); - spin_unlock_irq(&table->lock); + spin_unlock(&table->lock); if (!tmp) { mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); @@ -816,28 +401,106 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) return 0; } -int mlx5_eq_init(struct mlx5_core_dev *dev) +int mlx5_eq_table_init(struct mlx5_core_dev *dev) { - int err; + struct mlx5_eq_table *eq_table; + int i, err; - spin_lock_init(&dev->priv.eq_table.lock); + eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL); + if (!eq_table) + return -ENOMEM; + + dev->priv.eq_table = eq_table; err = mlx5_eq_debugfs_init(dev); + if (err) + goto kvfree_eq_table; + mutex_init(&eq_table->lock); + for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) + ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + + return 0; + +kvfree_eq_table: + kvfree(eq_table); + dev->priv.eq_table = NULL; return err; } -void mlx5_eq_cleanup(struct mlx5_core_dev *dev) +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) { mlx5_eq_debugfs_cleanup(dev); + kvfree(dev->priv.eq_table); } -int mlx5_start_eqs(struct mlx5_core_dev *dev) +/* Async EQs */ + +static int create_async_eq(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq *eq, struct mlx5_eq_param *param) { - struct mlx5_eq_table *table = &dev->priv.eq_table; - u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int err; + + mutex_lock(&eq_table->lock); + if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { + err = -ENOSPC; + goto unlock; + } + + err = create_map_eq(dev, eq, name, param); +unlock: + mutex_unlock(&eq_table->lock); + return err; +} + +static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; + mutex_lock(&eq_table->lock); + err = destroy_unmap_eq(dev, eq); + mutex_unlock(&eq_table->lock); + return err; +} + +static int cq_err_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eq_table *eqt; + struct mlx5_core_cq *cq; + struct mlx5_eqe *eqe; + struct mlx5_eq *eq; + u32 cqn; + + /* type == MLX5_EVENT_TYPE_CQ_ERROR */ + + eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); + eq = &eqt->async_eq; + eqe = data; + + cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; + mlx5_core_warn(eq->dev, "CQ error on CQN 0x%x, syndrome 0x%x\n", + cqn, eqe->data.cq_err.syndrome); + + cq = mlx5_eq_cq_get(eq, cqn); + if (unlikely(!cq)) { + mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn); + return NOTIFY_OK; + } + + cq->event(cq, type); + + mlx5_cq_put(cq); + + return NOTIFY_OK; +} + +static u64 gather_async_events_mask(struct mlx5_core_dev *dev) +{ + u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; + if (MLX5_VPORT_MANAGER(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); @@ -865,127 +528,524 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_MCAM_REG(dev, tracer_registers)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER); - err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, - MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, - "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); + if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); + + if (mlx5_core_is_ecpf_esw_manager(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + + return async_event_mask; +} + +static int create_async_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_param param = {}; + int err; + + MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); + mlx5_eq_notifier_register(dev, &table->cq_err_nb); + + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_CMD_IDX, + .mask = 1ull << MLX5_EVENT_TYPE_CMD, + .nent = MLX5_NUM_CMD_EQE, + .context = &table->cmd_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); - return err; + goto err0; } mlx5_cmd_use_events(dev); - err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, - MLX5_NUM_ASYNC_EQE, async_event_mask, - "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC); + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_ASYNC_IDX, + .mask = gather_async_events_mask(dev), + .nent = MLX5_NUM_ASYNC_EQE, + .context = &table->async_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; } - err = mlx5_create_map_eq(dev, &table->pages_eq, - MLX5_EQ_VEC_PAGES, - /* TODO: sriov max_vf + */ 1, - 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", - MLX5_EQ_TYPE_ASYNC); + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PAGEREQ_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, + .nent = /* TODO: sriov max_vf + */ 1, + .context = &table->pages_eq, + .handler = mlx5_eq_async_int, + }; + err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_create_map_eq(dev, &table->pfault_eq, - MLX5_EQ_VEC_PFAULT, - MLX5_NUM_ASYNC_EQE, - 1 << MLX5_EVENT_TYPE_PAGE_FAULT, - "mlx5_page_fault_eq", - MLX5_EQ_TYPE_PF); - if (err) { - mlx5_core_warn(dev, "failed to create page fault EQ %d\n", - err); - goto err3; - } - } - return err; -err3: - mlx5_destroy_unmap_eq(dev, &table->pages_eq); -#else - return err; -#endif err2: - mlx5_destroy_unmap_eq(dev, &table->async_eq); + destroy_async_eq(dev, &table->async_eq); err1: mlx5_cmd_use_polling(dev); - mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + destroy_async_eq(dev, &table->cmd_eq); +err0: + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; } -void mlx5_stop_eqs(struct mlx5_core_dev *dev) +static void destroy_async_eqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq_table *table = dev->priv.eq_table; int err; -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) { - err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq); - if (err) - mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n", - err); - } -#endif - - err = mlx5_destroy_unmap_eq(dev, &table->pages_eq); + err = destroy_async_eq(dev, &table->pages_eq); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = mlx5_destroy_unmap_eq(dev, &table->async_eq); + err = destroy_async_eq(dev, &table->async_eq); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); + mlx5_cmd_use_polling(dev); - err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq); + err = destroy_async_eq(dev, &table->cmd_eq); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); + + mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); +} + +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) +{ + return &dev->priv.eq_table->async_eq; +} + +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->async_eq.irqn); +} + +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) +{ + synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); +} + +/* Generic EQ API for mlx5_core consumers + * Needed For RDMA ODP EQ for now + */ +struct mlx5_eq * +mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, + struct mlx5_eq_param *param) +{ + struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); + int err; + + if (!eq) + return ERR_PTR(-ENOMEM); + + err = create_async_eq(dev, name, eq, param); + if (err) { + kvfree(eq); + eq = ERR_PTR(err); + } + + return eq; +} +EXPORT_SYMBOL(mlx5_eq_create_generic); + +int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + if (IS_ERR(eq)) + return -EINVAL; + + err = destroy_async_eq(dev, eq); + if (err) + goto out; + + kvfree(eq); +out: + return err; } +EXPORT_SYMBOL(mlx5_eq_destroy_generic); -int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - u32 *out, int outlen) +struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) { - u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0}; + u32 ci = eq->cons_index + cc; + struct mlx5_eqe *eqe; + + eqe = get_eqe(eq, ci & (eq->nent - 1)); + eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe; + /* Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + if (eqe) + dma_rmb(); - MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); - MLX5_SET(query_eq_in, in, eq_number, eq->eqn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + return eqe; +} +EXPORT_SYMBOL(mlx5_eq_get_eqe); + +void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val; + + eq->cons_index += cc; + val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} +EXPORT_SYMBOL(mlx5_eq_update_ci); + +/* Completion EQs */ + +static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + struct mlx5_priv *priv = &mdev->priv; + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + int irq = pci_irq_vector(mdev->pdev, vecidx); + struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + + if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), + irq_info->mask); + + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irq, irq_info->mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); + + return 0; +} + +static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_EQ_VEC_COMP_BASE + i; + struct mlx5_priv *priv = &mdev->priv; + int irq = pci_irq_vector(mdev->pdev, vecidx); + struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; + + irq_set_affinity_hint(irq, NULL); + free_cpumask_var(irq_info->mask); +} + +static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) +{ + int err; + int i; + + for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { + err = set_comp_irq_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + clear_comp_irq_affinity_hint(mdev, i); + + return err; +} + +static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) +{ + int i; + + for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) + clear_comp_irq_affinity_hint(mdev, i); +} + +static void destroy_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq, *n; + + clear_comp_irqs_affinity_hints(dev); + +#ifdef CONFIG_RFS_ACCEL + if (table->rmap) { + free_irq_cpu_rmap(table->rmap); + table->rmap = NULL; + } +#endif + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + list_del(&eq->list); + if (destroy_unmap_eq(dev, &eq->core)) + mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", + eq->core.eqn); + tasklet_disable(&eq->tasklet_ctx.task); + kfree(eq); + } +} + +static int create_comp_eqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + char name[MLX5_MAX_IRQ_NAME]; + struct mlx5_eq_comp *eq; + int ncomp_vec; + int nent; + int err; + int i; + + INIT_LIST_HEAD(&table->comp_eqs_list); + ncomp_vec = table->num_comp_vectors; + nent = MLX5_COMP_EQ_SIZE; +#ifdef CONFIG_RFS_ACCEL + table->rmap = alloc_irq_cpu_rmap(ncomp_vec); + if (!table->rmap) + return -ENOMEM; +#endif + for (i = 0; i < ncomp_vec; i++) { + int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + struct mlx5_eq_param param = {}; + + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) { + err = -ENOMEM; + goto clean; + } + + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + +#ifdef CONFIG_RFS_ACCEL + irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); +#endif + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + param = (struct mlx5_eq_param) { + .index = vecidx, + .mask = 0, + .nent = nent, + .context = &eq->core, + .handler = mlx5_eq_comp_int + }; + err = create_map_eq(dev, &eq->core, name, ¶m); + if (err) { + kfree(eq); + goto clean; + } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); + /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ + list_add_tail(&eq->list, &table->comp_eqs_list); + } + + err = set_comp_irq_affinity_hints(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); + goto clean; + } + + return 0; + +clean: + destroy_comp_eqs(dev); + return err; +} + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq, *n; + int err = -ENOENT; + int i = 0; + + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + if (i++ == vector) { + *eqn = eq->core.eqn; + *irqn = eq->core.irqn; + err = 0; + break; + } + } + + return err; +} +EXPORT_SYMBOL(mlx5_vector2eqn); + +unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) +{ + return dev->priv.eq_table->num_comp_vectors; +} +EXPORT_SYMBOL(mlx5_comp_vectors_count); + +struct cpumask * +mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) +{ + /* TODO: consider irq_get_affinity_mask(irq) */ + return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; +} +EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); + +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + return dev->priv.eq_table->rmap; +#else + return NULL; +#endif +} + +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + + list_for_each_entry(eq, &table->comp_eqs_list, list) { + if (eq->core.eqn == eqn) + return eq; + } + + return ERR_PTR(-ENOENT); } /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq; + struct mlx5_eq_table *table = dev->priv.eq_table; + int i, max_eqs; + + clear_comp_irqs_affinity_hints(dev); #ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; + if (table->rmap) { + free_irq_cpu_rmap(table->rmap); + table->rmap = NULL; } #endif - list_for_each_entry(eq, &table->comp_eqs_list, list) - free_irq(eq->irqn, eq); - - free_irq(table->pages_eq.irqn, &table->pages_eq); - free_irq(table->async_eq.irqn, &table->async_eq); - free_irq(table->cmd_eq.irqn, &table->cmd_eq); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - if (MLX5_CAP_GEN(dev, pg)) - free_irq(table->pfault_eq.irqn, &table->pfault_eq); -#endif + + mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ + max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; + for (i = max_eqs - 1; i >= 0; i--) { + if (!table->irq_info[i].context) + continue; + free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); + table->irq_info[i].context = NULL; + } + mutex_unlock(&table->lock); + pci_free_irq_vectors(dev->pdev); +} + +static int alloc_irq_vectors(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_eq_table *table = priv->eq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_EQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_EQ_VEC_COMP_BASE) + return -ENOMEM; + + table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); + if (!table->irq_info) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, + nvec, PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq_info; + } + + table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; + + return 0; + +err_free_irq_info: + kfree(table->irq_info); + return err; +} + +static void free_irq_vectors(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + pci_free_irq_vectors(dev->pdev); + kfree(priv->eq_table->irq_info); +} + +int mlx5_eq_table_create(struct mlx5_core_dev *dev) +{ + int err; + + err = alloc_irq_vectors(dev); + if (err) { + mlx5_core_err(dev, "alloc irq vectors failed\n"); + return err; + } + + err = create_async_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create async EQs\n"); + goto err_async_eqs; + } + + err = create_comp_eqs(dev); + if (err) { + mlx5_core_err(dev, "Failed to create completion EQs\n"); + goto err_comp_eqs; + } + + return 0; +err_comp_eqs: + destroy_async_eqs(dev); +err_async_eqs: + free_irq_vectors(dev); + return err; +} + +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) +{ + destroy_comp_eqs(dev); + destroy_async_eqs(dev); + free_irq_vectors(dev); +} + +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + if (nb->event_type >= MLX5_EVENT_TYPE_MAX) + return -EINVAL; + + return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); +} + +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) +{ + struct mlx5_eq_table *eqt = dev->priv.eq_table; + + if (nb->event_type >= MLX5_EVENT_TYPE_MAX) + return -EINVAL; + + return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d004957328f9..8a67fd197b79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -36,10 +36,10 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "mlx5_core.h" +#include "lib/eq.h" #include "eswitch.h" #include "fs_core.h" - -#define UPLINK_VPORT 0xFFFF +#include "ecpf.h" enum { MLX5_ACTION_NONE = 0, @@ -51,7 +51,7 @@ enum { struct vport_addr { struct l2addr_node node; u8 action; - u32 vport; + u16 vport; struct mlx5_flow_handle *flow_rule; bool mpfs; /* UC MAC was added to MPFs */ /* A flag indicating that mac was added due to mc promiscuous vport */ @@ -64,11 +64,36 @@ enum { PROMISC_CHANGE = BIT(3), }; +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw); +static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw); + /* Vport context events */ #define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ MC_ADDR_CHANGE | \ PROMISC_CHANGE) +/* The vport getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_vports(esw, i, vport) \ + for ((i) = MLX5_VPORT_PF; \ + (vport) = &(esw)->vports[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (vport) = &(esw)->vports[i], \ + (i) <= (nvfs); (i)++) + +static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, + u16 vport_num) +{ + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + + WARN_ON(idx > esw->total_vports - 1); + return &esw->vports[idx]; +} + static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -80,8 +105,7 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -109,12 +133,11 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(modify_esw_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; @@ -151,7 +174,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, /* E-Switch FDB */ static struct mlx5_flow_handle * -__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) { int match_header = (is_zero_ether_addr(mac_c) ? 0 : @@ -187,7 +210,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, misc_parameters); mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK); MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); } @@ -214,7 +237,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, } static struct mlx5_flow_handle * -esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport) { u8 mac_c[ETH_ALEN]; @@ -223,7 +246,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -236,7 +259,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) } static struct mlx5_flow_handle * -esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport) { u8 mac_c[ETH_ALEN]; u8 mac_v[ETH_ALEN]; @@ -246,6 +269,37 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); } +enum { + LEGACY_VEPA_PRIO = 0, + LEGACY_FDB_PRIO, +}; + +static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + int err; + + root_ns = mlx5_get_fdb_sub_ns(dev, 0); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* num FTE 2, num FG 2 */ + fdb = mlx5_create_auto_grouped_flow_table(root_ns, LEGACY_VEPA_PRIO, + 2, 2, 0, 0); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create VEPA FDB err %d\n", err); + return err; + } + esw->fdb_table.legacy.vepa_fdb = fdb; + + return 0; +} + static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -274,8 +328,8 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) return -ENOMEM; table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - ft_attr.max_fte = table_size; + ft_attr.prio = LEGACY_FDB_PRIO; fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); @@ -334,41 +388,67 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) esw->fdb_table.legacy.promisc_grp = g; out: - if (err) { - if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); - esw->fdb_table.legacy.allmulti_grp = NULL; - } - if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); - esw->fdb_table.legacy.addr_grp = NULL; - } - if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.fdb)) { - mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); - esw->fdb_table.legacy.fdb = NULL; - } - } + if (err) + esw_destroy_legacy_fdb_table(esw); kvfree(flow_group_in); return err; } +static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + esw_debug(esw->dev, "Destroy VEPA Table\n"); + if (!esw->fdb_table.legacy.vepa_fdb) + return; + + mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb); + esw->fdb_table.legacy.vepa_fdb = NULL; +} + static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) { + esw_debug(esw->dev, "Destroy FDB Table\n"); if (!esw->fdb_table.legacy.fdb) return; - esw_debug(esw->dev, "Destroy FDB Table\n"); - mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); - mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); - mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + if (esw->fdb_table.legacy.promisc_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + if (esw->fdb_table.legacy.allmulti_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + if (esw->fdb_table.legacy.addr_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); + esw->fdb_table.legacy.fdb = NULL; esw->fdb_table.legacy.addr_grp = NULL; esw->fdb_table.legacy.allmulti_grp = NULL; esw->fdb_table.legacy.promisc_grp = NULL; } +static int esw_create_legacy_table(struct mlx5_eswitch *esw) +{ + int err; + + memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); + + err = esw_create_legacy_vepa_table(esw); + if (err) + return err; + + err = esw_create_legacy_fdb_table(esw); + if (err) + esw_destroy_legacy_vepa_table(esw); + + return err; +} + +static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) +{ + esw_cleanup_vepa_rules(esw); + esw_destroy_legacy_fdb_table(esw); + esw_destroy_legacy_vepa_table(esw); +} + /* E-Switch vport UC/MC lists management */ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, struct vport_addr *vaddr); @@ -376,19 +456,19 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err; - /* Skip mlx5_mpfs_add_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_add_mac for eswitch_managers, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport) + if (esw->manager_vport == vport) goto fdb_add; err = mlx5_mpfs_add_mac(esw->dev, mac); if (err) { esw_warn(esw->dev, - "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n", + "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n", mac, vport, err); return err; } @@ -408,13 +488,13 @@ fdb_add: static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; int err = 0; - /* Skip mlx5_mpfs_del_mac for PFs, - * it is already done by the PF netdev in mlx5e_execute_l2_action + /* Skip mlx5_mpfs_del_mac for eswitch managerss, + * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vport || !vaddr->mpfs) + if (!vaddr->mpfs || esw->manager_vport == vport) goto fdb_del; err = mlx5_mpfs_del_mac(esw->dev, mac); @@ -437,17 +517,18 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, struct esw_mc_addr *esw_mc) { u8 *mac = vaddr->node.addr; - u32 vport_idx = 0; + struct mlx5_vport *vport; + u16 i, vport_num; - for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { - struct mlx5_vport *vport = &esw->vports[vport_idx]; + mlx5_esw_for_all_vports(esw, i, vport) { struct hlist_head *vport_hash = vport->mc_list; struct vport_addr *iter_vaddr = l2addr_hash_find(vport_hash, mac, struct vport_addr); + vport_num = vport->vport; if (IS_ERR_OR_NULL(vport->allmulti_rule) || - vaddr->vport == vport_idx) + vaddr->vport == vport_num) continue; switch (vaddr->action) { case MLX5_ACTION_ADD: @@ -459,14 +540,14 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, if (!iter_vaddr) { esw_warn(esw->dev, "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n", - mac, vport_idx); + mac, vport_num); continue; } - iter_vaddr->vport = vport_idx; + iter_vaddr->vport = vport_num; iter_vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, - vport_idx); + vport_num); iter_vaddr->mc_promisc = true; break; case MLX5_ACTION_DEL: @@ -484,7 +565,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -498,7 +579,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) return -ENOMEM; esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ - esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); + esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK); /* Add this multicast mac to all the mc promiscuous vports */ update_allmulti_vports(esw, vaddr, esw_mc); @@ -524,7 +605,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) struct hlist_head *hash = esw->mc_table; struct esw_mc_addr *esw_mc; u8 *mac = vaddr->node.addr; - u32 vport = vaddr->vport; + u16 vport = vaddr->vport; if (!esw->fdb_table.legacy.fdb) return 0; @@ -563,9 +644,9 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) /* Apply vport UC/MC list to HW l2 table and FDB table */ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; vport_addr_action vport_addr_add; vport_addr_action vport_addr_del; @@ -598,9 +679,9 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, /* Sync vport UC/MC list from vport context */ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, - u32 vport_num, int list_type) + u16 vport_num, int list_type) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; u8 (*mac_list)[ETH_ALEN]; struct l2addr_node *node; @@ -685,9 +766,9 @@ out: /* Sync vport UC/MC list from vport context * Must be called after esw_update_vport_addr_list */ -static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct l2addr_node *node; struct vport_addr *addr; struct hlist_head *hash; @@ -720,11 +801,11 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) } /* Apply vport rx mode to HW FDB table */ -static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num, bool promisc, bool mc_promisc) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; - struct mlx5_vport *vport = &esw->vports[vport_num]; if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) goto promisc; @@ -735,7 +816,7 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, if (!allmulti_addr->uplink_rule) allmulti_addr->uplink_rule = esw_fdb_set_vport_allmulti_rule(esw, - UPLINK_VPORT); + MLX5_VPORT_UPLINK); allmulti_addr->refcnt++; } else if (vport->allmulti_rule) { mlx5_del_flow_rules(vport->allmulti_rule); @@ -763,9 +844,9 @@ promisc: } /* Sync vport rx mode from vport context */ -static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int promisc_all = 0; int promisc_uc = 0; int promisc_mc = 0; @@ -1133,13 +1214,6 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, int err = 0; u8 *smac_v; - if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) { - mlx5_core_warn(esw->dev, - "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", - vport->vport); - return -EPERM; - } - esw_vport_cleanup_ingress_rules(esw, vport); if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { @@ -1349,8 +1423,8 @@ static void esw_destroy_tsar(struct mlx5_eswitch *esw) static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, u32 initial_max_rate, u32 initial_bw_share) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_vport *vport = &esw->vports[vport_num]; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; int err = 0; @@ -1389,7 +1463,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num, static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int err = 0; if (!vport->qos.enabled) @@ -1408,8 +1482,8 @@ static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num) static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num, u32 max_rate, u32 bw_share) { + struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - struct mlx5_vport *vport = &esw->vports[vport_num]; struct mlx5_core_dev *dev = esw->dev; void *vport_elem; u32 bitmask = 0; @@ -1465,15 +1539,22 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, { int vport_num = vport->vport; - if (!vport_num) + if (esw->manager_vport == vport_num) return; mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, vport->info.link_state); - mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); - mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + + /* Host PF has its own mac/guid. */ + if (vport_num) { + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, + vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, + vport->info.node_guid); + } + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, (vport->info.vlan || vport->info.qos)); @@ -1519,10 +1600,10 @@ static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport) mlx5_fc_destroy(dev, vport->egress.drop_counter); } -static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, +static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int enable_events) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + u16 vport_num = vport->vport; mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); @@ -1545,8 +1626,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, vport->enabled_events = enable_events; vport->enabled = true; - /* only PF is trusted by default */ - if (!vport_num) + /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well + * in smartNIC as it's a vport group manager. + */ + if (esw->manager_vport == vport_num || + (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; esw_vport_change_handle_locked(vport); @@ -1556,9 +1640,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, mutex_unlock(&esw->state_lock); } -static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +static void esw_disable_vport(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = &esw->vports[vport_num]; + u16 vport_num = vport->vport; if (!vport->enabled) return; @@ -1567,7 +1652,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) /* Mark this vport as disabled to discard new events */ vport->enabled = false; - synchronize_irq(pci_irq_vector(esw->dev->pdev, MLX5_EQ_VEC_ASYNC)); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ @@ -1580,10 +1664,11 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) esw_vport_change_handle_locked(vport); vport->enabled_events = 0; esw_vport_disable_qos(esw, vport_num); - if (vport_num && esw->mode == SRIOV_LEGACY) { + if (esw->manager_vport != vport_num && + esw->mode == SRIOV_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport_num, + vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); @@ -1593,12 +1678,29 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) mutex_unlock(&esw->state_lock); } +static int eswitch_vport_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_eswitch *esw = mlx5_nb_cof(nb, struct mlx5_eswitch, nb); + struct mlx5_eqe *eqe = data; + struct mlx5_vport *vport; + u16 vport_num; + + vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + + return NOTIFY_OK; +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) - int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { + int vf_nvports = 0, total_nvports = 0; + struct mlx5_vport *vport; int err; int i, enabled_events; @@ -1615,14 +1717,31 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + + if (mode == SRIOV_OFFLOADS) { + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); + if (err) + return err; + total_nvports = esw->total_vports; + } else { + vf_nvports = nvfs; + total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); + } + } + esw->mode = mode; + mlx5_lag_update(esw->dev); + if (mode == SRIOV_LEGACY) { - err = esw_create_legacy_fdb_table(esw); + err = esw_create_legacy_table(esw); + if (err) + goto abort; } else { + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - - err = esw_offloads_init(esw, nvfs + 1); + err = esw_offloads_init(esw, vf_nvports, total_nvports); } if (err) @@ -1637,8 +1756,25 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) * 2. FDB/Eswitch is programmed by user space tools */ enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; - for (i = 0; i <= nvfs; i++) - esw_enable_vport(esw, i, enabled_events); + + /* Enable PF vport */ + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_enable_vport(esw, vport, enabled_events); + + /* Enable ECPF vports */ + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + esw_enable_vport(esw, vport, enabled_events); + } + + /* Enable VF vports */ + mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) + esw_enable_vport(esw, vport, enabled_events); + + if (mode == SRIOV_LEGACY) { + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + } esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", esw->enabled_vports); @@ -1647,8 +1783,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) abort: esw->mode = SRIOV_NONE; - if (mode == SRIOV_OFFLOADS) + if (mode == SRIOV_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); + } return err; } @@ -1656,8 +1794,8 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + struct mlx5_vport *vport; int old_mode; - int nvports; int i; if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) @@ -1667,10 +1805,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw->enabled_vports, esw->mode); mc_promisc = &esw->mc_promisc; - nvports = esw->enabled_vports; - for (i = 0; i < esw->total_vports; i++) - esw_disable_vport(esw, i); + if (esw->mode == SRIOV_LEGACY) + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + + mlx5_esw_for_all_vports(esw, i, vport) + esw_disable_vport(esw, vport); if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rules(mc_promisc->uplink_rule); @@ -1678,25 +1818,29 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_destroy_tsar(esw); if (esw->mode == SRIOV_LEGACY) - esw_destroy_legacy_fdb_table(esw); + esw_destroy_legacy_table(esw); else if (esw->mode == SRIOV_OFFLOADS) - esw_offloads_cleanup(esw, nvports); + esw_offloads_cleanup(esw); old_mode = esw->mode; esw->mode = SRIOV_NONE; - if (old_mode == SRIOV_OFFLOADS) + mlx5_lag_update(esw->dev); + + if (old_mode == SRIOV_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); + } } int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; - int vport_num; - int err; + struct mlx5_vport *vport; + int err, i; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!MLX5_VPORT_MANAGER(dev)) return 0; esw_info(dev, @@ -1710,6 +1854,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) return -ENOMEM; esw->dev = dev; + esw->manager_vport = mlx5_eswitch_manager_vport(dev); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { @@ -1724,6 +1869,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + esw->total_vports = total_vports; + err = esw_offloads_init_reps(esw); if (err) goto abort; @@ -1732,17 +1879,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.mod_hdr_tbl); mutex_init(&esw->state_lock); - for (vport_num = 0; vport_num < total_vports; vport_num++) { - struct mlx5_vport *vport = &esw->vports[vport_num]; - - vport->vport = vport_num; + mlx5_esw_for_all_vports(esw, i, vport) { + vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); } - esw->total_vports = total_vports; esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; @@ -1765,7 +1909,7 @@ abort: void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev)) + if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) return; esw_info(esw->dev, "cleanup\n"); @@ -1777,23 +1921,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) kfree(esw); } -void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) -{ - struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; - u16 vport_num = be16_to_cpu(vc_eqe->vport_num); - struct mlx5_vport *vport; - - if (!esw) { - pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", - vport_num); - return; - } - - vport = &esw->vports[vport_num]; - if (vport->enabled) - queue_work(esw->work_queue, &vport->vport_change_handler); -} - /* Vport Administration */ #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) @@ -1804,7 +1931,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u64 node_guid; int err = 0; - if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) return -EPERM; if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) return -EINVAL; @@ -1812,13 +1939,10 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) mlx5_core_warn(esw->dev, - "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", + "Set invalid MAC while spoofchk is on, vport(%d)\n", vport); - err = -EPERM; - goto unlock; - } err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); if (err) { @@ -1861,7 +1985,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, err = mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, - vport, link_state); + vport, 1, link_state); if (err) { mlx5_core_warn(esw->dev, "Failed to set vport %d link state, err = %d", @@ -1881,7 +2005,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, { struct mlx5_vport *evport; - if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; @@ -1964,6 +2088,10 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, evport = &esw->vports[vport]; pschk = evport->info.spoofchk; evport->info.spoofchk = spoofchk; + if (pschk && !is_valid_ether_addr(evport->info.mac)) + mlx5_core_warn(esw->dev, + "Spoofchk in set while MAC is invalid, vport(%d)\n", + evport->vport); if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); if (err) @@ -1973,6 +2101,128 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, return err; } +static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.legacy.vepa_uplink_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule); + + if (esw->fdb_table.legacy.vepa_star_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule); + + esw->fdb_table.legacy.vepa_uplink_rule = NULL; + esw->fdb_table.legacy.vepa_star_rule = NULL; +} + +static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, + u8 setting) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + int err = 0; + void *misc; + + if (!setting) { + esw_cleanup_vepa_rules(esw); + return 0; + } + + if (esw->fdb_table.legacy.vepa_uplink_rule) + return 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Uplink rule forward uplink traffic to FDB */ + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->fdb_table.legacy.fdb; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } else { + esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; + } + + /* Star rule to forward all traffic to uplink vport */ + memset(spec, 0, sizeof(*spec)); + memset(&dest, 0, sizeof(dest)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = MLX5_VPORT_UPLINK; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } else { + esw->fdb_table.legacy.vepa_star_rule = flow_rule; + } + +out: + kvfree(spec); + if (err) + esw_cleanup_vepa_rules(esw); + return err; +} + +int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) +{ + int err = 0; + + if (!esw) + return -EOPNOTSUPP; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + + mutex_lock(&esw->state_lock); + if (esw->mode != SRIOV_LEGACY) { + err = -EOPNOTSUPP; + goto out; + } + + err = _mlx5_eswitch_set_vepa_locked(esw, setting); + +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) +{ + int err = 0; + + if (!esw) + return -EOPNOTSUPP; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + + mutex_lock(&esw->state_lock); + if (esw->mode != SRIOV_LEGACY) { + err = -EOPNOTSUPP; + goto out; + } + + *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; + +out: + mutex_unlock(&esw->state_lock); + return err; +} + int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, int vport, bool setting) { @@ -2000,8 +2250,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) u32 max_guarantee = 0; int i; - for (i = 0; i < esw->total_vports; i++) { - evport = &esw->vports[i]; + mlx5_esw_for_all_vports(esw, i, evport) { if (!evport->enabled || evport->info.min_rate < max_guarantee) continue; max_guarantee = evport->info.min_rate; @@ -2020,8 +2269,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int err; int i; - for (i = 0; i < esw->total_vports; i++) { - evport = &esw->vports[i]; + mlx5_esw_for_all_vports(esw, i, evport) { if (!evport->enabled) continue; vport_min_rate = evport->info.min_rate; @@ -2036,7 +2284,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) if (bw_share == evport->qos.bw_share) continue; - err = esw_vport_qos_config(esw, i, vport_max_rate, + err = esw_vport_qos_config(esw, evport->vport, vport_max_rate, bw_share); if (!err) evport->qos.bw_share = bw_share; @@ -2050,19 +2298,24 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, u32 max_rate, u32 min_rate) { - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - bool min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && - fw_max_bw_share >= MLX5_MIN_BW_SHARE; - bool max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); struct mlx5_vport *evport; + u32 fw_max_bw_share; u32 previous_min_rate; u32 divider; + bool min_rate_supported; + bool max_rate_supported; int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported)) return -EOPNOTSUPP; @@ -2119,7 +2372,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) return 0; - err = mlx5_query_vport_down_stats(dev, vport_idx, + err = mlx5_query_vport_down_stats(dev, vport_idx, 1, &rx_discard_vport_down, &tx_discard_vport_down); if (err) @@ -2156,8 +2409,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vport_counter_in, in, other_vport, 1); + MLX5_SET(query_vport_counter_in, in, other_vport, 1); memset(out, 0, outlen); err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); @@ -2219,3 +2471,21 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); + +bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) +{ + if ((dev0->priv.eswitch->mode == SRIOV_NONE && + dev1->priv.eswitch->mode == SRIOV_NONE) || + (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && + dev1->priv.eswitch->mode == SRIOV_OFFLOADS)) + return true; + + return false; +} + +bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && + dev1->priv.eswitch->mode == SRIOV_OFFLOADS); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index aaafc9f17115..3f3cd32ae60a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -38,6 +38,7 @@ #include <net/devlink.h> #include <linux/mlx5/device.h> #include <linux/mlx5/eswitch.h> +#include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "lib/mpfs.h" @@ -49,8 +50,6 @@ #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define FDB_UPLINK_VPORT 0xffff - #define MLX5_MIN_BW_SHARE 1 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ @@ -138,11 +137,16 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *addr_grp; struct mlx5_flow_group *allmulti_grp; struct mlx5_flow_group *promisc_grp; + struct mlx5_flow_table *vepa_fdb; + struct mlx5_flow_handle *vepa_uplink_rule; + struct mlx5_flow_handle *vepa_star_rule; } legacy; struct offloads_fdb { struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *peer_miss_grp; + struct mlx5_flow_handle **peer_miss_rules; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle *miss_rule_uni; struct mlx5_flow_handle *miss_rule_multi; @@ -165,6 +169,8 @@ struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; struct mlx5_eswitch_rep *vport_reps; + struct list_head peer_flows; + struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); u8 inline_mode; @@ -179,8 +185,19 @@ struct esw_mc_addr { /* SRIOV only */ u32 refcnt; }; +struct mlx5_host_work { + struct work_struct work; + struct mlx5_eswitch *esw; +}; + +struct mlx5_host_info { + struct mlx5_nb nb; + u16 num_vfs; +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; + struct mlx5_nb nb; struct mlx5_eswitch_fdb fdb_table; struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; @@ -201,17 +218,19 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; int mode; int nvports; + u16 manager_vport; + struct mlx5_host_info host_info; }; -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, @@ -226,6 +245,8 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, int vport_num, bool setting); int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, u32 max_rate, u32 min_rate); +int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); +int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi); int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, @@ -281,13 +302,17 @@ enum mlx5_flow_match_level { /* current maximum for flow based vport multicasting */ #define MLX5_MAX_FLOW_FWD_VPORTS 2 +enum { + MLX5_ESW_DEST_ENCAP = BIT(0), + MLX5_ESW_DEST_ENCAP_VALID = BIT(1), +}; + struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; - struct mlx5_eswitch_rep *out_rep[MLX5_MAX_FLOW_FWD_VPORTS]; - struct mlx5_core_dev *out_mdev[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5_core_dev *in_mdev; + struct mlx5_core_dev *counter_dev; - int mirror_count; + int split_count; int out_count; int action; @@ -296,9 +321,15 @@ struct mlx5_esw_flow_attr { u8 vlan_prio[MLX5_FS_VLAN_DEPTH]; u8 total_vlan; bool vlan_handled; - u32 encap_id; + struct { + u32 flags; + struct mlx5_eswitch_rep *rep; + struct mlx5_core_dev *mdev; + u32 encap_id; + } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; u8 match_level; + u8 tunnel_match_level; struct mlx5_fc *counter; u32 chain; u16 prio; @@ -338,6 +369,11 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); } +bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1); +bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ @@ -348,13 +384,60 @@ static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +/* The returned number is valid only when the dev is eswitch manager. */ +static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_ECPF : MLX5_VPORT_PF; +} + +static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) +{ + /* Uplink always locate at the last element of the array.*/ + return esw->total_vports - 1; +} + +static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw) +{ + return esw->total_vports - 2; +} + +static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, + u16 vport_num) +{ + if (vport_num == MLX5_VPORT_ECPF) { + if (!mlx5_ecpf_vport_exists(esw->dev)) + esw_warn(esw->dev, "ECPF vport doesn't exist!\n"); + return mlx5_eswitch_ecpf_idx(esw); + } + + if (vport_num == MLX5_VPORT_UPLINK) + return mlx5_eswitch_uplink_idx(esw); + + return vport_num; +} + +static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, + int index) +{ + if (index == mlx5_eswitch_ecpf_idx(esw) && + mlx5_ecpf_vport_exists(esw->dev)) + return MLX5_VPORT_ECPF; + + if (index == mlx5_eswitch_uplink_idx(esw)) + return MLX5_VPORT_UPLINK; + + return index; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9eac137790f5..9b2d78ee22b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -39,15 +39,60 @@ #include "eswitch.h" #include "en.h" #include "fs_core.h" +#include "lib/devcom.h" +#include "ecpf.h" +#include "lib/eq.h" enum { FDB_FAST_PATH = 0, FDB_SLOW_PATH }; +/* There are two match-all miss flows, one for unicast dst mac and + * one for multicast. + */ +#define MLX5_ESW_MISS_FLOWS (2) + #define fdb_prio_table(esw, chain, prio, level) \ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] +#define UPLINK_REP_INDEX 0 + +/* The rep getter/iterator are only valid after esw->total_vports + * and vport->vport are initialized in mlx5_eswitch_init. + */ +#define mlx5_esw_for_all_reps(esw, i, rep) \ + for ((i) = MLX5_VPORT_PF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) < (esw)->total_vports; (i)++) + +#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \ + for ((i) = MLX5_VPORT_FIRST_VF; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) <= (nvfs); (i)++) + +#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \ + for ((i) = (nvfs); \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) >= MLX5_VPORT_FIRST_VF; (i)--) + +#define mlx5_esw_for_each_vf_vport(esw, vport, nvfs) \ + for ((vport) = MLX5_VPORT_FIRST_VF; \ + (vport) <= (nvfs); (vport)++) + +#define mlx5_esw_for_each_vf_vport_reverse(esw, vport, nvfs) \ + for ((vport) = (nvfs); \ + (vport) >= MLX5_VPORT_FIRST_VF; (vport)--) + +static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, + u16 vport_num) +{ + u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); + + WARN_ON(idx > esw->total_vports - 1); + return &esw->offloads.vport_reps[idx]; +} + static struct mlx5_flow_table * esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); static void @@ -81,7 +126,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; - bool mirror = !!(attr->mirror_count); + bool split = !!(attr->split_count); struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; int j, i = 0; @@ -120,13 +165,21 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, dest[i].ft = ft; i++; } else { - for (j = attr->mirror_count; j < attr->out_count; j++) { + for (j = attr->split_count; j < attr->out_count; j++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->out_rep[j]->vport; + dest[i].vport.num = attr->dests[j].rep->vport; dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); - dest[i].vport.vhca_id_valid = - !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + MLX5_CAP_GEN(attr->dests[j].mdev, vhca_id); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + dest[i].vport.flags |= + MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act.reformat_id = attr->dests[j].encap_id; + dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + dest[i].vport.reformat_id = + attr->dests[j].encap_id; + } i++; } } @@ -151,22 +204,20 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_eswitch_owner_vhca_id); - if (attr->match_level == MLX5_MATCH_NONE) - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - else - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS; - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) - spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + if (attr->tunnel_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + if (attr->match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; + } else if (attr->match_level != MLX5_MATCH_NONE) { + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - flow_act.reformat_id = attr->encap_id; - - fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror); + fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split); if (IS_ERR(fdb)) { rule = ERR_CAST(fdb); goto err_esw_get; @@ -181,7 +232,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; err_add_rule: - esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); + esw_put_prio_table(esw, attr->chain, attr->prio, !!split); err_esw_get: if (attr->dest_chain) esw_put_prio_table(esw, attr->dest_chain, 1, 0); @@ -215,12 +266,17 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - for (i = 0; i < attr->mirror_count; i++) { + for (i = 0; i < attr->split_count; i++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->out_rep[i]->vport; + dest[i].vport.num = attr->dests[i].rep->vport; dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->out_mdev[i], vhca_id); - dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + MLX5_CAP_GEN(attr->dests[i].mdev, vhca_id); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) { + dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; + dest[i].vport.reformat_id = attr->dests[i].encap_id; + } } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = fwd_fdb, @@ -268,7 +324,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr, bool fwd_rule) { - bool mirror = (attr->mirror_count > 0); + bool split = (attr->split_count > 0); mlx5_del_flow_rules(rule); esw->offloads.num_flows--; @@ -277,7 +333,7 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, esw_put_prio_table(esw, attr->chain, attr->prio, 1); esw_put_prio_table(esw, attr->chain, attr->prio, 0); } else { - esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); + esw_put_prio_table(esw, attr->chain, attr->prio, !!split); if (attr->dest_chain) esw_put_prio_table(esw, attr->dest_chain, 1, 0); } @@ -307,7 +363,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { rep = &esw->offloads.vport_reps[vf_vport]; - if (!rep->rep_if[REP_ETH].valid) + if (rep->rep_if[REP_ETH].state != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -325,7 +381,7 @@ esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; in_rep = attr->in_rep; - out_rep = attr->out_rep[0]; + out_rep = attr->dests[0].rep; if (push) vport = in_rep; @@ -346,17 +402,17 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, goto out_notsupp; in_rep = attr->in_rep; - out_rep = attr->out_rep[0]; + out_rep = attr->dests[0].rep; - if (push && in_rep->vport == FDB_UPLINK_VPORT) + if (push && in_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; - if (pop && out_rep->vport == FDB_UPLINK_VPORT) + if (pop && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ if (!push && !pop && fwd) - if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK) goto out_notsupp; /* protects against (1) setting rules with different vlans to push and @@ -398,7 +454,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) { + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { vport->vlan_refcount++; attr->vlan_handled = true; } @@ -458,7 +514,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, if (!push && !pop && fwd) { /* tracks VF --> wire rules without vlan push action */ - if (attr->out_rep[0]->vport == FDB_UPLINK_VPORT) + if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--; return 0; @@ -505,7 +561,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); - MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + /* source vport is the esw manager */ + MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); @@ -531,6 +588,134 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } +static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, + struct mlx5_flow_spec *spec, + struct mlx5_flow_destination *dest) +{ + void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest->vport.num = peer_dev->priv.eswitch->manager_vport; + dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id); + dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; +} + +static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle **flows; + struct mlx5_flow_handle *flow; + struct mlx5_flow_spec *spec; + /* total vports is the same for both e-switches */ + int nvports = esw->total_vports; + void *misc; + int err, i; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + peer_miss_rules_setup(peer_dev, spec, &dest); + + flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL); + if (!flows) { + err = -ENOMEM; + goto alloc_flows_err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_pf_flow_err; + } + flows[MLX5_VPORT_PF] = flow; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ecpf_flow_err; + } + flows[mlx5_eswitch_ecpf_idx(esw)] = flow; + } + + mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) { + MLX5_SET(fte_match_set_misc, misc, source_port, i); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_vf_flow_err; + } + flows[i] = flow; + } + + esw->fdb_table.offloads.peer_miss_rules = flows; + + kvfree(spec); + return 0; + +add_vf_flow_err: + nvports = --i; + mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports) + mlx5_del_flow_rules(flows[i]); + + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); +add_ecpf_flow_err: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); +add_pf_flow_err: + esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); + kvfree(flows); +alloc_flows_err: + kvfree(spec); + return err; +} + +static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_handle **flows; + int i; + + flows = esw->fdb_table.offloads.peer_miss_rules; + + mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev)) + mlx5_del_flow_rules(flows[i]); + + if (mlx5_ecpf_vport_exists(esw->dev)) + mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); + + kvfree(flows); +} + static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_act flow_act = {0}; @@ -557,7 +742,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) dmac_c[0] = 0x01; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = 0; + dest.vport.num = esw->manager_vport; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, @@ -801,7 +986,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) esw->fdb_table.offloads.fdb_left[i] = ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; - table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2; + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + + MLX5_ESW_MISS_FLOWS + esw->total_vports; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. @@ -856,6 +1042,34 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.send_to_vport_grp = g; + /* create peer esw miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + esw->total_vports - 1); + ix += esw->total_vports; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); + goto peer_miss_err; + } + esw->fdb_table.offloads.peer_miss_grp = g; + /* create miss group */ memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -867,7 +1081,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) dmac[0] = 0x01; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + MLX5_ESW_MISS_FLOWS); g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR(g)) { @@ -888,6 +1103,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) miss_rule_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); +peer_miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: esw_destroy_offloads_fast_fdb_tables(esw); @@ -907,13 +1124,14 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); esw_destroy_offloads_fast_fdb_tables(esw); } -static int esw_create_offloads_table(struct mlx5_eswitch *esw) +static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; @@ -927,7 +1145,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_attr.max_fte = dev->priv.sriov.num_vfs + 2; + ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS; ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { @@ -947,16 +1165,15 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(offloads->ft_offloads); } -static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; - struct mlx5_priv *priv = &esw->dev->priv; u32 *flow_group_in; void *match_criteria, *misc; int err = 0; - int nvports = priv->sriov.num_vfs + 2; + nvports = nvports + MLX5_ESW_MISS_FLOWS; flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; @@ -1033,7 +1250,8 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != SRIOV_LEGACY && + !mlx5_core_is_ecpf_esw_manager(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't set offloads mode, SRIOV legacy not enabled"); return -EINVAL; @@ -1071,9 +1289,8 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vfs = MLX5_TOTAL_VPORTS(esw->dev); struct mlx5_core_dev *dev = esw->dev; - struct mlx5_esw_offload *offloads; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN]; + u8 hw_id[ETH_ALEN], rep_type; int vport; esw->offloads.vport_reps = kcalloc(total_vfs, @@ -1082,75 +1299,203 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) if (!esw->offloads.vport_reps) return -ENOMEM; - offloads = &esw->offloads; mlx5_query_nic_vport_mac_address(dev, 0, hw_id); - for (vport = 0; vport < total_vfs; vport++) { - rep = &offloads->vport_reps[vport]; - - rep->vport = vport; + mlx5_esw_for_all_reps(esw, vport, rep) { + rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport); ether_addr_copy(rep->hw_id, hw_id); - } - offloads->vport_reps[0].vport = FDB_UPLINK_VPORT; + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + rep->rep_if[rep_type].state = REP_UNREGISTERED; + } return 0; } -static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + if (rep->rep_if[rep_type].state != REP_LOADED) + return; + + rep->rep_if[rep_type].unload(rep); + rep->rep_if[rep_type].state = REP_REGISTERED; +} + +static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int vport; - for (vport = nvports - 1; vport >= 0; vport--) { - rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } - rep->rep_if[rep_type].unload(rep); + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); } + + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +static void __unload_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int i; + + mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvports) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + +static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = NUM_REP_TYPES; + + while (rep_type-- > 0) + __unload_reps_vf_vport(esw, nvports, rep_type); +} + +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + __unload_reps_vf_vport(esw, nvports, rep_type); + + /* Special vports must be the last to unload. */ + __unload_reps_special_vport(esw, rep_type); } -static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports) +static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = NUM_REP_TYPES; while (rep_type-- > 0) - esw_offloads_unload_reps_type(esw, nvports, rep_type); + __unload_reps_all_vport(esw, nvports, rep_type); } -static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, u8 rep_type) +{ + int err = 0; + + if (rep->rep_if[rep_type].state != REP_REGISTERED) + return 0; + + err = rep->rep_if[rep_type].load(esw->dev, rep); + if (err) + return err; + + rep->rep_if[rep_type].state = REP_LOADED; + + return 0; +} + +static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int vport; int err; - for (vport = 0; vport < nvports; vport++) { - rep = &esw->offloads.vport_reps[vport]; - if (!rep->rep_if[rep_type].valid) - continue; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + return err; - err = rep->rep_if[rep_type].load(esw->dev, rep); + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + err = __esw_offloads_load_rep(esw, rep, rep_type); if (err) - goto err_reps; + goto err_pf; + } + + if (mlx5_ecpf_vport_exists(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_ecpf; + } + + return 0; + +err_ecpf: + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + __esw_offloads_unload_rep(esw, rep, rep_type); + } + +err_pf: + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + __esw_offloads_unload_rep(esw, rep, rep_type); + return err; +} + +static int __load_reps_vf_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int err, i; + + mlx5_esw_for_each_vf_rep(esw, i, rep, nvports) { + err = __esw_offloads_load_rep(esw, rep, rep_type); + if (err) + goto err_vf; } return 0; +err_vf: + __unload_reps_vf_vport(esw, --i, rep_type); + return err; +} + +static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) +{ + u8 rep_type = 0; + int err; + + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + err = __load_reps_vf_vport(esw, nvports, rep_type); + if (err) + goto err_reps; + } + + return err; + err_reps: - esw_offloads_unload_reps_type(esw, vport, rep_type); + while (rep_type-- > 0) + __unload_reps_vf_vport(esw, nvports, rep_type); + return err; +} + +static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, + u8 rep_type) +{ + int err; + + /* Special vports must be loaded first. */ + err = __load_reps_special_vport(esw, rep_type); + if (err) + return err; + + err = __load_reps_vf_vport(esw, nvports, rep_type); + if (err) + goto err_vfs; + + return 0; + +err_vfs: + __unload_reps_special_vport(esw, rep_type); return err; } -static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = esw_offloads_load_reps_type(esw, nvports, rep_type); + err = __load_reps_all_vport(esw, nvports, rep_type); if (err) goto err_reps; } @@ -1159,37 +1504,130 @@ static int esw_offloads_load_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - esw_offloads_unload_reps_type(esw, nvports, rep_type); + __unload_reps_all_vport(esw, nvports, rep_type); return err; } -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +#define ESW_OFFLOADS_DEVCOM_PAIR (0) +#define ESW_OFFLOADS_DEVCOM_UNPAIR (1) + +static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) { int err; + err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); + if (err) + return err; + + return 0; +} + +void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); + +static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) +{ + mlx5e_tc_clean_fdb_peer_flows(esw); + esw_del_fdb_peer_miss_rules(esw); +} + +static int mlx5_esw_offloads_devcom_event(int event, + void *my_data, + void *event_data) +{ + struct mlx5_eswitch *esw = my_data; + struct mlx5_eswitch *peer_esw = event_data; + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + int err; + + switch (event) { + case ESW_OFFLOADS_DEVCOM_PAIR: + err = mlx5_esw_offloads_pair(esw, peer_esw); + if (err) + goto err_out; + + err = mlx5_esw_offloads_pair(peer_esw, esw); + if (err) + goto err_pair; + + mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); + break; + + case ESW_OFFLOADS_DEVCOM_UNPAIR: + if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + break; + + mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); + mlx5_esw_offloads_unpair(peer_esw); + mlx5_esw_offloads_unpair(esw); + break; + } + + return 0; + +err_pair: + mlx5_esw_offloads_unpair(esw); + +err_out: + mlx5_core_err(esw->dev, "esw offloads devcom event failure, event %u err %d", + event, err); + return err; +} + +static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) +{ + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + + INIT_LIST_HEAD(&esw->offloads.peer_flows); + mutex_init(&esw->offloads.peer_mutex); + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + mlx5_devcom_register_component(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + mlx5_esw_offloads_devcom_event, + esw); + + mlx5_devcom_send_event(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_PAIR, esw); +} + +static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_devcom *devcom = esw->dev->priv.devcom; + + if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) + return; + + mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_UNPAIR, esw); + + mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +} + +static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) +{ + int err; + + memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); err = esw_create_offloads_fdb_tables(esw, nvports); if (err) return err; - err = esw_create_offloads_table(esw); + err = esw_create_offloads_table(esw, nvports); if (err) goto create_ft_err; - err = esw_create_vport_rx_group(esw); + err = esw_create_vport_rx_group(esw, nvports); if (err) goto create_fg_err; - err = esw_offloads_load_reps(esw, nvports); - if (err) - goto err_reps; - return 0; -err_reps: - esw_destroy_vport_rx_group(esw); - create_fg_err: esw_destroy_offloads_table(esw); @@ -1199,6 +1637,95 @@ create_ft_err: return err; } +static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) +{ + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_fdb_tables(esw); +} + +static void esw_host_params_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + int err, num_vf = 0; + + host_work = container_of(work, struct mlx5_host_work, work); + esw = host_work->esw; + + err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); + if (err || num_vf == esw->host_info.num_vfs) + goto out; + + /* Number of VFs can only change from "0 to x" or "x to 0". */ + if (esw->host_info.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + } else { + err = esw_offloads_load_vf_reps(esw, num_vf); + + if (err) + goto out; + } + + esw->host_info.num_vfs = num_vf; + +out: + kfree(host_work); +} + +static int esw_host_params_event(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_host_work *host_work; + struct mlx5_host_info *host_info; + struct mlx5_eswitch *esw; + + host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); + if (!host_work) + return NOTIFY_DONE; + + host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); + esw = container_of(host_info, struct mlx5_eswitch, host_info); + + host_work->esw = esw; + + INIT_WORK(&host_work->work, esw_host_params_event_handler); + queue_work(esw->work_queue, &host_work->work); + + return NOTIFY_OK; +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, + int total_nvports) +{ + int err; + + mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); + + err = esw_offloads_steering_init(esw, total_nvports); + if (err) + return err; + + err = esw_offloads_load_all_reps(esw, vf_nvports); + if (err) + goto err_reps; + + esw_offloads_devcom_init(esw); + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, + HOST_PARAMS_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); + esw->host_info.num_vfs = vf_nvports; + } + + return 0; + +err_reps: + esw_offloads_steering_cleanup(esw); + return err; +} + static int esw_offloads_stop(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { @@ -1215,18 +1742,24 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, } } - /* enable back PF RoCE */ - mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - return err; } -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +void esw_offloads_cleanup(struct mlx5_eswitch *esw) { - esw_offloads_unload_reps(esw, nvports); - esw_destroy_vport_rx_group(esw); - esw_destroy_offloads_table(esw); - esw_destroy_offloads_fdb_tables(esw); + u16 num_vfs; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); + flush_workqueue(esw->work_queue); + num_vfs = esw->host_info.num_vfs; + } else { + num_vfs = esw->dev->priv.sriov.num_vfs; + } + + esw_offloads_devcom_cleanup(esw); + esw_offloads_unload_all_reps(esw, num_vfs); + esw_offloads_steering_cleanup(esw); } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) @@ -1315,7 +1848,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == SRIOV_NONE) + if (dev->priv.eswitch->mode == SRIOV_NONE && + !mlx5_core_is_ecpf_esw_manager(dev)) return -EOPNOTSUPP; return 0; @@ -1527,47 +2061,45 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) return 0; } -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *__rep_if, - u8 rep_type) +void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep_if *__rep_if, + u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep_if *rep_if; + struct mlx5_eswitch_rep *rep; + int i; - rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type]; - - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; + mlx5_esw_for_all_reps(esw, i, rep) { + rep_if = &rep->rep_if[rep_type]; + rep_if->load = __rep_if->load; + rep_if->unload = __rep_if->unload; + rep_if->get_proto_dev = __rep_if->get_proto_dev; + rep_if->priv = __rep_if->priv; - rep_if->valid = true; + rep_if->state = REP_REGISTERED; + } } -EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); +EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, u8 rep_type) +void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; + u16 max_vf = mlx5_core_max_vfs(esw->dev); struct mlx5_eswitch_rep *rep; + int i; - rep = &offloads->vport_reps[vport_index]; - - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) - rep->rep_if[rep_type].unload(rep); + if (esw->mode == SRIOV_OFFLOADS) + __unload_reps_all_vport(esw, max_vf, rep_type); - rep->rep_if[rep_type].valid = false; + mlx5_esw_for_all_reps(esw, i, rep) + rep->rep_if[rep_type].state = REP_UNREGISTERED; } -EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { -#define UPLINK_REP_INDEX 0 - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - rep = &offloads->vport_reps[UPLINK_REP_INDEX]; + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); return rep->rep_if[rep_type].priv; } @@ -1575,15 +2107,11 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, int vport, u8 rep_type) { - struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - if (vport == FDB_UPLINK_VPORT) - vport = UPLINK_REP_INDEX; - - rep = &offloads->vport_reps[vport]; + rep = mlx5_eswitch_get_rep(esw, vport); - if (rep->rep_if[rep_type].valid && + if (rep->rep_if[rep_type].state == REP_LOADED && rep->rep_if[rep_type].get_proto_dev) return rep->rep_if[rep_type].get_proto_dev(rep); return NULL; @@ -1592,13 +2120,13 @@ EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) { - return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type); + return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type); } EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, int vport) { - return &esw->offloads.vport_reps[vport]; + return mlx5_eswitch_get_rep(esw, vport); } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c new file mode 100644 index 000000000000..5d5864e8df3c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2018 Mellanox Technologies + +#include <linux/mlx5/driver.h> + +#include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" + +struct mlx5_event_nb { + struct mlx5_nb nb; + void *ctx; +}; + +/* General events handlers for the low level mlx5_core driver + * + * Other Major feature specific events such as + * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with + * separate notifiers callbacks, specifically by those mlx5 components. + */ +static int any_notifier(struct notifier_block *, unsigned long, void *); +static int temp_warn(struct notifier_block *, unsigned long, void *); +static int port_module(struct notifier_block *, unsigned long, void *); + +/* handler which forwards the event to events->nh, driver notifiers */ +static int forward_event(struct notifier_block *, unsigned long, void *); + +static struct mlx5_nb events_nbs_ref[] = { + /* Events to be proccessed by mlx5_core */ + {.nb.notifier_call = any_notifier, .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY }, + {.nb.notifier_call = temp_warn, .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT }, + {.nb.notifier_call = port_module, .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT }, + + /* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PORT_CHANGE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT }, + /* QP/WQ resource events to forward */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_DCT_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_COMM_EST }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SQ_DRAINED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR }, + /* SRQ events */ + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR }, + {.nb.notifier_call = forward_event, .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT }, +}; + +struct mlx5_events { + struct mlx5_core_dev *dev; + struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; + /* driver notifier chain */ + struct atomic_notifier_head nh; + /* port module events stats */ + struct mlx5_pme_stats pme_stats; +}; + +static const char *eqe_type_str(u8 type) +{ + switch (type) { + case MLX5_EVENT_TYPE_COMP: + return "MLX5_EVENT_TYPE_COMP"; + case MLX5_EVENT_TYPE_PATH_MIG: + return "MLX5_EVENT_TYPE_PATH_MIG"; + case MLX5_EVENT_TYPE_COMM_EST: + return "MLX5_EVENT_TYPE_COMM_EST"; + case MLX5_EVENT_TYPE_SQ_DRAINED: + return "MLX5_EVENT_TYPE_SQ_DRAINED"; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + return "MLX5_EVENT_TYPE_SRQ_LAST_WQE"; + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT"; + case MLX5_EVENT_TYPE_CQ_ERROR: + return "MLX5_EVENT_TYPE_CQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + return "MLX5_EVENT_TYPE_PATH_MIG_FAILED"; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR"; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR"; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR"; + case MLX5_EVENT_TYPE_INTERNAL_ERROR: + return "MLX5_EVENT_TYPE_INTERNAL_ERROR"; + case MLX5_EVENT_TYPE_PORT_CHANGE: + return "MLX5_EVENT_TYPE_PORT_CHANGE"; + case MLX5_EVENT_TYPE_GPIO_EVENT: + return "MLX5_EVENT_TYPE_GPIO_EVENT"; + case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: + return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT"; + case MLX5_EVENT_TYPE_TEMP_WARN_EVENT: + return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT"; + case MLX5_EVENT_TYPE_REMOTE_CONFIG: + return "MLX5_EVENT_TYPE_REMOTE_CONFIG"; + case MLX5_EVENT_TYPE_DB_BF_CONGESTION: + return "MLX5_EVENT_TYPE_DB_BF_CONGESTION"; + case MLX5_EVENT_TYPE_STALL_EVENT: + return "MLX5_EVENT_TYPE_STALL_EVENT"; + case MLX5_EVENT_TYPE_CMD: + return "MLX5_EVENT_TYPE_CMD"; + case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: + return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; + case MLX5_EVENT_TYPE_PAGE_REQUEST: + return "MLX5_EVENT_TYPE_PAGE_REQUEST"; + case MLX5_EVENT_TYPE_PAGE_FAULT: + return "MLX5_EVENT_TYPE_PAGE_FAULT"; + case MLX5_EVENT_TYPE_PPS_EVENT: + return "MLX5_EVENT_TYPE_PPS_EVENT"; + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE"; + case MLX5_EVENT_TYPE_FPGA_ERROR: + return "MLX5_EVENT_TYPE_FPGA_ERROR"; + case MLX5_EVENT_TYPE_FPGA_QP_ERROR: + return "MLX5_EVENT_TYPE_FPGA_QP_ERROR"; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + return "MLX5_EVENT_TYPE_GENERAL_EVENT"; + case MLX5_EVENT_TYPE_MONITOR_COUNTER: + return "MLX5_EVENT_TYPE_MONITOR_COUNTER"; + case MLX5_EVENT_TYPE_DEVICE_TRACER: + return "MLX5_EVENT_TYPE_DEVICE_TRACER"; + default: + return "Unrecognized event"; + } +} + +/* handles all FW events, type == eqe->type */ +static int any_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n", + eqe_type_str(eqe->type), eqe->sub_type); + return NOTIFY_OK; +} + +/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ +static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + u64 value_lsb; + u64 value_msb; + + value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); + + mlx5_core_warn(events->dev, + "High temperature on sensors with bit set %llx %llx", + value_msb, value_lsb); + + return NOTIFY_OK; +} + +/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status) +{ + switch (status) { + case MLX5_MODULE_STATUS_PLUGGED: + return "Cable plugged"; + case MLX5_MODULE_STATUS_UNPLUGGED: + return "Cable unplugged"; + case MLX5_MODULE_STATUS_ERROR: + return "Cable error"; + case MLX5_MODULE_STATUS_DISABLED: + return "Cable disabled"; + default: + return "Unknown status"; + } +} + +static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error) +{ + switch (error) { + case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: + return "Power budget exceeded"; + case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX: + return "Long Range for non MLNX cable"; + case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: + return "Bus stuck (I2C or data shorted)"; + case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: + return "No EEPROM/retry timeout"; + case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: + return "Enforce part number list"; + case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER: + return "Unknown identifier"; + case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: + return "High Temperature"; + case MLX5_MODULE_EVENT_ERROR_BAD_CABLE: + return "Bad or shorted cable/module"; + case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED: + return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot"; + default: + return "Unknown error"; + } +} + +/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */ +static int port_module(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + enum port_module_event_status_type module_status; + enum port_module_event_error_type error_type; + struct mlx5_eqe_port_module *module_event_eqe; + const char *status_str; + u8 module_num; + + module_event_eqe = &eqe->data.port_module; + module_status = module_event_eqe->module_status & + PORT_MODULE_EVENT_MODULE_STATUS_MASK; + error_type = module_event_eqe->error_type & + PORT_MODULE_EVENT_ERROR_TYPE_MASK; + + if (module_status < MLX5_MODULE_STATUS_NUM) + events->pme_stats.status_counters[module_status]++; + + if (module_status == MLX5_MODULE_STATUS_ERROR) + if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) + events->pme_stats.error_counters[error_type]++; + + if (!printk_ratelimit()) + return NOTIFY_OK; + + module_num = module_event_eqe->module; + status_str = mlx5_pme_status_to_string(module_status); + if (module_status == MLX5_MODULE_STATUS_ERROR) { + const char *error_str = mlx5_pme_error_to_string(error_type); + + mlx5_core_err(events->dev, + "Port module event[error]: module %u, %s, %s\n", + module_num, status_str, error_str); + } else { + mlx5_core_info(events->dev, + "Port module event: module %u, %s\n", + module_num, status_str); + } + + return NOTIFY_OK; +} + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats) +{ + *stats = dev->priv.events->pme_stats; +} + +/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */ +static int forward_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); + struct mlx5_events *events = event_nb->ctx; + struct mlx5_eqe *eqe = data; + + mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", + eqe_type_str(eqe->type), eqe->sub_type); + atomic_notifier_call_chain(&events->nh, event, data); + return NOTIFY_OK; +} + +int mlx5_events_init(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL); + + if (!events) + return -ENOMEM; + + ATOMIC_INIT_NOTIFIER_HEAD(&events->nh); + events->dev = dev; + dev->priv.events = events; + return 0; +} + +void mlx5_events_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.events); +} + +void mlx5_events_start(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + int i; + + for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) { + events->notifiers[i].nb = events_nbs_ref[i]; + events->notifiers[i].ctx = events; + mlx5_eq_notifier_register(dev, &events->notifiers[i].nb); + } +} + +void mlx5_events_stop(struct mlx5_core_dev *dev) +{ + struct mlx5_events *events = dev->priv.events; + int i; + + for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--) + mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb); +} + +int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_register(&events->nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_register); + +int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return atomic_notifier_chain_unregister(&events->nh, nb); +} +EXPORT_SYMBOL(mlx5_notifier_unregister); + +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) +{ + return atomic_notifier_call_chain(&events->nh, event, data); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 8ca1d1949d93..873541ef4c1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -334,7 +334,7 @@ static void mlx5_fpga_conn_handle_cqe(struct mlx5_fpga_conn *conn, { u8 opcode, status = 0; - opcode = cqe->op_own >> 4; + opcode = get_cqe_opcode(cqe); switch (opcode) { case MLX5_CQE_REQ_ERR: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 436a8136f26f..d046d1ec2a86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -36,6 +36,7 @@ #include "mlx5_core.h" #include "lib/mlx5.h" +#include "lib/eq.h" #include "fpga/core.h" #include "fpga/conn.h" @@ -145,6 +146,22 @@ static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev) return 0; } +static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *); + +static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + +static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe) +{ + struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb); + + return mlx5_fpga_event(fdev, event, eqe); +} + int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) { struct mlx5_fpga_device *fdev = mdev->fpga; @@ -185,6 +202,11 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) if (err) goto out; + MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR); + MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb); + err = mlx5_fpga_conn_device_init(fdev); if (err) goto err_rsvd_gid; @@ -201,6 +223,8 @@ err_conn_init: mlx5_fpga_conn_device_cleanup(fdev); err_rsvd_gid: + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); mlx5_core_unreserve_gids(mdev, max_num_qps); out: spin_lock_irqsave(&fdev->state_lock, flags); @@ -256,6 +280,9 @@ void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) } mlx5_fpga_conn_device_cleanup(fdev); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb); + mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb); + max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps); mlx5_core_unreserve_gids(mdev, max_num_qps); } @@ -283,13 +310,13 @@ static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome) return "Unknown"; } -void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) +static int mlx5_fpga_event(struct mlx5_fpga_device *fdev, + unsigned long event, void *eqe) { - struct mlx5_fpga_device *fdev = mdev->fpga; + void *data = ((struct mlx5_eqe *)eqe)->data.raw; const char *event_name; bool teardown = false; unsigned long flags; - u32 fpga_qpn; u8 syndrome; switch (event) { @@ -300,12 +327,9 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) case MLX5_EVENT_TYPE_FPGA_QP_ERROR: syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome); event_name = mlx5_fpga_qp_syndrome_to_string(syndrome); - fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn); break; default: - mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n", - event); - return; + return NOTIFY_DONE; } spin_lock_irqsave(&fdev->state_lock, flags); @@ -326,4 +350,6 @@ void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data) */ if (teardown) mlx5_trigger_health_work(fdev->mdev); + + return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h index 3e2355c8df3f..7e2e871dbf83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h @@ -35,11 +35,16 @@ #ifdef CONFIG_MLX5_FPGA +#include <linux/mlx5/eq.h> + +#include "lib/eq.h" #include "fpga/cmd.h" /* Represents an Innova device */ struct mlx5_fpga_device { struct mlx5_core_dev *mdev; + struct mlx5_nb fpga_err_nb; + struct mlx5_nb fpga_qp_err_nb; spinlock_t state_lock; /* Protects state transitions */ enum mlx5_fpga_status state; enum mlx5_fpga_image last_admin_image; @@ -82,7 +87,6 @@ int mlx5_fpga_init(struct mlx5_core_dev *mdev); void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev); int mlx5_fpga_device_start(struct mlx5_core_dev *mdev); void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev); -void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data); #else @@ -104,11 +108,6 @@ static inline void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev) { } -static inline void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, - void *data) -{ -} - #endif #endif /* __MLX5_FPGA_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c index 5cf5f2a9d51f..22a2ef111514 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c @@ -148,14 +148,16 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock, return ret; } -static void mlx5_fpga_tls_release_swid(struct idr *idr, - spinlock_t *idr_spinlock, u32 swid) +static void *mlx5_fpga_tls_release_swid(struct idr *idr, + spinlock_t *idr_spinlock, u32 swid) { unsigned long flags; + void *ptr; spin_lock_irqsave(idr_spinlock, flags); - idr_remove(idr, swid); + ptr = idr_remove(idr, swid); spin_unlock_irqrestore(idr_spinlock, flags); + return ptr; } static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, @@ -165,20 +167,12 @@ static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn, kfree(buf); } -struct mlx5_teardown_stream_context { - struct mlx5_fpga_tls_command_context cmd; - u32 swid; -}; - static void mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, struct mlx5_fpga_device *fdev, struct mlx5_fpga_tls_command_context *cmd, struct mlx5_fpga_dma_buf *resp) { - struct mlx5_teardown_stream_context *ctx = - container_of(cmd, struct mlx5_teardown_stream_context, cmd); - if (resp) { u32 syndrome = MLX5_GET(tls_resp, resp->sg[0].data, syndrome); @@ -186,14 +180,6 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn, mlx5_fpga_err(fdev, "Teardown stream failed with syndrome = %d", syndrome); - else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx)) - mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr, - &fdev->tls->tx_idr_spinlock, - ctx->swid); - else - mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr, - &fdev->tls->rx_idr_spinlock, - ctx->swid); } mlx5_fpga_tls_put_command_ctx(cmd); } @@ -225,8 +211,14 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, rcu_read_lock(); flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle)); - rcu_read_unlock(); + if (unlikely(!flow)) { + rcu_read_unlock(); + WARN_ONCE(1, "Received NULL pointer for handle\n"); + kfree(buf); + return -EINVAL; + } mlx5_fpga_tls_flow_to_cmd(flow, cmd); + rcu_read_unlock(); MLX5_SET(tls_cmd, cmd, swid, ntohl(handle)); MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn)); @@ -238,6 +230,8 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, buf->complete = mlx_tls_kfree_complete; ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf); + if (ret < 0) + kfree(buf); return ret; } @@ -245,7 +239,7 @@ int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, void *flow, u32 swid, gfp_t flags) { - struct mlx5_teardown_stream_context *ctx; + struct mlx5_fpga_tls_command_context *ctx; struct mlx5_fpga_dma_buf *buf; void *cmd; @@ -253,7 +247,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, if (!ctx) return; - buf = &ctx->cmd.buf; + buf = &ctx->buf; cmd = (ctx + 1); MLX5_SET(tls_cmd, cmd, command_type, CMD_TEARDOWN_STREAM); MLX5_SET(tls_cmd, cmd, swid, swid); @@ -264,8 +258,7 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev, buf->sg[0].data = cmd; buf->sg[0].size = MLX5_TLS_COMMAND_SIZE; - ctx->swid = swid; - mlx5_fpga_tls_cmd_send(mdev->fpga, &ctx->cmd, + mlx5_fpga_tls_cmd_send(mdev->fpga, ctx, mlx5_fpga_tls_teardown_completion); } @@ -275,13 +268,14 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, struct mlx5_fpga_tls *tls = mdev->fpga->tls; void *flow; - rcu_read_lock(); if (direction_sx) - flow = idr_find(&tls->tx_idr, swid); + flow = mlx5_fpga_tls_release_swid(&tls->tx_idr, + &tls->tx_idr_spinlock, + swid); else - flow = idr_find(&tls->rx_idr, swid); - - rcu_read_unlock(); + flow = mlx5_fpga_tls_release_swid(&tls->rx_idr, + &tls->rx_idr_spinlock, + swid); if (!flow) { mlx5_fpga_err(mdev->fpga, "No flow information for swid %u\n", @@ -289,6 +283,7 @@ void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, return; } + synchronize_rcu(); /* before kfree(flow) */ mlx5_fpga_tls_send_teardown_cmd(mdev, flow, swid, flags); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 08a891f9aade..c44ccb67c4a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -308,22 +308,68 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +static int mlx5_set_extended_dest(struct mlx5_core_dev *dev, + struct fs_fte *fte, bool *extended_dest) +{ + int fw_log_max_fdb_encap_uplink = + MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink); + int num_fwd_destinations = 0; + struct mlx5_flow_rule *dst; + int num_encap = 0; + + *extended_dest = false; + if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT && + dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + num_encap++; + num_fwd_destinations++; + } + if (num_fwd_destinations > 1 && num_encap > 0) + *extended_dest = true; + + if (*extended_dest && !fw_log_max_fdb_encap_uplink) { + mlx5_core_warn(dev, "FW does not support extended destination"); + return -EOPNOTSUPP; + } + if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) { + mlx5_core_warn(dev, "FW does not support more than %d encaps", + 1 << fw_log_max_fdb_encap_uplink); + return -EOPNOTSUPP; + } + + return 0; +} static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, int opmod, int modify_mask, struct mlx5_flow_table *ft, unsigned group_id, struct fs_fte *fte) { - unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + - fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0}; + bool extended_dest = false; struct mlx5_flow_rule *dst; void *in_flow_context, *vlan; void *in_match_value; + unsigned int inlen; + int dst_cnt_size; void *in_dests; u32 *in; int err; + if (mlx5_set_extended_dest(dev, fte, &extended_dest)) + return -EOPNOTSUPP; + + if (!extended_dest) + dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct); + else + dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format); + + inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size; in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -343,9 +389,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, group_id, group_id); MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); - MLX5_SET(flow_context, in_flow_context, action, fte->action.action); - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, - fte->action.reformat_id); + MLX5_SET(flow_context, in_flow_context, extended_destination, + extended_dest); + if (extended_dest) { + u32 action; + + action = fte->action.action & + ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + MLX5_SET(flow_context, in_flow_context, action, action); + } else { + MLX5_SET(flow_context, in_flow_context, action, + fte->action.action); + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.reformat_id); + } MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->action.modify_id); @@ -387,10 +444,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, id = dst->dest_attr.vport.num; MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id_valid, - dst->dest_attr.vport.vhca_id_valid); + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_VHCA_ID)); MLX5_SET(dest_format_struct, in_dests, destination_eswitch_owner_vhca_id, dst->dest_attr.vport.vhca_id); + if (extended_dest) { + MLX5_SET(dest_format_struct, in_dests, + packet_reformat, + !!(dst->dest_attr.vport.flags & + MLX5_FLOW_DEST_VPORT_REFORMAT_ID)); + MLX5_SET(extended_dest_format, in_dests, + packet_reformat_id, + dst->dest_attr.vport.reformat_id); + } break; default: id = dst->dest_attr.tir_num; @@ -399,7 +466,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(dest_format_struct, in_dests, destination_type, type); MLX5_SET(dest_format_struct, in_dests, destination_id, id); - in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + in_dests += dst_cnt_size; list_size++; } @@ -420,7 +487,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_counter_list, in_dests, flow_counter_id, dst->dest_attr.counter_id); - in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + in_dests += dst_cnt_size; list_size++; } if (list_size > max_list_size) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 08233cf44871..0be3eb86dd84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -32,6 +32,7 @@ #include <linux/mutex.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include <linux/mlx5/eswitch.h> #include "mlx5_core.h" @@ -262,10 +263,11 @@ static void nested_down_write_ref_node(struct fs_node *node, } } -static void down_write_ref_node(struct fs_node *node) +static void down_write_ref_node(struct fs_node *node, bool locked) { if (node) { - down_write(&node->lock); + if (!locked) + down_write(&node->lock); refcount_inc(&node->refcount); } } @@ -276,13 +278,14 @@ static void up_read_ref_node(struct fs_node *node) up_read(&node->lock); } -static void up_write_ref_node(struct fs_node *node) +static void up_write_ref_node(struct fs_node *node, bool locked) { refcount_dec(&node->refcount); - up_write(&node->lock); + if (!locked) + up_write(&node->lock); } -static void tree_put_node(struct fs_node *node) +static void tree_put_node(struct fs_node *node, bool locked) { struct fs_node *parent_node = node->parent; @@ -293,27 +296,27 @@ static void tree_put_node(struct fs_node *node) /* Only root namespace doesn't have parent and we just * need to free its node. */ - down_write_ref_node(parent_node); + down_write_ref_node(parent_node, locked); list_del_init(&node->list); if (node->del_sw_func) node->del_sw_func(node); - up_write_ref_node(parent_node); + up_write_ref_node(parent_node, locked); } else { kfree(node); } node = NULL; } if (!node && parent_node) - tree_put_node(parent_node); + tree_put_node(parent_node, locked); } -static int tree_remove_node(struct fs_node *node) +static int tree_remove_node(struct fs_node *node, bool locked) { if (refcount_read(&node->refcount) > 1) { refcount_dec(&node->refcount); return -EEXIST; } - tree_put_node(node); + tree_put_node(node, locked); return 0; } @@ -397,6 +400,7 @@ static void del_hw_flow_table(struct fs_node *node) fs_get_obj(ft, node); dev = get_dev(&ft->node); root = find_root(&ft->node); + trace_mlx5_fs_del_ft(ft); if (node->active) { err = root->cmds->destroy_flow_table(dev, ft); @@ -418,22 +422,34 @@ static void del_sw_flow_table(struct fs_node *node) kfree(ft); } -static void del_sw_hw_rule(struct fs_node *node) +static void modify_fte(struct fs_fte *fte) { struct mlx5_flow_root_namespace *root; - struct mlx5_flow_rule *rule; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; - struct fs_fte *fte; - int modify_mask; - struct mlx5_core_dev *dev = get_dev(node); + struct mlx5_core_dev *dev; int err; - bool update_fte = false; - fs_get_obj(rule, node); - fs_get_obj(fte, rule->node.parent); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); + dev = get_dev(&fte->node); + + root = find_root(&ft->node); + err = root->cmds->update_fte(dev, ft, fg->id, fte->modify_mask, fte); + if (err) + mlx5_core_warn(dev, + "%s can't del rule fg id=%d fte_index=%d\n", + __func__, fg->id, fte->index); + fte->modify_mask = 0; +} + +static void del_sw_hw_rule(struct fs_node *node) +{ + struct mlx5_flow_rule *rule; + struct fs_fte *fte; + + fs_get_obj(rule, node); + fs_get_obj(fte, rule->node.parent); trace_mlx5_fs_del_rule(rule); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); @@ -443,27 +459,19 @@ static void del_sw_hw_rule(struct fs_node *node) if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | - BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + fte->modify_mask |= + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; - update_fte = true; goto out; } if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); - update_fte = true; + fte->modify_mask |= + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); } out: - root = find_root(&ft->node); - if (update_fte && fte->dests_size) { - err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte); - if (err) - mlx5_core_warn(dev, - "%s can't del rule fg id=%d fte_index=%d\n", - __func__, fg->id, fte->index); - } kfree(rule); } @@ -489,6 +497,7 @@ static void del_hw_fte(struct fs_node *node) mlx5_core_warn(dev, "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); + node->active = 0; } } @@ -589,7 +598,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, fte->node.type = FS_TYPE_FLOW_ENTRY; fte->action = *flow_act; - tree_init_node(&fte->node, del_hw_fte, del_sw_fte); + tree_init_node(&fte->node, NULL, del_sw_fte); return fte; } @@ -618,7 +627,8 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer if (ret) { kmem_cache_free(steering->fgs_cache, fg); return ERR_PTR(ret); -} + } + ida_init(&fg->fte_allocator); fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, @@ -855,7 +865,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, fs_get_obj(fte, rule->node.parent); if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return -EINVAL; - down_write_ref_node(&fte->node); + down_write_ref_node(&fte->node, false); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); @@ -863,7 +873,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, root = find_root(&ft->node); err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id, modify_mask, fte); - up_write_ref_node(&fte->node); + up_write_ref_node(&fte->node, false); return err; } @@ -1013,12 +1023,13 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa if (err) goto destroy_ft; ft->node.active = true; - down_write_ref_node(&fs_prio->node); + down_write_ref_node(&fs_prio->node, false); tree_add_node(&ft->node, &fs_prio->node); list_add_flow_table(ft, fs_prio); fs_prio->num_ft++; - up_write_ref_node(&fs_prio->node); + up_write_ref_node(&fs_prio->node, false); mutex_unlock(&root->chain_lock); + trace_mlx5_fs_add_ft(ft); return ft; destroy_ft: root->cmds->destroy_flow_table(root->dev, ft); @@ -1110,17 +1121,17 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (ft->autogroup.active) return ERR_PTR(-EPERM); - down_write_ref_node(&ft->node); + down_write_ref_node(&ft->node, false); fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, start_index, end_index, ft->node.children.prev); - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); if (IS_ERR(fg)) return fg; err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id); if (err) { - tree_put_node(&fg->node); + tree_put_node(&fg->node, false); return ERR_PTR(err); } trace_mlx5_fs_add_fg(fg); @@ -1373,7 +1384,10 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, { if (d1->type == d2->type) { if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT && - d1->vport.num == d2->vport.num) || + d1->vport.num == d2->vport.num && + d1->vport.flags == d2->vport.flags && + ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ? + (d1->vport.reformat_id == d2->vport.reformat_id) : true)) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && d1->ft == d2->ft) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && @@ -1514,10 +1528,10 @@ static void free_match_list(struct match_list_head *head) struct match_list *iter, *match_tmp; list_del(&head->first.list); - tree_put_node(&head->first.g->node); + tree_put_node(&head->first.g->node, false); list_for_each_entry_safe(iter, match_tmp, &head->list, list) { - tree_put_node(&iter->g->node); + tree_put_node(&iter->g->node, false); list_del(&iter->list); kfree(iter); } @@ -1594,11 +1608,16 @@ lookup_fte_locked(struct mlx5_flow_group *g, fte_tmp = NULL; goto out; } + if (!fte_tmp->node.active) { + tree_put_node(&fte_tmp->node, false); + fte_tmp = NULL; + goto out; + } nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); out: if (take_write) - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); else up_read_ref_node(&g->node); return fte_tmp; @@ -1640,8 +1659,8 @@ search_again_locked: continue; rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte_tmp); - up_write_ref_node(&fte_tmp->node); - tree_put_node(&fte_tmp->node); + up_write_ref_node(&fte_tmp->node, false); + tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); return rule; } @@ -1677,7 +1696,7 @@ skip_search: err = insert_fte(g, fte); if (err) { - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); if (err == -ENOSPC) continue; kmem_cache_free(steering->ftes_cache, fte); @@ -1685,11 +1704,11 @@ skip_search: } nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte); - up_write_ref_node(&fte->node); - tree_put_node(&fte->node); + up_write_ref_node(&fte->node, false); + tree_put_node(&fte->node, false); return rule; } rule = ERR_PTR(-ENOENT); @@ -1731,7 +1750,7 @@ search_again_locked: err = build_match_list(&match_head, ft, spec); if (err) { if (take_write) - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); else up_read_ref_node(&ft->node); return ERR_PTR(err); @@ -1746,7 +1765,7 @@ search_again_locked: if (!IS_ERR(rule) || (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { if (take_write) - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); return rule; } @@ -1762,12 +1781,12 @@ search_again_locked: g = alloc_auto_flow_group(ft, spec); if (IS_ERR(g)) { rule = ERR_CAST(g); - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); return rule; } nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); err = create_auto_flow_group(ft, g); if (err) @@ -1786,17 +1805,17 @@ search_again_locked: } nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte); - up_write_ref_node(&fte->node); - tree_put_node(&fte->node); - tree_put_node(&g->node); + up_write_ref_node(&fte->node, false); + tree_put_node(&fte->node, false); + tree_put_node(&g->node, false); return rule; err_release_fg: - up_write_ref_node(&g->node); - tree_put_node(&g->node); + up_write_ref_node(&g->node, false); + tree_put_node(&g->node, false); return ERR_PTR(err); } @@ -1859,10 +1878,33 @@ EXPORT_SYMBOL(mlx5_add_flow_rules); void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) { + struct fs_fte *fte; int i; + /* In order to consolidate the HW changes we lock the FTE for other + * changes, and increase its refcount, in order not to perform the + * "del" functions of the FTE. Will handle them here. + * The removal of the rules is done under locked FTE. + * After removing all the handle's rules, if there are remaining + * rules, it means we just need to modify the FTE in FW, and + * unlock/decrease the refcount we increased before. + * Otherwise, it means the FTE should be deleted. First delete the + * FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of + * the FTE, which will handle the last decrease of the refcount, as + * well as required handling of its parent. + */ + fs_get_obj(fte, handle->rule[0]->node.parent); + down_write_ref_node(&fte->node, false); for (i = handle->num_rules - 1; i >= 0; i--) - tree_remove_node(&handle->rule[i]->node); + tree_remove_node(&handle->rule[i]->node, true); + if (fte->modify_mask && fte->dests_size) { + modify_fte(fte); + up_write_ref_node(&fte->node, false); + } else { + del_hw_fte(&fte->node); + up_write(&fte->node.lock); + tree_put_node(&fte->node, false); + } kfree(handle); } EXPORT_SYMBOL(mlx5_del_flow_rules); @@ -1965,7 +2007,7 @@ int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) mutex_unlock(&root->chain_lock); return err; } - if (tree_remove_node(&ft->node)) + if (tree_remove_node(&ft->node, false)) mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", ft->id); mutex_unlock(&root->chain_lock); @@ -1976,7 +2018,7 @@ EXPORT_SYMBOL(mlx5_destroy_flow_table); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) { - if (tree_remove_node(&fg->node)) + if (tree_remove_node(&fg->node, false)) mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n", fg->id); } @@ -2360,8 +2402,8 @@ static void clean_tree(struct fs_node *node) tree_get_node(node); list_for_each_entry_safe(iter, temp, &node->children, list) clean_tree(iter); - tree_put_node(node); - tree_remove_node(node); + tree_put_node(node, false); + tree_remove_node(node, false); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index b51ad217da32..87de0e4d9124 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -145,29 +145,6 @@ struct mlx5_flow_table { struct rhltable fgs_hash; }; -struct mlx5_fc_cache { - u64 packets; - u64 bytes; - u64 lastuse; -}; - -struct mlx5_fc { - struct list_head list; - struct llist_node addlist; - struct llist_node dellist; - - /* last{packets,bytes} members are used when calculating the delta since - * last reading - */ - u64 lastpackets; - u64 lastbytes; - - u32 id; - bool aging; - - struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; -}; - struct mlx5_ft_underlay_qp { struct list_head list; u32 qpn; @@ -195,6 +172,7 @@ struct fs_fte { enum fs_fte_status status; struct mlx5_fc *counter; struct rhash_head hash; + int modify_mask; }; /* Type of children is mlx5_flow_table/namespace */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 32accd6b041b..c6c28f56aa29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -41,6 +41,29 @@ /* Max number of counters to query in bulk read is 32K */ #define MLX5_SW_MAX_COUNTERS_BULK BIT(15) +struct mlx5_fc_cache { + u64 packets; + u64 bytes; + u64 lastuse; +}; + +struct mlx5_fc { + struct list_head list; + struct llist_node addlist; + struct llist_node dellist; + + /* last{packets,bytes} members are used when calculating the delta since + * last reading + */ + u64 lastpackets; + u64 lastbytes; + + u32 id; + bool aging; + + struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; +}; + /* locking scheme: * * It is the responsibility of the user to prevent concurrent calls or bad diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 43118de8ee99..cb9fa3430c53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -38,6 +38,8 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +#include "lib/eq.h" +#include "lib/mlx5.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, @@ -78,29 +80,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) &dev->iseg->cmdq_addr_l_sz); } -static void trigger_cmd_completions(struct mlx5_core_dev *dev) -{ - unsigned long flags; - u64 vector; - - /* wait for pending handlers to complete */ - synchronize_irq(pci_irq_vector(dev->pdev, MLX5_EQ_VEC_CMD)); - spin_lock_irqsave(&dev->cmd.alloc_lock, flags); - vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1); - if (!vector) - goto no_trig; - - vector |= MLX5_TRIGGERED_CMD_COMP; - spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); - - mlx5_core_dbg(dev, "vector 0x%llx\n", vector); - mlx5_cmd_comp_handler(dev, vector, true); - return; - -no_trig: - spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); -} - static int in_fatal(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -124,10 +103,10 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mlx5_core_err(dev, "start\n"); if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; - trigger_cmd_completions(dev); + mlx5_cmd_flush(dev); } - mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1); + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); mlx5_core_err(dev, "end\n"); unlock: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 11dabd62e2c7..4eac42555c7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev, mlx5_query_port_max_mtu(mdev, &max_mtu, 1); netdev->mtu = max_mtu; - mlx5e_build_nic_params(mdev, &priv->channels.params, + mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); @@ -446,11 +446,11 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; - err = mlx5e_open_channels(priv, &new_channels); + + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) goto out; - mlx5e_switch_priv_channels(priv, &new_channels, NULL); netdev->mtu = new_channels.params.sw_mtu; out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 582b2f18010a..959605559858 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -34,39 +34,9 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/vport.h> #include "mlx5_core.h" - -enum { - MLX5_LAG_FLAG_BONDED = 1 << 0, -}; - -struct lag_func { - struct mlx5_core_dev *dev; - struct net_device *netdev; -}; - -/* Used for collection of netdev event info. */ -struct lag_tracker { - enum netdev_lag_tx_type tx_type; - struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; - bool is_bonded; -}; - -/* LAG data of a ConnectX card. - * It serves both its phys functions. - */ -struct mlx5_lag { - u8 flags; - u8 v2p_map[MLX5_MAX_PORTS]; - struct lag_func pf[MLX5_MAX_PORTS]; - struct lag_tracker tracker; - struct delayed_work bond_work; - struct notifier_block nb; - - /* Admin state. Allow lag only if allowed is true - * even if network conditions for lag were met - */ - bool allowed; -}; +#include "eswitch.h" +#include "lag.h" +#include "lag_mp.h" /* General purpose, use for short periods of time. * Beware of lock dependencies (preferably, no locks should be acquired @@ -148,13 +118,8 @@ static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } -static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) -{ - return dev->priv.lag; -} - -static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, - struct net_device *ndev) +int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev) { int i; @@ -165,9 +130,14 @@ static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, return -1; } -static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev) +static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_ROCE); +} + +static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) { - return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); + return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); } static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, @@ -186,36 +156,131 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, *port2 = 1; } -static void mlx5_activate_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker) +void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + u8 v2p_port1, v2p_port2; int err; - ldev->flags |= MLX5_LAG_FLAG_BONDED; + mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, + &v2p_port2); + + if (v2p_port1 != ldev->v2p_map[0] || + v2p_port2 != ldev->v2p_map[1]) { + ldev->v2p_map[0] = v2p_port1; + ldev->v2p_map[1] = v2p_port2; + + mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", + ldev->v2p_map[0], ldev->v2p_map[1]); + + err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); + if (err) + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + } +} + +static int mlx5_create_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0], &ldev->v2p_map[1]); + mlx5_core_info(dev0, "lag map port 1:%d port 2:%d", + ldev->v2p_map[0], ldev->v2p_map[1]); + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]); if (err) mlx5_core_err(dev0, "Failed to create LAG (%d)\n", err); + return err; +} + +int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags) +{ + bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; + + err = mlx5_create_lag(ldev, tracker); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to activate RoCE LAG\n"); + } else { + mlx5_core_err(dev0, + "Failed to activate VF LAG\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + return err; + } + + ldev->flags |= flags; + return 0; } -static void mlx5_deactivate_lag(struct mlx5_lag *ldev) +static int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + bool roce_lag = __mlx5_lag_is_roce(ldev); int err; - ldev->flags &= ~MLX5_LAG_FLAG_BONDED; + ldev->flags &= ~MLX5_LAG_MODE_FLAGS; err = mlx5_cmd_destroy_lag(dev0); - if (err) - mlx5_core_err(dev0, - "Failed to destroy LAG (%d)\n", - err); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } else { + mlx5_core_err(dev0, + "Failed to deactivate VF LAG; driver restart required\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + } + + return err; +} + +static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +{ + if (!ldev->pf[0].dev || !ldev->pf[1].dev) + return false; + +#ifdef CONFIG_MLX5_ESWITCH + return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev); +#else + return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) && + !mlx5_sriov_is_enabled(ldev->pf[1].dev)); +#endif +} + +static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_add_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); +} + +static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_remove_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); } static void mlx5_do_bond(struct mlx5_lag *ldev) @@ -223,9 +288,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) struct mlx5_core_dev *dev0 = ldev->pf[0].dev; struct mlx5_core_dev *dev1 = ldev->pf[1].dev; struct lag_tracker tracker; - u8 v2p_port1, v2p_port2; - int i, err; - bool do_bond; + bool do_bond, roce_lag; + int err; if (!dev0 || !dev1) return; @@ -234,48 +298,56 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) tracker = ldev->tracker; mutex_unlock(&lag_mutex); - do_bond = tracker.is_bonded && ldev->allowed; + do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); - if (do_bond && !mlx5_lag_is_bonded(ldev)) { - for (i = 0; i < MLX5_MAX_PORTS; i++) - mlx5_remove_dev_by_protocol(ldev->pf[i].dev, - MLX5_INTERFACE_PROTOCOL_IB); + if (do_bond && !__mlx5_lag_is_active(ldev)) { + roce_lag = !mlx5_sriov_is_enabled(dev0) && + !mlx5_sriov_is_enabled(dev1); - mlx5_activate_lag(ldev, &tracker); +#ifdef CONFIG_MLX5_ESWITCH + roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE && + dev1->priv.eswitch->mode == SRIOV_NONE; +#endif - mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_nic_vport_enable_roce(dev1); - } else if (do_bond && mlx5_lag_is_bonded(ldev)) { - mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1, - &v2p_port2); + if (roce_lag) + mlx5_lag_remove_ib_devices(ldev); - if ((v2p_port1 != ldev->v2p_map[0]) || - (v2p_port2 != ldev->v2p_map[1])) { - ldev->v2p_map[0] = v2p_port1; - ldev->v2p_map[1] = v2p_port2; + err = mlx5_activate_lag(ldev, &tracker, + roce_lag ? MLX5_LAG_FLAG_ROCE : + MLX5_LAG_FLAG_SRIOV); + if (err) { + if (roce_lag) + mlx5_lag_add_ib_devices(ldev); - err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); - if (err) - mlx5_core_err(dev0, - "Failed to modify LAG (%d)\n", - err); + return; } - } else if (!do_bond && mlx5_lag_is_bonded(ldev)) { - mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_nic_vport_disable_roce(dev1); - mlx5_deactivate_lag(ldev); + if (roce_lag) { + mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_enable_roce(dev1); + } + } else if (do_bond && __mlx5_lag_is_active(ldev)) { + mlx5_modify_lag(ldev, &tracker); + } else if (!do_bond && __mlx5_lag_is_active(ldev)) { + roce_lag = __mlx5_lag_is_roce(ldev); + + if (roce_lag) { + mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_disable_roce(dev1); + } - for (i = 0; i < MLX5_MAX_PORTS; i++) - if (ldev->pf[i].dev) - mlx5_add_dev_by_protocol(ldev->pf[i].dev, - MLX5_INTERFACE_PROTOCOL_IB); + err = mlx5_deactivate_lag(ldev); + if (err) + return; + + if (roce_lag) + mlx5_lag_add_ib_devices(ldev); } } static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) { - schedule_delayed_work(&ldev->bond_work, delay); + queue_delayed_work(ldev->wq, &ldev->bond_work, delay); } static void mlx5_do_bond_work(struct work_struct *work) @@ -419,15 +491,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, return NOTIFY_DONE; } -static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) -{ - if ((ldev->pf[0].dev && mlx5_sriov_is_enabled(ldev->pf[0].dev)) || - (ldev->pf[1].dev && mlx5_sriov_is_enabled(ldev->pf[1].dev))) - return false; - else - return true; -} - static struct mlx5_lag *mlx5_lag_dev_alloc(void) { struct mlx5_lag *ldev; @@ -436,14 +499,20 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(void) if (!ldev) return NULL; + ldev->wq = create_singlethread_workqueue("mlx5_lag"); + if (!ldev->wq) { + kfree(ldev); + return NULL; + } + INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); - ldev->allowed = mlx5_lag_check_prereq(ldev); return ldev; } static void mlx5_lag_dev_free(struct mlx5_lag *ldev) { + destroy_workqueue(ldev->wq); kfree(ldev); } @@ -462,7 +531,6 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, ldev->tracker.netdev_state[fn].link_up = 0; ldev->tracker.netdev_state[fn].tx_enabled = 0; - ldev->allowed = mlx5_lag_check_prereq(ldev); dev->priv.lag = ldev; mutex_unlock(&lag_mutex); @@ -484,7 +552,6 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); dev->priv.lag = NULL; - ldev->allowed = mlx5_lag_check_prereq(ldev); mutex_unlock(&lag_mutex); } @@ -493,6 +560,7 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; + int err; if (!MLX5_CAP_GEN(dev, vport_group_manager) || !MLX5_CAP_GEN(dev, lag_master) || @@ -520,6 +588,11 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); } } + + err = mlx5_lag_mp_init(ldev); + if (err) + mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", + err); } /* Must be called with intf_mutex held */ @@ -532,7 +605,7 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) if (!ldev) return; - if (mlx5_lag_is_bonded(ldev)) + if (__mlx5_lag_is_active(ldev)) mlx5_deactivate_lag(ldev); mlx5_lag_dev_remove_pf(ldev, dev); @@ -544,61 +617,67 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) if (i == MLX5_MAX_PORTS) { if (ldev->nb.notifier_call) unregister_netdevice_notifier(&ldev->nb); + mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); mlx5_lag_dev_free(ldev); } } -bool mlx5_lag_is_active(struct mlx5_core_dev *dev) +bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; bool res; mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - res = ldev && mlx5_lag_is_bonded(ldev); + res = ldev && __mlx5_lag_is_roce(ldev); mutex_unlock(&lag_mutex); return res; } -EXPORT_SYMBOL(mlx5_lag_is_active); +EXPORT_SYMBOL(mlx5_lag_is_roce); -static int mlx5_lag_set_state(struct mlx5_core_dev *dev, bool allow) +bool mlx5_lag_is_active(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; - int ret = 0; - bool lag_active; - - mlx5_dev_list_lock(); + bool res; + mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (!ldev) { - ret = -ENODEV; - goto unlock; - } - lag_active = mlx5_lag_is_bonded(ldev); - if (!mlx5_lag_check_prereq(ldev) && allow) { - ret = -EINVAL; - goto unlock; - } - if (ldev->allowed == allow) - goto unlock; - ldev->allowed = allow; - if ((lag_active && !allow) || allow) - mlx5_do_bond(ldev); -unlock: - mlx5_dev_list_unlock(); - return ret; + res = ldev && __mlx5_lag_is_active(ldev); + mutex_unlock(&lag_mutex); + + return res; } +EXPORT_SYMBOL(mlx5_lag_is_active); -int mlx5_lag_forbid(struct mlx5_core_dev *dev) +bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) { - return mlx5_lag_set_state(dev, false); + struct mlx5_lag *ldev; + bool res; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + res = ldev && __mlx5_lag_is_sriov(ldev); + mutex_unlock(&lag_mutex); + + return res; } +EXPORT_SYMBOL(mlx5_lag_is_sriov); -int mlx5_lag_allow(struct mlx5_core_dev *dev) +void mlx5_lag_update(struct mlx5_core_dev *dev) { - return mlx5_lag_set_state(dev, true); + struct mlx5_lag *ldev; + + mlx5_dev_list_lock(); + ldev = mlx5_lag_dev_get(dev); + if (!ldev) + goto unlock; + + mlx5_do_bond(ldev); + +unlock: + mlx5_dev_list_unlock(); } struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) @@ -609,7 +688,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (!(ldev && mlx5_lag_is_bonded(ldev))) + if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { @@ -638,7 +717,7 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) return true; ldev = mlx5_lag_dev_get(dev); - if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev) + if (!ldev || !__mlx5_lag_is_roce(ldev) || ldev->pf[0].dev == dev) return true; /* If bonded, we do not add an IB device for PF1. */ @@ -665,7 +744,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, mutex_lock(&lag_mutex); ldev = mlx5_lag_dev_get(dev); - if (ldev && mlx5_lag_is_bonded(ldev)) { + if (ldev && __mlx5_lag_is_roce(ldev)) { num_ports = MLX5_MAX_PORTS; mdev[0] = ldev->pf[0].dev; mdev[1] = ldev->pf[1].dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h new file mode 100644 index 000000000000..1dea0b1c9826 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_LAG_H__ +#define __MLX5_LAG_H__ + +#include "mlx5_core.h" +#include "lag_mp.h" + +enum { + MLX5_LAG_FLAG_ROCE = 1 << 0, + MLX5_LAG_FLAG_SRIOV = 1 << 1, + MLX5_LAG_FLAG_MULTIPATH = 1 << 2, +}; + +#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\ + MLX5_LAG_FLAG_MULTIPATH) + +struct lag_func { + struct mlx5_core_dev *dev; + struct net_device *netdev; +}; + +/* Used for collection of netdev event info. */ +struct lag_tracker { + enum netdev_lag_tx_type tx_type; + struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; + unsigned int is_bonded:1; +}; + +/* LAG data of a ConnectX card. + * It serves both its phys functions. + */ +struct mlx5_lag { + u8 flags; + u8 v2p_map[MLX5_MAX_PORTS]; + struct lag_func pf[MLX5_MAX_PORTS]; + struct lag_tracker tracker; + struct workqueue_struct *wq; + struct delayed_work bond_work; + struct notifier_block nb; + struct lag_mp lag_mp; +}; + +static inline struct mlx5_lag * +mlx5_lag_dev_get(struct mlx5_core_dev *dev) +{ + return dev->priv.lag; +} + +static inline bool +__mlx5_lag_is_active(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); +} + +void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker); +int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags); +int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev); + +#endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c new file mode 100644 index 000000000000..5633f8572800 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/netdevice.h> +#include "lag.h" +#include "lag_mp.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "lib/mlx5.h" + +static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) +{ + if (!ldev->pf[0].dev || !ldev->pf[1].dev) + return false; + + return mlx5_esw_multipath_prereq(ldev->pf[0].dev, ldev->pf[1].dev); +} + +static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH); +} + +bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + ldev = mlx5_lag_dev_get(dev); + res = ldev && __mlx5_lag_is_multipath(ldev); + + return res; +} + +/** + * Set lag port affinity + * + * @ldev: lag device + * @port: + * 0 - set normal affinity. + * 1 - set affinity to port 1. + * 2 - set affinity to port 2. + * + **/ +static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port) +{ + struct lag_tracker tracker; + + if (!__mlx5_lag_is_multipath(ldev)) + return; + + switch (port) { + case 0: + tracker.netdev_state[0].tx_enabled = true; + tracker.netdev_state[1].tx_enabled = true; + tracker.netdev_state[0].link_up = true; + tracker.netdev_state[1].link_up = true; + break; + case 1: + tracker.netdev_state[0].tx_enabled = true; + tracker.netdev_state[0].link_up = true; + tracker.netdev_state[1].tx_enabled = false; + tracker.netdev_state[1].link_up = false; + break; + case 2: + tracker.netdev_state[0].tx_enabled = false; + tracker.netdev_state[0].link_up = false; + tracker.netdev_state[1].tx_enabled = true; + tracker.netdev_state[1].link_up = true; + break; + default: + mlx5_core_warn(ldev->pf[0].dev, "Invalid affinity port %d", + port); + return; + } + + if (tracker.netdev_state[0].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events, + MLX5_DEV_EVENT_PORT_AFFINITY, + (void *)0); + + if (tracker.netdev_state[1].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events, + MLX5_DEV_EVENT_PORT_AFFINITY, + (void *)0); + + mlx5_modify_lag(ldev, &tracker); +} + +static void mlx5_lag_fib_event_flush(struct notifier_block *nb) +{ + struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); + struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); + + flush_workqueue(ldev->wq); +} + +struct mlx5_fib_event_work { + struct work_struct work; + struct mlx5_lag *ldev; + unsigned long event; + union { + struct fib_entry_notifier_info fen_info; + struct fib_nh_notifier_info fnh_info; + }; +}; + +static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, + unsigned long event, + struct fib_info *fi) +{ + struct lag_mp *mp = &ldev->lag_mp; + + /* Handle delete event */ + if (event == FIB_EVENT_ENTRY_DEL) { + /* stop track */ + if (mp->mfi == fi) + mp->mfi = NULL; + return; + } + + /* Handle add/replace event */ + if (fi->fib_nhs == 1) { + if (__mlx5_lag_is_active(ldev)) { + struct net_device *nh_dev = fi->fib_nh[0].nh_dev; + int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); + + mlx5_lag_set_port_affinity(ldev, ++i); + } + return; + } + + if (fi->fib_nhs != 2) + return; + + /* Verify next hops are ports of the same hca */ + if (!(fi->fib_nh[0].nh_dev == ldev->pf[0].netdev && + fi->fib_nh[1].nh_dev == ldev->pf[1].netdev) && + !(fi->fib_nh[0].nh_dev == ldev->pf[1].netdev && + fi->fib_nh[1].nh_dev == ldev->pf[0].netdev)) { + mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n"); + return; + } + + /* First time we see multipath route */ + if (!mp->mfi && !__mlx5_lag_is_active(ldev)) { + struct lag_tracker tracker; + + tracker = ldev->tracker; + mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH); + } + + mlx5_lag_set_port_affinity(ldev, 0); + mp->mfi = fi; +} + +static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, + unsigned long event, + struct fib_nh *fib_nh, + struct fib_info *fi) +{ + struct lag_mp *mp = &ldev->lag_mp; + + /* Check the nh event is related to the route */ + if (!mp->mfi || mp->mfi != fi) + return; + + /* nh added/removed */ + if (event == FIB_EVENT_NH_DEL) { + int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->nh_dev); + + if (i >= 0) { + i = (i + 1) % 2 + 1; /* peer port */ + mlx5_lag_set_port_affinity(ldev, i); + } + } else if (event == FIB_EVENT_NH_ADD && + fi->fib_nhs == 2) { + mlx5_lag_set_port_affinity(ldev, 0); + } +} + +static void mlx5_lag_fib_update(struct work_struct *work) +{ + struct mlx5_fib_event_work *fib_work = + container_of(work, struct mlx5_fib_event_work, work); + struct mlx5_lag *ldev = fib_work->ldev; + struct fib_nh *fib_nh; + + /* Protect internal structures from changes */ + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + mlx5_lag_fib_route_event(ldev, fib_work->event, + fib_work->fen_info.fi); + fib_info_put(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_DEL: + fib_nh = fib_work->fnh_info.fib_nh; + mlx5_lag_fib_nexthop_event(ldev, + fib_work->event, + fib_work->fnh_info.fib_nh, + fib_nh->nh_parent); + fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); + break; + } + + rtnl_unlock(); + kfree(fib_work); +} + +static struct mlx5_fib_event_work * +mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event) +{ + struct mlx5_fib_event_work *fib_work; + + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); + if (WARN_ON(!fib_work)) + return NULL; + + INIT_WORK(&fib_work->work, mlx5_lag_fib_update); + fib_work->ldev = ldev; + fib_work->event = event; + + return fib_work; +} + +static int mlx5_lag_fib_event(struct notifier_block *nb, + unsigned long event, + void *ptr) +{ + struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); + struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); + struct fib_notifier_info *info = ptr; + struct mlx5_fib_event_work *fib_work; + struct fib_entry_notifier_info *fen_info; + struct fib_nh_notifier_info *fnh_info; + struct fib_info *fi; + + if (info->family != AF_INET) + return NOTIFY_DONE; + + if (!mlx5_lag_multipath_check_prereq(ldev)) + return NOTIFY_DONE; + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + fi = fen_info->fi; + if (fi->fib_dev != ldev->pf[0].netdev && + fi->fib_dev != ldev->pf[1].netdev) { + return NOTIFY_DONE; + } + fib_work = mlx5_lag_init_fib_work(ldev, event); + if (!fib_work) + return NOTIFY_DONE; + fib_work->fen_info = *fen_info; + /* Take reference on fib_info to prevent it from being + * freed while work is queued. Release it afterwards. + */ + fib_info_hold(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_DEL: + fnh_info = container_of(info, struct fib_nh_notifier_info, + info); + fib_work = mlx5_lag_init_fib_work(ldev, event); + if (!fib_work) + return NOTIFY_DONE; + fib_work->fnh_info = *fnh_info; + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + break; + default: + return NOTIFY_DONE; + } + + queue_work(ldev->wq, &fib_work->work); + + return NOTIFY_DONE; +} + +int mlx5_lag_mp_init(struct mlx5_lag *ldev) +{ + struct lag_mp *mp = &ldev->lag_mp; + int err; + + if (mp->fib_nb.notifier_call) + return 0; + + mp->fib_nb.notifier_call = mlx5_lag_fib_event; + err = register_fib_notifier(&mp->fib_nb, + mlx5_lag_fib_event_flush); + if (err) + mp->fib_nb.notifier_call = NULL; + + return err; +} + +void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) +{ + struct lag_mp *mp = &ldev->lag_mp; + + if (!mp->fib_nb.notifier_call) + return; + + unregister_fib_notifier(&mp->fib_nb); + mp->fib_nb.notifier_call = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h new file mode 100644 index 000000000000..6d14b1100be9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_LAG_MP_H__ +#define __MLX5_LAG_MP_H__ + +#include "lag.h" +#include "mlx5_core.h" + +struct lag_mp { + struct notifier_block fib_nb; + struct fib_info *mfi; /* used in tracking fib events */ +}; + +#ifdef CONFIG_MLX5_ESWITCH + +int mlx5_lag_mp_init(struct mlx5_lag *ldev); +void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; } +static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {} + +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_LAG_MP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 0d90b1b4a3d3..ca0ee9916e9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -33,6 +33,7 @@ #include <linux/clocksource.h> #include <linux/highmem.h> #include <rdma/mlx5-abi.h> +#include "lib/eq.h" #include "en.h" #include "clock.h" @@ -71,7 +72,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); - return mlx5_read_internal_timer(mdev) & cc->mask; + return mlx5_read_internal_timer(mdev, NULL) & cc->mask; } static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) @@ -155,15 +156,19 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, return 0; } -static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) +static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - u64 ns; + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); unsigned long flags; + u64 cycles, ns; write_seqlock_irqsave(&clock->lock, flags); - ns = timecounter_read(&clock->tc); + cycles = mlx5_read_internal_timer(mdev, sts); + ns = timecounter_cyc2time(&clock->tc, cycles); write_sequnlock_irqrestore(&clock->lock, flags); *ts = ns_to_timespec64(ns); @@ -306,7 +311,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, ts.tv_sec = rq->perout.start.sec; ts.tv_nsec = rq->perout.start.nsec; ns = timespec64_to_ns(&ts); - cycles_now = mlx5_read_internal_timer(mdev); + cycles_now = mlx5_read_internal_timer(mdev, NULL); write_seqlock_irqsave(&clock->lock, flags); nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); nsec_delta = ns - nsec_now; @@ -383,7 +388,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = { .pps = 0, .adjfreq = mlx5_ptp_adjfreq, .adjtime = mlx5_ptp_adjtime, - .gettime64 = mlx5_ptp_gettime, + .gettimex64 = mlx5_ptp_gettimex, .settime64 = mlx5_ptp_settime, .enable = NULL, .verify = NULL, @@ -439,16 +444,17 @@ static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); } -void mlx5_pps_event(struct mlx5_core_dev *mdev, - struct mlx5_eqe *eqe) +static int mlx5_pps_event(struct notifier_block *nb, + unsigned long type, void *data) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct mlx5_core_dev *mdev = clock->mdev; struct ptp_clock_event ptp_event; - struct timespec64 ts; - u64 nsec_now, nsec_delta; u64 cycles_now, cycles_delta; + u64 nsec_now, nsec_delta, ns; + struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; - s64 ns; + struct timespec64 ts; unsigned long flags; switch (clock->ptp_info.pin_config[pin].func) { @@ -463,11 +469,12 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, } else { ptp_event.type = PTP_CLOCK_EXTTS; } + /* TODOL clock->ptp can be NULL if ptp_clock_register failes */ ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: - mlx5_ptp_gettime(&clock->ptp_info, &ts); - cycles_now = mlx5_read_internal_timer(mdev); + mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL); + cycles_now = mlx5_read_internal_timer(mdev, NULL); ts.tv_sec += 1; ts.tv_nsec = 0; ns = timespec64_to_ns(&ts); @@ -481,8 +488,11 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, write_sequnlock_irqrestore(&clock->lock, flags); break; default: - mlx5_core_err(mdev, " Unhandled event\n"); + mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", + clock->ptp_info.pin_config[pin].func); } + + return NOTIFY_OK; } void mlx5_init_clock(struct mlx5_core_dev *mdev) @@ -511,14 +521,14 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) ktime_to_ns(ktime_get_real())); /* Calculate period in seconds to call the overflow watchdog - to make - * sure counter is checked at least once every wrap around. + * sure counter is checked at least twice every wrap around. * The period is calculated as the minimum between max HW cycles count * (The clock source mask) and max amount of cycles that can be * multiplied by clock multiplier where the result doesn't exceed * 64bits. */ overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult); - overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1); + overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3)); ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, frac, &frac); @@ -567,6 +577,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) PTR_ERR(clock->ptp)); clock->ptp = NULL; } + + MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &clock->pps_nb); } void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) @@ -576,6 +589,7 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) return; + mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); if (clock->ptp) { ptp_clock_unregister(clock->ptp); clock->ptp = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index 263cb6e2aeee..31600924bdc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -36,7 +36,6 @@ #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) void mlx5_init_clock(struct mlx5_core_dev *mdev); void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); -void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { @@ -60,8 +59,6 @@ static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, #else static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} -static inline void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) {} - static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { return -1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c new file mode 100644 index 000000000000..bced2efe9bef --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies */ + +#include <linux/mlx5/vport.h> +#include "lib/devcom.h" + +static LIST_HEAD(devcom_list); + +#define devcom_for_each_component(priv, comp, iter) \ + for (iter = 0; \ + comp = &(priv)->components[iter], iter < MLX5_DEVCOM_NUM_COMPONENTS; \ + iter++) + +struct mlx5_devcom_component { + struct { + void *data; + } device[MLX5_MAX_PORTS]; + + mlx5_devcom_event_handler_t handler; + struct rw_semaphore sem; + bool paired; +}; + +struct mlx5_devcom_list { + struct list_head list; + + struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS]; + struct mlx5_core_dev *devs[MLX5_MAX_PORTS]; +}; + +struct mlx5_devcom { + struct mlx5_devcom_list *priv; + int idx; +}; + +static struct mlx5_devcom_list *mlx5_devcom_list_alloc(void) +{ + struct mlx5_devcom_component *comp; + struct mlx5_devcom_list *priv; + int i; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + devcom_for_each_component(priv, comp, i) + init_rwsem(&comp->sem); + + return priv; +} + +static struct mlx5_devcom *mlx5_devcom_alloc(struct mlx5_devcom_list *priv, + u8 idx) +{ + struct mlx5_devcom *devcom; + + devcom = kzalloc(sizeof(*devcom), GFP_KERNEL); + if (!devcom) + return NULL; + + devcom->priv = priv; + devcom->idx = idx; + return devcom; +} + +/* Must be called with intf_mutex held */ +struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_devcom_list *priv = NULL, *iter; + struct mlx5_devcom *devcom = NULL; + bool new_priv = false; + u64 sguid0, sguid1; + int idx, i; + + if (!mlx5_core_is_pf(dev)) + return NULL; + + sguid0 = mlx5_query_nic_system_image_guid(dev); + list_for_each_entry(iter, &devcom_list, list) { + struct mlx5_core_dev *tmp_dev = NULL; + + idx = -1; + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (iter->devs[i]) + tmp_dev = iter->devs[i]; + else + idx = i; + } + + if (idx == -1) + continue; + + sguid1 = mlx5_query_nic_system_image_guid(tmp_dev); + if (sguid0 != sguid1) + continue; + + priv = iter; + break; + } + + if (!priv) { + priv = mlx5_devcom_list_alloc(); + if (!priv) + return ERR_PTR(-ENOMEM); + + idx = 0; + new_priv = true; + } + + priv->devs[idx] = dev; + devcom = mlx5_devcom_alloc(priv, idx); + if (!devcom) { + kfree(priv); + return ERR_PTR(-ENOMEM); + } + + if (new_priv) + list_add(&priv->list, &devcom_list); + + return devcom; +} + +/* Must be called with intf_mutex held */ +void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom) +{ + struct mlx5_devcom_list *priv; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return; + + priv = devcom->priv; + priv->devs[devcom->idx] = NULL; + + kfree(devcom); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (priv->devs[i]) + break; + + if (i != MLX5_MAX_PORTS) + return; + + list_del(&priv->list); + kfree(priv); +} + +void mlx5_devcom_register_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + mlx5_devcom_event_handler_t handler, + void *data) +{ + struct mlx5_devcom_component *comp; + + if (IS_ERR_OR_NULL(devcom)) + return; + + WARN_ON(!data); + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + comp->handler = handler; + comp->device[devcom->idx].data = data; + up_write(&comp->sem); +} + +void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + + if (IS_ERR_OR_NULL(devcom)) + return; + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + comp->device[devcom->idx].data = NULL; + up_write(&comp->sem); +} + +int mlx5_devcom_send_event(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int event, + void *event_data) +{ + struct mlx5_devcom_component *comp; + int err = -ENODEV, i; + + if (IS_ERR_OR_NULL(devcom)) + return err; + + comp = &devcom->priv->components[id]; + down_write(&comp->sem); + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (i != devcom->idx && comp->device[i].data) { + err = comp->handler(event, comp->device[i].data, + event_data); + break; + } + + up_write(&comp->sem); + return err; +} + +void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool paired) +{ + struct mlx5_devcom_component *comp; + + comp = &devcom->priv->components[id]; + WARN_ON(!rwsem_is_locked(&comp->sem)); + + comp->paired = paired; +} + +bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + if (IS_ERR_OR_NULL(devcom)) + return false; + + return devcom->priv->components[id].paired; +} + +void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return NULL; + + comp = &devcom->priv->components[id]; + down_read(&comp->sem); + if (!comp->paired) { + up_read(&comp->sem); + return NULL; + } + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (i != devcom->idx) + break; + + return comp->device[i].data; +} + +void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp = &devcom->priv->components[id]; + + up_read(&comp->sem); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h new file mode 100644 index 000000000000..939d5bf1581b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_DEVCOM_H__ +#define __LIB_MLX5_DEVCOM_H__ + +#include <linux/mlx5/driver.h> + +enum mlx5_devcom_components { + MLX5_DEVCOM_ESW_OFFLOADS, + + MLX5_DEVCOM_NUM_COMPONENTS, +}; + +typedef int (*mlx5_devcom_event_handler_t)(int event, + void *my_data, + void *event_data); + +struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev); +void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom); + +void mlx5_devcom_register_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + mlx5_devcom_event_handler_t handler, + void *data); +void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +int mlx5_devcom_send_event(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int event, + void *event_data); + +void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool paired); +bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); +void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); + +#endif + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h new file mode 100644 index 000000000000..c0fb6d72b695 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2018 Mellanox Technologies */ + +#ifndef __LIB_MLX5_EQ_H__ +#define __LIB_MLX5_EQ_H__ +#include <linux/mlx5/driver.h> +#include <linux/mlx5/eq.h> +#include <linux/mlx5/cq.h> + +#define MLX5_MAX_IRQ_NAME (32) +#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) + +struct mlx5_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + spinlock_t lock; /* lock completion tasklet list */ +}; + +struct mlx5_cq_table { + spinlock_t lock; /* protect radix tree */ + struct radix_tree_root tree; +}; + +struct mlx5_eq { + struct mlx5_core_dev *dev; + struct mlx5_cq_table cq_table; + __be32 __iomem *doorbell; + u32 cons_index; + struct mlx5_frag_buf buf; + int size; + unsigned int vecidx; + unsigned int irqn; + u8 eqn; + int nent; + struct mlx5_rsc_debug *dbg; +}; + +struct mlx5_eq_comp { + struct mlx5_eq core; /* Must be first */ + struct mlx5_eq_tasklet tasklet_ctx; + struct list_head list; +}; + +static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) +{ + return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); +} + +static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) +{ + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); + + return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; +} + +static inline void eq_update_ci(struct mlx5_eq *eq, int arm) +{ + __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); + u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + + __raw_writel((__force u32)cpu_to_be32(val), addr); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +int mlx5_eq_table_init(struct mlx5_core_dev *dev); +void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_eq_table_create(struct mlx5_core_dev *dev); +void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); + +int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); +struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); +void mlx5_cq_tasklet_cb(unsigned long data); +struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); + +u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); +void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev); +void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); + +int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); +int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); +void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); +#endif + +int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); +int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 7550b1cc8c6a..397a2847867a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -33,6 +33,8 @@ #ifndef __LIB_MLX5_H__ #define __LIB_MLX5_H__ +#include "mlx5_core.h" + void mlx5_init_reserved_gids(struct mlx5_core_dev *dev); void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev); int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); @@ -40,4 +42,38 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +/* TODO move to lib/events.h */ + +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_DISABLED = 0x4, + MLX5_MODULE_STATUS_NUM, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, + MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 98359559c77e..a71d5b9c7ab2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -108,8 +108,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev) mutex_init(&mpfs->lock); mpfs->size = l2table_size; - mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size), - sizeof(uintptr_t), GFP_KERNEL); + mpfs->bitmap = bitmap_zalloc(l2table_size, GFP_KERNEL); if (!mpfs->bitmap) { kfree(mpfs); return -ENOMEM; @@ -127,7 +126,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) return; WARN_ON(!hlist_empty(mpfs->hash)); - kfree(mpfs->bitmap); + bitmap_free(mpfs->bitmap); kfree(mpfs); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c new file mode 100644 index 000000000000..40f4a19b1ce1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/port.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_core.h" +#include "lib/port_tun.h" + +struct mlx5_port_tun_entropy_flags { + bool force_supported, force_enabled; + bool calc_supported, calc_enabled; + bool gre_calc_supported, gre_calc_enabled; +}; + +static void mlx5_query_port_tun_entropy(struct mlx5_core_dev *mdev, + struct mlx5_port_tun_entropy_flags *entropy_flags) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + entropy_flags->force_supported = false; + entropy_flags->calc_supported = false; + entropy_flags->gre_calc_supported = false; + entropy_flags->force_enabled = false; + entropy_flags->calc_enabled = true; + entropy_flags->gre_calc_enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + entropy_flags->force_supported = !!(MLX5_GET(pcmr_reg, out, entropy_force_cap)); + entropy_flags->calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_calc_cap)); + entropy_flags->gre_calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc_cap)); + entropy_flags->force_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_force)); + entropy_flags->calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_calc)); + entropy_flags->gre_calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc)); +} + +static int mlx5_set_port_tun_entropy_calc(struct mlx5_core_dev *mdev, u8 enable, + u8 force) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, entropy_force, force); + MLX5_SET(pcmr_reg, in, entropy_calc, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +static int mlx5_set_port_gre_tun_entropy_calc(struct mlx5_core_dev *mdev, + u8 enable, u8 force) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, entropy_force, force); + MLX5_SET(pcmr_reg, in, entropy_gre_calc, enable); + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy, + struct mlx5_core_dev *mdev) +{ + struct mlx5_port_tun_entropy_flags entropy_flags; + + tun_entropy->mdev = mdev; + mutex_init(&tun_entropy->lock); + mlx5_query_port_tun_entropy(mdev, &entropy_flags); + tun_entropy->num_enabling_entries = 0; + tun_entropy->num_disabling_entries = 0; + tun_entropy->enabled = entropy_flags.calc_enabled; + tun_entropy->enabled = + (entropy_flags.calc_supported) ? + entropy_flags.calc_enabled : true; +} + +static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy, + int reformat_type, bool enable) +{ + struct mlx5_port_tun_entropy_flags entropy_flags; + int err; + + mlx5_query_port_tun_entropy(tun_entropy->mdev, &entropy_flags); + /* Tunnel entropy calculation may be controlled either on port basis + * for all tunneling protocols or specifically for GRE protocol. + * Prioritize GRE protocol control (if capable) over global port + * configuration. + */ + if (entropy_flags.gre_calc_supported && + reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) { + /* Other applications may change the global FW entropy + * calculations settings. Check that the current entropy value + * is the negative of the updated value. + */ + if (entropy_flags.force_enabled && + enable == entropy_flags.gre_calc_enabled) { + mlx5_core_warn(tun_entropy->mdev, + "Unexpected GRE entropy calc setting - expected %d", + !entropy_flags.gre_calc_enabled); + return -EOPNOTSUPP; + } + err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, enable, + entropy_flags.force_supported); + if (err) + return err; + /* if we turn on the entropy we don't need to force it anymore */ + if (entropy_flags.force_supported && enable) { + err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, 1, 0); + if (err) + return err; + } + } else if (entropy_flags.calc_supported) { + /* Other applications may change the global FW entropy + * calculations settings. Check that the current entropy value + * is the negative of the updated value. + */ + if (entropy_flags.force_enabled && + enable == entropy_flags.calc_enabled) { + mlx5_core_warn(tun_entropy->mdev, + "Unexpected entropy calc setting - expected %d", + !entropy_flags.calc_enabled); + return -EOPNOTSUPP; + } + /* GRE requires disabling entropy calculation. if there are + * enabling entries (i.e VXLAN) we cannot turn it off for them, + * thus fail. + */ + if (tun_entropy->num_enabling_entries) + return -EOPNOTSUPP; + err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, enable, + entropy_flags.force_supported); + if (err) + return err; + tun_entropy->enabled = enable; + /* if we turn on the entropy we don't need to force it anymore */ + if (entropy_flags.force_supported && enable) { + err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, 1, 0); + if (err) + return err; + } + } + + return 0; +} + +/* the function manages the refcount for enabling/disabling tunnel types. + * the return value indicates if the inc is successful or not, depending on + * entropy capabilities and configuration. + */ +int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy, + int reformat_type) +{ + /* the default is error for unknown (non VXLAN/GRE tunnel types) */ + int err = -EOPNOTSUPP; + + mutex_lock(&tun_entropy->lock); + if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN && + tun_entropy->enabled) { + /* in case entropy calculation is enabled for all tunneling + * types, it is ok for VXLAN, so approve. + * otherwise keep the error default. + */ + tun_entropy->num_enabling_entries++; + err = 0; + } else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) { + /* turn off the entropy only for the first GRE rule. + * for the next rules the entropy was already disabled + * successfully. + */ + if (tun_entropy->num_disabling_entries == 0) + err = mlx5_set_entropy(tun_entropy, reformat_type, 0); + else + err = 0; + if (!err) + tun_entropy->num_disabling_entries++; + } + mutex_unlock(&tun_entropy->lock); + + return err; +} + +void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy, + int reformat_type) +{ + mutex_lock(&tun_entropy->lock); + if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN) + tun_entropy->num_enabling_entries--; + else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE && + --tun_entropy->num_disabling_entries == 0) + mlx5_set_entropy(tun_entropy, reformat_type, 1); + mutex_unlock(&tun_entropy->lock); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h new file mode 100644 index 000000000000..54c42a88705e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_PORT_TUN_H__ +#define __MLX5_PORT_TUN_H__ + +#include <linux/mlx5/driver.h> + +struct mlx5_tun_entropy { + struct mlx5_core_dev *mdev; + u32 num_enabling_entries; + u32 num_disabling_entries; + u8 enabled; + struct mutex lock; /* lock the entropy fields */ +}; + +void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy, + struct mlx5_core_dev *mdev); +int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy, + int reformat_type); +void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy, + int reformat_type); + +#endif /* __MLX5_PORT_TUN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c deleted file mode 100644 index 3a3b0005fd2b..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/cmd.h> -#include "mlx5_core.h" - -int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, - u16 opmod, u8 port) -{ - int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); - int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); - int err = -ENOMEM; - void *data; - void *resp; - u32 *out; - u32 *in; - - in = kzalloc(inlen, GFP_KERNEL); - out = kzalloc(outlen, GFP_KERNEL); - if (!in || !out) - goto out; - - MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); - MLX5_SET(mad_ifc_in, in, op_mod, opmod); - MLX5_SET(mad_ifc_in, in, port, port); - - data = MLX5_ADDR_OF(mad_ifc_in, in, mad); - memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - - err = mlx5_cmd_exec(dev, in, inlen, out, outlen); - if (err) - goto out; - - resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); - memcpy(outb, resp, - MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); - -out: - kfree(out); - kfree(in); - return err; -} -EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 28132c7dc05f..76716419370d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -43,7 +43,6 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/srq.h> #include <linux/debugfs.h> #include <linux/kmod.h> #include <linux/mlx5/mlx5_ifc.h> @@ -53,6 +52,7 @@ #endif #include <net/devlink.h> #include "mlx5_core.h" +#include "lib/eq.h" #include "fs_core.h" #include "lib/mpfs.h" #include "eswitch.h" @@ -63,7 +63,9 @@ #include "accel/tls.h" #include "lib/clock.h" #include "lib/vxlan.h" +#include "lib/devcom.h" #include "diag/fw_tracer.h" +#include "ecpf.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -162,26 +164,6 @@ static struct mlx5_profile profile[] = { .size = 8, .limit = 4 }, - .mr_cache[16] = { - .size = 8, - .limit = 4 - }, - .mr_cache[17] = { - .size = 8, - .limit = 4 - }, - .mr_cache[18] = { - .size = 8, - .limit = 4 - }, - .mr_cache[19] = { - .size = 4, - .limit = 2 - }, - .mr_cache[20] = { - .size = 4, - .limit = 2 - }, }, }; @@ -319,51 +301,6 @@ static void release_bar(struct pci_dev *pdev) pci_release_regions(pdev); } -static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = &priv->eq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); - int nvec; - int err; - - nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; - nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) - return -ENOMEM; - - priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL); - if (!priv->irq_info) - return -ENOMEM; - - nvec = pci_alloc_irq_vectors(dev->pdev, - MLX5_EQ_VEC_COMP_BASE + 1, nvec, - PCI_IRQ_MSIX); - if (nvec < 0) { - err = nvec; - goto err_free_irq_info; - } - - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; - - return 0; - -err_free_irq_info: - kfree(priv->irq_info); - return err; -} - -static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - - pci_free_irq_vectors(dev->pdev); - kfree(priv->irq_info); -} - struct mlx5_reg_host_endianness { u8 he; u8 rsvd[15]; @@ -503,6 +440,58 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) return err; } +static int handle_hca_cap_odp(struct mlx5_core_dev *dev) +{ + void *set_hca_cap; + void *set_ctx; + int set_sz; + bool do_set = false; + int err; + + if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || + !MLX5_CAP_GEN(dev, pg)) + return 0; + + err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); + if (err) + return err; + + set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP], + MLX5_ST_SZ_BYTES(odp_cap)); + +#define ODP_CAP_SET_MAX(dev, field) \ + do { \ + u32 _res = MLX5_CAP_ODP_MAX(dev, field); \ + if (_res) { \ + do_set = true; \ + MLX5_SET(odp_cap, set_hca_cap, field, _res); \ + } \ + } while (0) + + ODP_CAP_SET_MAX(dev, ud_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, rc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.srq_receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.send); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.receive); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.write); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.read); + ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic); + + if (do_set) + err = set_caps(dev, set_ctx, set_sz, + MLX5_SET_HCA_CAP_OP_MOD_ODP); + + kfree(set_ctx); + + return err; +} + static int handle_hca_cap(struct mlx5_core_dev *dev) { void *set_ctx = NULL; @@ -576,6 +565,33 @@ query_ex: return err; } +static int set_hca_cap(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + err = handle_hca_cap(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap failed\n"); + goto out; + } + + err = handle_hca_cap_atomic(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n"); + goto out; + } + + err = handle_hca_cap_odp(dev); + if (err) { + dev_err(&pdev->dev, "handle_hca_cap_odp failed\n"); + goto out; + } + +out: + return err; +} + static int set_hca_ctrl(struct mlx5_core_dev *dev) { struct mlx5_reg_host_endianness he_in; @@ -611,6 +627,8 @@ int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); } @@ -621,191 +639,29 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, + dev->caps.embedded_cpu); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) +u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts) { u32 timer_h, timer_h1, timer_l; timer_h = ioread32be(&dev->iseg->internal_timer_h); + ptp_read_system_prets(sts); timer_l = ioread32be(&dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); timer_h1 = ioread32be(&dev->iseg->internal_timer_h); - if (timer_h != timer_h1) /* wrap around */ + if (timer_h != timer_h1) { + /* wrap around */ + ptp_read_system_prets(sts); timer_l = ioread32be(&dev->iseg->internal_timer_l); - - return (u64)timer_l | (u64)timer_h1 << 32; -} - -static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); - - if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - priv->irq_info[i].mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, priv->irq_info[i].mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i); - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(priv->irq_info[i].mask); -} - -static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) { - err = mlx5_irq_set_affinity_hint(mdev, i); - if (err) - goto err_out; + ptp_read_system_postts(sts); } - return 0; - -err_out: - for (i--; i >= 0; i--) - mlx5_irq_clear_affinity_hint(mdev, i); - - return err; -} - -static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) - mlx5_irq_clear_affinity_hint(mdev, i); -} - -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq, *n; - int err = -ENOENT; - - spin_lock(&table->lock); - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - if (eq->index == vector) { - *eqn = eq->eqn; - *irqn = eq->irqn; - err = 0; - break; - } - } - spin_unlock(&table->lock); - - return err; -} -EXPORT_SYMBOL(mlx5_vector2eqn); - -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq; - - spin_lock(&table->lock); - list_for_each_entry(eq, &table->comp_eqs_list, list) - if (eq->eqn == eqn) { - spin_unlock(&table->lock); - return eq; - } - - spin_unlock(&table->lock); - - return ERR_PTR(-ENOENT); -} - -static void free_comp_eqs(struct mlx5_core_dev *dev) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - struct mlx5_eq *eq, *n; - -#ifdef CONFIG_RFS_ACCEL - if (dev->rmap) { - free_irq_cpu_rmap(dev->rmap); - dev->rmap = NULL; - } -#endif - spin_lock(&table->lock); - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - list_del(&eq->list); - spin_unlock(&table->lock); - if (mlx5_destroy_unmap_eq(dev, eq)) - mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n", - eq->eqn); - kfree(eq); - spin_lock(&table->lock); - } - spin_unlock(&table->lock); -} - -static int alloc_comp_eqs(struct mlx5_core_dev *dev) -{ - struct mlx5_eq_table *table = &dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; - struct mlx5_eq *eq; - int ncomp_vec; - int nent; - int err; - int i; - - INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; - nent = MLX5_COMP_EQ_SIZE; -#ifdef CONFIG_RFS_ACCEL - dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!dev->rmap) - return -ENOMEM; -#endif - for (i = 0; i < ncomp_vec; i++) { - eq = kzalloc(sizeof(*eq), GFP_KERNEL); - if (!eq) { - err = -ENOMEM; - goto clean; - } - -#ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(dev->rmap, pci_irq_vector(dev->pdev, - MLX5_EQ_VEC_COMP_BASE + i)); -#endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); - err = mlx5_create_map_eq(dev, eq, - i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - name, MLX5_EQ_TYPE_COMP); - if (err) { - kfree(eq); - goto clean; - } - mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn); - eq->index = i; - spin_lock(&table->lock); - list_add_tail(&eq->list, &table->comp_eqs_list); - spin_unlock(&table->lock); - } - - return 0; - -clean: - free_comp_eqs(dev); - return err; + return (u64)timer_l | (u64)timer_h1 << 32; } static int mlx5_core_set_issi(struct mlx5_core_dev *dev) @@ -877,11 +733,9 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) priv->numa_node = dev_to_node(&dev->pdev->dev); - priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); - if (!priv->dbg_root) { - dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n"); - return -ENOMEM; - } + if (mlx5_debugfs_root) + priv->dbg_root = + debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root); err = mlx5_pci_enable_device(dev); if (err) { @@ -903,6 +757,11 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_clr_master; } + if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) && + pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) + mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); + dev->iseg_base = pci_resource_start(dev->pdev, 0); dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg)); if (!dev->iseg) { @@ -938,28 +797,37 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) struct pci_dev *pdev = dev->pdev; int err; + priv->devcom = mlx5_devcom_register_device(dev); + if (IS_ERR(priv->devcom)) + dev_err(&pdev->dev, "failed to register with devcom (0x%p)\n", + priv->devcom); + err = mlx5_query_board_id(dev); if (err) { dev_err(&pdev->dev, "query board id failed\n"); - goto out; + goto err_devcom; } - err = mlx5_eq_init(dev); + err = mlx5_eq_table_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize eq\n"); - goto out; + goto err_devcom; + } + + err = mlx5_events_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize events\n"); + goto err_eq_cleanup; } err = mlx5_cq_debugfs_init(dev); if (err) { dev_err(&pdev->dev, "failed to initialize cq debugfs\n"); - goto err_eq_cleanup; + goto err_events_cleanup; } mlx5_init_qp_table(dev); - mlx5_init_srq_table(dev); - mlx5_init_mkey_table(dev); mlx5_init_reserved_gids(dev); @@ -1013,14 +881,15 @@ err_rl_cleanup: err_tables_cleanup: mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); - +err_events_cleanup: + mlx5_events_cleanup(dev); err_eq_cleanup: - mlx5_eq_cleanup(dev); + mlx5_eq_table_cleanup(dev); +err_devcom: + mlx5_devcom_unregister_device(dev->priv.devcom); -out: return err; } @@ -1036,10 +905,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); mlx5_cq_debugfs_cleanup(dev); - mlx5_eq_cleanup(dev); + mlx5_events_cleanup(dev); + mlx5_eq_table_cleanup(dev); + mlx5_devcom_unregister_device(dev->priv.devcom); } static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, @@ -1048,6 +918,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, struct pci_dev *pdev = dev->pdev; int err; + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n", @@ -1113,15 +984,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } - err = handle_hca_cap(dev); + err = set_hca_cap(dev); if (err) { - dev_err(&pdev->dev, "handle_hca_cap failed\n"); - goto reclaim_boot_pages; - } - - err = handle_hca_cap_atomic(dev); - if (err) { - dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n"); + dev_err(&pdev->dev, "set_hca_cap failed\n"); goto reclaim_boot_pages; } @@ -1131,16 +996,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto reclaim_boot_pages; } - err = mlx5_pagealloc_start(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n"); - goto reclaim_boot_pages; - } - err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { dev_err(&pdev->dev, "init hca failed\n"); - goto err_pagealloc_stop; + goto reclaim_boot_pages; } mlx5_set_driver_version(dev); @@ -1161,23 +1020,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, } } - err = mlx5_alloc_irq_vectors(dev); - if (err) { - dev_err(&pdev->dev, "alloc irq vectors failed\n"); - goto err_cleanup_once; - } - dev->priv.uar = mlx5_get_uars_page(dev); if (IS_ERR(dev->priv.uar)) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); err = PTR_ERR(dev->priv.uar); - goto err_disable_msix; + goto err_get_uars; } - err = mlx5_start_eqs(dev); + mlx5_events_start(dev); + mlx5_pagealloc_start(dev); + + err = mlx5_eq_table_create(dev); if (err) { - dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); - goto err_put_uars; + dev_err(&pdev->dev, "Failed to create EQs\n"); + goto err_eq_table; } err = mlx5_fw_tracer_init(dev->tracer); @@ -1186,18 +1042,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_fw_tracer; } - err = alloc_comp_eqs(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc completion EQs\n"); - goto err_comp_eqs; - } - - err = mlx5_irq_set_affinity_hints(dev); - if (err) { - dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - goto err_affinity_hints; - } - err = mlx5_fpga_device_start(dev); if (err) { dev_err(&pdev->dev, "fpga device start failed %d\n", err); @@ -1234,6 +1078,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_sriov; } + err = mlx5_ec_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init embedded CPU\n"); + goto err_ec; + } + if (mlx5_device_registered(dev)) { mlx5_attach_device(dev); } else { @@ -1251,6 +1101,9 @@ out: return 0; err_reg_dev: + mlx5_ec_cleanup(dev); + +err_ec: mlx5_sriov_detach(dev); err_sriov: @@ -1266,24 +1119,17 @@ err_ipsec_start: mlx5_fpga_device_stop(dev); err_fpga_start: - mlx5_irq_clear_affinity_hints(dev); - -err_affinity_hints: - free_comp_eqs(dev); - -err_comp_eqs: mlx5_fw_tracer_cleanup(dev->tracer); err_fw_tracer: - mlx5_stop_eqs(dev); + mlx5_eq_table_destroy(dev); -err_put_uars: +err_eq_table: + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); mlx5_put_uars_page(dev, priv->uar); -err_disable_msix: - mlx5_free_irq_vectors(dev); - -err_cleanup_once: +err_get_uars: if (boot) mlx5_cleanup_once(dev); @@ -1294,9 +1140,6 @@ err_stop_poll: goto out_err; } -err_pagealloc_stop: - mlx5_pagealloc_stop(dev); - reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); @@ -1335,26 +1178,26 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, if (mlx5_device_registered(dev)) mlx5_detach_device(dev); + mlx5_ec_cleanup(dev); mlx5_sriov_detach(dev); mlx5_cleanup_fs(dev); mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); mlx5_fpga_device_stop(dev); - mlx5_irq_clear_affinity_hints(dev); - free_comp_eqs(dev); mlx5_fw_tracer_cleanup(dev->tracer); - mlx5_stop_eqs(dev); + mlx5_eq_table_destroy(dev); + mlx5_pagealloc_stop(dev); + mlx5_events_stop(dev); mlx5_put_uars_page(dev, priv->uar); - mlx5_free_irq_vectors(dev); if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev, cleanup); + err = mlx5_cmd_teardown_hca(dev); if (err) { dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n"); goto out; } - mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); mlx5_cmd_cleanup(dev); @@ -1364,12 +1207,6 @@ out: return err; } -struct mlx5_core_event_handler { - void (*event)(struct mlx5_core_dev *dev, - enum mlx5_dev_event event, - void *data); -}; - static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, @@ -1403,7 +1240,6 @@ static int init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); dev->pdev = pdev; - dev->event = mlx5_core_event; dev->profile = &profile[prof_sel]; INIT_LIST_HEAD(&priv->ctx_list); @@ -1411,17 +1247,6 @@ static int init_one(struct pci_dev *pdev, mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); - INIT_LIST_HEAD(&priv->waiting_events_list); - priv->is_accum_events = false; - -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - err = init_srcu_struct(&priv->pfault_srcu); - if (err) { - dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n", - err); - goto clean_dev; - } -#endif mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); INIT_LIST_HEAD(&priv->bfregs.reg_head.list); @@ -1430,7 +1255,7 @@ static int init_one(struct pci_dev *pdev, err = mlx5_pci_init(dev, priv); if (err) { dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); - goto clean_srcu; + goto clean_dev; } err = mlx5_health_init(dev); @@ -1439,12 +1264,14 @@ static int init_one(struct pci_dev *pdev, goto close_pci; } - mlx5_pagealloc_init(dev); + err = mlx5_pagealloc_init(dev); + if (err) + goto err_pagealloc_init; err = mlx5_load_one(dev, priv, true); if (err) { dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); - goto clean_health; + goto err_load_one; } request_module_nowait(MLX5_IB_MOD); @@ -1458,16 +1285,13 @@ static int init_one(struct pci_dev *pdev, clean_load: mlx5_unload_one(dev, priv, true); -clean_health: +err_load_one: mlx5_pagealloc_cleanup(dev); +err_pagealloc_init: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); -clean_srcu: -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&priv->pfault_srcu); clean_dev: -#endif devlink_free(devlink); return err; @@ -1491,9 +1315,6 @@ static void remove_one(struct pci_dev *pdev) mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - cleanup_srcu_struct(&priv->pfault_srcu); -#endif devlink_free(devlink); } @@ -1637,7 +1458,6 @@ succeed: * kexec. There is no need to cleanup the mlx5_core software * contexts. */ - mlx5_irq_clear_affinity_hints(dev); mlx5_core_eq_free_irqs(dev); return 0; @@ -1669,6 +1489,8 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ + { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */ + { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { 0, } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0594d0961cb3..7b331674622c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -38,6 +38,7 @@ #include <linux/sched.h> #include <linux/if_link.h> #include <linux/firmware.h> +#include <linux/ptp_clock_kernel.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/fs.h> @@ -78,6 +79,11 @@ do { \ __func__, __LINE__, current->pid, \ ##__VA_ARGS__) +#define mlx5_core_warn_once(__dev, format, ...) \ + dev_warn_once(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + #define mlx5_core_info(__dev, format, ...) \ dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__) @@ -97,12 +103,6 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); - -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param); -void mlx5_core_page_fault(struct mlx5_core_dev *dev, - struct mlx5_pagefault *pfault); -void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); @@ -121,31 +121,12 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 modify_bitmask); int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); - -int mlx5_eq_init(struct mlx5_core_dev *dev); -void mlx5_eq_cleanup(struct mlx5_core_dev *dev); -int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, const char *name, - enum mlx5_eq_type type); -int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - u32 *out, int outlen); -int mlx5_start_eqs(struct mlx5_core_dev *dev); -void mlx5_stop_eqs(struct mlx5_core_dev *dev); -/* This function should only be called after mlx5_cmd_force_teardown_hca */ -void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); -struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); -u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); -void mlx5_cq_tasklet_cb(unsigned long data); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); -int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); -int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); -void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); +u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts); + +void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); +void mlx5_cmd_flush(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); @@ -159,6 +140,11 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_events_init(struct mlx5_core_dev *dev); +void mlx5_events_cleanup(struct mlx5_core_dev *dev); +void mlx5_events_start(struct mlx5_core_dev *dev); +void mlx5_events_stop(struct mlx5_core_dev *dev); + void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); void mlx5_attach_device(struct mlx5_core_dev *dev); @@ -202,10 +188,8 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) MLX5_CAP_GEN(dev, lag_master); } -int mlx5_lag_allow(struct mlx5_core_dev *dev); -int mlx5_lag_forbid(struct mlx5_core_dev *dev); - void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); +void mlx5_lag_update(struct mlx5_core_dev *dev); enum { MLX5_NIC_IFC_FULL = 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 0670165afd5f..ea744d8466ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context) + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; @@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(mkc, mkc, mkey_7_0, key); if (callback) - return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); @@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen) { - return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, NULL, 0, NULL, NULL); } EXPORT_SYMBOL(mlx5_core_create_mkey); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index e36d3e3675f9..41025387ff2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -37,6 +37,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +#include "lib/eq.h" enum { MLX5_PAGES_CANT_GIVE = 0, @@ -47,6 +48,7 @@ enum { struct mlx5_pages_req { struct mlx5_core_dev *dev; u16 func_id; + u8 ec_function; s32 npages; struct work_struct work; }; @@ -142,6 +144,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, MLX5_SET(query_pages_in, in, op_mod, boot ? MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); + MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev)); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -252,7 +255,8 @@ err_mapping: return err; } -static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) +static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, + bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -261,6 +265,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -269,7 +274,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) } static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, - int notify_fail) + int notify_fail, bool ec_function) { u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); @@ -304,6 +309,7 @@ retry: MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) { @@ -315,8 +321,11 @@ retry: dev->priv.fw_pages += npages; if (func_id) dev->priv.vfs_pages += npages; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages += npages; - mlx5_core_dbg(dev, "err %d\n", err); + mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n", + npages, ec_function, func_id, err); kvfree(in); return 0; @@ -327,7 +336,7 @@ out_4k: out_free: kvfree(in); if (notify_fail) - page_notify_fail(dev, func_id); + page_notify_fail(dev, func_id, ec_function); return err; } @@ -363,7 +372,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, } static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, - int *nclaimed) + int *nclaimed, bool ec_function) { int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; @@ -384,6 +393,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); MLX5_SET(manage_pages_in, in, function_id, func_id); MLX5_SET(manage_pages_in, in, input_num_entries, npages); + MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); @@ -409,6 +419,8 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, dev->priv.fw_pages -= num_claimed; if (func_id) dev->priv.vfs_pages -= num_claimed; + else if (mlx5_core_is_ecpf(dev) && !ec_function) + dev->priv.peer_pf_pages -= num_claimed; out_free: kvfree(out); @@ -422,9 +434,10 @@ static void pages_work_handler(struct work_struct *work) int err = 0; if (req->npages < 0) - err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); + err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL, + req->ec_function); else if (req->npages > 0) - err = give_pages(dev, req->func_id, req->npages, 1); + err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function); if (err) mlx5_core_warn(dev, "%s fail %d\n", @@ -433,22 +446,43 @@ static void pages_work_handler(struct work_struct *work) kfree(req); } -void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, - s32 npages) +enum { + EC_FUNCTION_MASK = 0x8000, +}; + +static int req_pages_handler(struct notifier_block *nb, + unsigned long type, void *data) { struct mlx5_pages_req *req; - + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + bool ec_function; + u16 func_id; + s32 npages; + + priv = mlx5_nb_cof(nb, struct mlx5_priv, pg_nb); + dev = container_of(priv, struct mlx5_core_dev, priv); + eqe = data; + + func_id = be16_to_cpu(eqe->data.req_pages.func_id); + npages = be32_to_cpu(eqe->data.req_pages.num_pages); + ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK; + mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n", + func_id, npages); req = kzalloc(sizeof(*req), GFP_ATOMIC); if (!req) { mlx5_core_warn(dev, "failed to allocate pages request\n"); - return; + return NOTIFY_DONE; } req->dev = dev; req->func_id = func_id; req->npages = npages; + req->ec_function = ec_function; INIT_WORK(&req->work, pages_work_handler); queue_work(dev->priv.pg_wq, &req->work); + return NOTIFY_OK; } int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) @@ -464,7 +498,7 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", npages, boot ? "boot" : "init", func_id); - return give_pages(dev, func_id, npages, 0); + return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev)); } enum { @@ -498,7 +532,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) fwp = rb_entry(p, struct fw_page, rb_node); err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), - &nclaimed); + &nclaimed, mlx5_core_is_ecpf(dev)); if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", @@ -520,39 +554,45 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.vfs_pages, "VFs FW pages counter is %d after reclaiming all pages\n", dev->priv.vfs_pages); + WARN(dev->priv.peer_pf_pages, + "Peer PF FW pages counter is %d after reclaiming all pages\n", + dev->priv.peer_pf_pages); return 0; } -void mlx5_pagealloc_init(struct mlx5_core_dev *dev) +int mlx5_pagealloc_init(struct mlx5_core_dev *dev) { dev->priv.page_root = RB_ROOT; INIT_LIST_HEAD(&dev->priv.free_list); + dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); + if (!dev->priv.pg_wq) + return -ENOMEM; + + return 0; } void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) { - /* nothing */ + destroy_workqueue(dev->priv.pg_wq); } -int mlx5_pagealloc_start(struct mlx5_core_dev *dev) +void mlx5_pagealloc_start(struct mlx5_core_dev *dev) { - dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); - if (!dev->priv.pg_wq) - return -ENOMEM; - - return 0; + MLX5_NB_INIT(&dev->priv.pg_nb, req_pages_handler, PAGE_REQUEST); + mlx5_eq_notifier_register(dev, &dev->priv.pg_nb); } void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { - destroy_workqueue(dev->priv.pg_wq); + mlx5_eq_notifier_unregister(dev, &dev->priv.pg_nb); + flush_workqueue(dev->priv.pg_wq); } -int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages) { unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - int prev_vfs_pages = dev->priv.vfs_pages; + int prev_pages = *pages; /* In case of internal error we will free the pages manually later */ if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -560,16 +600,16 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) return 0; } - mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_pages, dev->priv.name); - while (dev->priv.vfs_pages) { + while (*pages) { if (time_after(jiffies, end)) { - mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages); return -ETIMEDOUT; } - if (dev->priv.vfs_pages < prev_vfs_pages) { + if (*pages < prev_pages) { end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); - prev_vfs_pages = dev->priv.vfs_pages; + prev_pages = *pages; } msleep(50); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 31a9cbd85689..361468e0435d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -30,10 +30,7 @@ * SOFTWARE. */ -#include <linux/module.h> -#include <linux/mlx5/driver.h> #include <linux/mlx5/port.h> -#include <linux/mlx5/cmd.h> #include "mlx5_core.h" int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, @@ -157,44 +154,6 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) sizeof(out), MLX5_REG_MLCR, 0, 1); } -int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, - u32 *proto_cap, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - else - *proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap); - -int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, - u32 *proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1); - if (err) - return err; - - if (proto_mask == MLX5_PTYS_EN) - *proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - else - *proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin); - int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port) { @@ -211,23 +170,6 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); -int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, - u32 *proto_oper, u8 local_port) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, - local_port); - if (err) - return err; - - *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - - return 0; -} -EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper); - int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, u8 local_port) { @@ -245,35 +187,6 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); -int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, - u32 proto_admin, int proto_mask) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; - u8 an_disable_admin; - u8 an_disable_cap; - u8 an_status; - - mlx5_query_port_autoneg(dev, proto_mask, &an_status, - &an_disable_cap, &an_disable_admin); - if (!an_disable_cap && an_disable) - return -EPERM; - - memset(in, 0, sizeof(in)); - - MLX5_SET(ptys_reg, in, local_port, 1); - MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); - MLX5_SET(ptys_reg, in, proto_mask, proto_mask); - if (proto_mask == MLX5_PTYS_EN) - MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); - else - MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin); - - return mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_PTYS, 0, 1); -} -EXPORT_SYMBOL_GPL(mlx5_set_port_ptys); - /* This function should be used after setting a port register only */ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) { @@ -404,10 +317,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; i2c_addr = MLX5_I2C_ADDR_LOW; - if (offset >= MLX5_EEPROM_PAGE_LENGTH) { - i2c_addr = MLX5_I2C_ADDR_HIGH; - offset -= MLX5_EEPROM_PAGE_LENGTH; - } MLX5_SET(mcia_reg, in, l, 0); MLX5_SET(mcia_reg, in, module, module_num); @@ -606,25 +515,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) } EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); -void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, - u8 *an_status, - u8 *an_disable_cap, u8 *an_disable_admin) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - - *an_status = 0; - *an_disable_cap = 0; - *an_disable_admin = 0; - - if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1)) - return; - - *an_status = MLX5_GET(ptys_reg, out, an_status); - *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); - *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); -} -EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg); - int mlx5_max_tc(struct mlx5_core_dev *mdev) { u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; @@ -870,8 +760,7 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) } EXPORT_SYMBOL_GPL(mlx5_query_port_wol); -static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, - int outlen) +int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen) { u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; @@ -880,7 +769,7 @@ static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, outlen, MLX5_REG_PCMR, 0, 0); } -static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) +int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) { u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; @@ -891,7 +780,11 @@ static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) { u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; + int err; + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; MLX5_SET(pcmr_reg, in, local_port, 1); MLX5_SET(pcmr_reg, in, fcs_chk, enable); return mlx5_set_ports_check(mdev, in, sizeof(in)); @@ -915,63 +808,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); } -static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = { - "Cable plugged", /* MLX5_MODULE_STATUS_PLUGGED = 0x1 */ - "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED = 0x2 */ - "Cable error", /* MLX5_MODULE_STATUS_ERROR = 0x3 */ -}; - -static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = { - "Power budget exceeded", - "Long Range for non MLNX cable", - "Bus stuck(I2C or data shorted)", - "No EEPROM/retry timeout", - "Enforce part number list", - "Unknown identifier", - "High Temperature", - "Bad or shorted cable/module", - "Unknown status", -}; - -void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) -{ - enum port_module_event_status_type module_status; - enum port_module_event_error_type error_type; - struct mlx5_eqe_port_module *module_event_eqe; - struct mlx5_priv *priv = &dev->priv; - u8 module_num; - - module_event_eqe = &eqe->data.port_module; - module_num = module_event_eqe->module; - module_status = module_event_eqe->module_status & - PORT_MODULE_EVENT_MODULE_STATUS_MASK; - error_type = module_event_eqe->error_type & - PORT_MODULE_EVENT_ERROR_TYPE_MASK; - - if (module_status < MLX5_MODULE_STATUS_ERROR) { - priv->pme_stats.status_counters[module_status - 1]++; - } else if (module_status == MLX5_MODULE_STATUS_ERROR) { - if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN) - /* Unknown error type */ - error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN; - priv->pme_stats.error_counters[error_type]++; - } - - if (!printk_ratelimit()) - return; - - if (module_status < MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, - "Port module event: module %u, %s\n", - module_num, mlx5_pme_status[module_status - 1]); - - else if (module_status == MLX5_MODULE_STATUS_ERROR) - mlx5_core_info(dev, - "Port module event[error]: module %u, %s, %s\n", - module_num, mlx5_pme_status[module_status - 1], - mlx5_pme_error[error_type]); -} - int mlx5_query_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size) { u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 91b8139a388d..b8ba74de9555 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -38,26 +38,25 @@ #include <linux/mlx5/transobj.h> #include "mlx5_core.h" +#include "lib/eq.h" -static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, - u32 rsn) +static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct); + +static struct mlx5_core_rsc_common * +mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) { - struct mlx5_qp_table *table = &dev->priv.qp_table; struct mlx5_core_rsc_common *common; + unsigned long flags; - spin_lock(&table->lock); + spin_lock_irqsave(&table->lock, flags); common = radix_tree_lookup(&table->tree, rsn); if (common) atomic_inc(&common->refcount); - spin_unlock(&table->lock); + spin_unlock_irqrestore(&table->lock, flags); - if (!common) { - mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", - rsn); - return NULL; - } return common; } @@ -120,19 +119,57 @@ static bool is_event_type_allowed(int rsc_type, int event_type) } } -void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) +static int rsc_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) { - struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn); + struct mlx5_core_rsc_common *common; + struct mlx5_qp_table *table; + struct mlx5_core_dev *dev; struct mlx5_core_dct *dct; + u8 event_type = (u8)type; struct mlx5_core_qp *qp; + struct mlx5_priv *priv; + struct mlx5_eqe *eqe; + u32 rsn; + + switch (event_type) { + case MLX5_EVENT_TYPE_DCT_DRAINED: + eqe = data; + rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; + rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); + break; + case MLX5_EVENT_TYPE_PATH_MIG: + case MLX5_EVENT_TYPE_COMM_EST: + case MLX5_EVENT_TYPE_SQ_DRAINED: + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + eqe = data; + rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); + break; + default: + return NOTIFY_DONE; + } - if (!common) - return; + table = container_of(nb, struct mlx5_qp_table, nb); + priv = container_of(table, struct mlx5_priv, qp_table); + dev = container_of(priv, struct mlx5_core_dev, priv); + + mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn); + + common = mlx5_get_rsc(table, rsn); + if (!common) { + mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", rsn); + return NOTIFY_OK; + } if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) { mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n", event_type, rsn); - return; + goto out; } switch (common->res) { @@ -150,8 +187,10 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) default: mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn); } - +out: mlx5_core_put_rsc(common); + + return NOTIFY_OK; } static int create_resource_common(struct mlx5_core_dev *dev, @@ -191,20 +230,49 @@ static void destroy_resource_common(struct mlx5_core_dev *dev, wait_for_completion(&qp->common.free); } +static int _mlx5_core_destroy_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct, bool need_cleanup) +{ + u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + int err; + + err = mlx5_core_drain_dct(dev, dct); + if (err) { + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + goto destroy; + } else { + mlx5_core_warn( + dev, "failed drain DCT 0x%x with error 0x%x\n", + qp->qpn, err); + return err; + } + } + wait_for_completion(&dct->drained); +destroy: + if (need_cleanup) + destroy_resource_common(dev, &dct->mqp); + MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); + MLX5_SET(destroy_dct_in, in, uid, qp->uid); + err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)&out, sizeof(out)); + return err; +} + int mlx5_core_create_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, - u32 *in, int inlen) + u32 *in, int inlen, + u32 *out, int outlen) { - u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; - u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; - u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; struct mlx5_core_qp *qp = &dct->mqp; int err; init_completion(&dct->drained); MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); if (err) { mlx5_core_warn(dev, "create DCT failed, ret %d\n", err); return err; @@ -218,11 +286,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev, return 0; err_cmd: - MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); - MLX5_SET(destroy_dct_in, din, dctn, qp->qpn); - MLX5_SET(destroy_dct_in, din, uid, qp->uid); - mlx5_cmd_exec(dev, (void *)&in, sizeof(din), - (void *)&out, sizeof(dout)); + _mlx5_core_destroy_dct(dev, dct, false); return err; } EXPORT_SYMBOL_GPL(mlx5_core_create_dct); @@ -287,29 +351,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, int mlx5_core_destroy_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct) { - u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; - struct mlx5_core_qp *qp = &dct->mqp; - int err; - - err = mlx5_core_drain_dct(dev, dct); - if (err) { - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - goto destroy; - } else { - mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err); - return err; - } - } - wait_for_completion(&dct->drained); -destroy: - destroy_resource_common(dev, &dct->mqp); - MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); - MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); - MLX5_SET(destroy_dct_in, in, uid, qp->uid); - err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), - (void *)&out, sizeof(out)); - return err; + return _mlx5_core_destroy_dct(dev, dct, true); } EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct); @@ -487,10 +529,16 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev) spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); mlx5_qp_debugfs_init(dev); + + table->nb.notifier_call = rsc_event_notifier; + mlx5_notifier_register(dev, &table->nb); } void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) { + struct mlx5_qp_table *table = &dev->priv.qp_table; + + mlx5_notifier_unregister(dev, &table->nb); mlx5_qp_debugfs_cleanup(dev); } @@ -670,3 +718,20 @@ int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); + +struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev, + int res_num, + enum mlx5_res_type res_type) +{ + u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN); + struct mlx5_qp_table *table = &dev->priv.qp_table; + + return mlx5_get_rsc(table, rsn); +} +EXPORT_SYMBOL_GPL(mlx5_core_res_hold); + +void mlx5_core_res_put(struct mlx5_core_rsc_common *res) +{ + mlx5_core_put_rsc(res); +} +EXPORT_SYMBOL_GPL(mlx5_core_res_put); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a0674962f02c..7b23fa8d2d60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -147,7 +147,7 @@ out: if (MLX5_ESWITCH_MANAGER(dev)) mlx5_eswitch_disable_sriov(dev->priv.eswitch); - if (mlx5_wait_for_vf_pages(dev)) + if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } @@ -216,20 +216,10 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!mlx5_core_is_pf(dev)) return -EPERM; - if (num_vfs) { - int ret; - - ret = mlx5_lag_forbid(dev); - if (ret && (ret != -ENODEV)) - return ret; - } - - if (num_vfs) { + if (num_vfs) err = mlx5_sriov_enable(pdev, num_vfs); - } else { + else mlx5_sriov_disable(pdev); - mlx5_lag_allow(dev); - } return err ? err : num_vfs; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c deleted file mode 100644 index 6a6fc9be01e6..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ /dev/null @@ -1,716 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mlx5/driver.h> -#include <linux/mlx5/cmd.h> -#include <linux/mlx5/srq.h> -#include <rdma/ib_verbs.h> -#include "mlx5_core.h" -#include <linux/mlx5/transobj.h> - -void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *srq; - - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); - if (srq) - atomic_inc(&srq->refcount); - - spin_unlock(&table->lock); - - if (!srq) { - mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn); - return; - } - - srq->event(srq, event_type); - - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); -} - -static int get_pas_size(struct mlx5_srq_attr *in) -{ - u32 log_page_size = in->log_page_size + 12; - u32 log_srq_size = in->log_size; - u32 log_rq_stride = in->wqe_shift; - u32 page_offset = in->page_offset; - u32 po_quanta = 1 << (log_page_size - 6); - u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); - u32 page_size = 1 << log_page_size; - u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); - - return rq_num_pas * sizeof(u64); -} - -static void set_wq(void *wq, struct mlx5_srq_attr *in) -{ - MLX5_SET(wq, wq, wq_signature, !!(in->flags - & MLX5_SRQ_FLAG_WQ_SIG)); - MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); - MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); - MLX5_SET(wq, wq, log_wq_sz, in->log_size); - MLX5_SET(wq, wq, page_offset, in->page_offset); - MLX5_SET(wq, wq, lwm, in->lwm); - MLX5_SET(wq, wq, pd, in->pd); - MLX5_SET64(wq, wq, dbr_addr, in->db_record); -} - -static void set_srqc(void *srqc, struct mlx5_srq_attr *in) -{ - MLX5_SET(srqc, srqc, wq_signature, !!(in->flags - & MLX5_SRQ_FLAG_WQ_SIG)); - MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); - MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); - MLX5_SET(srqc, srqc, log_srq_size, in->log_size); - MLX5_SET(srqc, srqc, page_offset, in->page_offset); - MLX5_SET(srqc, srqc, lwm, in->lwm); - MLX5_SET(srqc, srqc, pd, in->pd); - MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); - MLX5_SET(srqc, srqc, xrcd, in->xrcd); - MLX5_SET(srqc, srqc, cqn, in->cqn); -} - -static void get_wq(void *wq, struct mlx5_srq_attr *in) -{ - if (MLX5_GET(wq, wq, wq_signature)) - in->flags &= MLX5_SRQ_FLAG_WQ_SIG; - in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); - in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; - in->log_size = MLX5_GET(wq, wq, log_wq_sz); - in->page_offset = MLX5_GET(wq, wq, page_offset); - in->lwm = MLX5_GET(wq, wq, lwm); - in->pd = MLX5_GET(wq, wq, pd); - in->db_record = MLX5_GET64(wq, wq, dbr_addr); -} - -static void get_srqc(void *srqc, struct mlx5_srq_attr *in) -{ - if (MLX5_GET(srqc, srqc, wq_signature)) - in->flags &= MLX5_SRQ_FLAG_WQ_SIG; - in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); - in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); - in->log_size = MLX5_GET(srqc, srqc, log_srq_size); - in->page_offset = MLX5_GET(srqc, srqc, page_offset); - in->lwm = MLX5_GET(srqc, srqc, lwm); - in->pd = MLX5_GET(srqc, srqc, pd); - in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); -} - -struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *srq; - - spin_lock(&table->lock); - - srq = radix_tree_lookup(&table->tree, srqn); - if (srq) - atomic_inc(&srq->refcount); - - spin_unlock(&table->lock); - - return srq; -} -EXPORT_SYMBOL(mlx5_core_get_srq); - -static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; - void *create_in; - void *srqc; - void *pas; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - MLX5_SET(create_srq_in, create_in, uid, in->uid); - srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); - pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); - - set_srqc(srqc, in); - memcpy(pas, in->pas, pas_size); - - MLX5_SET(create_srq_in, create_in, opcode, - MLX5_CMD_OP_CREATE_SRQ); - - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - kvfree(create_in); - if (!err) { - srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); - srq->uid = in->uid; - } - - return err; -} - -static int destroy_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; - - MLX5_SET(destroy_srq_in, srq_in, opcode, - MLX5_CMD_OP_DESTROY_SRQ); - MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); -} - -static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) -{ - u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - - MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); - MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, srq_in, lwm, lwm); - MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); -} - -static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; - u32 *srq_out; - void *srqc; - int err; - - srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); - if (!srq_out) - return -ENOMEM; - - MLX5_SET(query_srq_in, srq_in, opcode, - MLX5_CMD_OP_QUERY_SRQ); - MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); - err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), - srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); - if (err) - goto out; - - srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); - get_srqc(srqc, out); - if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; -out: - kvfree(srq_out); - return err; -} - -static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; - void *create_in; - void *xrc_srqc; - void *pas; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); - xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, - xrc_srq_context_entry); - pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); - - set_srqc(xrc_srqc, in); - MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); - memcpy(pas, in->pas, pas_size); - MLX5_SET(create_xrc_srq_in, create_in, opcode, - MLX5_CMD_OP_CREATE_XRC_SRQ); - - memset(create_out, 0, sizeof(create_out)); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - if (err) - goto out; - - srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); - srq->uid = in->uid; -out: - kvfree(create_in); - return err; -} - -static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; - - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); -} - -static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, u16 lwm) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); - - return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); -} - -static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; - u32 *xrcsrq_out; - void *xrc_srqc; - int err; - - xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); - if (!xrcsrq_out) - return -ENOMEM; - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - - MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, - MLX5_CMD_OP_QUERY_XRC_SRQ); - MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - - err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); - if (err) - goto out; - - xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, - xrc_srq_context_entry); - get_srqc(xrc_srqc, out); - if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; - -out: - kvfree(xrcsrq_out); - return err; -} - -static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - void *create_in; - void *rmpc; - void *wq; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - MLX5_SET(create_rmp_in, create_in, uid, in->uid); - set_wq(wq, in); - memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); - - err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); - if (!err) - srq->uid = in->uid; - - kvfree(create_in); - return err; -} - -static int destroy_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; - - MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); - MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); - MLX5_SET(destroy_rmp_in, in, uid, srq->uid); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int arm_rmp_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - u16 lwm) -{ - void *in; - void *rmpc; - void *wq; - void *bitmask; - int err; - - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); - bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); - MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); - MLX5_SET(modify_rmp_in, in, uid, srq->uid); - MLX5_SET(wq, wq, lwm, lwm); - MLX5_SET(rmp_bitmask, bitmask, lwm, 1); - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); - - kvfree(in); - return err; -} - -static int query_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 *rmp_out; - void *rmpc; - int err; - - rmp_out = kvzalloc(MLX5_ST_SZ_BYTES(query_rmp_out), GFP_KERNEL); - if (!rmp_out) - return -ENOMEM; - - err = mlx5_core_query_rmp(dev, srq->srqn, rmp_out); - if (err) - goto out; - - rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); - get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); - if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) - out->flags |= MLX5_SRQ_FLAG_ERR; - -out: - kvfree(rmp_out); - return err; -} - -static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; - void *create_in; - void *xrqc; - void *wq; - int pas_size; - int inlen; - int err; - - pas_size = get_pas_size(in); - inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; - create_in = kvzalloc(inlen, GFP_KERNEL); - if (!create_in) - return -ENOMEM; - - xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); - wq = MLX5_ADDR_OF(xrqc, xrqc, wq); - - set_wq(wq, in); - memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); - - if (in->type == IB_SRQT_TM) { - MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); - if (in->flags & MLX5_SRQ_FLAG_RNDV) - MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); - MLX5_SET(xrqc, xrqc, - tag_matching_topology_context.log_matching_list_sz, - in->tm_log_list_size); - } - MLX5_SET(xrqc, xrqc, user_index, in->user_index); - MLX5_SET(xrqc, xrqc, cqn, in->cqn); - MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); - MLX5_SET(create_xrq_in, create_in, uid, in->uid); - err = mlx5_cmd_exec(dev, create_in, inlen, create_out, - sizeof(create_out)); - kvfree(create_in); - if (!err) { - srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); - srq->uid = in->uid; - } - - return err; -} - -static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) -{ - u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; - - MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); - MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); - MLX5_SET(destroy_xrq_in, in, uid, srq->uid); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int arm_xrq_cmd(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - u16 lwm) -{ - u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; - - MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); - MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); - MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); - MLX5_SET(arm_rq_in, in, lwm, lwm); - MLX5_SET(arm_rq_in, in, uid, srq->uid); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int query_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; - u32 *xrq_out; - int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); - void *xrqc; - int err; - - xrq_out = kvzalloc(outlen, GFP_KERNEL); - if (!xrq_out) - return -ENOMEM; - - MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); - MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); - - err = mlx5_cmd_exec(dev, in, sizeof(in), xrq_out, outlen); - if (err) - goto out; - - xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); - get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); - if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) - out->flags |= MLX5_SRQ_FLAG_ERR; - out->tm_next_tag = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.append_next_index); - out->tm_hw_phase_cnt = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.hw_phase_cnt); - out->tm_sw_phase_cnt = - MLX5_GET(xrqc, xrqc, - tag_matching_topology_context.sw_phase_cnt); - -out: - kvfree(xrq_out); - return err; -} - -static int create_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - if (!dev->issi) - return create_srq_cmd(dev, srq, in); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return create_xrc_srq_cmd(dev, srq, in); - case MLX5_RES_XRQ: - return create_xrq_cmd(dev, srq, in); - default: - return create_rmp_cmd(dev, srq, in); - } -} - -static int destroy_srq_split(struct mlx5_core_dev *dev, - struct mlx5_core_srq *srq) -{ - if (!dev->issi) - return destroy_srq_cmd(dev, srq); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return destroy_xrc_srq_cmd(dev, srq); - case MLX5_RES_XRQ: - return destroy_xrq_cmd(dev, srq); - default: - return destroy_rmp_cmd(dev, srq); - } -} - -int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *in) -{ - int err; - struct mlx5_srq_table *table = &dev->priv.srq_table; - - switch (in->type) { - case IB_SRQT_XRC: - srq->common.res = MLX5_RES_XSRQ; - break; - case IB_SRQT_TM: - srq->common.res = MLX5_RES_XRQ; - break; - default: - srq->common.res = MLX5_RES_SRQ; - } - - err = create_srq_split(dev, srq, in); - if (err) - return err; - - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); - - spin_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, srq->srqn, srq); - spin_unlock_irq(&table->lock); - if (err) { - mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn); - goto err_destroy_srq_split; - } - - return 0; - -err_destroy_srq_split: - destroy_srq_split(dev, srq); - - return err; -} -EXPORT_SYMBOL(mlx5_core_create_srq); - -int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - struct mlx5_core_srq *tmp; - int err; - - spin_lock_irq(&table->lock); - tmp = radix_tree_delete(&table->tree, srq->srqn); - spin_unlock_irq(&table->lock); - if (!tmp) { - mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn); - return -EINVAL; - } - if (tmp != srq) { - mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn); - return -EINVAL; - } - - err = destroy_srq_split(dev, srq); - if (err) - return err; - - if (atomic_dec_and_test(&srq->refcount)) - complete(&srq->free); - wait_for_completion(&srq->free); - - return 0; -} -EXPORT_SYMBOL(mlx5_core_destroy_srq); - -int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - struct mlx5_srq_attr *out) -{ - if (!dev->issi) - return query_srq_cmd(dev, srq, out); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return query_xrc_srq_cmd(dev, srq, out); - case MLX5_RES_XRQ: - return query_xrq_cmd(dev, srq, out); - default: - return query_rmp_cmd(dev, srq, out); - } -} -EXPORT_SYMBOL(mlx5_core_query_srq); - -int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, - u16 lwm, int is_srq) -{ - if (!dev->issi) - return arm_srq_cmd(dev, srq, lwm, is_srq); - switch (srq->common.res) { - case MLX5_RES_XSRQ: - return arm_xrc_srq_cmd(dev, srq, lwm); - case MLX5_RES_XRQ: - return arm_xrq_cmd(dev, srq, lwm); - default: - return arm_rmp_cmd(dev, srq, lwm); - } -} -EXPORT_SYMBOL(mlx5_core_arm_srq); - -void mlx5_init_srq_table(struct mlx5_core_dev *dev) -{ - struct mlx5_srq_table *table = &dev->priv.srq_table; - - memset(table, 0, sizeof(*table)); - spin_lock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); -} - -void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev) -{ - /* nothing */ -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index a1ee9a8a769e..c4d4b76096dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -258,115 +258,6 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) } EXPORT_SYMBOL(mlx5_core_destroy_tis); -int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *rmpn) -{ - u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0}; - int err; - - MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - if (!err) - *rmpn = MLX5_GET(create_rmp_out, out, rmpn); - - return err; -} - -int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) -{ - u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; - - MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); -} - -int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) -{ - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0}; - - MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); - MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, - sizeof(out)); -} - -int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) -{ - u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; - int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); - - MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); - MLX5_SET(query_rmp_in, in, rmpn, rmpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - -int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) -{ - void *in; - void *rmpc; - void *wq; - void *bitmask; - int err; - - in = kvzalloc(MLX5_ST_SZ_BYTES(modify_rmp_in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); - bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); - wq = MLX5_ADDR_OF(rmpc, rmpc, wq); - - MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); - MLX5_SET(modify_rmp_in, in, rmpn, rmpn); - MLX5_SET(wq, wq, lwm, lwm); - MLX5_SET(rmp_bitmask, bitmask, lwm, 1); - MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); - - err = mlx5_core_modify_rmp(dev, in, MLX5_ST_SZ_BYTES(modify_rmp_in)); - - kvfree(in); - - return err; -} - -int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, - u32 *xsrqn) -{ - u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0}; - int err; - - MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - if (!err) - *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); - - return err; -} - -int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) -{ - u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; - - MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); - MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) -{ - u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; - - MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); - MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); - MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); - MLX5_SET(arm_xrc_srq_in, in, op_mod, - MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 8b97066dd1f1..94464723ff77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -90,8 +90,8 @@ static void up_rel_func(struct kref *kref) iounmap(up->map); if (mlx5_cmd_free_uar(up->mdev, up->index)) mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); - kfree(up->reg_bitmap); - kfree(up->fp_bitmap); + bitmap_free(up->reg_bitmap); + bitmap_free(up->fp_bitmap); kfree(up); } @@ -110,11 +110,11 @@ static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, return ERR_PTR(err); up->mdev = mdev; - up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + up->reg_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL); if (!up->reg_bitmap) goto error1; - up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + up->fp_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL); if (!up->fp_bitmap) goto error1; @@ -157,8 +157,8 @@ error2: if (mlx5_cmd_free_uar(mdev, up->index)) mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index); error1: - kfree(up->fp_bitmap); - kfree(up->reg_bitmap); + bitmap_free(up->fp_bitmap); + bitmap_free(up->reg_bitmap); kfree(up); return ERR_PTR(err); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index cfbea66b4879..ef95feca9961 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -64,7 +64,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) } int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, - u16 vport, u8 state) + u16 vport, u8 other_vport, u8 state) { u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0}; u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0}; @@ -73,8 +73,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, MLX5_CMD_OP_MODIFY_VPORT_STATE); MLX5_SET(modify_vport_state_in, in, op_mod, opmod); MLX5_SET(modify_vport_state_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_vport_state_in, in, other_vport, 1); + MLX5_SET(modify_vport_state_in, in, other_vport, other_vport); MLX5_SET(modify_vport_state_in, in, admin_state, state); return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); @@ -255,7 +254,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu) EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu); int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int *list_size) @@ -373,7 +372,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, - u32 vport, + u16 vport, u16 vlans[], int *size) { @@ -526,7 +525,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, - u32 vport, u64 node_guid) + u16 vport, u64 node_guid) { int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); void *nic_vport_context; @@ -827,7 +826,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, - u32 vport, + u16 vport, int *promisc_uc, int *promisc_mc, int *promisc_all) @@ -1057,7 +1056,7 @@ free: EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter); int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, - u64 *rx_discard_vport_down, + u8 other_vport, u64 *rx_discard_vport_down, u64 *tx_discard_vport_down) { u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0}; @@ -1068,8 +1067,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport, MLX5_CMD_OP_QUERY_VNIC_ENV); MLX5_SET(query_vnic_env_in, in, op_mod, 0); MLX5_SET(query_vnic_env_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_vnic_env_in, in, other_vport, 1); + MLX5_SET(query_vnic_env_in, in, other_vport, other_vport); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (err) @@ -1204,9 +1202,19 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) { - if (!mdev->sys_image_guid) - mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid); + int port_type_cap = MLX5_CAP_GEN(mdev, port_type); + u64 tmp = 0; + + if (mdev->sys_image_guid) + return mdev->sys_image_guid; + + if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH) + mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + else + mlx5_query_hca_vport_system_image_guid(mdev, &tmp); + + mdev->sys_image_guid = tmp; - return mdev->sys_image_guid; + return tmp; } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 2dcbf1ebfd6a..953cc8efba69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -155,7 +155,8 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_wq_ctrl *wq_ctrl) { - u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6; + /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */ + u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7; u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size); int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index b1293d153a58..ea934a48c90a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -177,9 +177,14 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) return mlx5_cqwq_ctr2ix(wq, wq->cc); } -static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) +static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) { - return mlx5_frag_buf_get_wqe(&wq->fbc, ix); + struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix); + + /* For 128B CQEs the data is in the last 64B */ + cqe += wq->fbc.log_stride == 7; + + return cqe; } static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr) diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h index 7a712b6b09ec..14c0c62f8e73 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #ifndef _MLXFW_H #define _MLXFW_H diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c index 2cf89126fb23..240c027e5f07 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #define pr_fmt(fmt) "mlxfw: " fmt diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c index 993cb5ba934e..544344ac4894 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #define pr_fmt(fmt) "mlxfw_mfa2: " fmt diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h index 20472aa139cd..5bba6ad79d34 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #ifndef _MLXFW_MFA2_H #define _MLXFW_MFA2_H diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h index f667942b1ea3..874c0a2474ae 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_file.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #ifndef _MLXFW_MFA2_FILE_H #define _MLXFW_MFA2_FILE_H diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h index dd66737c033d..b001e5258091 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h @@ -1,36 +1,6 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + #ifndef _MLXFW_MFA2_FORMAT_H #define _MLXFW_MFA2_FORMAT_H diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h index cc013e77b326..33c971190bba 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #ifndef _MLXFW_MFA2_TLV_H #define _MLXFW_MFA2_TLV_H diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c index 0094b92a233b..017d68f1e123 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c @@ -1,36 +1,5 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ #define pr_fmt(fmt) "MFA2: " fmt diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h index 2c667894f3a2..633284eeded7 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h @@ -1,36 +1,6 @@ -/* - * drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved */ + #ifndef _MLXFW_MFA2_TLV_MULTI_H #define _MLXFW_MFA2_TLV_MULTI_H diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 8a291eb36c64..9c195dfed031 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -4,7 +4,6 @@ config MLXSW_CORE tristate "Mellanox Technologies Switch ASICs support" - depends on MAY_USE_DEVLINK ---help--- This driver supports Mellanox Technologies Switch ASICs family. @@ -78,8 +77,10 @@ config MLXSW_SPECTRUM depends on IPV6 || IPV6=n depends on NET_IPGRE || NET_IPGRE=n depends on IPV6_GRE || IPV6_GRE=n + depends on VXLAN || VXLAN=n select GENERIC_ALLOCATOR select PARMAN + select OBJAGG select MLXFW default m ---help--- diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 1f77e97e2d7a..a01d15546e37 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o mlxsw_core-objs := core.o core_acl_flex_keys.o \ - core_acl_flex_actions.o + core_acl_flex_actions.o core_env.o mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o @@ -20,7 +20,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_acl_tcam.o spectrum_acl_ctcam.o \ spectrum_acl_atcam.o spectrum_acl_erp.o \ spectrum1_acl_tcam.o spectrum2_acl_tcam.o \ - spectrum_acl.o \ + spectrum_acl_bloom_filter.o spectrum_acl.o \ spectrum_flower.o spectrum_cnt.o \ spectrum_fid.o spectrum_ipip.o \ spectrum_acl_flex_actions.o \ diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index f7154f358f27..f26a4ca29363 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -568,7 +568,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0; - emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_MEM_RECLAIM, 0); + emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0); if (!emad_wq) return -ENOMEM; mlxsw_core->emad_wq = emad_wq; @@ -970,10 +970,11 @@ static const struct devlink_ops mlxsw_devlink_ops = { .sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get, }; -int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, - const struct mlxsw_bus *mlxsw_bus, - void *bus_priv, bool reload, - struct devlink *devlink) +static int +__mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, + const struct mlxsw_bus *mlxsw_bus, + void *bus_priv, bool reload, + struct devlink *devlink) { const char *device_kind = mlxsw_bus_info->device_kind; struct mlxsw_core *mlxsw_core; @@ -1040,6 +1041,12 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_devlink_register; } + if (mlxsw_driver->params_register && !reload) { + err = mlxsw_driver->params_register(mlxsw_core); + if (err) + goto err_register_params; + } + err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); if (err) goto err_hwmon_init; @@ -1055,6 +1062,9 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_driver_init; } + if (mlxsw_driver->params_register && !reload) + devlink_params_publish(devlink); + return 0; err_driver_init: @@ -1062,6 +1072,9 @@ err_driver_init: err_thermal_init: mlxsw_hwmon_fini(mlxsw_core->hwmon); err_hwmon_init: + if (mlxsw_driver->params_unregister && !reload) + mlxsw_driver->params_unregister(mlxsw_core); +err_register_params: if (!reload) devlink_unregister(devlink); err_devlink_register: @@ -1081,6 +1094,29 @@ err_bus_init: err_devlink_alloc: return err; } + +int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, + const struct mlxsw_bus *mlxsw_bus, + void *bus_priv, bool reload, + struct devlink *devlink) +{ + bool called_again = false; + int err; + +again: + err = __mlxsw_core_bus_device_register(mlxsw_bus_info, mlxsw_bus, + bus_priv, reload, devlink); + /* -EAGAIN is returned in case the FW was updated. FW needs + * a reset, so lets try to call __mlxsw_core_bus_device_register() + * again. + */ + if (err == -EAGAIN && !called_again) { + called_again = true; + goto again; + } + + return err; +} EXPORT_SYMBOL(mlxsw_core_bus_device_register); void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, @@ -1098,10 +1134,14 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, return; } + if (mlxsw_core->driver->params_unregister && !reload) + devlink_params_unpublish(devlink); if (mlxsw_core->driver->fini) mlxsw_core->driver->fini(mlxsw_core); mlxsw_thermal_fini(mlxsw_core->thermal); mlxsw_hwmon_fini(mlxsw_core->hwmon); + if (mlxsw_core->driver->params_unregister && !reload) + mlxsw_core->driver->params_unregister(mlxsw_core); if (!reload) devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); @@ -1114,6 +1154,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, return; reload_fail_deinit: + if (mlxsw_core->driver->params_unregister) + mlxsw_core->driver->params_unregister(mlxsw_core); devlink_unregister(devlink); devlink_resources_unregister(devlink, NULL); devlink_free(devlink); @@ -1423,13 +1465,17 @@ static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans) if (trans->retries) dev_warn(mlxsw_core->bus_info->dev, "EMAD retries (%d/%d) (tid=%llx)\n", trans->retries, MLXSW_EMAD_MAX_RETRY, trans->tid); - if (err) + if (err) { dev_err(mlxsw_core->bus_info->dev, "EMAD reg access failed (tid=%llx,reg_id=%x(%s),type=%s,status=%x(%s))\n", trans->tid, trans->reg->id, mlxsw_reg_id_str(trans->reg->id), mlxsw_core_reg_access_type_str(trans->type), trans->emad_status, mlxsw_emad_op_tlv_status_str(trans->emad_status)); + trace_devlink_hwerr(priv_to_devlink(mlxsw_core), + trans->emad_status, + mlxsw_emad_op_tlv_status_str(trans->emad_status)); + } list_del(&trans->bulk_list); kfree_rcu(trans, rcu); @@ -1871,14 +1917,51 @@ void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_fw_flash_end); +int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox, + struct mlxsw_res *res) +{ + int index, i; + u64 data; + u16 id; + int err; + + if (!res) + return 0; + + mlxsw_cmd_mbox_zero(mbox); + + for (index = 0; index < MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES; + index++) { + err = mlxsw_cmd_query_resources(mlxsw_core, mbox, index); + if (err) + return err; + + for (i = 0; i < MLXSW_CMD_QUERY_RESOURCES_PER_QUERY; i++) { + id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i); + data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i); + + if (id == MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID) + return 0; + + mlxsw_res_parse(res, id, data); + } + } + + /* If after MLXSW_RESOURCES_QUERY_MAX_QUERIES we still didn't get + * MLXSW_RESOURCES_TABLE_END_ID, something went bad in the FW. + */ + return -EIO; +} +EXPORT_SYMBOL(mlxsw_core_resources_query); + static int __init mlxsw_core_module_init(void) { int err; - mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0); + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0); if (!mlxsw_wq) return -ENOMEM; - mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM, + mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0, mlxsw_core_driver_name); if (!mlxsw_owq) { err = -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index c4e4971764e5..8ec53f027575 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -182,6 +182,8 @@ int mlxsw_core_port_get_phys_port_name(struct mlxsw_core *mlxsw_core, int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); bool mlxsw_core_schedule_work(struct work_struct *work); void mlxsw_core_flush_owq(void); +int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox, + struct mlxsw_res *res); #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 @@ -282,6 +284,8 @@ struct mlxsw_driver { const struct mlxsw_config_profile *profile, u64 *p_single_size, u64 *p_double_size, u64 *p_linear_size); + int (*params_register)(struct mlxsw_core *mlxsw_core); + void (*params_unregister)(struct mlxsw_core *mlxsw_core); u8 txhdr_len; const struct mlxsw_config_profile *profile; bool res_query_enabled; @@ -342,6 +346,7 @@ struct mlxsw_bus_info { struct mlxsw_fw_rev fw_rev; u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN]; u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; + u8 low_frequency; }; struct mlxsw_hwmon; @@ -392,4 +397,9 @@ static inline void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) #endif +enum mlxsw_devlink_param_id { + MLXSW_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, +}; + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c index 785bf01fe2be..cb3e663b1d37 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -236,12 +236,10 @@ mlxsw_afk_key_info_create(struct mlxsw_afk *mlxsw_afk, struct mlxsw_afk_element_usage *elusage) { struct mlxsw_afk_key_info *key_info; - size_t alloc_size; int err; - alloc_size = sizeof(*key_info) + - sizeof(key_info->blocks[0]) * mlxsw_afk->max_blocks; - key_info = kzalloc(alloc_size, GFP_KERNEL); + key_info = kzalloc(struct_size(key_info, blocks, mlxsw_afk->max_blocks), + GFP_KERNEL); if (!key_info) return ERR_PTR(-ENOMEM); err = mlxsw_afk_picker(mlxsw_afk, key_info, elusage); @@ -426,15 +424,17 @@ mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst, void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk, struct mlxsw_afk_key_info *key_info, struct mlxsw_afk_element_values *values, - char *key, char *mask, int block_start, int block_end) + char *key, char *mask) { + unsigned int blocks_count = + mlxsw_afk_key_info_blocks_count_get(key_info); char block_mask[MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE]; char block_key[MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE]; const struct mlxsw_afk_element_inst *elinst; enum mlxsw_afk_element element; int block_index, i; - for (i = block_start; i <= block_end; i++) { + for (i = 0; i < blocks_count; i++) { memset(block_key, 0, MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE); memset(block_mask, 0, MLXSW_SP_AFK_KEY_BLOCK_MAX_SIZE); @@ -451,10 +451,18 @@ void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk, values->storage.mask); } - if (key) - mlxsw_afk->ops->encode_block(block_key, i, key); - if (mask) - mlxsw_afk->ops->encode_block(block_mask, i, mask); + mlxsw_afk->ops->encode_block(key, i, block_key); + mlxsw_afk->ops->encode_block(mask, i, block_mask); } } EXPORT_SYMBOL(mlxsw_afk_encode); + +void mlxsw_afk_clear(struct mlxsw_afk *mlxsw_afk, char *key, + int block_start, int block_end) +{ + int i; + + for (i = block_start; i <= block_end; i++) + mlxsw_afk->ops->clear_block(key, i); +} +EXPORT_SYMBOL(mlxsw_afk_clear); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index c29c045d826d..4a625cdf3e7c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -33,6 +33,8 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_IP_TTL_, MLXSW_AFK_ELEMENT_IP_ECN, MLXSW_AFK_ELEMENT_IP_DSCP, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7, MLXSW_AFK_ELEMENT_MAX, }; @@ -87,6 +89,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_8_10, 0x18, 17, 3), + MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_0_7, 0x18, 20, 8), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4), @@ -188,7 +192,8 @@ struct mlxsw_afk; struct mlxsw_afk_ops { const struct mlxsw_afk_block *blocks; unsigned int blocks_count; - void (*encode_block)(char *block, int block_index, char *output); + void (*encode_block)(char *output, int block_index, char *block); + void (*clear_block)(char *output, int block_index); }; struct mlxsw_afk *mlxsw_afk_create(unsigned int max_blocks, @@ -228,6 +233,8 @@ void mlxsw_afk_values_add_buf(struct mlxsw_afk_element_values *values, void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk, struct mlxsw_afk_key_info *key_info, struct mlxsw_afk_element_values *values, - char *key, char *mask, int block_start, int block_end); + char *key, char *mask); +void mlxsw_afk_clear(struct mlxsw_afk *mlxsw_afk, char *key, + int block_start, int block_end); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c new file mode 100644 index 000000000000..c1c1965d7acc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/kernel.h> +#include <linux/err.h> + +#include "core.h" +#include "core_env.h" +#include "item.h" +#include "reg.h" + +static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id, + bool *qsfp) +{ + char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; + char mcia_pl[MLXSW_REG_MCIA_LEN]; + u8 ident; + int err; + + mlxsw_reg_mcia_pack(mcia_pl, id, 0, MLXSW_REG_MCIA_PAGE0_LO_OFF, 0, 1, + MLXSW_REG_MCIA_I2C_ADDR_LOW); + err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp); + ident = eeprom_tmp[0]; + switch (ident) { + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: + *qsfp = false; + break; + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD: + *qsfp = true; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_env_query_module_eeprom(struct mlxsw_core *mlxsw_core, int module, + u16 offset, u16 size, void *data, + unsigned int *p_read_size) +{ + char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; + char mcia_pl[MLXSW_REG_MCIA_LEN]; + u16 i2c_addr; + int status; + int err; + + size = min_t(u16, size, MLXSW_REG_MCIA_EEPROM_SIZE); + + if (offset < MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH && + offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset; + + i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW; + if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) { + i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH; + offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH; + } + + mlxsw_reg_mcia_pack(mcia_pl, module, 0, 0, offset, size, i2c_addr); + + err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + + status = mlxsw_reg_mcia_status_get(mcia_pl); + if (status) + return -EIO; + + mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp); + memcpy(data, eeprom_tmp, size); + *p_read_size = size; + + return 0; +} + +int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, + int off, int *temp) +{ + char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE]; + union { + u8 buf[MLXSW_REG_MCIA_TH_ITEM_SIZE]; + u16 temp; + } temp_thresh; + char mcia_pl[MLXSW_REG_MCIA_LEN] = {0}; + char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; + u16 module_temp; + bool qsfp; + int err; + + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, + 1); + err = mlxsw_reg_query(core, MLXSW_REG(mtbr), mtbr_pl); + if (err) + return err; + + /* Don't read temperature thresholds for module with no valid info. */ + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &module_temp, NULL); + switch (module_temp) { + case MLXSW_REG_MTBR_BAD_SENS_INFO: /* fall-through */ + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: + *temp = 0; + return 0; + default: + /* Do not consider thresholds for zero temperature. */ + if (MLXSW_REG_MTMP_TEMP_TO_MC(module_temp) == 0) { + *temp = 0; + return 0; + } + break; + } + + /* Read Free Side Device Temperature Thresholds from page 03h + * (MSB at lower byte address). + * Bytes: + * 128-129 - Temp High Alarm (SFP_TEMP_HIGH_ALARM); + * 130-131 - Temp Low Alarm (SFP_TEMP_LOW_ALARM); + * 132-133 - Temp High Warning (SFP_TEMP_HIGH_WARN); + * 134-135 - Temp Low Warning (SFP_TEMP_LOW_WARN); + */ + + /* Validate module identifier value. */ + err = mlxsw_env_validate_cable_ident(core, module, &qsfp); + if (err) + return err; + + if (qsfp) + mlxsw_reg_mcia_pack(mcia_pl, module, 0, + MLXSW_REG_MCIA_TH_PAGE_NUM, + MLXSW_REG_MCIA_TH_PAGE_OFF + off, + MLXSW_REG_MCIA_TH_ITEM_SIZE, + MLXSW_REG_MCIA_I2C_ADDR_LOW); + else + mlxsw_reg_mcia_pack(mcia_pl, module, 0, + MLXSW_REG_MCIA_PAGE0_LO, + off, MLXSW_REG_MCIA_TH_ITEM_SIZE, + MLXSW_REG_MCIA_I2C_ADDR_HIGH); + + err = mlxsw_reg_query(core, MLXSW_REG(mcia), mcia_pl); + if (err) + return err; + + mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp); + memcpy(temp_thresh.buf, eeprom_tmp, MLXSW_REG_MCIA_TH_ITEM_SIZE); + *temp = temp_thresh.temp * 1000; + + return 0; +} + +int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, + struct ethtool_modinfo *modinfo) +{ + u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE]; + u16 offset = MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE; + u8 module_rev_id, module_id; + unsigned int read_size; + int err; + + err = mlxsw_env_query_module_eeprom(mlxsw_core, module, 0, offset, + module_info, &read_size); + if (err) + return err; + + if (read_size < offset) + return -EIO; + + module_rev_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID]; + module_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID]; + + switch (module_id) { + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS: /* fall-through */ + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28: + if (module_id == MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 || + module_rev_id >= + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL(mlxsw_env_get_module_info); + +int mlxsw_env_get_module_eeprom(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, int module, + struct ethtool_eeprom *ee, u8 *data) +{ + int offset = ee->offset; + unsigned int read_size; + int i = 0; + int err; + + if (!ee->len) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + err = mlxsw_env_query_module_eeprom(mlxsw_core, module, offset, + ee->len - i, data + i, + &read_size); + if (err) { + netdev_err(netdev, "Eeprom query failed\n"); + return err; + } + + i += read_size; + offset += read_size; + } + + return 0; +} +EXPORT_SYMBOL(mlxsw_env_get_module_eeprom); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h new file mode 100644 index 000000000000..064d0e770c01 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_CORE_ENV_H +#define _MLXSW_CORE_ENV_H + +int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, + int off, int *temp); + +int mlxsw_env_get_module_info(struct mlxsw_core *mlxsw_core, int module, + struct ethtool_modinfo *modinfo); + +int mlxsw_env_get_module_eeprom(struct net_device *netdev, + struct mlxsw_core *mlxsw_core, int module, + struct ethtool_eeprom *ee, u8 *data); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index e04e8162aa14..6956bbebe2f1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -7,8 +7,10 @@ #include <linux/sysfs.h> #include <linux/hwmon.h> #include <linux/err.h> +#include <linux/sfp.h> #include "core.h" +#include "core_env.h" #define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127 #define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \ @@ -30,6 +32,7 @@ struct mlxsw_hwmon { struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1]; struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; unsigned int attrs_count; + u8 sensor_count; }; static ssize_t mlxsw_hwmon_temp_show(struct device *dev, @@ -121,6 +124,27 @@ static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev, return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl)); } +static ssize_t mlxsw_hwmon_fan_fault_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char fore_pl[MLXSW_REG_FORE_LEN]; + bool fault; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(fore), fore_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); + return err; + } + mlxsw_reg_fore_unpack(fore_pl, mlwsw_hwmon_attr->type_index, &fault); + + return sprintf(buf, "%u\n", fault); +} + static ssize_t mlxsw_hwmon_pwm_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -167,12 +191,160 @@ static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, return len; } +static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; + u16 temp; + u8 module; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, + 1); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); + if (err) { + dev_err(dev, "Failed to query module temperature sensor\n"); + return err; + } + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + /* Update status and temperature cache. */ + switch (temp) { + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: + temp = 0; + break; + case MLXSW_REG_MTBR_BAD_SENS_INFO: + /* Untrusted cable is connected. Reading temperature from its + * sensor is faulty. + */ + temp = 0; + break; + default: + temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + break; + } + + return sprintf(buf, "%u\n", temp); +} + +static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; + u8 module, fault; + u16 temp; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, + 1); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); + if (err) { + dev_err(dev, "Failed to query module temperature sensor\n"); + return err; + } + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + + /* Update status and temperature cache. */ + switch (temp) { + case MLXSW_REG_MTBR_BAD_SENS_INFO: + /* Untrusted cable is connected. Reading temperature from its + * sensor is faulty. + */ + fault = 1; + break; + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: + default: + fault = 0; + break; + } + + return sprintf(buf, "%u\n", fault); +} + +static ssize_t +mlxsw_hwmon_module_temp_critical_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + int temp; + u8 module; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, + SFP_TEMP_HIGH_WARN, &temp); + if (err) { + dev_err(dev, "Failed to query module temperature thresholds\n"); + return err; + } + + return sprintf(buf, "%u\n", temp); +} + +static ssize_t +mlxsw_hwmon_module_temp_emergency_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + u8 module; + int temp; + int err; + + module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; + err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module, + SFP_TEMP_HIGH_ALARM, &temp); + if (err) { + dev_err(dev, "Failed to query module temperature thresholds\n"); + return err; + } + + return sprintf(buf, "%u\n", temp); +} + +static ssize_t +mlxsw_hwmon_module_temp_label_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + + return sprintf(buf, "front panel %03u\n", + mlwsw_hwmon_attr->type_index); +} + enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP, MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, MLXSW_HWMON_ATTR_TYPE_TEMP_RST, MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + MLXSW_HWMON_ATTR_TYPE_FAN_FAULT, MLXSW_HWMON_ATTR_TYPE_PWM, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, }; static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, @@ -209,6 +381,12 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), "fan%u_input", num + 1); break; + case MLXSW_HWMON_ATTR_TYPE_FAN_FAULT: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_fault_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "fan%u_fault", num + 1); + break; case MLXSW_HWMON_ATTR_TYPE_PWM: mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show; mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store; @@ -216,6 +394,40 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), "pwm%u", num + 1); break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_module_temp_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_fault_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_fault", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_critical_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_crit", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_emergency_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_emergency", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_module_temp_label_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_label", num + 1); + break; default: WARN_ON(1); } @@ -233,7 +445,6 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) { char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0}; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - u8 sensor_count; int i; int err; @@ -242,8 +453,8 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n"); return err; } - sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); - for (i = 0; i < sensor_count; i++) { + mlxsw_hwmon->sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); + for (i = 0; i < mlxsw_hwmon->sensor_count; i++) { mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); @@ -280,10 +491,14 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); num = 0; for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) { - if (tacho_active & BIT(type_index)) + if (tacho_active & BIT(type_index)) { mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + type_index, num); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_FAN_FAULT, type_index, num++); + } } num = 0; for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) { @@ -295,6 +510,53 @@ static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) return 0; } +static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + unsigned int module_count = mlxsw_core_max_ports(mlxsw_hwmon->core); + char pmlp_pl[MLXSW_REG_PMLP_LEN] = {0}; + int i, index; + u8 width; + int err; + + /* Add extra attributes for module temperature. Sensor index is + * assigned to sensor_count value, while all indexed before + * sensor_count are already utilized by the sensors connected through + * mtmp register by mlxsw_hwmon_temp_init(). + */ + index = mlxsw_hwmon->sensor_count; + for (i = 1; i < module_count; i++) { + mlxsw_reg_pmlp_pack(pmlp_pl, i); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(pmlp), + pmlp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to read module index %d\n", + i); + return err; + } + width = mlxsw_reg_pmlp_width_get(pmlp_pl); + if (!width) + continue; + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE, index, + index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_FAULT, + index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, + index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, + index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, + index, index); + index++; + } + + return 0; +} + int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info, struct mlxsw_hwmon **p_hwmon) @@ -317,6 +579,10 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, if (err) goto err_fans_init; + err = mlxsw_hwmon_module_init(mlxsw_hwmon); + if (err) + goto err_temp_module_init; + mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; @@ -333,6 +599,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, return 0; err_hwmon_register: +err_temp_module_init: err_fans_init: err_temp_init: kfree(mlxsw_hwmon); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 6d29dc428608..472f63f9fac5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -9,17 +9,48 @@ #include <linux/sysfs.h> #include <linux/thermal.h> #include <linux/err.h> +#include <linux/sfp.h> #include "core.h" +#include "core_env.h" #define MLXSW_THERMAL_POLL_INT 1000 /* ms */ -#define MLXSW_THERMAL_MAX_TEMP 110000 /* 110C */ +#define MLXSW_THERMAL_SLOW_POLL_INT 20000 /* ms */ +#define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ +#define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ +#define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ +#define MLXSW_THERMAL_ASIC_TEMP_CRIT 110000 /* 110C */ +#define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ +#define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) +#define MLXSW_THERMAL_ZONE_MAX_NAME 16 #define MLXSW_THERMAL_MAX_STATE 10 #define MLXSW_THERMAL_MAX_DUTY 255 +/* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values + * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for + * setting fan speed dynamic minimum. For example, if value is set to 14 (40%) + * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to + * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100. + */ +#define MLXSW_THERMAL_SPEED_MIN (MLXSW_THERMAL_MAX_STATE + 2) +#define MLXSW_THERMAL_SPEED_MAX (MLXSW_THERMAL_MAX_STATE * 2) +#define MLXSW_THERMAL_SPEED_MIN_LEVEL 2 /* 20% */ + +/* External cooling devices, allowed for binding to mlxsw thermal zones. */ +static char * const mlxsw_thermal_external_allowed_cdev[] = { + "mlxreg_fan", +}; + +enum mlxsw_thermal_trips { + MLXSW_THERMAL_TEMP_TRIP_NORM, + MLXSW_THERMAL_TEMP_TRIP_HIGH, + MLXSW_THERMAL_TEMP_TRIP_HOT, + MLXSW_THERMAL_TEMP_TRIP_CRIT, +}; struct mlxsw_thermal_trip { int type; int temp; + int hyst; int min_state; int max_state; }; @@ -27,32 +58,29 @@ struct mlxsw_thermal_trip { static const struct mlxsw_thermal_trip default_thermal_trips[] = { { /* In range - 0-40% PWM */ .type = THERMAL_TRIP_ACTIVE, - .temp = 75000, + .temp = MLXSW_THERMAL_ASIC_TEMP_NORM, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = 0, .max_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, }, - { /* High - 40-100% PWM */ - .type = THERMAL_TRIP_ACTIVE, - .temp = 80000, - .min_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, - .max_state = MLXSW_THERMAL_MAX_STATE, - }, { - /* Very high - 100% PWM */ + /* In range - 40-100% PWM */ .type = THERMAL_TRIP_ACTIVE, - .temp = 85000, - .min_state = MLXSW_THERMAL_MAX_STATE, + .temp = MLXSW_THERMAL_ASIC_TEMP_HIGH, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, + .min_state = (4 * MLXSW_THERMAL_MAX_STATE) / 10, .max_state = MLXSW_THERMAL_MAX_STATE, }, { /* Warning */ .type = THERMAL_TRIP_HOT, - .temp = 105000, + .temp = MLXSW_THERMAL_ASIC_TEMP_HOT, + .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = MLXSW_THERMAL_MAX_STATE, .max_state = MLXSW_THERMAL_MAX_STATE, }, { /* Critical - soft poweroff */ .type = THERMAL_TRIP_CRITICAL, - .temp = MLXSW_THERMAL_MAX_TEMP, + .temp = MLXSW_THERMAL_ASIC_TEMP_CRIT, .min_state = MLXSW_THERMAL_MAX_STATE, .max_state = MLXSW_THERMAL_MAX_STATE, } @@ -63,13 +91,26 @@ static const struct mlxsw_thermal_trip default_thermal_trips[] = { /* Make sure all trips are writable */ #define MLXSW_THERMAL_TRIP_MASK (BIT(MLXSW_THERMAL_NUM_TRIPS) - 1) +struct mlxsw_thermal; + +struct mlxsw_thermal_module { + struct mlxsw_thermal *parent; + struct thermal_zone_device *tzdev; + struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; + enum thermal_device_mode mode; + int module; +}; + struct mlxsw_thermal { struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; struct thermal_zone_device *tzdev; + int polling_delay; struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX]; + u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1]; struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; + struct mlxsw_thermal_module *tz_module_arr; }; static inline u8 mlxsw_state_to_duty(int state) @@ -93,9 +134,67 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal, if (thermal->cdevs[i] == cdev) return i; + /* Allow mlxsw thermal zone binding to an external cooling device */ + for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) { + if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i], + sizeof(cdev->type))) + return 0; + } + return -ENODEV; } +static void +mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz) +{ + tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0; +} + +static int +mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal_module *tz) +{ + int crit_temp, emerg_temp; + int err; + + err = mlxsw_env_module_temp_thresholds_get(core, tz->module, + SFP_TEMP_HIGH_WARN, + &crit_temp); + if (err) + return err; + + err = mlxsw_env_module_temp_thresholds_get(core, tz->module, + SFP_TEMP_HIGH_ALARM, + &emerg_temp); + if (err) + return err; + + /* According to the system thermal requirements, the thermal zones are + * defined with four trip points. The critical and emergency + * temperature thresholds, provided by QSFP module are set as "active" + * and "hot" trip points, "normal" and "critical" trip points are + * derived from "active" and "hot" by subtracting or adding double + * hysteresis value. + */ + if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT) + tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp - + MLXSW_THERMAL_MODULE_TEMP_SHIFT; + else + tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp; + tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp; + if (emerg_temp > crit_temp) + tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + + MLXSW_THERMAL_MODULE_TEMP_SHIFT; + else + tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp; + + return 0; +} + static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev, struct thermal_cooling_device *cdev) { @@ -162,7 +261,7 @@ static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev, mutex_lock(&tzdev->lock); if (mode == THERMAL_DEVICE_ENABLED) - tzdev->polling_delay = MLXSW_THERMAL_POLL_INT; + tzdev->polling_delay = thermal->polling_delay; else tzdev->polling_delay = 0; @@ -227,13 +326,31 @@ static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev, struct mlxsw_thermal *thermal = tzdev->devdata; if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS || - temp > MLXSW_THERMAL_MAX_TEMP) + temp > MLXSW_THERMAL_ASIC_TEMP_CRIT) return -EINVAL; thermal->trips[trip].temp = temp; return 0; } +static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev, + int trip, int *p_hyst) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + *p_hyst = thermal->trips[trip].hyst; + return 0; +} + +static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev, + int trip, int hyst) +{ + struct mlxsw_thermal *thermal = tzdev->devdata; + + thermal->trips[trip].hyst = hyst; + return 0; +} + static struct thermal_zone_device_ops mlxsw_thermal_ops = { .bind = mlxsw_thermal_bind, .unbind = mlxsw_thermal_unbind, @@ -243,6 +360,206 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = { .get_trip_type = mlxsw_thermal_get_trip_type, .get_trip_temp = mlxsw_thermal_get_trip_temp, .set_trip_temp = mlxsw_thermal_set_trip_temp, + .get_trip_hyst = mlxsw_thermal_get_trip_hyst, + .set_trip_hyst = mlxsw_thermal_set_trip_hyst, +}; + +static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev, + struct thermal_cooling_device *cdev) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + int i, j, err; + + /* If the cooling device is one of ours bind it */ + if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0) + return 0; + + for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) { + const struct mlxsw_thermal_trip *trip = &tz->trips[i]; + + err = thermal_zone_bind_cooling_device(tzdev, i, cdev, + trip->max_state, + trip->min_state, + THERMAL_WEIGHT_DEFAULT); + if (err < 0) + goto err_bind_cooling_device; + } + return 0; + +err_bind_cooling_device: + for (j = i - 1; j >= 0; j--) + thermal_zone_unbind_cooling_device(tzdev, j, cdev); + return err; +} + +static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev, + struct thermal_cooling_device *cdev) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + int i; + int err; + + /* If the cooling device is one of ours unbind it */ + if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0) + return 0; + + for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) { + err = thermal_zone_unbind_cooling_device(tzdev, i, cdev); + WARN_ON(err); + } + return err; +} + +static int mlxsw_thermal_module_mode_get(struct thermal_zone_device *tzdev, + enum thermal_device_mode *mode) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + *mode = tz->mode; + + return 0; +} + +static int mlxsw_thermal_module_mode_set(struct thermal_zone_device *tzdev, + enum thermal_device_mode mode) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + + mutex_lock(&tzdev->lock); + + if (mode == THERMAL_DEVICE_ENABLED) + tzdev->polling_delay = thermal->polling_delay; + else + tzdev->polling_delay = 0; + + mutex_unlock(&tzdev->lock); + + tz->mode = mode; + thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED); + + return 0; +} + +static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + struct device *dev = thermal->bus_info->dev; + char mtbr_pl[MLXSW_REG_MTBR_LEN]; + u16 temp; + int err; + + /* Read module temperature. */ + mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + + tz->module, 1); + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtbr), mtbr_pl); + if (err) + return err; + + mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); + /* Update temperature. */ + switch (temp) { + case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ + case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ + case MLXSW_REG_MTBR_INDEX_NA: /* fall-through */ + case MLXSW_REG_MTBR_BAD_SENS_INFO: + temp = 0; + break; + default: + temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + /* Reset all trip point. */ + mlxsw_thermal_module_trips_reset(tz); + /* Update trip points. */ + err = mlxsw_thermal_module_trips_update(dev, thermal->core, + tz); + if (err) + return err; + break; + } + + *p_temp = (int) temp; + return 0; +} + +static int +mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip, + enum thermal_trip_type *p_type) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + *p_type = tz->trips[trip].type; + return 0; +} + +static int +mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev, + int trip, int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + *p_temp = tz->trips[trip].temp; + return 0; +} + +static int +mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev, + int trip, int temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS || + temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp) + return -EINVAL; + + tz->trips[trip].temp = temp; + return 0; +} + +static int +mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip, + int *p_hyst) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + *p_hyst = tz->trips[trip].hyst; + return 0; +} + +static int +mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip, + int hyst) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + + tz->trips[trip].hyst = hyst; + return 0; +} + +static struct thermal_zone_params mlxsw_thermal_module_params = { + .governor_name = "user_space", +}; + +static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { + .bind = mlxsw_thermal_module_bind, + .unbind = mlxsw_thermal_module_unbind, + .get_mode = mlxsw_thermal_module_mode_get, + .set_mode = mlxsw_thermal_module_mode_set, + .get_temp = mlxsw_thermal_module_temp_get, + .get_trip_type = mlxsw_thermal_module_trip_type_get, + .get_trip_temp = mlxsw_thermal_module_trip_temp_get, + .set_trip_temp = mlxsw_thermal_module_trip_temp_set, + .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, + .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, }; static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, @@ -285,12 +602,51 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev, struct mlxsw_thermal *thermal = cdev->devdata; struct device *dev = thermal->bus_info->dev; char mfsc_pl[MLXSW_REG_MFSC_LEN]; - int err, idx; + unsigned long cur_state, i; + int idx; + u8 duty; + int err; idx = mlxsw_get_cooling_device_idx(thermal, cdev); if (idx < 0) return idx; + /* Verify if this request is for changing allowed fan dynamical + * minimum. If it is - update cooling levels accordingly and update + * state, if current state is below the newly requested minimum state. + * For example, if current state is 5, and minimal state is to be + * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed + * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be + * overwritten. + */ + if (state >= MLXSW_THERMAL_SPEED_MIN && + state <= MLXSW_THERMAL_SPEED_MAX) { + state -= MLXSW_THERMAL_MAX_STATE; + for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++) + thermal->cooling_levels[i] = max(state, i); + + mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0); + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) + return err; + + duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl); + cur_state = mlxsw_duty_to_state(duty); + + /* If current fan state is lower than requested dynamical + * minimum, increase fan speed up to dynamical minimum. + */ + if (state < cur_state) + return 0; + + state = cur_state; + } + + if (state > MLXSW_THERMAL_MAX_STATE) + return -EINVAL; + + /* Normalize the state to the valid speed range. */ + state = thermal->cooling_levels[state]; mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state)); err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl); if (err) { @@ -306,6 +662,125 @@ static const struct thermal_cooling_device_ops mlxsw_cooling_ops = { .set_cur_state = mlxsw_thermal_set_cur_state, }; +static int +mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz) +{ + char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME]; + int err; + + snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d", + module_tz->module + 1); + module_tz->tzdev = thermal_zone_device_register(tz_name, + MLXSW_THERMAL_NUM_TRIPS, + MLXSW_THERMAL_TRIP_MASK, + module_tz, + &mlxsw_thermal_module_ops, + &mlxsw_thermal_module_params, + 0, 0); + if (IS_ERR(module_tz->tzdev)) { + err = PTR_ERR(module_tz->tzdev); + return err; + } + + return 0; +} + +static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev) +{ + thermal_zone_device_unregister(tzdev); +} + +static int +mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal *thermal, u8 local_port) +{ + struct mlxsw_thermal_module *module_tz; + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + u8 width, module; + int err; + + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + err = mlxsw_reg_query(core, MLXSW_REG(pmlp), pmlp_pl); + if (err) + return err; + + width = mlxsw_reg_pmlp_width_get(pmlp_pl); + if (!width) + return 0; + + module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); + module_tz = &thermal->tz_module_arr[module]; + /* Skip if parent is already set (case of port split). */ + if (module_tz->parent) + return 0; + module_tz->module = module; + module_tz->parent = thermal; + memcpy(module_tz->trips, default_thermal_trips, + sizeof(thermal->trips)); + /* Initialize all trip point. */ + mlxsw_thermal_module_trips_reset(module_tz); + /* Update trip point according to the module data. */ + return mlxsw_thermal_module_trips_update(dev, core, module_tz); +} + +static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz) +{ + if (module_tz && module_tz->tzdev) { + mlxsw_thermal_module_tz_fini(module_tz->tzdev); + module_tz->tzdev = NULL; + module_tz->parent = NULL; + } +} + +static int +mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal *thermal) +{ + unsigned int module_count = mlxsw_core_max_ports(core); + struct mlxsw_thermal_module *module_tz; + int i, err; + + thermal->tz_module_arr = kcalloc(module_count, + sizeof(*thermal->tz_module_arr), + GFP_KERNEL); + if (!thermal->tz_module_arr) + return -ENOMEM; + + for (i = 1; i < module_count; i++) { + err = mlxsw_thermal_module_init(dev, core, thermal, i); + if (err) + goto err_unreg_tz_module_arr; + } + + for (i = 0; i < module_count - 1; i++) { + module_tz = &thermal->tz_module_arr[i]; + if (!module_tz->parent) + continue; + err = mlxsw_thermal_module_tz_init(module_tz); + if (err) + goto err_unreg_tz_module_arr; + } + + return 0; + +err_unreg_tz_module_arr: + for (i = module_count - 1; i >= 0; i--) + mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); + kfree(thermal->tz_module_arr); + return err; +} + +static void +mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) +{ + unsigned int module_count = mlxsw_core_max_ports(thermal->core); + int i; + + for (i = module_count - 1; i >= 0; i--) + mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]); + kfree(thermal->tz_module_arr); +} + int mlxsw_thermal_init(struct mlxsw_core *core, const struct mlxsw_bus_info *bus_info, struct mlxsw_thermal **p_thermal) @@ -358,8 +833,9 @@ int mlxsw_thermal_init(struct mlxsw_core *core, if (pwm_active & BIT(i)) { struct thermal_cooling_device *cdev; - cdev = thermal_cooling_device_register("Fan", thermal, - &mlxsw_cooling_ops); + cdev = thermal_cooling_device_register("mlxsw_fan", + thermal, + &mlxsw_cooling_ops); if (IS_ERR(cdev)) { err = PTR_ERR(cdev); dev_err(dev, "Failed to register cooling device\n"); @@ -369,22 +845,41 @@ int mlxsw_thermal_init(struct mlxsw_core *core, } } + /* Initialize cooling levels per PWM state. */ + for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++) + thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL, + i); + + thermal->polling_delay = bus_info->low_frequency ? + MLXSW_THERMAL_SLOW_POLL_INT : + MLXSW_THERMAL_POLL_INT; + thermal->tzdev = thermal_zone_device_register("mlxsw", MLXSW_THERMAL_NUM_TRIPS, MLXSW_THERMAL_TRIP_MASK, thermal, &mlxsw_thermal_ops, NULL, 0, - MLXSW_THERMAL_POLL_INT); + thermal->polling_delay); if (IS_ERR(thermal->tzdev)) { err = PTR_ERR(thermal->tzdev); dev_err(dev, "Failed to register thermal zone\n"); goto err_unreg_cdevs; } + err = mlxsw_thermal_modules_init(dev, core, thermal); + if (err) + goto err_unreg_tzdev; + thermal->mode = THERMAL_DEVICE_ENABLED; *p_thermal = thermal; return 0; + +err_unreg_tzdev: + if (thermal->tzdev) { + thermal_zone_device_unregister(thermal->tzdev); + thermal->tzdev = NULL; + } err_unreg_cdevs: for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) if (thermal->cdevs[i]) @@ -398,6 +893,7 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) { int i; + mlxsw_thermal_modules_fini(thermal); if (thermal->tzdev) { thermal_zone_device_unregister(thermal->tzdev); thermal->tzdev = NULL; diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 798bd5aca384..06aea1999518 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -14,14 +14,17 @@ #include "cmd.h" #include "core.h" #include "i2c.h" +#include "resources.h" #define MLXSW_I2C_CIR2_BASE 0x72000 #define MLXSW_I2C_CIR_STATUS_OFF 0x18 #define MLXSW_I2C_CIR2_OFF_STATUS (MLXSW_I2C_CIR2_BASE + \ MLXSW_I2C_CIR_STATUS_OFF) #define MLXSW_I2C_OPMOD_SHIFT 12 +#define MLXSW_I2C_EVENT_BIT_SHIFT 22 #define MLXSW_I2C_GO_BIT_SHIFT 23 #define MLXSW_I2C_CIR_CTRL_STATUS_SHIFT 24 +#define MLXSW_I2C_EVENT_BIT BIT(MLXSW_I2C_EVENT_BIT_SHIFT) #define MLXSW_I2C_GO_BIT BIT(MLXSW_I2C_GO_BIT_SHIFT) #define MLXSW_I2C_GO_OPMODE BIT(MLXSW_I2C_OPMOD_SHIFT) #define MLXSW_I2C_SET_IMM_CMD (MLXSW_I2C_GO_OPMODE | \ @@ -33,6 +36,9 @@ #define MLXSW_I2C_TLV_HDR_SIZE 0x10 #define MLXSW_I2C_ADDR_WIDTH 4 #define MLXSW_I2C_PUSH_CMD_SIZE (MLXSW_I2C_ADDR_WIDTH + 4) +#define MLXSW_I2C_SET_EVENT_CMD (MLXSW_I2C_EVENT_BIT) +#define MLXSW_I2C_PUSH_EVENT_CMD (MLXSW_I2C_GO_BIT | \ + MLXSW_I2C_SET_EVENT_CMD) #define MLXSW_I2C_READ_SEMA_SIZE 4 #define MLXSW_I2C_PREP_SIZE (MLXSW_I2C_ADDR_WIDTH + 28) #define MLXSW_I2C_MBOX_SIZE 20 @@ -44,6 +50,7 @@ #define MLXSW_I2C_BLK_MAX 32 #define MLXSW_I2C_RETRY 5 #define MLXSW_I2C_TIMEOUT_MSECS 5000 +#define MLXSW_I2C_MAX_DATA_SIZE 256 /** * struct mlxsw_i2c - device private data: @@ -167,7 +174,7 @@ static int mlxsw_i2c_wait_go_bit(struct i2c_client *client, return err > 0 ? 0 : err; } -/* Routine posts a command to ASIC though mail box. */ +/* Routine posts a command to ASIC through mail box. */ static int mlxsw_i2c_write_cmd(struct i2c_client *client, struct mlxsw_i2c *mlxsw_i2c, int immediate) @@ -213,6 +220,66 @@ static int mlxsw_i2c_write_cmd(struct i2c_client *client, return 0; } +/* Routine posts initialization command to ASIC through mail box. */ +static int +mlxsw_i2c_write_init_cmd(struct i2c_client *client, + struct mlxsw_i2c *mlxsw_i2c, u16 opcode, u32 in_mod) +{ + __be32 push_cmd_buf[MLXSW_I2C_PUSH_CMD_SIZE / 4] = { + 0, cpu_to_be32(MLXSW_I2C_PUSH_EVENT_CMD) + }; + __be32 prep_cmd_buf[MLXSW_I2C_PREP_SIZE / 4] = { + 0, 0, 0, 0, 0, 0, + cpu_to_be32(client->adapter->nr & 0xffff), + cpu_to_be32(MLXSW_I2C_SET_EVENT_CMD) + }; + struct i2c_msg push_cmd = + MLXSW_I2C_WRITE_MSG(client, push_cmd_buf, + MLXSW_I2C_PUSH_CMD_SIZE); + struct i2c_msg prep_cmd = + MLXSW_I2C_WRITE_MSG(client, prep_cmd_buf, MLXSW_I2C_PREP_SIZE); + u8 status; + int err; + + push_cmd_buf[1] = cpu_to_be32(MLXSW_I2C_PUSH_EVENT_CMD | opcode); + prep_cmd_buf[3] = cpu_to_be32(in_mod); + prep_cmd_buf[7] = cpu_to_be32(MLXSW_I2C_GO_BIT | opcode); + mlxsw_i2c_set_slave_addr((u8 *)prep_cmd_buf, + MLXSW_I2C_CIR2_BASE); + mlxsw_i2c_set_slave_addr((u8 *)push_cmd_buf, + MLXSW_I2C_CIR2_OFF_STATUS); + + /* Prepare Command Interface Register for transaction */ + err = i2c_transfer(client->adapter, &prep_cmd, 1); + if (err < 0) + return err; + else if (err != 1) + return -EIO; + + /* Write out Command Interface Register GO bit to push transaction */ + err = i2c_transfer(client->adapter, &push_cmd, 1); + if (err < 0) + return err; + else if (err != 1) + return -EIO; + + /* Wait until go bit is cleared. */ + err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, &status); + if (err) { + dev_err(&client->dev, "HW semaphore is not released"); + return err; + } + + /* Validate transaction completion status. */ + if (status) { + dev_err(&client->dev, "Bad transaction completion status %x\n", + status); + return -EIO; + } + + return 0; +} + /* Routine obtains mail box offsets from ASIC register space. */ static int mlxsw_i2c_get_mbox(struct i2c_client *client, struct mlxsw_i2c *mlxsw_i2c) @@ -310,8 +377,8 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num, /* Routine executes I2C command. */ static int -mlxsw_i2c_cmd(struct device *dev, size_t in_mbox_size, u8 *in_mbox, - size_t out_mbox_size, u8 *out_mbox, u8 *status) +mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, + u8 *in_mbox, size_t out_mbox_size, u8 *out_mbox, u8 *status) { struct i2c_client *client = to_i2c_client(dev); struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client); @@ -326,24 +393,40 @@ mlxsw_i2c_cmd(struct device *dev, size_t in_mbox_size, u8 *in_mbox, WARN_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32)); - reg_size = mlxsw_i2c_get_reg_size(in_mbox); - num = reg_size / MLXSW_I2C_BLK_MAX; - if (reg_size % MLXSW_I2C_BLK_MAX) - num++; + if (in_mbox) { + reg_size = mlxsw_i2c_get_reg_size(in_mbox); + num = reg_size / MLXSW_I2C_BLK_MAX; + if (reg_size % MLXSW_I2C_BLK_MAX) + num++; - if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { - dev_err(&client->dev, "Could not acquire lock"); - return -EINVAL; - } + if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { + dev_err(&client->dev, "Could not acquire lock"); + return -EINVAL; + } + + err = mlxsw_i2c_write(dev, reg_size, in_mbox, num, status); + if (err) + goto cmd_fail; + + /* No out mailbox is case of write transaction. */ + if (!out_mbox) { + mutex_unlock(&mlxsw_i2c->cmd.lock); + return 0; + } + } else { + /* No input mailbox is case of initialization query command. */ + reg_size = MLXSW_I2C_MAX_DATA_SIZE; + num = reg_size / MLXSW_I2C_BLK_MAX; - err = mlxsw_i2c_write(dev, reg_size, in_mbox, num, status); - if (err) - goto cmd_fail; + if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { + dev_err(&client->dev, "Could not acquire lock"); + return -EINVAL; + } - /* No out mailbox is case of write transaction. */ - if (!out_mbox) { - mutex_unlock(&mlxsw_i2c->cmd.lock); - return 0; + err = mlxsw_i2c_write_init_cmd(client, mlxsw_i2c, opcode, + in_mod); + if (err) + goto cmd_fail; } /* Send read transaction to get output mailbox content. */ @@ -395,8 +478,8 @@ static int mlxsw_i2c_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, { struct mlxsw_i2c *mlxsw_i2c = bus_priv; - return mlxsw_i2c_cmd(mlxsw_i2c->dev, in_mbox_size, in_mbox, - out_mbox_size, out_mbox, status); + return mlxsw_i2c_cmd(mlxsw_i2c->dev, opcode, in_mod, in_mbox_size, + in_mbox, out_mbox_size, out_mbox, status); } static bool mlxsw_i2c_skb_transmit_busy(void *bus_priv, @@ -414,13 +497,22 @@ static int mlxsw_i2c_skb_transmit(void *bus_priv, struct sk_buff *skb, static int mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_config_profile *profile, - struct mlxsw_res *resources) + struct mlxsw_res *res) { struct mlxsw_i2c *mlxsw_i2c = bus_priv; + char *mbox; + int err; mlxsw_i2c->core = mlxsw_core; - return 0; + mbox = mlxsw_cmd_mbox_alloc(); + if (!mbox) + return -ENOMEM; + + err = mlxsw_core_resources_query(mlxsw_core, mbox, res); + + mlxsw_cmd_mbox_free(mbox); + return err; } static void mlxsw_i2c_fini(void *bus_priv) @@ -503,6 +595,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client, mlxsw_i2c->bus_info.device_kind = id->name; mlxsw_i2c->bus_info.device_name = client->name; mlxsw_i2c->bus_info.dev = &client->dev; + mlxsw_i2c->bus_info.low_frequency = true; mlxsw_i2c->dev = &client->dev; err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info, diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 5a6c4457fb55..00c390024350 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 -/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */ +/* Copyright (c) 2016-2019 Mellanox Technologies. All rights reserved */ +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> #include <linux/i2c.h> #include <linux/kernel.h> #include <linux/module.h> @@ -8,59 +11,395 @@ #include <linux/types.h> #include "core.h" +#include "core_env.h" #include "i2c.h" -static const char mlxsw_minimal_driver_name[] = "mlxsw_minimal"; +static const char mlxsw_m_driver_name[] = "mlxsw_minimal"; -static const struct mlxsw_config_profile mlxsw_minimal_config_profile; +struct mlxsw_m_port; -static struct mlxsw_driver mlxsw_minimal_driver = { - .kind = mlxsw_minimal_driver_name, - .priv_size = 1, - .profile = &mlxsw_minimal_config_profile, +struct mlxsw_m { + struct mlxsw_m_port **ports; + int *module_to_port; + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + u8 base_mac[ETH_ALEN]; + u8 max_ports; }; -static const struct i2c_device_id mlxsw_minimal_i2c_id[] = { +struct mlxsw_m_port { + struct net_device *dev; + struct mlxsw_m *mlxsw_m; + u8 local_port; + u8 module; +}; + +static int mlxsw_m_base_mac_get(struct mlxsw_m *mlxsw_m) +{ + char spad_pl[MLXSW_REG_SPAD_LEN] = {0}; + int err; + + err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(spad), spad_pl); + if (err) + return err; + mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_m->base_mac); + return 0; +} + +static int mlxsw_m_port_dummy_open_stop(struct net_device *dev) +{ + return 0; +} + +static int +mlxsw_m_port_get_phys_port_name(struct net_device *dev, char *name, size_t len) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev); + struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; + u8 local_port = mlxsw_m_port->local_port; + + return mlxsw_core_port_get_phys_port_name(core, local_port, name, len); +} + +static int mlxsw_m_port_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev); + struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m; + + ppid->id_len = sizeof(mlxsw_m->base_mac); + memcpy(&ppid->id, &mlxsw_m->base_mac, ppid->id_len); + + return 0; +} + +static const struct net_device_ops mlxsw_m_port_netdev_ops = { + .ndo_open = mlxsw_m_port_dummy_open_stop, + .ndo_stop = mlxsw_m_port_dummy_open_stop, + .ndo_get_phys_port_name = mlxsw_m_port_get_phys_port_name, + .ndo_get_port_parent_id = mlxsw_m_port_get_port_parent_id, +}; + +static int mlxsw_m_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); + struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; + + return mlxsw_env_get_module_info(core, mlxsw_m_port->module, modinfo); +} + +static int +mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); + struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; + + return mlxsw_env_get_module_eeprom(netdev, core, mlxsw_m_port->module, + ee, data); +} + +static const struct ethtool_ops mlxsw_m_port_ethtool_ops = { + .get_module_info = mlxsw_m_get_module_info, + .get_module_eeprom = mlxsw_m_get_module_eeprom, +}; + +static int +mlxsw_m_port_module_info_get(struct mlxsw_m *mlxsw_m, u8 local_port, + u8 *p_module, u8 *p_width) +{ + char pmlp_pl[MLXSW_REG_PMLP_LEN]; + int err; + + mlxsw_reg_pmlp_pack(pmlp_pl, local_port); + err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(pmlp), pmlp_pl); + if (err) + return err; + *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); + *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl); + + return 0; +} + +static int +mlxsw_m_port_dev_addr_get(struct mlxsw_m_port *mlxsw_m_port) +{ + struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m; + struct net_device *dev = mlxsw_m_port->dev; + char ppad_pl[MLXSW_REG_PPAD_LEN]; + int err; + + mlxsw_reg_ppad_pack(ppad_pl, false, 0); + err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(ppad), ppad_pl); + if (err) + return err; + mlxsw_reg_ppad_mac_memcpy_from(ppad_pl, dev->dev_addr); + /* The last byte value in base mac address is guaranteed + * to be such it does not overflow when adding local_port + * value. + */ + dev->dev_addr[ETH_ALEN - 1] += mlxsw_m_port->module + 1; + return 0; +} + +static int +mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module) +{ + struct mlxsw_m_port *mlxsw_m_port; + struct net_device *dev; + int err; + + err = mlxsw_core_port_init(mlxsw_m->core, local_port); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Port %d: Failed to init core port\n", + local_port); + return err; + } + + dev = alloc_etherdev(sizeof(struct mlxsw_m_port)); + if (!dev) { + err = -ENOMEM; + goto err_alloc_etherdev; + } + + SET_NETDEV_DEV(dev, mlxsw_m->bus_info->dev); + mlxsw_m_port = netdev_priv(dev); + mlxsw_m_port->dev = dev; + mlxsw_m_port->mlxsw_m = mlxsw_m; + mlxsw_m_port->local_port = local_port; + mlxsw_m_port->module = module; + + dev->netdev_ops = &mlxsw_m_port_netdev_ops; + dev->ethtool_ops = &mlxsw_m_port_ethtool_ops; + + err = mlxsw_m_port_dev_addr_get(mlxsw_m_port); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Port %d: Unable to get port mac address\n", + mlxsw_m_port->local_port); + goto err_dev_addr_get; + } + + netif_carrier_off(dev); + mlxsw_m->ports[local_port] = mlxsw_m_port; + err = register_netdev(dev); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Port %d: Failed to register netdev\n", + mlxsw_m_port->local_port); + goto err_register_netdev; + } + + mlxsw_core_port_eth_set(mlxsw_m->core, mlxsw_m_port->local_port, + mlxsw_m_port, dev, module + 1, false, 0); + + return 0; + +err_register_netdev: + mlxsw_m->ports[local_port] = NULL; + free_netdev(dev); +err_dev_addr_get: +err_alloc_etherdev: + mlxsw_core_port_fini(mlxsw_m->core, local_port); + return err; +} + +static void mlxsw_m_port_remove(struct mlxsw_m *mlxsw_m, u8 local_port) +{ + struct mlxsw_m_port *mlxsw_m_port = mlxsw_m->ports[local_port]; + + mlxsw_core_port_clear(mlxsw_m->core, local_port, mlxsw_m); + unregister_netdev(mlxsw_m_port->dev); /* This calls ndo_stop */ + mlxsw_m->ports[local_port] = NULL; + free_netdev(mlxsw_m_port->dev); + mlxsw_core_port_fini(mlxsw_m->core, local_port); +} + +static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u8 local_port, + u8 *last_module) +{ + u8 module, width; + int err; + + /* Fill out to local port mapping array */ + err = mlxsw_m_port_module_info_get(mlxsw_m, local_port, &module, + &width); + if (err) + return err; + + if (!width) + return 0; + /* Skip, if port belongs to the cluster */ + if (module == *last_module) + return 0; + *last_module = module; + mlxsw_m->module_to_port[module] = ++mlxsw_m->max_ports; + + return 0; +} + +static void mlxsw_m_port_module_unmap(struct mlxsw_m *mlxsw_m, u8 module) +{ + mlxsw_m->module_to_port[module] = -1; +} + +static int mlxsw_m_ports_create(struct mlxsw_m *mlxsw_m) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core); + u8 last_module = max_ports; + int i; + int err; + + mlxsw_m->ports = kcalloc(max_ports, sizeof(*mlxsw_m->ports), + GFP_KERNEL); + if (!mlxsw_m->ports) + return -ENOMEM; + + mlxsw_m->module_to_port = kmalloc_array(max_ports, sizeof(int), + GFP_KERNEL); + if (!mlxsw_m->module_to_port) { + err = -ENOMEM; + goto err_module_to_port_alloc; + } + + /* Invalidate the entries of module to local port mapping array */ + for (i = 0; i < max_ports; i++) + mlxsw_m->module_to_port[i] = -1; + + /* Fill out module to local port mapping array */ + for (i = 1; i < max_ports; i++) { + err = mlxsw_m_port_module_map(mlxsw_m, i, &last_module); + if (err) + goto err_module_to_port_map; + } + + /* Create port objects for each valid entry */ + for (i = 0; i < mlxsw_m->max_ports; i++) { + if (mlxsw_m->module_to_port[i] > 0) { + err = mlxsw_m_port_create(mlxsw_m, + mlxsw_m->module_to_port[i], + i); + if (err) + goto err_module_to_port_create; + } + } + + return 0; + +err_module_to_port_create: + for (i--; i >= 0; i--) { + if (mlxsw_m->module_to_port[i] > 0) + mlxsw_m_port_remove(mlxsw_m, + mlxsw_m->module_to_port[i]); + } + i = max_ports; +err_module_to_port_map: + for (i--; i > 0; i--) + mlxsw_m_port_module_unmap(mlxsw_m, i); + kfree(mlxsw_m->module_to_port); +err_module_to_port_alloc: + kfree(mlxsw_m->ports); + return err; +} + +static void mlxsw_m_ports_remove(struct mlxsw_m *mlxsw_m) +{ + int i; + + for (i = 0; i < mlxsw_m->max_ports; i++) { + if (mlxsw_m->module_to_port[i] > 0) { + mlxsw_m_port_remove(mlxsw_m, + mlxsw_m->module_to_port[i]); + mlxsw_m_port_module_unmap(mlxsw_m, i); + } + } + + kfree(mlxsw_m->module_to_port); + kfree(mlxsw_m->ports); +} + +static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info) +{ + struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core); + int err; + + mlxsw_m->core = mlxsw_core; + mlxsw_m->bus_info = mlxsw_bus_info; + + err = mlxsw_m_base_mac_get(mlxsw_m); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Failed to get base mac\n"); + return err; + } + + err = mlxsw_m_ports_create(mlxsw_m); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Failed to create ports\n"); + return err; + } + + return 0; +} + +static void mlxsw_m_fini(struct mlxsw_core *mlxsw_core) +{ + struct mlxsw_m *mlxsw_m = mlxsw_core_driver_priv(mlxsw_core); + + mlxsw_m_ports_remove(mlxsw_m); +} + +static const struct mlxsw_config_profile mlxsw_m_config_profile; + +static struct mlxsw_driver mlxsw_m_driver = { + .kind = mlxsw_m_driver_name, + .priv_size = sizeof(struct mlxsw_m), + .init = mlxsw_m_init, + .fini = mlxsw_m_fini, + .profile = &mlxsw_m_config_profile, + .res_query_enabled = true, +}; + +static const struct i2c_device_id mlxsw_m_i2c_id[] = { { "mlxsw_minimal", 0}, { }, }; -static struct i2c_driver mlxsw_minimal_i2c_driver = { +static struct i2c_driver mlxsw_m_i2c_driver = { .driver.name = "mlxsw_minimal", .class = I2C_CLASS_HWMON, - .id_table = mlxsw_minimal_i2c_id, + .id_table = mlxsw_m_i2c_id, }; -static int __init mlxsw_minimal_module_init(void) +static int __init mlxsw_m_module_init(void) { int err; - err = mlxsw_core_driver_register(&mlxsw_minimal_driver); + err = mlxsw_core_driver_register(&mlxsw_m_driver); if (err) return err; - err = mlxsw_i2c_driver_register(&mlxsw_minimal_i2c_driver); + err = mlxsw_i2c_driver_register(&mlxsw_m_i2c_driver); if (err) goto err_i2c_driver_register; return 0; err_i2c_driver_register: - mlxsw_core_driver_unregister(&mlxsw_minimal_driver); + mlxsw_core_driver_unregister(&mlxsw_m_driver); return err; } -static void __exit mlxsw_minimal_module_exit(void) +static void __exit mlxsw_m_module_exit(void) { - mlxsw_i2c_driver_unregister(&mlxsw_minimal_i2c_driver); - mlxsw_core_driver_unregister(&mlxsw_minimal_driver); + mlxsw_i2c_driver_unregister(&mlxsw_m_i2c_driver); + mlxsw_core_driver_unregister(&mlxsw_m_driver); } -module_init(mlxsw_minimal_module_init); -module_exit(mlxsw_minimal_module_exit); +module_init(mlxsw_m_module_init); +module_exit(mlxsw_m_module_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>"); MODULE_DESCRIPTION("Mellanox minimal driver"); -MODULE_DEVICE_TABLE(i2c, mlxsw_minimal_i2c_id); +MODULE_DEVICE_TABLE(i2c, mlxsw_m_i2c_id); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 5890fdfd62c3..b40455f8293d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -604,29 +604,31 @@ static void mlxsw_pci_cq_tasklet(unsigned long data) u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe); u8 sendq = mlxsw_pci_cqe_sr_get(q->u.cq.v, cqe); u8 dqn = mlxsw_pci_cqe_dqn_get(q->u.cq.v, cqe); + char ncqe[MLXSW_PCI_CQE_SIZE_MAX]; + + memcpy(ncqe, cqe, q->elem_size); + mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); if (sendq) { struct mlxsw_pci_queue *sdq; sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq, - wqe_counter, cqe); + wqe_counter, ncqe); q->u.cq.comp_sdq_count++; } else { struct mlxsw_pci_queue *rdq; rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn); mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, - wqe_counter, q->u.cq.v, cqe); + wqe_counter, q->u.cq.v, ncqe); q->u.cq.comp_rdq_count++; } if (++items == credits) break; } - if (items) { - mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q); + if (items) mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q); - } } static u16 mlxsw_pci_cq_elem_count(const struct mlxsw_pci_queue *q) @@ -1037,42 +1039,6 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask); } -static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox, - struct mlxsw_res *res) -{ - int index, i; - u64 data; - u16 id; - int err; - - if (!res) - return 0; - - mlxsw_cmd_mbox_zero(mbox); - - for (index = 0; index < MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES; - index++) { - err = mlxsw_cmd_query_resources(mlxsw_pci->core, mbox, index); - if (err) - return err; - - for (i = 0; i < MLXSW_CMD_QUERY_RESOURCES_PER_QUERY; i++) { - id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i); - data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i); - - if (id == MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID) - return 0; - - mlxsw_res_parse(res, id, data); - } - } - - /* If after MLXSW_RESOURCES_QUERY_MAX_QUERIES we still didn't get - * MLXSW_RESOURCES_TABLE_END_ID, something went bad in the FW. - */ - return -EIO; -} - static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_pci *mlxsw_pci, const struct mlxsw_config_profile *profile, @@ -1365,10 +1331,10 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY); if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC) - break; + return 0; cond_resched(); } while (time_before(jiffies, end)); - return 0; + return -EBUSY; } static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci) @@ -1457,7 +1423,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_boardinfo; - err = mlxsw_pci_resources_query(mlxsw_pci, mbox, res); + err = mlxsw_core_resources_query(mlxsw_core, mbox, res); if (err) goto err_query_resources; @@ -1720,7 +1686,6 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { const char *driver_name = pdev->driver->name; struct mlxsw_pci *mlxsw_pci; - bool called_again = false; int err; mlxsw_pci = kzalloc(sizeof(*mlxsw_pci), GFP_KERNEL); @@ -1777,18 +1742,10 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mlxsw_pci->bus_info.dev = &pdev->dev; mlxsw_pci->id = id; -again: err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus, mlxsw_pci, false, NULL); - /* -EAGAIN is returned in case the FW was updated. FW needs - * a reset, so lets try to call mlxsw_core_bus_device_register() - * again. - */ - if (err == -EAGAIN && !called_again) { - called_again = true; - goto again; - } else if (err) { + if (err) { dev_err(&pdev->dev, "cannot register bus device\n"); goto err_bus_device_register; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index bb99f6d41fe0..8648ca171254 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -27,7 +27,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) -#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 5000 +#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 20000 #define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF @@ -53,6 +53,7 @@ #define MLXSW_PCI_WQE_SIZE 32 /* 32 bytes per element */ #define MLXSW_PCI_CQE01_SIZE 16 /* 16 bytes per element */ #define MLXSW_PCI_CQE2_SIZE 32 /* 32 bytes per element */ +#define MLXSW_PCI_CQE_SIZE_MAX MLXSW_PCI_CQE2_SIZE #define MLXSW_PCI_EQE_SIZE 16 /* 16 bytes per element */ #define MLXSW_PCI_WQE_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE) #define MLXSW_PCI_CQE01_COUNT (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE01_SIZE) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index db3d2790aeec..eb4c5e8964cd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -641,6 +641,10 @@ enum mlxsw_reg_sfn_rec_type { MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7, /* Aged-out MAC address on a LAG port. */ MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8, + /* Learned unicast tunnel record. */ + MLXSW_REG_SFN_REC_TYPE_LEARNED_UNICAST_TUNNEL = 0xD, + /* Aged-out unicast tunnel record. */ + MLXSW_REG_SFN_REC_TYPE_AGED_OUT_UNICAST_TUNNEL = 0xE, }; /* reg_sfn_rec_type @@ -704,6 +708,66 @@ static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index, *p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index); } +/* reg_sfn_uc_tunnel_uip_msb + * When protocol is IPv4, the most significant byte of the underlay IPv4 + * address of the remote VTEP. + * When protocol is IPv6, reserved. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_uip_msb, MLXSW_REG_SFN_BASE_LEN, 24, + 8, MLXSW_REG_SFN_REC_LEN, 0x08, false); + +enum mlxsw_reg_sfn_uc_tunnel_protocol { + MLXSW_REG_SFN_UC_TUNNEL_PROTOCOL_IPV4, + MLXSW_REG_SFN_UC_TUNNEL_PROTOCOL_IPV6, +}; + +/* reg_sfn_uc_tunnel_protocol + * IP protocol. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_protocol, MLXSW_REG_SFN_BASE_LEN, 27, + 1, MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +/* reg_sfn_uc_tunnel_uip_lsb + * When protocol is IPv4, the least significant bytes of the underlay + * IPv4 address of the remote VTEP. + * When protocol is IPv6, ipv6_id to be queried from TNIPSD. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, uc_tunnel_uip_lsb, MLXSW_REG_SFN_BASE_LEN, 0, + 24, MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +enum mlxsw_reg_sfn_tunnel_port { + MLXSW_REG_SFN_TUNNEL_PORT_NVE, + MLXSW_REG_SFN_TUNNEL_PORT_VPLS, + MLXSW_REG_SFN_TUNNEL_FLEX_TUNNEL0, + MLXSW_REG_SFN_TUNNEL_FLEX_TUNNEL1, +}; + +/* reg_sfn_uc_tunnel_port + * Tunnel port. + * Reserved on Spectrum. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, tunnel_port, MLXSW_REG_SFN_BASE_LEN, 0, 4, + MLXSW_REG_SFN_REC_LEN, 0x10, false); + +static inline void +mlxsw_reg_sfn_uc_tunnel_unpack(char *payload, int rec_index, char *mac, + u16 *p_fid, u32 *p_uip, + enum mlxsw_reg_sfn_uc_tunnel_protocol *p_proto) +{ + u32 uip_msb, uip_lsb; + + mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac); + *p_fid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index); + uip_msb = mlxsw_reg_sfn_uc_tunnel_uip_msb_get(payload, rec_index); + uip_lsb = mlxsw_reg_sfn_uc_tunnel_uip_lsb_get(payload, rec_index); + *p_uip = uip_msb << 24 | uip_lsb; + *p_proto = mlxsw_reg_sfn_uc_tunnel_protocol_get(payload, rec_index); +} + /* SPMS - Switch Port MSTP/RSTP State Register * ------------------------------------------- * Configures the spanning tree state of a physical port. @@ -2135,6 +2199,14 @@ MLXSW_ITEM32(reg, pagt, size, 0x00, 0, 8); */ MLXSW_ITEM32(reg, pagt, acl_group_id, 0x08, 0, 16); +/* reg_pagt_multi + * Multi-ACL + * 0 - This ACL is the last ACL in the multi-ACL + * 1 - This ACL is part of a multi-ACL + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, pagt, multi, 0x30, 31, 1, 0x04, 0x00, false); + /* reg_pagt_acl_id * ACL identifier * Access: RW @@ -2148,12 +2220,13 @@ static inline void mlxsw_reg_pagt_pack(char *payload, u16 acl_group_id) } static inline void mlxsw_reg_pagt_acl_id_pack(char *payload, int index, - u16 acl_id) + u16 acl_id, bool multi) { u8 size = mlxsw_reg_pagt_size_get(payload); if (index >= size) mlxsw_reg_pagt_size_set(payload, index + 1); + mlxsw_reg_pagt_multi_set(payload, index, multi); mlxsw_reg_pagt_acl_id_set(payload, index, acl_id); } @@ -2431,6 +2504,43 @@ static inline void mlxsw_reg_pefa_unpack(char *payload, bool *p_a) *p_a = mlxsw_reg_pefa_a_get(payload); } +/* PEMRBT - Policy-Engine Multicast Router Binding Table Register + * -------------------------------------------------------------- + * This register is used for binding Multicast router to an ACL group + * that serves the MC router. + * This register is not supported by SwitchX/-2 and Spectrum. + */ +#define MLXSW_REG_PEMRBT_ID 0x3014 +#define MLXSW_REG_PEMRBT_LEN 0x14 + +MLXSW_REG_DEFINE(pemrbt, MLXSW_REG_PEMRBT_ID, MLXSW_REG_PEMRBT_LEN); + +enum mlxsw_reg_pemrbt_protocol { + MLXSW_REG_PEMRBT_PROTO_IPV4, + MLXSW_REG_PEMRBT_PROTO_IPV6, +}; + +/* reg_pemrbt_protocol + * Access: Index + */ +MLXSW_ITEM32(reg, pemrbt, protocol, 0x00, 0, 1); + +/* reg_pemrbt_group_id + * ACL group identifier. + * Range 0..cap_max_acl_groups-1 + * Access: RW + */ +MLXSW_ITEM32(reg, pemrbt, group_id, 0x10, 0, 16); + +static inline void +mlxsw_reg_pemrbt_pack(char *payload, enum mlxsw_reg_pemrbt_protocol protocol, + u16 group_id) +{ + MLXSW_REG_ZERO(pemrbt, payload); + mlxsw_reg_pemrbt_protocol_set(payload, protocol); + mlxsw_reg_pemrbt_group_id_set(payload, group_id); +} + /* PTCE-V2 - Policy-Engine TCAM Entry Register Version 2 * ----------------------------------------------------- * This register is used for accessing rules within a TCAM region. @@ -2642,7 +2752,7 @@ mlxsw_reg_perpt_pack(char *payload, u8 erpt_bank, u8 erpt_index, mlxsw_reg_perpt_erpt_bank_set(payload, erpt_bank); mlxsw_reg_perpt_erpt_index_set(payload, erpt_index); mlxsw_reg_perpt_key_size_set(payload, key_size); - mlxsw_reg_perpt_bf_bypass_set(payload, true); + mlxsw_reg_perpt_bf_bypass_set(payload, false); mlxsw_reg_perpt_erp_id_set(payload, erp_id); mlxsw_reg_perpt_erpt_base_bank_set(payload, erpt_base_bank); mlxsw_reg_perpt_erpt_base_index_set(payload, erpt_base_index); @@ -2834,8 +2944,9 @@ static inline void mlxsw_reg_ptce3_pack(char *payload, bool valid, u32 priority, const char *tcam_region_info, const char *key, u8 erp_id, - bool large_exists, u32 lkey_id, - u32 action_pointer) + u16 delta_start, u8 delta_mask, + u8 delta_value, bool large_exists, + u32 lkey_id, u32 action_pointer) { MLXSW_REG_ZERO(ptce3, payload); mlxsw_reg_ptce3_v_set(payload, valid); @@ -2844,6 +2955,9 @@ static inline void mlxsw_reg_ptce3_pack(char *payload, bool valid, mlxsw_reg_ptce3_tcam_region_info_memcpy_to(payload, tcam_region_info); mlxsw_reg_ptce3_flex2_key_blocks_memcpy_to(payload, key); mlxsw_reg_ptce3_erp_id_set(payload, erp_id); + mlxsw_reg_ptce3_delta_start_set(payload, delta_start); + mlxsw_reg_ptce3_delta_mask_set(payload, delta_mask); + mlxsw_reg_ptce3_delta_value_set(payload, delta_value); mlxsw_reg_ptce3_large_exists_set(payload, large_exists); mlxsw_reg_ptce3_large_entry_key_id_set(payload, lkey_id); mlxsw_reg_ptce3_action_pointer_set(payload, action_pointer); @@ -2901,7 +3015,7 @@ static inline void mlxsw_reg_percr_pack(char *payload, u16 region_id) mlxsw_reg_percr_region_id_set(payload, region_id); mlxsw_reg_percr_atcam_ignore_prune_set(payload, false); mlxsw_reg_percr_ctcam_ignore_prune_set(payload, false); - mlxsw_reg_percr_bf_bypass_set(payload, true); + mlxsw_reg_percr_bf_bypass_set(payload, false); } /* PERERP - Policy-Engine Region eRP Register @@ -2990,6 +3104,72 @@ static inline void mlxsw_reg_pererp_pack(char *payload, u16 region_id, mlxsw_reg_pererp_master_rp_id_set(payload, master_rp_id); } +/* PEABFE - Policy-Engine Algorithmic Bloom Filter Entries Register + * ---------------------------------------------------------------- + * This register configures the Bloom filter entries. + */ +#define MLXSW_REG_PEABFE_ID 0x3022 +#define MLXSW_REG_PEABFE_BASE_LEN 0x10 +#define MLXSW_REG_PEABFE_BF_REC_LEN 0x4 +#define MLXSW_REG_PEABFE_BF_REC_MAX_COUNT 256 +#define MLXSW_REG_PEABFE_LEN (MLXSW_REG_PEABFE_BASE_LEN + \ + MLXSW_REG_PEABFE_BF_REC_LEN * \ + MLXSW_REG_PEABFE_BF_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(peabfe, MLXSW_REG_PEABFE_ID, MLXSW_REG_PEABFE_LEN); + +/* reg_peabfe_size + * Number of BF entries to be updated. + * Range 1..256 + * Access: Op + */ +MLXSW_ITEM32(reg, peabfe, size, 0x00, 0, 9); + +/* reg_peabfe_bf_entry_state + * Bloom filter state + * 0 - Clear + * 1 - Set + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_state, + MLXSW_REG_PEABFE_BASE_LEN, 31, 1, + MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false); + +/* reg_peabfe_bf_entry_bank + * Bloom filter bank ID + * Range 0..cap_max_erp_table_banks-1 + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_bank, + MLXSW_REG_PEABFE_BASE_LEN, 24, 4, + MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false); + +/* reg_peabfe_bf_entry_index + * Bloom filter entry index + * Range 0..2^cap_max_bf_log-1 + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, peabfe, bf_entry_index, + MLXSW_REG_PEABFE_BASE_LEN, 0, 24, + MLXSW_REG_PEABFE_BF_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_peabfe_pack(char *payload) +{ + MLXSW_REG_ZERO(peabfe, payload); +} + +static inline void mlxsw_reg_peabfe_rec_pack(char *payload, int rec_index, + u8 state, u8 bank, u32 bf_index) +{ + u8 num_rec = mlxsw_reg_peabfe_size_get(payload); + + if (rec_index >= num_rec) + mlxsw_reg_peabfe_size_set(payload, rec_index + 1); + mlxsw_reg_peabfe_bf_entry_state_set(payload, rec_index, state); + mlxsw_reg_peabfe_bf_entry_bank_set(payload, rec_index, bank); + mlxsw_reg_peabfe_bf_entry_index_set(payload, rec_index, bf_index); +} + /* IEDR - Infrastructure Entry Delete Register * ---------------------------------------------------- * This register is used for deleting entries from the entry tables. @@ -3791,6 +3971,25 @@ enum { */ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M BIT(0) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII BIT(1) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII BIT(2) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R BIT(3) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G BIT(4) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G BIT(5) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR BIT(6) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_2_LAUI_2_50GBASE_CR2_KR2 BIT(7) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR BIT(8) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4 BIT(9) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2 BIT(10) +#define MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4 BIT(12) + +/* reg_ptys_ext_eth_proto_cap + * Extended Ethernet port supported speeds and protocols. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32); + #define MLXSW_REG_PTYS_ETH_SPEED_SGMII BIT(0) #define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX BIT(1) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 BIT(2) @@ -3845,6 +4044,12 @@ MLXSW_ITEM32(reg, ptys, ib_link_width_cap, 0x10, 16, 16); */ MLXSW_ITEM32(reg, ptys, ib_proto_cap, 0x10, 0, 16); +/* reg_ptys_ext_eth_proto_admin + * Extended speed and protocol to set port to. + * Access: RW + */ +MLXSW_ITEM32(reg, ptys, ext_eth_proto_admin, 0x14, 0, 32); + /* reg_ptys_eth_proto_admin * Speed and protocol to set port to. * Access: RW @@ -3863,6 +4068,12 @@ MLXSW_ITEM32(reg, ptys, ib_link_width_admin, 0x1C, 16, 16); */ MLXSW_ITEM32(reg, ptys, ib_proto_admin, 0x1C, 0, 16); +/* reg_ptys_ext_eth_proto_oper + * The extended current speed and protocol configured for the port. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, ext_eth_proto_oper, 0x20, 0, 32); + /* reg_ptys_eth_proto_oper * The current speed and protocol configured for the port. * Access: RO @@ -3881,12 +4092,23 @@ MLXSW_ITEM32(reg, ptys, ib_link_width_oper, 0x28, 16, 16); */ MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16); -/* reg_ptys_eth_proto_lp_advertise - * The protocols that were advertised by the link partner during - * autonegotiation. +enum mlxsw_reg_ptys_connector_type { + MLXSW_REG_PTYS_CONNECTOR_TYPE_UNKNOWN_OR_NO_CONNECTOR, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_NONE, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_TP, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_AUI, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_BNC, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_MII, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_FIBRE, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_DA, + MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_OTHER, +}; + +/* reg_ptys_connector_type + * Connector type indication. * Access: RO */ -MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32); +MLXSW_ITEM32(reg, ptys, connector_type, 0x2C, 0, 4); static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port, u32 proto_admin, bool autoneg) @@ -3898,17 +4120,46 @@ static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port, mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg); } +static inline void mlxsw_reg_ptys_ext_eth_pack(char *payload, u8 local_port, + u32 proto_admin, bool autoneg) +{ + MLXSW_REG_ZERO(ptys, payload); + mlxsw_reg_ptys_local_port_set(payload, local_port); + mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_ETH); + mlxsw_reg_ptys_ext_eth_proto_admin_set(payload, proto_admin); + mlxsw_reg_ptys_an_disable_admin_set(payload, !autoneg); +} + static inline void mlxsw_reg_ptys_eth_unpack(char *payload, u32 *p_eth_proto_cap, - u32 *p_eth_proto_adm, + u32 *p_eth_proto_admin, u32 *p_eth_proto_oper) { if (p_eth_proto_cap) - *p_eth_proto_cap = mlxsw_reg_ptys_eth_proto_cap_get(payload); - if (p_eth_proto_adm) - *p_eth_proto_adm = mlxsw_reg_ptys_eth_proto_admin_get(payload); + *p_eth_proto_cap = + mlxsw_reg_ptys_eth_proto_cap_get(payload); + if (p_eth_proto_admin) + *p_eth_proto_admin = + mlxsw_reg_ptys_eth_proto_admin_get(payload); if (p_eth_proto_oper) - *p_eth_proto_oper = mlxsw_reg_ptys_eth_proto_oper_get(payload); + *p_eth_proto_oper = + mlxsw_reg_ptys_eth_proto_oper_get(payload); +} + +static inline void mlxsw_reg_ptys_ext_eth_unpack(char *payload, + u32 *p_eth_proto_cap, + u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper) +{ + if (p_eth_proto_cap) + *p_eth_proto_cap = + mlxsw_reg_ptys_ext_eth_proto_cap_get(payload); + if (p_eth_proto_admin) + *p_eth_proto_admin = + mlxsw_reg_ptys_ext_eth_proto_admin_get(payload); + if (p_eth_proto_oper) + *p_eth_proto_oper = + mlxsw_reg_ptys_ext_eth_proto_oper_get(payload); } static inline void mlxsw_reg_ptys_ib_pack(char *payload, u8 local_port, @@ -4231,8 +4482,11 @@ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2); enum mlxsw_reg_ppcnt_grp { MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0, + MLXSW_REG_PPCNT_RFC_2863_CNT = 0x1, MLXSW_REG_PPCNT_RFC_2819_CNT = 0x2, + MLXSW_REG_PPCNT_RFC_3635_CNT = 0x3, MLXSW_REG_PPCNT_EXT_CNT = 0x5, + MLXSW_REG_PPCNT_DISCARD_CNT = 0x6, MLXSW_REG_PPCNT_PRIO_CNT = 0x10, MLXSW_REG_PPCNT_TC_CNT = 0x11, MLXSW_REG_PPCNT_TC_CONG_TC = 0x13, @@ -4247,6 +4501,7 @@ enum mlxsw_reg_ppcnt_grp { * 0x2: RFC 2819 Counters * 0x3: RFC 3635 Counters * 0x5: Ethernet Extended Counters + * 0x6: Ethernet Discard Counters * 0x8: Link Level Retransmission Counters * 0x10: Per Priority Counters * 0x11: Per Traffic Class Counters @@ -4390,8 +4645,46 @@ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received, MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted, MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64); +/* Ethernet RFC 2863 Counter Group */ + +/* reg_ppcnt_if_in_discards + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, if_in_discards, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64); + +/* reg_ppcnt_if_out_discards + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, if_out_discards, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64); + +/* reg_ppcnt_if_out_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, if_out_errors, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64); + /* Ethernet RFC 2819 Counter Group */ +/* reg_ppcnt_ether_stats_undersize_pkts + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_undersize_pkts, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64); + +/* reg_ppcnt_ether_stats_oversize_pkts + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_oversize_pkts, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64); + +/* reg_ppcnt_ether_stats_fragments + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ether_stats_fragments, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64); + /* reg_ppcnt_ether_stats_pkts64octets * Access: RO */ @@ -4452,6 +4745,32 @@ MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts4096to8191octets, MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts8192to10239octets, MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0xA0, 0, 64); +/* Ethernet RFC 3635 Counter Group */ + +/* reg_ppcnt_dot3stats_fcs_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, dot3stats_fcs_errors, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + +/* reg_ppcnt_dot3stats_symbol_errors + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, dot3stats_symbol_errors, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); + +/* reg_ppcnt_dot3control_in_unknown_opcodes + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, dot3control_in_unknown_opcodes, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64); + +/* reg_ppcnt_dot3in_pause_frames + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, dot3in_pause_frames, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); + /* Ethernet Extended Counter Group Counters */ /* reg_ppcnt_ecn_marked @@ -4460,6 +4779,80 @@ MLXSW_ITEM64(reg, ppcnt, ether_stats_pkts8192to10239octets, MLXSW_ITEM64(reg, ppcnt, ecn_marked, MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); +/* Ethernet Discard Counter Group Counters */ + +/* reg_ppcnt_ingress_general + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ingress_general, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); + +/* reg_ppcnt_ingress_policy_engine + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ingress_policy_engine, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + +/* reg_ppcnt_ingress_vlan_membership + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ingress_vlan_membership, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64); + +/* reg_ppcnt_ingress_tag_frame_type + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ingress_tag_frame_type, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x18, 0, 64); + +/* reg_ppcnt_egress_vlan_membership + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_vlan_membership, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64); + +/* reg_ppcnt_loopback_filter + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, loopback_filter, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64); + +/* reg_ppcnt_egress_general + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_general, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64); + +/* reg_ppcnt_egress_hoq + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_hoq, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64); + +/* reg_ppcnt_egress_policy_engine + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_policy_engine, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64); + +/* reg_ppcnt_ingress_tx_link_down + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ingress_tx_link_down, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64); + +/* reg_ppcnt_egress_stp_filter + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_stp_filter, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); + +/* reg_ppcnt_egress_sll + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, egress_sll, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); + /* Ethernet Per Priority Group Counters */ /* reg_ppcnt_rx_octets @@ -4862,6 +5255,7 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, + MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR, }; /* reg_htgt_trap_group @@ -5352,6 +5746,8 @@ enum mlxsw_reg_ritr_loopback_protocol { MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4, /* IPinIP IPv6 underlay Unicast */ MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6, + /* IPinIP generic - used for Spectrum-2 underlay RIF */ + MLXSW_REG_RITR_LOOPBACK_GENERIC, }; /* reg_ritr_loopback_protocol @@ -5392,6 +5788,13 @@ MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4); */ MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16); +/* reg_ritr_loopback_ipip_underlay_rif + * Underlay ingress router interface. + * Reserved for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_underlay_rif, 0x14, 0, 16); + /* reg_ritr_loopback_ipip_usip* * Encapsulation Underlay source IP. * Access: RW @@ -5507,11 +5910,12 @@ static inline void mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload, enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, enum mlxsw_reg_ritr_loopback_ipip_options options, - u16 uvr_id, u32 gre_key) + u16 uvr_id, u16 underlay_rif, u32 gre_key) { mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type); mlxsw_reg_ritr_loopback_ipip_options_set(payload, options); mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id); + mlxsw_reg_ritr_loopback_ipip_underlay_rif_set(payload, underlay_rif); mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key); } @@ -5519,12 +5923,12 @@ static inline void mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, enum mlxsw_reg_ritr_loopback_ipip_options options, - u16 uvr_id, u32 usip, u32 gre_key) + u16 uvr_id, u16 underlay_rif, u32 usip, u32 gre_key) { mlxsw_reg_ritr_loopback_protocol_set(payload, MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4); mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options, - uvr_id, gre_key); + uvr_id, underlay_rif, gre_key); mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); } @@ -6886,6 +7290,13 @@ MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4); */ MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24); +/* reg_rtdp_egress_router_interface + * Underlay egress router interface. + * Valid range is from 0 to cap_max_router_interfaces - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, egress_router_interface, 0x40, 0, 16); + /* IPinIP */ /* reg_rtdp_ipip_irif @@ -7535,6 +7946,35 @@ static inline void mlxsw_reg_mfsl_unpack(char *payload, u8 tacho, *p_tach_max = mlxsw_reg_mfsl_tach_max_get(payload); } +/* FORE - Fan Out of Range Event Register + * -------------------------------------- + * This register reports the status of the controlled fans compared to the + * range defined by the MFSL register. + */ +#define MLXSW_REG_FORE_ID 0x9007 +#define MLXSW_REG_FORE_LEN 0x0C + +MLXSW_REG_DEFINE(fore, MLXSW_REG_FORE_ID, MLXSW_REG_FORE_LEN); + +/* fan_under_limit + * Fan speed is below the low limit defined in MFSL register. Each bit relates + * to a single tachometer and indicates the specific tachometer reading is + * below the threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, fore, fan_under_limit, 0x00, 16, 10); + +static inline void mlxsw_reg_fore_unpack(char *payload, u8 tacho, + bool *fault) +{ + u16 limit; + + if (fault) { + limit = mlxsw_reg_fore_fan_under_limit_get(payload); + *fault = limit & BIT(tacho); + } +} + /* MTCAP - Management Temperature Capabilities * ------------------------------------------- * This register exposes the capabilities of the device and @@ -7661,6 +8101,80 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); } +/* MTBR - Management Temperature Bulk Register + * ------------------------------------------- + * This register is used for bulk temperature reading. + */ +#define MLXSW_REG_MTBR_ID 0x900F +#define MLXSW_REG_MTBR_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_MTBR_REC_LEN 0x04 /* record length */ +#define MLXSW_REG_MTBR_REC_MAX_COUNT 47 /* firmware limitation */ +#define MLXSW_REG_MTBR_LEN (MLXSW_REG_MTBR_BASE_LEN + \ + MLXSW_REG_MTBR_REC_LEN * \ + MLXSW_REG_MTBR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN); + +/* reg_mtbr_base_sensor_index + * Base sensors index to access (0 - ASIC sensor, 1-63 - ambient sensors, + * 64-127 are mapped to the SFP+/QSFP modules sequentially). + * Access: Index + */ +MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7); + +/* reg_mtbr_num_rec + * Request: Number of records to read + * Response: Number of records read + * See above description for more details. + * Range 1..255 + * Access: RW + */ +MLXSW_ITEM32(reg, mtbr, num_rec, 0x04, 0, 8); + +/* reg_mtbr_rec_max_temp + * The highest measured temperature from the sensor. + * When the bit mte is cleared, the field max_temperature is reserved. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16, + 16, MLXSW_REG_MTBR_REC_LEN, 0x00, false); + +/* reg_mtbr_rec_temp + * Temperature reading from the sensor. Reading is in 0..125 Celsius + * degrees units. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16, + MLXSW_REG_MTBR_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index, + u8 num_rec) +{ + MLXSW_REG_ZERO(mtbr, payload); + mlxsw_reg_mtbr_base_sensor_index_set(payload, base_sensor_index); + mlxsw_reg_mtbr_num_rec_set(payload, num_rec); +} + +/* Error codes from temperatute reading */ +enum mlxsw_reg_mtbr_temp_status { + MLXSW_REG_MTBR_NO_CONN = 0x8000, + MLXSW_REG_MTBR_NO_TEMP_SENS = 0x8001, + MLXSW_REG_MTBR_INDEX_NA = 0x8002, + MLXSW_REG_MTBR_BAD_SENS_INFO = 0x8003, +}; + +/* Base index for reading modules temperature */ +#define MLXSW_REG_MTBR_BASE_MODULE_INDEX 64 + +static inline void mlxsw_reg_mtbr_temp_unpack(char *payload, int rec_ind, + u16 *p_temp, u16 *p_max_temp) +{ + if (p_temp) + *p_temp = mlxsw_reg_mtbr_rec_temp_get(payload, rec_ind); + if (p_max_temp) + *p_max_temp = mlxsw_reg_mtbr_rec_max_temp_get(payload, rec_ind); +} + /* MCIA - Management Cable Info Access * ----------------------------------- * MCIA register is used to access the SFP+ and QSFP connector's EPROM. @@ -7715,13 +8229,41 @@ MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16); */ MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16); -#define MLXSW_SP_REG_MCIA_EEPROM_SIZE 48 +#define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH 256 +#define MLXSW_REG_MCIA_EEPROM_SIZE 48 +#define MLXSW_REG_MCIA_I2C_ADDR_LOW 0x50 +#define MLXSW_REG_MCIA_I2C_ADDR_HIGH 0x51 +#define MLXSW_REG_MCIA_PAGE0_LO_OFF 0xa0 +#define MLXSW_REG_MCIA_TH_ITEM_SIZE 2 +#define MLXSW_REG_MCIA_TH_PAGE_NUM 3 +#define MLXSW_REG_MCIA_PAGE0_LO 0 +#define MLXSW_REG_MCIA_TH_PAGE_OFF 0x80 + +enum mlxsw_reg_mcia_eeprom_module_info_rev_id { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_UNSPC = 0x00, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8436 = 0x01, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636 = 0x03, +}; + +enum mlxsw_reg_mcia_eeprom_module_info_id { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP = 0x03, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP = 0x0C, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD = 0x18, +}; + +enum mlxsw_reg_mcia_eeprom_module_info { + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID, + MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE, +}; /* reg_mcia_eeprom * Bytes to read/write. * Access: RW */ -MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_SP_REG_MCIA_EEPROM_SIZE); +MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE); static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock, u8 page_number, u16 device_addr, @@ -9357,8 +9899,10 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(ppbs), MLXSW_REG(prcr), MLXSW_REG(pefa), + MLXSW_REG(pemrbt), MLXSW_REG(ptce2), MLXSW_REG(perpt), + MLXSW_REG(peabfe), MLXSW_REG(perar), MLXSW_REG(ptce3), MLXSW_REG(percr), @@ -9407,8 +9951,10 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mfsc), MLXSW_REG(mfsm), MLXSW_REG(mfsl), + MLXSW_REG(fore), MLXSW_REG(mtcap), MLXSW_REG(mtmp), + MLXSW_REG(mtbr), MLXSW_REG(mcia), MLXSW_REG(mpat), MLXSW_REG(mpar), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 99b341539870..773ef7fdb285 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -26,6 +26,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_MAX_LAG_MEMBERS, MLXSW_RES_ID_MAX_BUFFER_SIZE, MLXSW_RES_ID_CELL_SIZE, + MLXSW_RES_ID_MAX_HEADROOM_SIZE, MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS, MLXSW_RES_ID_ACL_MAX_TCAM_RULES, MLXSW_RES_ID_ACL_MAX_REGIONS, @@ -41,6 +42,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB, MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB, MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB, + MLXSW_RES_ID_ACL_MAX_BF_LOG, MLXSW_RES_ID_MAX_CPU_POLICERS, MLXSW_RES_ID_MAX_VRS, MLXSW_RES_ID_MAX_RIFS, @@ -78,6 +80,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521, [MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802, /* Bytes */ [MLXSW_RES_ID_CELL_SIZE] = 0x2803, /* Bytes */ + [MLXSW_RES_ID_MAX_HEADROOM_SIZE] = 0x2811, /* Bytes */ [MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901, [MLXSW_RES_ID_ACL_MAX_TCAM_RULES] = 0x2902, [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903, @@ -93,6 +96,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_ACL_ERPT_ENTRIES_4KB] = 0x2951, [MLXSW_RES_ID_ACL_ERPT_ENTRIES_8KB] = 0x2952, [MLXSW_RES_ID_ACL_ERPT_ENTRIES_12KB] = 0x2953, + [MLXSW_RES_ID_ACL_MAX_BF_LOG] = 0x2960, [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13, [MLXSW_RES_ID_MAX_VRS] = 0x2C01, [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index f84b9c02fcc5..6b8aa3761899 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -32,6 +32,7 @@ #include "spectrum.h" #include "pci.h" #include "core.h" +#include "core_env.h" #include "reg.h" #include "port.h" #include "trap.h" @@ -45,8 +46,8 @@ #define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100) #define MLXSW_SP1_FWREV_MAJOR 13 -#define MLXSW_SP1_FWREV_MINOR 1703 -#define MLXSW_SP1_FWREV_SUBMINOR 4 +#define MLXSW_SP1_FWREV_MINOR 1910 +#define MLXSW_SP1_FWREV_SUBMINOR 622 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { @@ -65,6 +66,13 @@ static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum"; static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2"; static const char mlxsw_sp_driver_version[] = "1.0"; +static const unsigned char mlxsw_sp1_mac_mask[ETH_ALEN] = { + 0xff, 0xff, 0xff, 0xff, 0xfc, 0x00 +}; +static const unsigned char mlxsw_sp2_mac_mask[ETH_ALEN] = { + 0xff, 0xff, 0xff, 0xff, 0xf0, 0x00 +}; + /* tx_hdr_version * Tx header version. * Must be set to 1. @@ -323,6 +331,7 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) const struct mlxsw_fw_rev *rev = &mlxsw_sp->bus_info->fw_rev; const struct mlxsw_fw_rev *req_rev = mlxsw_sp->req_rev; const char *fw_filename = mlxsw_sp->fw_filename; + union devlink_param_value value; const struct firmware *firmware; int err; @@ -330,6 +339,15 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) if (!req_rev || !fw_filename) return 0; + /* Don't check if devlink 'fw_load_policy' param is 'flash' */ + err = devlink_param_driverinit_value_get(priv_to_devlink(mlxsw_sp->core), + DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, + &value); + if (err) + return err; + if (value.vu8 == DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH) + return 0; + /* Validate driver & FW are compatible */ if (rev->major != req_rev->major) { WARN(1, "Mismatch in major FW version [%d:%d] is never expected; Please contact support\n", @@ -835,8 +853,12 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0; u16 delay = !!my_pfc ? my_pfc->delay : 0; char pbmc_pl[MLXSW_REG_PBMC_LEN]; + u32 taken_headroom_cells = 0; + u32 max_headroom_cells; int i, j, err; + max_headroom_cells = mlxsw_sp_sb_max_headroom_cells(mlxsw_sp); + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); if (err) @@ -845,8 +867,10 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { bool configure = false; bool pfc = false; + u16 thres_cells; + u16 delay_cells; + u16 total_cells; bool lossy; - u16 thres; for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) { if (prio_tc[j] == i) { @@ -860,10 +884,17 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, continue; lossy = !(pfc || pause_en); - thres = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); - delay = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, pfc, - pause_en); - mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres + delay, thres, lossy); + thres_cells = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); + delay_cells = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, + pfc, pause_en); + total_cells = thres_cells + delay_cells; + + taken_headroom_cells += total_cells; + if (taken_headroom_cells > max_headroom_cells) + return -ENOBUFS; + + mlxsw_sp_pg_buf_pack(pbmc_pl, i, total_cells, + thres_cells, lossy); } return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); @@ -1123,22 +1154,40 @@ int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, return 0; } -static void mlxsw_sp_port_vlan_flush(struct mlxsw_sp_port *mlxsw_sp_port) +static void mlxsw_sp_port_vlan_flush(struct mlxsw_sp_port *mlxsw_sp_port, + bool flush_default) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, *tmp; list_for_each_entry_safe(mlxsw_sp_port_vlan, tmp, - &mlxsw_sp_port->vlans_list, list) - mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); + &mlxsw_sp_port->vlans_list, list) { + if (!flush_default && + mlxsw_sp_port_vlan->vid == MLXSW_SP_DEFAULT_VID) + continue; + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); + } +} + +static void +mlxsw_sp_port_vlan_cleanup(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +{ + if (mlxsw_sp_port_vlan->bridge_port) + mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); + else if (mlxsw_sp_port_vlan->fid) + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); } -static struct mlxsw_sp_port_vlan * +struct mlxsw_sp_port_vlan * mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - bool untagged = vid == 1; + bool untagged = vid == MLXSW_SP_DEFAULT_VID; int err; + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (mlxsw_sp_port_vlan) + return ERR_PTR(-EEXIST); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, untagged); if (err) return ERR_PTR(err); @@ -1150,7 +1199,6 @@ mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) } mlxsw_sp_port_vlan->mlxsw_sp_port = mlxsw_sp_port; - mlxsw_sp_port_vlan->ref_count = 1; mlxsw_sp_port_vlan->vid = vid; list_add(&mlxsw_sp_port_vlan->list, &mlxsw_sp_port->vlans_list); @@ -1161,46 +1209,17 @@ err_port_vlan_alloc: return ERR_PTR(err); } -static void -mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) +void mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; u16 vid = mlxsw_sp_port_vlan->vid; + mlxsw_sp_port_vlan_cleanup(mlxsw_sp_port_vlan); list_del(&mlxsw_sp_port_vlan->list); kfree(mlxsw_sp_port_vlan); mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); } -struct mlxsw_sp_port_vlan * -mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) -{ - struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - - mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); - if (mlxsw_sp_port_vlan) { - mlxsw_sp_port_vlan->ref_count++; - return mlxsw_sp_port_vlan; - } - - return mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid); -} - -void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) -{ - struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; - - if (--mlxsw_sp_port_vlan->ref_count != 0) - return; - - if (mlxsw_sp_port_vlan->bridge_port) - mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); - else if (fid) - mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); - - mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); -} - static int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid) { @@ -1212,7 +1231,7 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev, if (!vid) return 0; - return PTR_ERR_OR_ZERO(mlxsw_sp_port_vlan_get(mlxsw_sp_port, vid)); + return PTR_ERR_OR_ZERO(mlxsw_sp_port_vlan_create(mlxsw_sp_port, vid)); } static int mlxsw_sp_port_kill_vid(struct net_device *dev, @@ -1230,7 +1249,7 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); if (!mlxsw_sp_port_vlan) return 0; - mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); return 0; } @@ -1342,7 +1361,6 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; __be16 protocol = f->common.protocol; const struct tc_action *a; - LIST_HEAD(actions); int err; if (!tcf_exts_has_one_action(f->exts)) { @@ -1696,6 +1714,18 @@ static int mlxsw_sp_set_features(struct net_device *dev, mlxsw_sp_feature_hw_tc); } +static int mlxsw_sp_port_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + ppid->id_len = sizeof(mlxsw_sp->base_mac); + memcpy(&ppid->id, &mlxsw_sp->base_mac, ppid->id_len); + + return 0; +} + static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, @@ -1711,6 +1741,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_get_phys_port_name = mlxsw_sp_port_get_phys_port_name, .ndo_set_features = mlxsw_sp_set_features, + .ndo_get_port_parent_id = mlxsw_sp_port_get_port_parent_id, }; static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, @@ -1881,8 +1912,38 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { #define MLXSW_SP_PORT_HW_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_stats) +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_2863_stats[] = { + { + .str = "if_in_discards", + .getter = mlxsw_reg_ppcnt_if_in_discards_get, + }, + { + .str = "if_out_discards", + .getter = mlxsw_reg_ppcnt_if_out_discards_get, + }, + { + .str = "if_out_errors", + .getter = mlxsw_reg_ppcnt_if_out_errors_get, + }, +}; + +#define MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN \ + ARRAY_SIZE(mlxsw_sp_port_hw_rfc_2863_stats) + static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_2819_stats[] = { { + .str = "ether_stats_undersize_pkts", + .getter = mlxsw_reg_ppcnt_ether_stats_undersize_pkts_get, + }, + { + .str = "ether_stats_oversize_pkts", + .getter = mlxsw_reg_ppcnt_ether_stats_oversize_pkts_get, + }, + { + .str = "ether_stats_fragments", + .getter = mlxsw_reg_ppcnt_ether_stats_fragments_get, + }, + { .str = "ether_pkts64octets", .getter = mlxsw_reg_ppcnt_ether_stats_pkts64octets_get, }, @@ -1927,6 +1988,82 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_2819_stats[] = { #define MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN \ ARRAY_SIZE(mlxsw_sp_port_hw_rfc_2819_stats) +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_rfc_3635_stats[] = { + { + .str = "dot3stats_fcs_errors", + .getter = mlxsw_reg_ppcnt_dot3stats_fcs_errors_get, + }, + { + .str = "dot3stats_symbol_errors", + .getter = mlxsw_reg_ppcnt_dot3stats_symbol_errors_get, + }, + { + .str = "dot3control_in_unknown_opcodes", + .getter = mlxsw_reg_ppcnt_dot3control_in_unknown_opcodes_get, + }, + { + .str = "dot3in_pause_frames", + .getter = mlxsw_reg_ppcnt_dot3in_pause_frames_get, + }, +}; + +#define MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN \ + ARRAY_SIZE(mlxsw_sp_port_hw_rfc_3635_stats) + +static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_discard_stats[] = { + { + .str = "discard_ingress_general", + .getter = mlxsw_reg_ppcnt_ingress_general_get, + }, + { + .str = "discard_ingress_policy_engine", + .getter = mlxsw_reg_ppcnt_ingress_policy_engine_get, + }, + { + .str = "discard_ingress_vlan_membership", + .getter = mlxsw_reg_ppcnt_ingress_vlan_membership_get, + }, + { + .str = "discard_ingress_tag_frame_type", + .getter = mlxsw_reg_ppcnt_ingress_tag_frame_type_get, + }, + { + .str = "discard_egress_vlan_membership", + .getter = mlxsw_reg_ppcnt_egress_vlan_membership_get, + }, + { + .str = "discard_loopback_filter", + .getter = mlxsw_reg_ppcnt_loopback_filter_get, + }, + { + .str = "discard_egress_general", + .getter = mlxsw_reg_ppcnt_egress_general_get, + }, + { + .str = "discard_egress_hoq", + .getter = mlxsw_reg_ppcnt_egress_hoq_get, + }, + { + .str = "discard_egress_policy_engine", + .getter = mlxsw_reg_ppcnt_egress_policy_engine_get, + }, + { + .str = "discard_ingress_tx_link_down", + .getter = mlxsw_reg_ppcnt_ingress_tx_link_down_get, + }, + { + .str = "discard_egress_stp_filter", + .getter = mlxsw_reg_ppcnt_egress_stp_filter_get, + }, + { + .str = "discard_egress_sll", + .getter = mlxsw_reg_ppcnt_egress_sll_get, + }, +}; + +#define MLXSW_SP_PORT_HW_DISCARD_STATS_LEN \ + ARRAY_SIZE(mlxsw_sp_port_hw_discard_stats) + static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = { { .str = "rx_octets_prio", @@ -1979,7 +2116,10 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = { #define MLXSW_SP_PORT_HW_TC_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_tc_stats) #define MLXSW_SP_PORT_ETHTOOL_STATS_LEN (MLXSW_SP_PORT_HW_STATS_LEN + \ + MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN + \ MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN + \ + MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN + \ + MLXSW_SP_PORT_HW_DISCARD_STATS_LEN + \ (MLXSW_SP_PORT_HW_PRIO_STATS_LEN * \ IEEE_8021QAZ_MAX_TCS) + \ (MLXSW_SP_PORT_HW_TC_STATS_LEN * \ @@ -1990,7 +2130,7 @@ static void mlxsw_sp_port_get_prio_strings(u8 **p, int prio) int i; for (i = 0; i < MLXSW_SP_PORT_HW_PRIO_STATS_LEN; i++) { - snprintf(*p, ETH_GSTRING_LEN, "%s_%d", + snprintf(*p, ETH_GSTRING_LEN, "%.29s_%.1d", mlxsw_sp_port_hw_prio_stats[i].str, prio); *p += ETH_GSTRING_LEN; } @@ -2001,7 +2141,7 @@ static void mlxsw_sp_port_get_tc_strings(u8 **p, int tc) int i; for (i = 0; i < MLXSW_SP_PORT_HW_TC_STATS_LEN; i++) { - snprintf(*p, ETH_GSTRING_LEN, "%s_%d", + snprintf(*p, ETH_GSTRING_LEN, "%.29s_%.1d", mlxsw_sp_port_hw_tc_stats[i].str, tc); *p += ETH_GSTRING_LEN; } @@ -2020,12 +2160,31 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } + + for (i = 0; i < MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_rfc_2863_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN; i++) { memcpy(p, mlxsw_sp_port_hw_rfc_2819_stats[i].str, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } + for (i = 0; i < MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_rfc_3635_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; i++) { + memcpy(p, mlxsw_sp_port_hw_discard_stats[i].str, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) mlxsw_sp_port_get_prio_strings(&p, i); @@ -2068,10 +2227,22 @@ mlxsw_sp_get_hw_stats_by_group(struct mlxsw_sp_port_hw_stats **p_hw_stats, *p_hw_stats = mlxsw_sp_port_hw_stats; *p_len = MLXSW_SP_PORT_HW_STATS_LEN; break; + case MLXSW_REG_PPCNT_RFC_2863_CNT: + *p_hw_stats = mlxsw_sp_port_hw_rfc_2863_stats; + *p_len = MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN; + break; case MLXSW_REG_PPCNT_RFC_2819_CNT: *p_hw_stats = mlxsw_sp_port_hw_rfc_2819_stats; *p_len = MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN; break; + case MLXSW_REG_PPCNT_RFC_3635_CNT: + *p_hw_stats = mlxsw_sp_port_hw_rfc_3635_stats; + *p_len = MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; + break; + case MLXSW_REG_PPCNT_DISCARD_CNT: + *p_hw_stats = mlxsw_sp_port_hw_discard_stats; + *p_len = MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; + break; case MLXSW_REG_PPCNT_PRIO_CNT: *p_hw_stats = mlxsw_sp_port_hw_prio_stats; *p_len = MLXSW_SP_PORT_HW_PRIO_STATS_LEN; @@ -2121,11 +2292,26 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev, data, data_index); data_index = MLXSW_SP_PORT_HW_STATS_LEN; + /* RFC 2863 Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_2863_CNT, 0, + data, data_index); + data_index += MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN; + /* RFC 2819 Counters */ __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_2819_CNT, 0, data, data_index); data_index += MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN; + /* RFC 3635 Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_RFC_3635_CNT, 0, + data, data_index); + data_index += MLXSW_SP_PORT_HW_RFC_3635_STATS_LEN; + + /* Discard Counters */ + __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_DISCARD_CNT, 0, + data, data_index); + data_index += MLXSW_SP_PORT_HW_DISCARD_STATS_LEN; + /* Per-Priority Counters */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { __mlxsw_sp_port_get_stats(dev, MLXSW_REG_PPCNT_PRIO_CNT, i, @@ -2151,13 +2337,13 @@ static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) } } -struct mlxsw_sp_port_link_mode { +struct mlxsw_sp1_port_link_mode { enum ethtool_link_mode_bit_indices mask_ethtool; u32 mask; u32 speed; }; -static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = { +static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { { .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT, @@ -2229,11 +2415,6 @@ static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = { .speed = SPEED_25000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, - .mask_ethtool = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, - .speed = SPEED_25000, - }, - { .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2, .mask_ethtool = ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, .speed = SPEED_50000, @@ -2290,11 +2471,12 @@ static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = { }, }; -#define MLXSW_SP_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp_port_link_mode) +#define MLXSW_SP1_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp1_port_link_mode) static void -mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto, - struct ethtool_link_ksettings *cmd) +mlxsw_sp1_from_ptys_supported_port(struct mlxsw_sp *mlxsw_sp, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) { if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | @@ -2312,19 +2494,23 @@ mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto, ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane); } -static void mlxsw_sp_from_ptys_link(u32 ptys_eth_proto, unsigned long *mode) +static void +mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, + unsigned long *mode) { int i; - for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) - __set_bit(mlxsw_sp_port_link_mode[i].mask_ethtool, + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) + __set_bit(mlxsw_sp1_port_link_mode[i].mask_ethtool, mode); } } -static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, - struct ethtool_link_ksettings *cmd) +static void +mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) { u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; @@ -2333,9 +2519,9 @@ static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, if (!carrier_ok) goto out; - for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) { - speed = mlxsw_sp_port_link_mode[i].speed; + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) { + speed = mlxsw_sp1_port_link_mode[i].speed; duplex = DUPLEX_FULL; break; } @@ -2345,129 +2531,559 @@ out: cmd->base.duplex = duplex; } -static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto) +static u32 +mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, + const struct ethtool_link_ksettings *cmd) { - if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | - MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | - MLXSW_REG_PTYS_ETH_SPEED_SGMII)) - return PORT_FIBRE; + u32 ptys_proto = 0; + int i; - if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | - MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4)) - return PORT_DA; + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (test_bit(mlxsw_sp1_port_link_mode[i].mask_ethtool, + cmd->link_modes.advertising)) + ptys_proto |= mlxsw_sp1_port_link_mode[i].mask; + } + return ptys_proto; +} - if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | - MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | - MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4)) - return PORT_NONE; +static u32 mlxsw_sp1_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 speed) +{ + u32 ptys_proto = 0; + int i; - return PORT_OTHER; + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (speed == mlxsw_sp1_port_link_mode[i].speed) + ptys_proto |= mlxsw_sp1_port_link_mode[i].mask; + } + return ptys_proto; } static u32 -mlxsw_sp_to_ptys_advert_link(const struct ethtool_link_ksettings *cmd) +mlxsw_sp1_to_ptys_upper_speed(struct mlxsw_sp *mlxsw_sp, u32 upper_speed) { u32 ptys_proto = 0; int i; - for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (test_bit(mlxsw_sp_port_link_mode[i].mask_ethtool, - cmd->link_modes.advertising)) - ptys_proto |= mlxsw_sp_port_link_mode[i].mask; + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (mlxsw_sp1_port_link_mode[i].speed <= upper_speed) + ptys_proto |= mlxsw_sp1_port_link_mode[i].mask; + } + return ptys_proto; +} + +static int +mlxsw_sp1_port_speed_base(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u32 *base_speed) +{ + *base_speed = MLXSW_SP_PORT_BASE_SPEED_25G; + return 0; +} + +static void +mlxsw_sp1_reg_ptys_eth_pack(struct mlxsw_sp *mlxsw_sp, char *payload, + u8 local_port, u32 proto_admin, bool autoneg) +{ + mlxsw_reg_ptys_eth_pack(payload, local_port, proto_admin, autoneg); +} + +static void +mlxsw_sp1_reg_ptys_eth_unpack(struct mlxsw_sp *mlxsw_sp, char *payload, + u32 *p_eth_proto_cap, u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper) +{ + mlxsw_reg_ptys_eth_unpack(payload, p_eth_proto_cap, p_eth_proto_admin, + p_eth_proto_oper); +} + +static const struct mlxsw_sp_port_type_speed_ops +mlxsw_sp1_port_type_speed_ops = { + .from_ptys_supported_port = mlxsw_sp1_from_ptys_supported_port, + .from_ptys_link = mlxsw_sp1_from_ptys_link, + .from_ptys_speed_duplex = mlxsw_sp1_from_ptys_speed_duplex, + .to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link, + .to_ptys_speed = mlxsw_sp1_to_ptys_speed, + .to_ptys_upper_speed = mlxsw_sp1_to_ptys_upper_speed, + .port_speed_base = mlxsw_sp1_port_speed_base, + .reg_ptys_eth_pack = mlxsw_sp1_reg_ptys_eth_pack, + .reg_ptys_eth_unpack = mlxsw_sp1_reg_ptys_eth_unpack, +}; + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_sgmii_100m[] = { + ETHTOOL_LINK_MODE_100baseT_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_sgmii_100m) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_1000base_x_sgmii[] = { + ETHTOOL_LINK_MODE_1000baseT_Full_BIT, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_1000base_x_sgmii) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii[] = { + ETHTOOL_LINK_MODE_2500baseX_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_5gbase_r[] = { + ETHTOOL_LINK_MODE_5000baseT_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_5gbase_r) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g[] = { + ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g[] = { + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr[] = { + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2[] = { + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr[] = { + ETHTOOL_LINK_MODE_50000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT, + ETHTOOL_LINK_MODE_50000baseDR_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_50GAUI_1_LAUI_1_50GBASE_CR_KR_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4[] = { + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2[] = { + ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT, + ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_100GAUI_2_100GBASE_CR2_KR2_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2) + +static const enum ethtool_link_mode_bit_indices +mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4[] = { + ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT, +}; + +#define MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN \ + ARRAY_SIZE(mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4) + +struct mlxsw_sp2_port_link_mode { + const enum ethtool_link_mode_bit_indices *mask_ethtool; + int m_ethtool_len; + u32 mask; + u32 speed; +}; + +static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M, + .mask_ethtool = mlxsw_sp2_mask_ethtool_sgmii_100m, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN, + .speed = SPEED_100, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII, + .mask_ethtool = mlxsw_sp2_mask_ethtool_1000base_x_sgmii, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN, + .speed = SPEED_1000, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_2_5GBASE_X_2_5GMII, + .mask_ethtool = mlxsw_sp2_mask_ethtool_2_5gbase_x_2_5gmii, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_2_5GBASE_X_2_5GMII_LEN, + .speed = SPEED_2500, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R, + .mask_ethtool = mlxsw_sp2_mask_ethtool_5gbase_r, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN, + .speed = SPEED_5000, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G, + .mask_ethtool = mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN, + .speed = SPEED_10000, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G, + .mask_ethtool = mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN, + .speed = SPEED_40000, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR, + .mask_ethtool = mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_2_LAUI_2_50GBASE_CR2_KR2, + .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR, + .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_1_LAUI_1_50GBASE_CR_KR_LEN, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4, + .mask_ethtool = mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2, + .mask_ethtool = mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_100GAUI_2_100GBASE_CR2_KR2_LEN, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4, + .mask_ethtool = mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4, + .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN, + .speed = SPEED_200000, + }, +}; + +#define MLXSW_SP2_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp2_port_link_mode) + +static void +mlxsw_sp2_from_ptys_supported_port(struct mlxsw_sp *mlxsw_sp, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) +{ + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane); +} + +static void +mlxsw_sp2_set_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, + unsigned long *mode) +{ + int i; + + for (i = 0; i < link_mode->m_ethtool_len; i++) + __set_bit(link_mode->mask_ethtool[i], mode); +} + +static void +mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, + unsigned long *mode) +{ + int i; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) + mlxsw_sp2_set_bit_ethtool(&mlxsw_sp2_port_link_mode[i], + mode); + } +} + +static void +mlxsw_sp2_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) +{ + u32 speed = SPEED_UNKNOWN; + u8 duplex = DUPLEX_UNKNOWN; + int i; + + if (!carrier_ok) + goto out; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) { + speed = mlxsw_sp2_port_link_mode[i].speed; + duplex = DUPLEX_FULL; + break; + } + } +out: + cmd->base.speed = speed; + cmd->base.duplex = duplex; +} + +static bool +mlxsw_sp2_test_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, + const unsigned long *mode) +{ + int cnt = 0; + int i; + + for (i = 0; i < link_mode->m_ethtool_len; i++) { + if (test_bit(link_mode->mask_ethtool[i], mode)) + cnt++; + } + + return cnt == link_mode->m_ethtool_len; +} + +static u32 +mlxsw_sp2_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, + const struct ethtool_link_ksettings *cmd) +{ + u32 ptys_proto = 0; + int i; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (mlxsw_sp2_test_bit_ethtool(&mlxsw_sp2_port_link_mode[i], + cmd->link_modes.advertising)) + ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; } return ptys_proto; } -static u32 mlxsw_sp_to_ptys_speed(u32 speed) +static u32 mlxsw_sp2_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 speed) { u32 ptys_proto = 0; int i; - for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (speed == mlxsw_sp_port_link_mode[i].speed) - ptys_proto |= mlxsw_sp_port_link_mode[i].mask; + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (speed == mlxsw_sp2_port_link_mode[i].speed) + ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; } return ptys_proto; } -static u32 mlxsw_sp_to_ptys_upper_speed(u32 upper_speed) +static u32 +mlxsw_sp2_to_ptys_upper_speed(struct mlxsw_sp *mlxsw_sp, u32 upper_speed) { u32 ptys_proto = 0; int i; - for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (mlxsw_sp_port_link_mode[i].speed <= upper_speed) - ptys_proto |= mlxsw_sp_port_link_mode[i].mask; + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (mlxsw_sp2_port_link_mode[i].speed <= upper_speed) + ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; } return ptys_proto; } -static void mlxsw_sp_port_get_link_supported(u32 eth_proto_cap, - struct ethtool_link_ksettings *cmd) +static int +mlxsw_sp2_port_speed_base(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u32 *base_speed) +{ + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_cap; + int err; + + /* In Spectrum-2, the speed of 1x can change from port to port, so query + * it from firmware. + */ + mlxsw_reg_ptys_ext_eth_pack(ptys_pl, local_port, 0, false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + mlxsw_reg_ptys_ext_eth_unpack(ptys_pl, ð_proto_cap, NULL, NULL); + + if (eth_proto_cap & + MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR) { + *base_speed = MLXSW_SP_PORT_BASE_SPEED_50G; + return 0; + } + + if (eth_proto_cap & + MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR) { + *base_speed = MLXSW_SP_PORT_BASE_SPEED_25G; + return 0; + } + + return -EIO; +} + +static void +mlxsw_sp2_reg_ptys_eth_pack(struct mlxsw_sp *mlxsw_sp, char *payload, + u8 local_port, u32 proto_admin, + bool autoneg) +{ + mlxsw_reg_ptys_ext_eth_pack(payload, local_port, proto_admin, autoneg); +} + +static void +mlxsw_sp2_reg_ptys_eth_unpack(struct mlxsw_sp *mlxsw_sp, char *payload, + u32 *p_eth_proto_cap, u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper) +{ + mlxsw_reg_ptys_ext_eth_unpack(payload, p_eth_proto_cap, + p_eth_proto_admin, p_eth_proto_oper); +} + +static const struct mlxsw_sp_port_type_speed_ops +mlxsw_sp2_port_type_speed_ops = { + .from_ptys_supported_port = mlxsw_sp2_from_ptys_supported_port, + .from_ptys_link = mlxsw_sp2_from_ptys_link, + .from_ptys_speed_duplex = mlxsw_sp2_from_ptys_speed_duplex, + .to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link, + .to_ptys_speed = mlxsw_sp2_to_ptys_speed, + .to_ptys_upper_speed = mlxsw_sp2_to_ptys_upper_speed, + .port_speed_base = mlxsw_sp2_port_speed_base, + .reg_ptys_eth_pack = mlxsw_sp2_reg_ptys_eth_pack, + .reg_ptys_eth_unpack = mlxsw_sp2_reg_ptys_eth_unpack, +}; + +static void +mlxsw_sp_port_get_link_supported(struct mlxsw_sp *mlxsw_sp, u32 eth_proto_cap, + struct ethtool_link_ksettings *cmd) { + const struct mlxsw_sp_port_type_speed_ops *ops; + + ops = mlxsw_sp->port_type_speed_ops; + ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause); ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); - mlxsw_sp_from_ptys_supported_port(eth_proto_cap, cmd); - mlxsw_sp_from_ptys_link(eth_proto_cap, cmd->link_modes.supported); + ops->from_ptys_supported_port(mlxsw_sp, eth_proto_cap, cmd); + ops->from_ptys_link(mlxsw_sp, eth_proto_cap, cmd->link_modes.supported); } -static void mlxsw_sp_port_get_link_advertise(u32 eth_proto_admin, bool autoneg, - struct ethtool_link_ksettings *cmd) +static void +mlxsw_sp_port_get_link_advertise(struct mlxsw_sp *mlxsw_sp, + u32 eth_proto_admin, bool autoneg, + struct ethtool_link_ksettings *cmd) { + const struct mlxsw_sp_port_type_speed_ops *ops; + + ops = mlxsw_sp->port_type_speed_ops; + if (!autoneg) return; ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); - mlxsw_sp_from_ptys_link(eth_proto_admin, cmd->link_modes.advertising); + ops->from_ptys_link(mlxsw_sp, eth_proto_admin, + cmd->link_modes.advertising); } -static void -mlxsw_sp_port_get_link_lp_advertise(u32 eth_proto_lp, u8 autoneg_status, - struct ethtool_link_ksettings *cmd) +static u8 +mlxsw_sp_port_connector_port(enum mlxsw_reg_ptys_connector_type connector_type) { - if (autoneg_status != MLXSW_REG_PTYS_AN_STATUS_OK || !eth_proto_lp) - return; - - ethtool_link_ksettings_add_link_mode(cmd, lp_advertising, Autoneg); - mlxsw_sp_from_ptys_link(eth_proto_lp, cmd->link_modes.lp_advertising); + switch (connector_type) { + case MLXSW_REG_PTYS_CONNECTOR_TYPE_UNKNOWN_OR_NO_CONNECTOR: + return PORT_OTHER; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_NONE: + return PORT_NONE; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_TP: + return PORT_TP; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_AUI: + return PORT_AUI; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_BNC: + return PORT_BNC; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_MII: + return PORT_MII; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_FIBRE: + return PORT_FIBRE; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_DA: + return PORT_DA; + case MLXSW_REG_PTYS_CONNECTOR_TYPE_PORT_OTHER: + return PORT_OTHER; + default: + WARN_ON_ONCE(1); + return PORT_OTHER; + } } static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { - u32 eth_proto_cap, eth_proto_admin, eth_proto_oper, eth_proto_lp; + u32 eth_proto_cap, eth_proto_admin, eth_proto_oper; struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + const struct mlxsw_sp_port_type_speed_ops *ops; char ptys_pl[MLXSW_REG_PTYS_LEN]; - u8 autoneg_status; + u8 connector_type; bool autoneg; int err; + ops = mlxsw_sp->port_type_speed_ops; + autoneg = mlxsw_sp_port->link.autoneg; - mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false); + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, + 0, false); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); if (err) return err; - mlxsw_reg_ptys_eth_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, - ð_proto_oper); + ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, + ð_proto_admin, ð_proto_oper); - mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd); + mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap, cmd); - mlxsw_sp_port_get_link_advertise(eth_proto_admin, autoneg, cmd); - - eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl); - autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl); - mlxsw_sp_port_get_link_lp_advertise(eth_proto_lp, autoneg_status, cmd); + mlxsw_sp_port_get_link_advertise(mlxsw_sp, eth_proto_admin, autoneg, + cmd); cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; - cmd->base.port = mlxsw_sp_port_connector_port(eth_proto_oper); - mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, - cmd); + connector_type = mlxsw_reg_ptys_connector_type_get(ptys_pl); + cmd->base.port = mlxsw_sp_port_connector_port(connector_type); + ops->from_ptys_speed_duplex(mlxsw_sp, netif_carrier_ok(dev), + eth_proto_oper, cmd); return 0; } @@ -2478,21 +3094,25 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + const struct mlxsw_sp_port_type_speed_ops *ops; char ptys_pl[MLXSW_REG_PTYS_LEN]; u32 eth_proto_cap, eth_proto_new; bool autoneg; int err; - mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0, false); + ops = mlxsw_sp->port_type_speed_ops; + + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, + 0, false); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); if (err) return err; - mlxsw_reg_ptys_eth_unpack(ptys_pl, ð_proto_cap, NULL, NULL); + ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, ð_proto_cap, NULL, NULL); autoneg = cmd->base.autoneg == AUTONEG_ENABLE; eth_proto_new = autoneg ? - mlxsw_sp_to_ptys_advert_link(cmd) : - mlxsw_sp_to_ptys_speed(cmd->base.speed); + ops->to_ptys_advert_link(mlxsw_sp, cmd) : + ops->to_ptys_speed(mlxsw_sp, cmd->base.speed); eth_proto_new = eth_proto_new & eth_proto_cap; if (!eth_proto_new) { @@ -2500,17 +3120,17 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, return -EINVAL; } - mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, - eth_proto_new, autoneg); + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, + eth_proto_new, autoneg); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); if (err) return err; + mlxsw_sp_port->link.autoneg = autoneg; + if (!netif_running(dev)) return 0; - mlxsw_sp_port->link.autoneg = autoneg; - mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); @@ -2542,117 +3162,18 @@ out: return err; } -#define MLXSW_SP_I2C_ADDR_LOW 0x50 -#define MLXSW_SP_I2C_ADDR_HIGH 0x51 -#define MLXSW_SP_EEPROM_PAGE_LENGTH 256 - -static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port, - u16 offset, u16 size, void *data, - unsigned int *p_read_size) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE]; - char mcia_pl[MLXSW_REG_MCIA_LEN]; - u16 i2c_addr; - int status; - int err; - - size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE); - - if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH && - offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH) - /* Cross pages read, read until offset 256 in low page */ - size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset; - - i2c_addr = MLXSW_SP_I2C_ADDR_LOW; - if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) { - i2c_addr = MLXSW_SP_I2C_ADDR_HIGH; - offset -= MLXSW_SP_EEPROM_PAGE_LENGTH; - } - - mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module, - 0, 0, offset, size, i2c_addr); - - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcia), mcia_pl); - if (err) - return err; - - status = mlxsw_reg_mcia_status_get(mcia_pl); - if (status) - return -EIO; - - mlxsw_reg_mcia_eeprom_memcpy_from(mcia_pl, eeprom_tmp); - memcpy(data, eeprom_tmp, size); - *p_read_size = size; - - return 0; -} - -enum mlxsw_sp_eeprom_module_info_rev_id { - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_UNSPC = 0x00, - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8436 = 0x01, - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636 = 0x03, -}; - -enum mlxsw_sp_eeprom_module_info_id { - MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP = 0x03, - MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP = 0x0C, - MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS = 0x0D, - MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 = 0x11, -}; - -enum mlxsw_sp_eeprom_module_info { - MLXSW_SP_EEPROM_MODULE_INFO_ID, - MLXSW_SP_EEPROM_MODULE_INFO_REV_ID, - MLXSW_SP_EEPROM_MODULE_INFO_SIZE, -}; - static int mlxsw_sp_get_module_info(struct net_device *netdev, struct ethtool_modinfo *modinfo) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); - u8 module_info[MLXSW_SP_EEPROM_MODULE_INFO_SIZE]; - u8 module_rev_id, module_id; - unsigned int read_size; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; int err; - err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0, - MLXSW_SP_EEPROM_MODULE_INFO_SIZE, - module_info, &read_size); - if (err) - return err; + err = mlxsw_env_get_module_info(mlxsw_sp->core, + mlxsw_sp_port->mapping.module, + modinfo); - if (read_size < MLXSW_SP_EEPROM_MODULE_INFO_SIZE) - return -EIO; - - module_rev_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_REV_ID]; - module_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_ID]; - - switch (module_id) { - case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP: - modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; - break; - case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS: - case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28: - if (module_id == MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 || - module_rev_id >= MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636) { - modinfo->type = ETH_MODULE_SFF_8636; - modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; - } else { - modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; - } - break; - case MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP: - modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; - break; - default: - return -EINVAL; - } - - return 0; + return err; } static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, @@ -2660,30 +3181,14 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, u8 *data) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); - int offset = ee->offset; - unsigned int read_size; - int i = 0; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; int err; - if (!ee->len) - return -EINVAL; - - memset(data, 0, ee->len); + err = mlxsw_env_get_module_eeprom(netdev, mlxsw_sp->core, + mlxsw_sp_port->mapping.module, ee, + data); - while (i < ee->len) { - err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, offset, - ee->len - i, data + i, - &read_size); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Eeprom query failed\n"); - return err; - } - - i += read_size; - offset += read_size; - } - - return 0; + return err; } static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { @@ -2706,13 +3211,24 @@ static int mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u32 upper_speed = MLXSW_SP_PORT_BASE_SPEED * width; + const struct mlxsw_sp_port_type_speed_ops *ops; char ptys_pl[MLXSW_REG_PTYS_LEN]; u32 eth_proto_admin; + u32 upper_speed; + u32 base_speed; + int err; + + ops = mlxsw_sp->port_type_speed_ops; + + err = ops->port_speed_base(mlxsw_sp, mlxsw_sp_port->local_port, + &base_speed); + if (err) + return err; + upper_speed = base_speed * width; - eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed); - mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, - eth_proto_admin, mlxsw_sp_port->link.autoneg); + eth_proto_admin = ops->to_ptys_upper_speed(mlxsw_sp, upper_speed); + ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, mlxsw_sp_port->local_port, + eth_proto_admin, mlxsw_sp_port->link.autoneg); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); } @@ -2800,7 +3316,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) err = mlxsw_sp_port_ets_set(mlxsw_sp_port, MLXSW_REG_QEEC_HIERARCY_TC, i + 8, i, - false, 0); + true, 100); if (err) return err; } @@ -2892,7 +3408,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_sp_port->dev = dev; mlxsw_sp_port->mlxsw_sp = mlxsw_sp; mlxsw_sp_port->local_port = local_port; - mlxsw_sp_port->pvid = 1; + mlxsw_sp_port->pvid = MLXSW_SP_DEFAULT_VID; mlxsw_sp_port->split = split; mlxsw_sp_port->mapping.module = module; mlxsw_sp_port->mapping.width = width; @@ -3031,15 +3547,23 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_nve_init; } - mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1); + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set PVID\n", + mlxsw_sp_port->local_port); + goto err_port_pvid_set; + } + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_create(mlxsw_sp_port, + MLXSW_SP_DEFAULT_VID); if (IS_ERR(mlxsw_sp_port_vlan)) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n", mlxsw_sp_port->local_port); err = PTR_ERR(mlxsw_sp_port_vlan); - goto err_port_vlan_get; + goto err_port_vlan_create; } + mlxsw_sp_port->default_vlan = mlxsw_sp_port_vlan; - mlxsw_sp_port_switchdev_init(mlxsw_sp_port); mlxsw_sp->ports[local_port] = mlxsw_sp_port; err = register_netdev(dev); if (err) { @@ -3056,9 +3580,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, err_register_netdev: mlxsw_sp->ports[local_port] = NULL; - mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); - mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); -err_port_vlan_get: + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); +err_port_vlan_create: +err_port_pvid_set: mlxsw_sp_port_nve_fini(mlxsw_sp_port); err_port_nve_init: mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); @@ -3098,8 +3622,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; - mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); - mlxsw_sp_port_vlan_flush(mlxsw_sp_port); + mlxsw_sp_port_vlan_flush(mlxsw_sp_port, true); mlxsw_sp_port_nve_fini(mlxsw_sp_port); mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); mlxsw_sp_port_fids_fini(mlxsw_sp_port); @@ -3394,10 +3917,10 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } -static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb, +static void mlxsw_sp_rx_listener_l3_mark_func(struct sk_buff *skb, u8 local_port, void *priv) { - skb->offload_mr_fwd_mark = 1; + skb->offload_l3_fwd_mark = 1; skb->offload_fwd_mark = 1; return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } @@ -3445,8 +3968,8 @@ out: MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) -#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ - MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \ +#define MLXSW_SP_RXL_L3_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_l3_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) #define MLXSW_SP_EVENTL(_func, _trap_id) \ @@ -3479,7 +4002,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { /* L3 traps */ MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_L3_MARK(LBERROR, MIRROR_TO_CPU, LBERROR, false), MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP, false), @@ -3523,7 +4046,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false), MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), - MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), + MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), /* NVE traps */ MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false), MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false), @@ -3554,6 +4077,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR: rate = 128; burst_size = 7; break; @@ -3574,8 +4098,8 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) burst_size = 7; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME: - rate = 4 * 1024; - burst_size = 4; + rate = 1024; + burst_size = 7; break; default: continue; @@ -3639,6 +4163,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR: priority = 1; tc = 1; break; @@ -3841,6 +4366,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_nve_init; } + err = mlxsw_sp_acl_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n"); + goto err_acl_init; + } + err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); @@ -3858,12 +4389,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_netdev_notifier; } - err = mlxsw_sp_acl_init(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n"); - goto err_acl_init; - } - err = mlxsw_sp_dpipe_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n"); @@ -3881,12 +4406,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, err_ports_create: mlxsw_sp_dpipe_fini(mlxsw_sp); err_dpipe_init: - mlxsw_sp_acl_fini(mlxsw_sp); -err_acl_init: unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); err_netdev_notifier: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_acl_fini(mlxsw_sp); +err_acl_init: mlxsw_sp_nve_fini(mlxsw_sp); err_nve_init: mlxsw_sp_afa_fini(mlxsw_sp); @@ -3922,6 +4447,10 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops; mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops; mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr; + mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask; + mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr; + mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals; + mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -3937,6 +4466,10 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops; mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; + mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; + mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; + mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; + mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -3947,9 +4480,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); - mlxsw_sp_acl_fini(mlxsw_sp); unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_nve_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); @@ -3962,16 +4495,20 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_kvdl_fini(mlxsw_sp); } +/* Per-FID flood tables are used for both "true" 802.1D FIDs and emulated + * 802.1Q FIDs + */ +#define MLXSW_SP_FID_FLOOD_TABLE_SIZE (MLXSW_SP_FID_8021D_MAX + \ + VLAN_VID_MASK - 1) + static const struct mlxsw_config_profile mlxsw_sp1_config_profile = { .used_max_mid = 1, .max_mid = MLXSW_SP_MID_MAX, .used_flood_tables = 1, .used_flood_mode = 1, .flood_mode = 3, - .max_fid_offset_flood_tables = 3, - .fid_offset_flood_table_size = VLAN_N_VID - 1, .max_fid_flood_tables = 3, - .fid_flood_table_size = MLXSW_SP_FID_8021D_MAX, + .fid_flood_table_size = MLXSW_SP_FID_FLOOD_TABLE_SIZE, .used_max_ib_mc = 1, .max_ib_mc = 0, .used_max_pkey = 1, @@ -3994,10 +4531,8 @@ static const struct mlxsw_config_profile mlxsw_sp2_config_profile = { .used_flood_tables = 1, .used_flood_mode = 1, .flood_mode = 3, - .max_fid_offset_flood_tables = 3, - .fid_offset_flood_table_size = VLAN_N_VID - 1, .max_fid_flood_tables = 3, - .fid_flood_table_size = MLXSW_SP_FID_8021D_MAX, + .fid_flood_table_size = MLXSW_SP_FID_FLOOD_TABLE_SIZE, .used_max_ib_mc = 1, .max_ib_mc = 0, .used_max_pkey = 1, @@ -4177,6 +4712,117 @@ static int mlxsw_sp_kvd_sizes_get(struct mlxsw_core *mlxsw_core, return 0; } +static int +mlxsw_sp_devlink_param_fw_load_policy_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if ((val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER) && + (val.vu8 != DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH)) { + NL_SET_ERR_MSG_MOD(extack, "'fw_load_policy' must be 'driver' or 'flash'"); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param mlxsw_sp_devlink_params[] = { + DEVLINK_PARAM_GENERIC(FW_LOAD_POLICY, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, + mlxsw_sp_devlink_param_fw_load_policy_validate), +}; + +static int mlxsw_sp_params_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + union devlink_param_value value; + int err; + + err = devlink_params_register(devlink, mlxsw_sp_devlink_params, + ARRAY_SIZE(mlxsw_sp_devlink_params)); + if (err) + return err; + + value.vu8 = DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, + value); + return 0; +} + +static void mlxsw_sp_params_unregister(struct mlxsw_core *mlxsw_core) +{ + devlink_params_unregister(priv_to_devlink(mlxsw_core), + mlxsw_sp_devlink_params, + ARRAY_SIZE(mlxsw_sp_devlink_params)); +} + +static int +mlxsw_sp_params_acl_region_rehash_intrvl_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + ctx->val.vu32 = mlxsw_sp_acl_region_rehash_intrvl_get(mlxsw_sp); + return 0; +} + +static int +mlxsw_sp_params_acl_region_rehash_intrvl_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + return mlxsw_sp_acl_region_rehash_intrvl_set(mlxsw_sp, ctx->val.vu32); +} + +static const struct devlink_param mlxsw_sp2_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, + "acl_region_rehash_interval", + DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlxsw_sp_params_acl_region_rehash_intrvl_get, + mlxsw_sp_params_acl_region_rehash_intrvl_set, + NULL), +}; + +static int mlxsw_sp2_params_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + union devlink_param_value value; + int err; + + err = mlxsw_sp_params_register(mlxsw_core); + if (err) + return err; + + err = devlink_params_register(devlink, mlxsw_sp2_devlink_params, + ARRAY_SIZE(mlxsw_sp2_devlink_params)); + if (err) + goto err_devlink_params_register; + + value.vu32 = 0; + devlink_param_driverinit_value_set(devlink, + MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, + value); + return 0; + +err_devlink_params_register: + mlxsw_sp_params_unregister(mlxsw_core); + return err; +} + +static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core) +{ + devlink_params_unregister(priv_to_devlink(mlxsw_core), + mlxsw_sp2_devlink_params, + ARRAY_SIZE(mlxsw_sp2_devlink_params)); + mlxsw_sp_params_unregister(mlxsw_core); +} + static struct mlxsw_driver mlxsw_sp1_driver = { .kind = mlxsw_sp1_driver_name, .priv_size = sizeof(struct mlxsw_sp), @@ -4198,6 +4844,8 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp1_resources_register, .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, + .params_register = mlxsw_sp_params_register, + .params_unregister = mlxsw_sp_params_unregister, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp1_config_profile, .res_query_enabled = true, @@ -4223,6 +4871,8 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, + .params_register = mlxsw_sp2_params_register, + .params_unregister = mlxsw_sp2_params_unregister, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .res_query_enabled = true, @@ -4298,6 +4948,25 @@ void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) dev_put(mlxsw_sp_port->dev); } +static void +mlxsw_sp_port_lag_uppers_cleanup(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct net_device *br_dev = netdev_master_upper_dev_get(lag_dev); + struct net_device *upper_dev; + struct list_head *iter; + + if (netif_is_bridge_port(lag_dev)) + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lag_dev, br_dev); + + netdev_for_each_upper_dev_rcu(lag_dev, upper_dev, iter) { + if (!netif_is_bridge_port(upper_dev)) + continue; + br_dev = netdev_master_upper_dev_get(upper_dev); + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, br_dev); + } +} + static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) { char sldr_pl[MLXSW_REG_SLDR_LEN]; @@ -4425,7 +5094,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *lag_dev) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; struct mlxsw_sp_upper *lag; u16 lag_id; u8 port_index; @@ -4448,9 +5116,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); if (err) goto err_col_port_add; - err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id); - if (err) - goto err_col_port_enable; mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, mlxsw_sp_port->local_port); @@ -4459,14 +5124,11 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, lag->ref_count++; /* Port is no longer usable as a router interface */ - mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1); - if (mlxsw_sp_port_vlan->fid) - mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); + if (mlxsw_sp_port->default_vlan->fid) + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan); return 0; -err_col_port_enable: - mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); err_col_port_add: if (!lag->ref_count) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); @@ -4485,11 +5147,15 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); WARN_ON(lag->ref_count == 0); - mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); /* Any VLANs configured on the port are no longer valid */ - mlxsw_sp_port_vlan_flush(mlxsw_sp_port); + mlxsw_sp_port_vlan_flush(mlxsw_sp_port, false); + mlxsw_sp_port_vlan_cleanup(mlxsw_sp_port->default_vlan); + /* Make the LAG and its directly linked uppers leave bridges they + * are memeber in + */ + mlxsw_sp_port_lag_uppers_cleanup(mlxsw_sp_port, lag_dev); if (lag->ref_count == 1) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); @@ -4499,9 +5165,8 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->lagged = 0; lag->ref_count--; - mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1); /* Make sure untagged frames are allowed to ingress */ - mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); + mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID); } static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -4526,21 +5191,56 @@ static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); } -static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port, - bool lag_tx_enabled) +static int +mlxsw_sp_port_lag_col_dist_enable(struct mlxsw_sp_port *mlxsw_sp_port) { - if (lag_tx_enabled) - return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, - mlxsw_sp_port->lag_id); - else - return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, - mlxsw_sp_port->lag_id); + int err; + + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + if (err) + goto err_dist_port_add; + + return 0; + +err_dist_port_add: + mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; +} + +static int +mlxsw_sp_port_lag_col_dist_disable(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + goto err_col_port_disable; + + return 0; + +err_col_port_disable: + mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; } static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, struct netdev_lag_lower_state_info *info) { - return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); + if (info->tx_enabled) + return mlxsw_sp_port_lag_col_dist_enable(mlxsw_sp_port); + else + return mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); } static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -4579,7 +5279,7 @@ static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port) err = mlxsw_sp_port_stp_set(mlxsw_sp_port, true); if (err) goto err_port_stp_set; - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1, + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 1, VLAN_N_VID - 2, true, false); if (err) goto err_port_vlan_set; @@ -4611,7 +5311,7 @@ static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); - mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1, + mlxsw_sp_port_vlan_set(mlxsw_sp_port, 1, VLAN_N_VID - 2, false, false); mlxsw_sp_port_stp_set(mlxsw_sp_port, false); mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); @@ -4631,6 +5331,30 @@ static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev) return num_vxlans > 1; } +static bool mlxsw_sp_bridge_vxlan_vlan_is_valid(struct net_device *br_dev) +{ + DECLARE_BITMAP(vlans, VLAN_N_VID) = {0}; + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + u16 pvid; + int err; + + if (!netif_is_vxlan(dev)) + continue; + + err = mlxsw_sp_vxlan_mapped_vid(dev, &pvid); + if (err || !pvid) + continue; + + if (test_and_set_bit(pvid, vlans)) + return false; + } + + return true; +} + static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev, struct netlink_ext_ack *extack) { @@ -4639,13 +5363,15 @@ static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev, return false; } - if (br_vlan_enabled(br_dev)) { - NL_SET_ERR_MSG_MOD(extack, "VLAN filtering can not be enabled on a bridge with a VxLAN device"); + if (!br_vlan_enabled(br_dev) && + mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge"); return false; } - if (mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) { - NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge"); + if (br_vlan_enabled(br_dev) && + !mlxsw_sp_bridge_vxlan_vlan_is_valid(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices cannot have the same VLAN as PVID and egress untagged"); return false; } @@ -4719,11 +5445,6 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port"); return -EINVAL; } - if (is_vlan_dev(upper_dev) && - vlan_dev_vlan_id(upper_dev) == 1) { - NL_SET_ERR_MSG_MOD(extack, "Creating a VLAN device with VID 1 is unsupported: VLAN 1 carries untagged traffic"); - return -EINVAL; - } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -4738,12 +5459,14 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, lower_dev, upper_dev); } else if (netif_is_lag_master(upper_dev)) { - if (info->linking) + if (info->linking) { err = mlxsw_sp_port_lag_join(mlxsw_sp_port, upper_dev); - else + } else { + mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); + } } else if (netif_is_ovs_master(upper_dev)) { if (info->linking) err = mlxsw_sp_port_ovs_join(mlxsw_sp_port); @@ -4752,6 +5475,16 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, } else if (netif_is_macvlan(upper_dev)) { if (!info->linking) mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); + } else if (is_vlan_dev(upper_dev)) { + struct net_device *br_dev; + + if (!netif_is_bridge_port(upper_dev)) + break; + if (info->linking) + break; + br_dev = netdev_master_upper_dev_get(upper_dev); + mlxsw_sp_port_bridge_leave(mlxsw_sp_port, upper_dev, + br_dev); } break; } @@ -4908,6 +5641,48 @@ static int mlxsw_sp_netdevice_lag_port_vlan_event(struct net_device *vlan_dev, return 0; } +static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, + struct net_device *br_dev, + unsigned long event, void *ptr, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); + struct netdev_notifier_changeupper_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + + if (!mlxsw_sp) + return 0; + + extack = netdev_notifier_info_to_extack(&info->info); + + switch (event) { + case NETDEV_PRECHANGEUPPER: + upper_dev = info->upper_dev; + if (!netif_is_macvlan(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); + return -EOPNOTSUPP; + } + if (!info->linking) + break; + if (netif_is_macvlan(upper_dev) && + !mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan_dev)) { + NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces"); + return -EOPNOTSUPP; + } + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (info->linking) + break; + if (netif_is_macvlan(upper_dev)) + mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); + break; + } + + return 0; +} + static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, unsigned long event, void *ptr) { @@ -4921,6 +5696,9 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, return mlxsw_sp_netdevice_lag_port_vlan_event(vlan_dev, real_dev, event, ptr, vid); + else if (netif_is_bridge_master(real_dev)) + return mlxsw_sp_netdevice_bridge_vlan_event(vlan_dev, real_dev, + event, ptr, vid); return 0; } @@ -5020,10 +5798,21 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, if (cu_info->linking) { if (!netif_running(dev)) return 0; + /* When the bridge is VLAN-aware, the VNI of the VxLAN + * device needs to be mapped to a VLAN, but at this + * point no VLANs are configured on the VxLAN device + */ + if (br_vlan_enabled(upper_dev)) + return 0; return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, - dev, extack); + dev, 0, extack); } else { - mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev); + /* VLANs were already flushed, which triggered the + * necessary cleanup + */ + if (br_vlan_enabled(upper_dev)) + return 0; + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev); } break; case NETDEV_PRE_UP: @@ -5034,7 +5823,7 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, return 0; if (!mlxsw_sp_lower_get(upper_dev)) return 0; - return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev, + return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev, 0, extack); case NETDEV_DOWN: upper_dev = netdev_master_upper_dev_get(dev); @@ -5044,7 +5833,7 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, return 0; if (!mlxsw_sp_lower_get(upper_dev)) return 0; - mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev); + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev); break; } @@ -5075,8 +5864,10 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev)) err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev, event, ptr); - else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) - err = mlxsw_sp_netdevice_router_port_event(dev); + else if (event == NETDEV_PRE_CHANGEADDR || + event == NETDEV_CHANGEADDR || + event == NETDEV_CHANGEMTU) + err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr); else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); else if (mlxsw_sp_port_dev_check(dev)) @@ -5097,18 +5888,10 @@ static struct notifier_block mlxsw_sp_inetaddr_valid_nb __read_mostly = { .notifier_call = mlxsw_sp_inetaddr_valid_event, }; -static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { - .notifier_call = mlxsw_sp_inetaddr_event, -}; - static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = { .notifier_call = mlxsw_sp_inet6addr_valid_event, }; -static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { - .notifier_call = mlxsw_sp_inet6addr_event, -}; - static const struct pci_device_id mlxsw_sp1_pci_id_table[] = { {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0}, {0, }, @@ -5134,9 +5917,7 @@ static int __init mlxsw_sp_module_init(void) int err; register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); - register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); err = mlxsw_core_driver_register(&mlxsw_sp1_driver); if (err) @@ -5163,9 +5944,7 @@ err_sp1_pci_driver_register: err_sp2_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp1_driver); err_sp1_core_driver_register: - unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); return err; } @@ -5176,9 +5955,7 @@ static void __exit mlxsw_sp_module_exit(void) mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sp2_driver); mlxsw_core_driver_unregister(&mlxsw_sp1_driver); - unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 0875a79cbe7b..da6278b0caa4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -8,6 +8,7 @@ #include <linux/netdevice.h> #include <linux/rhashtable.h> #include <linux/bitops.h> +#include <linux/if_bridge.h> #include <linux/if_vlan.h> #include <linux/list.h> #include <linux/dcbnl.h> @@ -24,13 +25,16 @@ #include "core_acl_flex_actions.h" #include "reg.h" +#define MLXSW_SP_DEFAULT_VID (VLAN_N_VID - 1) + #define MLXSW_SP_FID_8021D_MAX 1024 #define MLXSW_SP_MID_MAX 7000 #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 -#define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ +#define MLXSW_SP_PORT_BASE_SPEED_25G 25000 /* Mb/s */ +#define MLXSW_SP_PORT_BASE_SPEED_50G 50000 /* Mb/s */ #define MLXSW_SP_KVD_LINEAR_SIZE 98304 /* entries */ #define MLXSW_SP_KVD_GRANULARITY 128 @@ -72,6 +76,11 @@ enum mlxsw_sp_rif_type { MLXSW_SP_RIF_TYPE_MAX, }; +struct mlxsw_sp_rif_ops; + +extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[]; +extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[]; + enum mlxsw_sp_fid_type { MLXSW_SP_FID_TYPE_8021Q, MLXSW_SP_FID_TYPE_8021D, @@ -80,6 +89,10 @@ enum mlxsw_sp_fid_type { MLXSW_SP_FID_TYPE_MAX, }; +enum mlxsw_sp_nve_type { + MLXSW_SP_NVE_TYPE_VXLAN, +}; + struct mlxsw_sp_mid { struct list_head list; unsigned char addr[ETH_ALEN]; @@ -121,12 +134,15 @@ struct mlxsw_sp_kvdl_ops; struct mlxsw_sp_mr_tcam_ops; struct mlxsw_sp_acl_tcam_ops; struct mlxsw_sp_nve_ops; +struct mlxsw_sp_sb_vals; +struct mlxsw_sp_port_type_speed_ops; struct mlxsw_sp { struct mlxsw_sp_port **ports; struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; unsigned char base_mac[ETH_ALEN]; + const unsigned char *mac_mask; struct mlxsw_sp_upper *lags; int *port_to_module; struct mlxsw_sp_sb *sb; @@ -153,6 +169,9 @@ struct mlxsw_sp { const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops; const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; const struct mlxsw_sp_nve_ops **nve_ops_arr; + const struct mlxsw_sp_rif_ops **rif_ops_arr; + const struct mlxsw_sp_sb_vals *sb_vals; + const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; }; static inline struct mlxsw_sp_upper * @@ -184,7 +203,6 @@ struct mlxsw_sp_port_vlan { struct list_head list; struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp_fid *fid; - unsigned int ref_count; u16 vid; struct mlxsw_sp_bridge_port *bridge_port; struct list_head bridge_vlan_node; @@ -235,6 +253,7 @@ struct mlxsw_sp_port { } periodic_hw_stats; struct mlxsw_sp_port_sample *sample; struct list_head vlans_list; + struct mlxsw_sp_port_vlan *default_vlan; struct mlxsw_sp_qdisc *root_qdisc; struct mlxsw_sp_qdisc *tclass_qdiscs; unsigned acl_rule_count; @@ -242,6 +261,29 @@ struct mlxsw_sp_port { struct mlxsw_sp_acl_block *eg_acl_block; }; +struct mlxsw_sp_port_type_speed_ops { + void (*from_ptys_supported_port)(struct mlxsw_sp *mlxsw_sp, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd); + void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, + unsigned long *mode); + void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp, + bool carrier_ok, u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd); + u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, + const struct ethtool_link_ksettings *cmd); + u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 speed); + u32 (*to_ptys_upper_speed)(struct mlxsw_sp *mlxsw_sp, u32 upper_speed); + int (*port_speed_base)(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u32 *base_speed); + void (*reg_ptys_eth_pack)(struct mlxsw_sp *mlxsw_sp, char *payload, + u8 local_port, u32 proto_admin, bool autoneg); + void (*reg_ptys_eth_unpack)(struct mlxsw_sp *mlxsw_sp, char *payload, + u32 *p_eth_proto_cap, + u32 *p_eth_proto_admin, + u32 *p_eth_proto_oper); +}; + static inline struct net_device * mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev) { @@ -261,6 +303,26 @@ static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev) return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev); } +static inline int +mlxsw_sp_vxlan_mapped_vid(const struct net_device *vxlan_dev, u16 *p_vid) +{ + struct bridge_vlan_info vinfo; + u16 vid = 0; + int err; + + err = br_vlan_get_pvid(vxlan_dev, &vid); + if (err || !vid) + goto out; + + err = br_vlan_get_info(vxlan_dev, vid, &vinfo); + if (err || !(vinfo.flags & BRIDGE_VLAN_INFO_UNTAGGED)) + vid = 0; + +out: + *p_vid = vid; + return err; +} + static inline bool mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) { @@ -337,12 +399,14 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, u32 *p_cur, u32 *p_max); u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells); u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes); +u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp); + +extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals; +extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals; /* spectrum_switchdev.c */ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp); -void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port); -void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, bool adding); void @@ -358,11 +422,15 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev); int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev, - const struct net_device *vxlan_dev, + const struct net_device *vxlan_dev, u16 vid, struct netlink_ext_ack *extack); void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev, const struct net_device *vxlan_dev); +struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + u16 vid, + struct netlink_ext_ack *extack); +extern struct notifier_block mlxsw_sp_switchdev_notifier; /* spectrum.c */ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -384,8 +452,8 @@ int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, bool learn_enable); int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); struct mlxsw_sp_port_vlan * -mlxsw_sp_port_vlan_get(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); -void mlxsw_sp_port_vlan_put(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); +mlxsw_sp_port_vlan_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +void mlxsw_sp_port_vlan_destroy(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged); int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, @@ -429,15 +497,12 @@ union mlxsw_sp_l3addr { int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); +int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, + unsigned long event, void *ptr); void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, const struct net_device *macvlan_dev); -int mlxsw_sp_inetaddr_event(struct notifier_block *unused, - unsigned long event, void *ptr); int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, unsigned long event, void *ptr); -int mlxsw_sp_inet6addr_event(struct notifier_block *unused, - unsigned long event, void *ptr); int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, @@ -457,7 +522,6 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, struct netdev_notifier_info *info); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); -void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, struct net_device *dev); struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, @@ -473,6 +537,9 @@ void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, const union mlxsw_sp_l3addr *ul_sip); int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, u16 *vr_id); +int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + u16 *ul_rif_index); +void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index); /* spectrum_kvdl.c */ enum mlxsw_sp_kvdl_entry_type { @@ -538,6 +605,7 @@ struct mlxsw_sp_acl_rule_info { unsigned int priority; struct mlxsw_afk_element_values values; struct mlxsw_afa_block *act_block; + u8 action_created:1; unsigned int counter_index; }; @@ -547,6 +615,7 @@ struct mlxsw_sp_acl_ruleset; /* spectrum_acl.c */ enum mlxsw_sp_acl_profile { MLXSW_SP_ACL_PROFILE_FLOWER, + MLXSW_SP_ACL_PROFILE_MR, }; struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); @@ -581,7 +650,8 @@ void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset); struct mlxsw_sp_acl_rule_info * -mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl); +mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl, + struct mlxsw_afa_block *afa_block); void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei); void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei, @@ -625,6 +695,7 @@ struct mlxsw_sp_acl_rule * mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset, unsigned long cookie, + struct mlxsw_afa_block *afa_block, struct netlink_ext_ack *extack); void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule *rule); @@ -632,6 +703,9 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule *rule); void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule *rule); +int mlxsw_sp_acl_rule_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule, + struct mlxsw_afa_block *afa_block); struct mlxsw_sp_acl_rule * mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset, @@ -646,6 +720,8 @@ struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); +u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val); /* spectrum_acl_tcam.c */ struct mlxsw_sp_acl_tcam; @@ -660,10 +736,13 @@ struct mlxsw_sp_acl_tcam_ops { size_t region_priv_size; int (*region_init)(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *tcam_priv, - struct mlxsw_sp_acl_tcam_region *region); + struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv); void (*region_fini)(struct mlxsw_sp *mlxsw_sp, void *region_priv); int (*region_associate)(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region); + void * (*region_rehash_hints_get)(void *region_priv); + void (*region_rehash_hints_put)(void *hints_priv); size_t chunk_priv_size; void (*chunk_init)(void *region_priv, void *chunk_priv, unsigned int priority); @@ -676,6 +755,9 @@ struct mlxsw_sp_acl_tcam_ops { void (*entry_del)(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *chunk_priv, void *entry_priv); + int (*entry_action_replace)(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei); int (*entry_activity_get)(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *entry_priv, bool *activity); @@ -721,6 +803,12 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p); /* spectrum_fid.c */ +bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid); +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, + u16 fid_index); +int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex); +int mlxsw_sp_fid_nve_type(const struct mlxsw_sp_fid *fid, + enum mlxsw_sp_nve_type *p_type); struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, __be32 vni); int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni); @@ -728,9 +816,12 @@ int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid, u32 nve_flood_index); void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid); bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid); -int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni); +int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_nve_type type, + __be32 vni, int nve_ifindex); void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid); bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid); +void mlxsw_sp_fid_fdb_clear_offload(const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev); int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type, u8 local_port, bool member); @@ -738,10 +829,10 @@ int mlxsw_sp_fid_port_vid_map(struct mlxsw_sp_fid *fid, struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); void mlxsw_sp_fid_port_vid_unmap(struct mlxsw_sp_fid *fid, struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); -enum mlxsw_sp_rif_type mlxsw_sp_fid_rif_type(const struct mlxsw_sp_fid *fid); u16 mlxsw_sp_fid_index(const struct mlxsw_sp_fid *fid); enum mlxsw_sp_fid_type mlxsw_sp_fid_type(const struct mlxsw_sp_fid *fid); void mlxsw_sp_fid_rif_set(struct mlxsw_sp_fid *fid, struct mlxsw_sp_rif *rif); +struct mlxsw_sp_rif *mlxsw_sp_fid_rif(const struct mlxsw_sp_fid *fid); enum mlxsw_sp_rif_type mlxsw_sp_fid_type_rif_type(const struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_fid_type type); @@ -749,6 +840,8 @@ u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid); struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid); struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, int br_ifindex); +struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_lookup(struct mlxsw_sp *mlxsw_sp, + u16 vid); struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp, int br_ifindex); struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, @@ -797,10 +890,6 @@ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops; extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops; /* spectrum_nve.c */ -enum mlxsw_sp_nve_type { - MLXSW_SP_NVE_TYPE_VXLAN, -}; - struct mlxsw_sp_nve_params { enum mlxsw_sp_nve_type type; __be32 vni; @@ -810,6 +899,9 @@ struct mlxsw_sp_nve_params { extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[]; extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[]; +int mlxsw_sp_nve_learned_ip_resolve(struct mlxsw_sp *mlxsw_sp, u32 uip, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr); int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, enum mlxsw_sp_l3proto proto, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c index 2a9eac90002e..3a636f753607 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum1_acl_tcam.c @@ -67,7 +67,7 @@ mlxsw_sp1_acl_ctcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_ctcam_chunk_init(®ion->cregion, ®ion->catchall.cchunk, MLXSW_SP_ACL_TCAM_CATCHALL_PRIO); - rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl); + rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl, NULL); if (IS_ERR(rulei)) { err = PTR_ERR(rulei); goto err_rulei_create; @@ -112,7 +112,8 @@ mlxsw_sp1_acl_ctcam_region_catchall_del(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp1_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *tcam_priv, - struct mlxsw_sp_acl_tcam_region *_region) + struct mlxsw_sp_acl_tcam_region *_region, + void *hints_priv) { struct mlxsw_sp1_acl_tcam_region *region = region_priv; int err; @@ -193,6 +194,14 @@ static void mlxsw_sp1_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, } static int +mlxsw_sp1_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + return -EOPNOTSUPP; +} + +static int mlxsw_sp1_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *_region, unsigned int offset, @@ -240,5 +249,6 @@ const struct mlxsw_sp_acl_tcam_ops mlxsw_sp1_acl_tcam_ops = { .entry_priv_size = sizeof(struct mlxsw_sp1_acl_tcam_entry), .entry_add = mlxsw_sp1_acl_tcam_entry_add, .entry_del = mlxsw_sp1_acl_tcam_entry_del, + .entry_action_replace = mlxsw_sp1_acl_tcam_entry_action_replace, .entry_activity_get = mlxsw_sp1_acl_tcam_entry_activity_get, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c index 8ca77f3e8f27..6c66a0f1b79e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_acl_tcam.c @@ -34,15 +34,15 @@ mlxsw_sp2_acl_ctcam_region_entry_insert(struct mlxsw_sp_acl_ctcam_region *cregio { struct mlxsw_sp_acl_atcam_region *aregion; struct mlxsw_sp_acl_atcam_entry *aentry; - struct mlxsw_sp_acl_erp *erp; + struct mlxsw_sp_acl_erp_mask *erp_mask; aregion = mlxsw_sp_acl_tcam_cregion_aregion(cregion); aentry = mlxsw_sp_acl_tcam_centry_aentry(centry); - erp = mlxsw_sp_acl_erp_get(aregion, mask, true); - if (IS_ERR(erp)) - return PTR_ERR(erp); - aentry->erp = erp; + erp_mask = mlxsw_sp_acl_erp_mask_get(aregion, mask, true); + if (IS_ERR(erp_mask)) + return PTR_ERR(erp_mask); + aentry->erp_mask = erp_mask; return 0; } @@ -57,7 +57,7 @@ mlxsw_sp2_acl_ctcam_region_entry_remove(struct mlxsw_sp_acl_ctcam_region *cregio aregion = mlxsw_sp_acl_tcam_cregion_aregion(cregion); aentry = mlxsw_sp_acl_tcam_centry_aentry(centry); - mlxsw_sp_acl_erp_put(aregion, aentry->erp); + mlxsw_sp_acl_erp_mask_put(aregion, aentry->erp_mask); } static const struct mlxsw_sp_acl_ctcam_region_ops @@ -139,7 +139,8 @@ static void mlxsw_sp2_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, void *priv) static int mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *tcam_priv, - struct mlxsw_sp_acl_tcam_region *_region) + struct mlxsw_sp_acl_tcam_region *_region, + void *hints_priv) { struct mlxsw_sp2_acl_tcam_region *region = region_priv; struct mlxsw_sp2_acl_tcam *tcam = tcam_priv; @@ -147,7 +148,8 @@ mlxsw_sp2_acl_tcam_region_init(struct mlxsw_sp *mlxsw_sp, void *region_priv, region->region = _region; return mlxsw_sp_acl_atcam_region_init(mlxsw_sp, &tcam->atcam, - ®ion->aregion, _region, + ®ion->aregion, + _region, hints_priv, &mlxsw_sp2_acl_ctcam_region_ops); } @@ -166,6 +168,18 @@ mlxsw_sp2_acl_tcam_region_associate(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_acl_atcam_region_associate(mlxsw_sp, region->id); } +static void *mlxsw_sp2_acl_tcam_region_rehash_hints_get(void *region_priv) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + + return mlxsw_sp_acl_atcam_rehash_hints_get(®ion->aregion); +} + +static void mlxsw_sp2_acl_tcam_region_rehash_hints_put(void *hints_priv) +{ + mlxsw_sp_acl_atcam_rehash_hints_put(hints_priv); +} + static void mlxsw_sp2_acl_tcam_chunk_init(void *region_priv, void *chunk_priv, unsigned int priority) { @@ -211,6 +225,20 @@ static void mlxsw_sp2_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, } static int +mlxsw_sp2_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + void *region_priv, void *entry_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp2_acl_tcam_region *region = region_priv; + struct mlxsw_sp2_acl_tcam_entry *entry = entry_priv; + + entry->act_block = rulei->act_block; + return mlxsw_sp_acl_atcam_entry_action_replace(mlxsw_sp, + ®ion->aregion, + &entry->aentry, rulei); +} + +static int mlxsw_sp2_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, void *region_priv, void *entry_priv, bool *activity) @@ -229,11 +257,14 @@ const struct mlxsw_sp_acl_tcam_ops mlxsw_sp2_acl_tcam_ops = { .region_init = mlxsw_sp2_acl_tcam_region_init, .region_fini = mlxsw_sp2_acl_tcam_region_fini, .region_associate = mlxsw_sp2_acl_tcam_region_associate, + .region_rehash_hints_get = mlxsw_sp2_acl_tcam_region_rehash_hints_get, + .region_rehash_hints_put = mlxsw_sp2_acl_tcam_region_rehash_hints_put, .chunk_priv_size = sizeof(struct mlxsw_sp2_acl_tcam_chunk), .chunk_init = mlxsw_sp2_acl_tcam_chunk_init, .chunk_fini = mlxsw_sp2_acl_tcam_chunk_fini, .entry_priv_size = sizeof(struct mlxsw_sp2_acl_tcam_entry), .entry_add = mlxsw_sp2_acl_tcam_entry_add, .entry_del = mlxsw_sp2_acl_tcam_entry_del, + .entry_action_replace = mlxsw_sp2_acl_tcam_entry_action_replace, .entry_activity_get = mlxsw_sp2_acl_tcam_entry_activity_get, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c index 4dd62478162e..e31ec75ac035 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c @@ -7,6 +7,201 @@ #include "spectrum.h" #include "spectrum_mr.h" +struct mlxsw_sp2_mr_tcam { + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_acl_block *acl_block; + struct mlxsw_sp_acl_ruleset *ruleset4; + struct mlxsw_sp_acl_ruleset *ruleset6; +}; + +struct mlxsw_sp2_mr_route { + struct mlxsw_sp2_mr_tcam *mr_tcam; +}; + +static struct mlxsw_sp_acl_ruleset * +mlxsw_sp2_mr_tcam_proto_ruleset(struct mlxsw_sp2_mr_tcam *mr_tcam, + enum mlxsw_sp_l3proto proto) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mr_tcam->ruleset4; + case MLXSW_SP_L3_PROTO_IPV6: + return mr_tcam->ruleset6; + } + return NULL; +} + +static int mlxsw_sp2_mr_tcam_bind_group(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_pemrbt_protocol protocol, + struct mlxsw_sp_acl_ruleset *ruleset) +{ + char pemrbt_pl[MLXSW_REG_PEMRBT_LEN]; + u16 group_id; + + group_id = mlxsw_sp_acl_ruleset_group_id(ruleset); + + mlxsw_reg_pemrbt_pack(pemrbt_pl, protocol, group_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pemrbt), pemrbt_pl); +} + +static const enum mlxsw_afk_element mlxsw_sp2_mr_tcam_usage_ipv4[] = { + MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7, + MLXSW_AFK_ELEMENT_SRC_IP_0_31, + MLXSW_AFK_ELEMENT_DST_IP_0_31, +}; + +static int mlxsw_sp2_mr_tcam_ipv4_init(struct mlxsw_sp2_mr_tcam *mr_tcam) +{ + struct mlxsw_afk_element_usage elusage; + int err; + + /* Initialize IPv4 ACL group. */ + mlxsw_afk_element_usage_fill(&elusage, + mlxsw_sp2_mr_tcam_usage_ipv4, + ARRAY_SIZE(mlxsw_sp2_mr_tcam_usage_ipv4)); + mr_tcam->ruleset4 = mlxsw_sp_acl_ruleset_get(mr_tcam->mlxsw_sp, + mr_tcam->acl_block, + MLXSW_SP_L3_PROTO_IPV4, + MLXSW_SP_ACL_PROFILE_MR, + &elusage); + + if (IS_ERR(mr_tcam->ruleset4)) + return PTR_ERR(mr_tcam->ruleset4); + + /* MC Router groups should be bound before routes are inserted. */ + err = mlxsw_sp2_mr_tcam_bind_group(mr_tcam->mlxsw_sp, + MLXSW_REG_PEMRBT_PROTO_IPV4, + mr_tcam->ruleset4); + if (err) + goto err_bind_group; + + return 0; + +err_bind_group: + mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset4); + return err; +} + +static void mlxsw_sp2_mr_tcam_ipv4_fini(struct mlxsw_sp2_mr_tcam *mr_tcam) +{ + mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset4); +} + +static const enum mlxsw_afk_element mlxsw_sp2_mr_tcam_usage_ipv6[] = { + MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7, + MLXSW_AFK_ELEMENT_SRC_IP_96_127, + MLXSW_AFK_ELEMENT_SRC_IP_64_95, + MLXSW_AFK_ELEMENT_SRC_IP_32_63, + MLXSW_AFK_ELEMENT_SRC_IP_0_31, + MLXSW_AFK_ELEMENT_DST_IP_96_127, + MLXSW_AFK_ELEMENT_DST_IP_64_95, + MLXSW_AFK_ELEMENT_DST_IP_32_63, + MLXSW_AFK_ELEMENT_DST_IP_0_31, +}; + +static int mlxsw_sp2_mr_tcam_ipv6_init(struct mlxsw_sp2_mr_tcam *mr_tcam) +{ + struct mlxsw_afk_element_usage elusage; + int err; + + /* Initialize IPv6 ACL group */ + mlxsw_afk_element_usage_fill(&elusage, + mlxsw_sp2_mr_tcam_usage_ipv6, + ARRAY_SIZE(mlxsw_sp2_mr_tcam_usage_ipv6)); + mr_tcam->ruleset6 = mlxsw_sp_acl_ruleset_get(mr_tcam->mlxsw_sp, + mr_tcam->acl_block, + MLXSW_SP_L3_PROTO_IPV6, + MLXSW_SP_ACL_PROFILE_MR, + &elusage); + + if (IS_ERR(mr_tcam->ruleset6)) + return PTR_ERR(mr_tcam->ruleset6); + + /* MC Router groups should be bound before routes are inserted. */ + err = mlxsw_sp2_mr_tcam_bind_group(mr_tcam->mlxsw_sp, + MLXSW_REG_PEMRBT_PROTO_IPV6, + mr_tcam->ruleset6); + if (err) + goto err_bind_group; + + return 0; + +err_bind_group: + mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset6); + return err; +} + +static void mlxsw_sp2_mr_tcam_ipv6_fini(struct mlxsw_sp2_mr_tcam *mr_tcam) +{ + mlxsw_sp_acl_ruleset_put(mr_tcam->mlxsw_sp, mr_tcam->ruleset6); +} + +static void +mlxsw_sp2_mr_tcam_rule_parse4(struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_mr_route_key *key) +{ + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31, + (char *) &key->source.addr4, + (char *) &key->source_mask.addr4, 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31, + (char *) &key->group.addr4, + (char *) &key->group_mask.addr4, 4); +} + +static void +mlxsw_sp2_mr_tcam_rule_parse6(struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_mr_route_key *key) +{ + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127, + &key->source.addr6.s6_addr[0x0], + &key->source_mask.addr6.s6_addr[0x0], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95, + &key->source.addr6.s6_addr[0x4], + &key->source_mask.addr6.s6_addr[0x4], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63, + &key->source.addr6.s6_addr[0x8], + &key->source_mask.addr6.s6_addr[0x8], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31, + &key->source.addr6.s6_addr[0xc], + &key->source_mask.addr6.s6_addr[0xc], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127, + &key->group.addr6.s6_addr[0x0], + &key->group_mask.addr6.s6_addr[0x0], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95, + &key->group.addr6.s6_addr[0x4], + &key->group_mask.addr6.s6_addr[0x4], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63, + &key->group.addr6.s6_addr[0x8], + &key->group_mask.addr6.s6_addr[0x8], 4); + mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31, + &key->group.addr6.s6_addr[0xc], + &key->group_mask.addr6.s6_addr[0xc], 4); +} + +static void +mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule, + struct mlxsw_sp_mr_route_key *key, + unsigned int priority) +{ + struct mlxsw_sp_acl_rule_info *rulei; + + rulei = mlxsw_sp_acl_rule_rulei(rule); + rulei->priority = priority; + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VIRT_ROUTER_0_7, + key->vrid, GENMASK(7, 0)); + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_8_10, + key->vrid >> 8, GENMASK(2, 0)); + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mlxsw_sp2_mr_tcam_rule_parse4(rulei, key); + case MLXSW_SP_L3_PROTO_IPV6: + return mlxsw_sp2_mr_tcam_rule_parse6(rulei, key); + } +} + static int mlxsw_sp2_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, void *route_priv, @@ -14,7 +209,33 @@ mlxsw_sp2_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, struct mlxsw_afa_block *afa_block, enum mlxsw_sp_mr_route_prio prio) { + struct mlxsw_sp2_mr_route *mr_route = route_priv; + struct mlxsw_sp2_mr_tcam *mr_tcam = priv; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + int err; + + mr_route->mr_tcam = mr_tcam; + ruleset = mlxsw_sp2_mr_tcam_proto_ruleset(mr_tcam, key->proto); + if (WARN_ON(!ruleset)) + return -EINVAL; + + rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, + (unsigned long) route_priv, afa_block, + NULL); + if (IS_ERR(rule)) + return PTR_ERR(rule); + + mlxsw_sp2_mr_tcam_rule_parse(rule, key, prio); + err = mlxsw_sp_acl_rule_add(mlxsw_sp, rule); + if (err) + goto err_rule_add; + return 0; + +err_rule_add: + mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule); + return err; } static void @@ -22,6 +243,21 @@ mlxsw_sp2_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, void *priv, void *route_priv, struct mlxsw_sp_mr_route_key *key) { + struct mlxsw_sp2_mr_tcam *mr_tcam = priv; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + + ruleset = mlxsw_sp2_mr_tcam_proto_ruleset(mr_tcam, key->proto); + if (WARN_ON(!ruleset)) + return; + + rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, + (unsigned long) route_priv); + if (WARN_ON(!rule)) + return; + + mlxsw_sp_acl_rule_del(mlxsw_sp, rule); + mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule); } static int @@ -30,21 +266,64 @@ mlxsw_sp2_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mr_route_key *key, struct mlxsw_afa_block *afa_block) { - return 0; + struct mlxsw_sp2_mr_route *mr_route = route_priv; + struct mlxsw_sp2_mr_tcam *mr_tcam = mr_route->mr_tcam; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + + ruleset = mlxsw_sp2_mr_tcam_proto_ruleset(mr_tcam, key->proto); + if (WARN_ON(!ruleset)) + return -EINVAL; + + rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, + (unsigned long) route_priv); + if (WARN_ON(!rule)) + return -EINVAL; + + return mlxsw_sp_acl_rule_action_replace(mlxsw_sp, rule, afa_block); } static int mlxsw_sp2_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) { + struct mlxsw_sp2_mr_tcam *mr_tcam = priv; + int err; + + mr_tcam->mlxsw_sp = mlxsw_sp; + mr_tcam->acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, NULL); + if (!mr_tcam->acl_block) + return -ENOMEM; + + err = mlxsw_sp2_mr_tcam_ipv4_init(mr_tcam); + if (err) + goto err_ipv4_init; + + err = mlxsw_sp2_mr_tcam_ipv6_init(mr_tcam); + if (err) + goto err_ipv6_init; + return 0; + +err_ipv6_init: + mlxsw_sp2_mr_tcam_ipv4_fini(mr_tcam); +err_ipv4_init: + mlxsw_sp_acl_block_destroy(mr_tcam->acl_block); + return err; } static void mlxsw_sp2_mr_tcam_fini(void *priv) { + struct mlxsw_sp2_mr_tcam *mr_tcam = priv; + + mlxsw_sp2_mr_tcam_ipv6_fini(mr_tcam); + mlxsw_sp2_mr_tcam_ipv4_fini(mr_tcam); + mlxsw_sp_acl_block_destroy(mr_tcam->acl_block); } const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops = { + .priv_size = sizeof(struct mlxsw_sp2_mr_tcam), .init = mlxsw_sp2_mr_tcam_init, .fini = mlxsw_sp2_mr_tcam_fini, + .route_priv_size = sizeof(struct mlxsw_sp2_mr_route), .route_create = mlxsw_sp2_mr_tcam_route_create, .route_destroy = mlxsw_sp2_mr_tcam_route_destroy, .route_update = mlxsw_sp2_mr_tcam_route_update, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index c4f9238591e6..a146a44634e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -435,7 +435,8 @@ u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset) } struct mlxsw_sp_acl_rule_info * -mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl) +mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl, + struct mlxsw_afa_block *afa_block) { struct mlxsw_sp_acl_rule_info *rulei; int err; @@ -443,11 +444,18 @@ mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl) rulei = kzalloc(sizeof(*rulei), GFP_KERNEL); if (!rulei) return NULL; + + if (afa_block) { + rulei->act_block = afa_block; + return rulei; + } + rulei->act_block = mlxsw_afa_block_create(acl->mlxsw_sp->afa); if (IS_ERR(rulei->act_block)) { err = PTR_ERR(rulei->act_block); goto err_afa_block_create; } + rulei->action_created = 1; return rulei; err_afa_block_create: @@ -457,7 +465,8 @@ err_afa_block_create: void mlxsw_sp_acl_rulei_destroy(struct mlxsw_sp_acl_rule_info *rulei) { - mlxsw_afa_block_destroy(rulei->act_block); + if (rulei->action_created) + mlxsw_afa_block_destroy(rulei->act_block); kfree(rulei); } @@ -579,7 +588,7 @@ int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, { u8 ethertype; - if (action == TCA_VLAN_ACT_MODIFY) { + if (action == FLOW_ACTION_VLAN_MANGLE) { switch (proto) { case ETH_P_8021Q: ethertype = 0; @@ -623,6 +632,7 @@ struct mlxsw_sp_acl_rule * mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset, unsigned long cookie, + struct mlxsw_afa_block *afa_block, struct netlink_ext_ack *extack) { const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; @@ -630,7 +640,7 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_sp_acl_ruleset_ref_inc(ruleset); - rule = kzalloc(sizeof(*rule) + ops->rule_priv_size(mlxsw_sp), + rule = kzalloc(sizeof(*rule) + ops->rule_priv_size, GFP_KERNEL); if (!rule) { err = -ENOMEM; @@ -639,7 +649,7 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, rule->cookie = cookie; rule->ruleset = ruleset; - rule->rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl); + rule->rulei = mlxsw_sp_acl_rulei_create(mlxsw_sp->acl, afa_block); if (IS_ERR(rule->rulei)) { err = PTR_ERR(rule->rulei); goto err_rulei_create; @@ -721,6 +731,20 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, ops->rule_del(mlxsw_sp, rule->priv); } +int mlxsw_sp_acl_rule_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule, + struct mlxsw_afa_block *afa_block) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_rule_info *rulei; + + rulei = mlxsw_sp_acl_rule_rulei(rule); + rulei->act_block = afa_block; + + return ops->rule_action_replace(mlxsw_sp, rule->priv, rule->rulei); +} + struct mlxsw_sp_acl_rule * mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset, @@ -781,7 +805,7 @@ static void mlxsw_sp_acl_rule_activity_work_schedule(struct mlxsw_sp_acl *acl) msecs_to_jiffies(interval)); } -static void mlxsw_sp_acl_rul_activity_update_work(struct work_struct *work) +static void mlxsw_sp_acl_rule_activity_update_work(struct work_struct *work) { struct mlxsw_sp_acl *acl = container_of(work, struct mlxsw_sp_acl, rule_activity_update.dw.work); @@ -860,7 +884,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) /* Create the delayed work for the rule activity_update */ INIT_DELAYED_WORK(&acl->rule_activity_update.dw, - mlxsw_sp_acl_rul_activity_update_work); + mlxsw_sp_acl_rule_activity_update_work); acl->rule_activity_update.interval = MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS; mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, 0); return 0; @@ -888,3 +912,19 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_afk_destroy(acl->afk); kfree(acl); } + +u32 mlxsw_sp_acl_region_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(mlxsw_sp, + &acl->tcam); +} + +int mlxsw_sp_acl_region_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, u32 val) +{ + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + + return mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(mlxsw_sp, + &acl->tcam, val); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c index 2dda028f94db..ded4cf658680 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c @@ -7,6 +7,8 @@ #include <linux/gfp.h> #include <linux/refcount.h> #include <linux/rhashtable.h> +#define CREATE_TRACE_POINTS +#include <trace/events/mlxsw.h> #include "reg.h" #include "core.h" @@ -14,8 +16,8 @@ #include "spectrum_acl_tcam.h" #include "core_acl_flex_keys.h" -#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_START 6 -#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_END 11 +#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_START 0 +#define MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_END 5 struct mlxsw_sp_acl_atcam_lkey_id_ht_key { char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* MSB blocks */ @@ -34,7 +36,7 @@ struct mlxsw_sp_acl_atcam_region_ops { void (*fini)(struct mlxsw_sp_acl_atcam_region *aregion); struct mlxsw_sp_acl_atcam_lkey_id * (*lkey_id_get)(struct mlxsw_sp_acl_atcam_region *aregion, - struct mlxsw_sp_acl_rule_info *rulei, u8 erp_id); + char *enc_key, u8 erp_id); void (*lkey_id_put)(struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_atcam_lkey_id *lkey_id); }; @@ -64,7 +66,7 @@ static const struct rhashtable_params mlxsw_sp_acl_atcam_entries_ht_params = { static bool mlxsw_sp_acl_atcam_is_centry(const struct mlxsw_sp_acl_atcam_entry *aentry) { - return mlxsw_sp_acl_erp_is_ctcam_erp(aentry->erp); + return mlxsw_sp_acl_erp_mask_is_ctcam(aentry->erp_mask); } static int @@ -90,8 +92,7 @@ mlxsw_sp_acl_atcam_region_generic_fini(struct mlxsw_sp_acl_atcam_region *aregion static struct mlxsw_sp_acl_atcam_lkey_id * mlxsw_sp_acl_atcam_generic_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion, - struct mlxsw_sp_acl_rule_info *rulei, - u8 erp_id) + char *enc_key, u8 erp_id) { struct mlxsw_sp_acl_atcam_region_generic *region_generic; @@ -220,8 +221,7 @@ mlxsw_sp_acl_atcam_lkey_id_destroy(struct mlxsw_sp_acl_atcam_region *aregion, static struct mlxsw_sp_acl_atcam_lkey_id * mlxsw_sp_acl_atcam_12kb_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion, - struct mlxsw_sp_acl_rule_info *rulei, - u8 erp_id) + char *enc_key, u8 erp_id) { struct mlxsw_sp_acl_atcam_region_12kb *region_12kb = aregion->priv; struct mlxsw_sp_acl_tcam_region *region = aregion->region; @@ -230,9 +230,10 @@ mlxsw_sp_acl_atcam_12kb_lkey_id_get(struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); struct mlxsw_sp_acl_atcam_lkey_id *lkey_id; - mlxsw_afk_encode(afk, region->key_info, &rulei->values, ht_key.enc_key, - NULL, MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_START, - MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_END); + memcpy(ht_key.enc_key, enc_key, sizeof(ht_key.enc_key)); + mlxsw_afk_clear(afk, ht_key.enc_key, + MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_START, + MLXSW_SP_ACL_ATCAM_LKEY_ID_BLOCK_CLEAR_END); ht_key.erp_id = erp_id; lkey_id = rhashtable_lookup_fast(®ion_12kb->lkey_ht, &ht_key, mlxsw_sp_acl_atcam_lkey_id_ht_params); @@ -317,6 +318,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam, struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv, const struct mlxsw_sp_acl_ctcam_region_ops *ops) { int err; @@ -324,6 +326,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, aregion->region = region; aregion->atcam = atcam; mlxsw_sp_acl_atcam_region_type_init(aregion); + INIT_LIST_HEAD(&aregion->entries_list); err = rhashtable_init(&aregion->entries_ht, &mlxsw_sp_acl_atcam_entries_ht_params); @@ -332,7 +335,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, err = aregion->ops->init(aregion); if (err) goto err_ops_init; - err = mlxsw_sp_acl_erp_region_init(aregion); + err = mlxsw_sp_acl_erp_region_init(aregion, hints_priv); if (err) goto err_erp_region_init; err = mlxsw_sp_acl_ctcam_region_init(mlxsw_sp, &aregion->cregion, @@ -357,6 +360,7 @@ void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion) mlxsw_sp_acl_erp_region_fini(aregion); aregion->ops->fini(aregion); rhashtable_destroy(&aregion->entries_ht); + WARN_ON(!list_empty(&aregion->entries_list)); } void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion, @@ -379,7 +383,7 @@ mlxsw_sp_acl_atcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei) { struct mlxsw_sp_acl_tcam_region *region = aregion->region; - u8 erp_id = mlxsw_sp_acl_erp_id(aentry->erp); + u8 erp_id = mlxsw_sp_acl_erp_mask_erp_id(aentry->erp_mask); struct mlxsw_sp_acl_atcam_lkey_id *lkey_id; char ptce3_pl[MLXSW_REG_PTCE3_LEN]; u32 kvdl_index, priority; @@ -389,7 +393,7 @@ mlxsw_sp_acl_atcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, if (err) return err; - lkey_id = aregion->ops->lkey_id_get(aregion, rulei, erp_id); + lkey_id = aregion->ops->lkey_id_get(aregion, aentry->enc_key, erp_id); if (IS_ERR(lkey_id)) return PTR_ERR(lkey_id); aentry->lkey_id = lkey_id; @@ -397,7 +401,10 @@ mlxsw_sp_acl_atcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, kvdl_index = mlxsw_afa_block_first_kvdl_index(rulei->act_block); mlxsw_reg_ptce3_pack(ptce3_pl, true, MLXSW_REG_PTCE3_OP_WRITE_WRITE, priority, region->tcam_region_info, - aentry->ht_key.enc_key, erp_id, + aentry->enc_key, erp_id, + aentry->delta_info.start, + aentry->delta_info.mask, + aentry->delta_info.value, refcount_read(&lkey_id->refcnt) != 1, lkey_id->id, kvdl_index); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl); @@ -418,18 +425,50 @@ mlxsw_sp_acl_atcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_atcam_lkey_id *lkey_id = aentry->lkey_id; struct mlxsw_sp_acl_tcam_region *region = aregion->region; - u8 erp_id = mlxsw_sp_acl_erp_id(aentry->erp); + u8 erp_id = mlxsw_sp_acl_erp_mask_erp_id(aentry->erp_mask); char ptce3_pl[MLXSW_REG_PTCE3_LEN]; mlxsw_reg_ptce3_pack(ptce3_pl, false, MLXSW_REG_PTCE3_OP_WRITE_WRITE, 0, - region->tcam_region_info, aentry->ht_key.enc_key, - erp_id, refcount_read(&lkey_id->refcnt) != 1, + region->tcam_region_info, + aentry->enc_key, erp_id, + aentry->delta_info.start, + aentry->delta_info.mask, + aentry->delta_info.value, + refcount_read(&lkey_id->refcnt) != 1, lkey_id->id, 0); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl); aregion->ops->lkey_id_put(aregion, lkey_id); } static int +mlxsw_sp_acl_atcam_region_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_atcam_lkey_id *lkey_id = aentry->lkey_id; + u8 erp_id = mlxsw_sp_acl_erp_mask_erp_id(aentry->erp_mask); + struct mlxsw_sp_acl_tcam_region *region = aregion->region; + char ptce3_pl[MLXSW_REG_PTCE3_LEN]; + u32 kvdl_index, priority; + int err; + + err = mlxsw_sp_acl_tcam_priority_get(mlxsw_sp, rulei, &priority, true); + if (err) + return err; + kvdl_index = mlxsw_afa_block_first_kvdl_index(rulei->act_block); + mlxsw_reg_ptce3_pack(ptce3_pl, true, MLXSW_REG_PTCE3_OP_WRITE_UPDATE, + priority, region->tcam_region_info, + aentry->enc_key, erp_id, + aentry->delta_info.start, + aentry->delta_info.mask, + aentry->delta_info.value, + refcount_read(&lkey_id->refcnt) != 1, lkey_id->id, + kvdl_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce3), ptce3_pl); +} + +static int __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_atcam_entry *aentry, @@ -438,19 +477,37 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region = aregion->region; char mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN] = { 0 }; struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); - struct mlxsw_sp_acl_erp *erp; - unsigned int blocks_count; + const struct mlxsw_sp_acl_erp_delta *delta; + struct mlxsw_sp_acl_erp_mask *erp_mask; int err; - blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info); mlxsw_afk_encode(afk, region->key_info, &rulei->values, - aentry->ht_key.enc_key, mask, 0, blocks_count - 1); - - erp = mlxsw_sp_acl_erp_get(aregion, mask, false); - if (IS_ERR(erp)) - return PTR_ERR(erp); - aentry->erp = erp; - aentry->ht_key.erp_id = mlxsw_sp_acl_erp_id(erp); + aentry->ht_key.full_enc_key, mask); + + erp_mask = mlxsw_sp_acl_erp_mask_get(aregion, mask, false); + if (IS_ERR(erp_mask)) + return PTR_ERR(erp_mask); + aentry->erp_mask = erp_mask; + aentry->ht_key.erp_id = mlxsw_sp_acl_erp_mask_erp_id(erp_mask); + memcpy(aentry->enc_key, aentry->ht_key.full_enc_key, + sizeof(aentry->enc_key)); + + /* Compute all needed delta information and clear the delta bits + * from the encrypted key. + */ + delta = mlxsw_sp_acl_erp_delta(aentry->erp_mask); + aentry->delta_info.start = mlxsw_sp_acl_erp_delta_start(delta); + aentry->delta_info.mask = mlxsw_sp_acl_erp_delta_mask(delta); + aentry->delta_info.value = + mlxsw_sp_acl_erp_delta_value(delta, + aentry->ht_key.full_enc_key); + mlxsw_sp_acl_erp_delta_clear(delta, aentry->enc_key); + + /* Add rule to the list of A-TCAM rules, assuming this + * rule is intended to A-TCAM. In case this rule does + * not fit into A-TCAM it will be removed from the list. + */ + list_add(&aentry->list, &aregion->entries_list); /* We can't insert identical rules into the A-TCAM, so fail and * let the rule spill into C-TCAM @@ -461,6 +518,13 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, if (err) goto err_rhashtable_insert; + /* Bloom filter must be updated here, before inserting the rule into + * the A-TCAM. + */ + err = mlxsw_sp_acl_erp_bf_insert(mlxsw_sp, aregion, erp_mask, aentry); + if (err) + goto err_bf_insert; + err = mlxsw_sp_acl_atcam_region_entry_insert(mlxsw_sp, aregion, aentry, rulei); if (err) @@ -469,10 +533,13 @@ __mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, return 0; err_rule_insert: + mlxsw_sp_acl_erp_bf_remove(mlxsw_sp, aregion, erp_mask, aentry); +err_bf_insert: rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node, mlxsw_sp_acl_atcam_entries_ht_params); err_rhashtable_insert: - mlxsw_sp_acl_erp_put(aregion, erp); + list_del(&aentry->list); + mlxsw_sp_acl_erp_mask_put(aregion, erp_mask); return err; } @@ -482,9 +549,21 @@ __mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam_entry *aentry) { mlxsw_sp_acl_atcam_region_entry_remove(mlxsw_sp, aregion, aentry); + mlxsw_sp_acl_erp_bf_remove(mlxsw_sp, aregion, aentry->erp_mask, aentry); rhashtable_remove_fast(&aregion->entries_ht, &aentry->ht_node, mlxsw_sp_acl_atcam_entries_ht_params); - mlxsw_sp_acl_erp_put(aregion, aentry->erp); + list_del(&aentry->list); + mlxsw_sp_acl_erp_mask_put(aregion, aentry->erp_mask); +} + +static int +__mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_sp_acl_atcam_region_entry_action_replace(mlxsw_sp, aregion, + aentry, rulei); } int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, @@ -502,6 +581,7 @@ int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp, /* It is possible we failed to add the rule to the A-TCAM due to * exceeded number of masks. Try to spill into C-TCAM. */ + trace_mlxsw_sp_acl_atcam_entry_add_ctcam_spill(mlxsw_sp, aregion); err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &aregion->cregion, &achunk->cchunk, &aentry->centry, rulei, true); @@ -523,6 +603,27 @@ void mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp, __mlxsw_sp_acl_atcam_entry_del(mlxsw_sp, aregion, aentry); } +int +mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + int err; + + if (mlxsw_sp_acl_atcam_is_centry(aentry)) + err = mlxsw_sp_acl_ctcam_entry_action_replace(mlxsw_sp, + &aregion->cregion, + &aentry->centry, + rulei); + else + err = __mlxsw_sp_acl_atcam_entry_action_replace(mlxsw_sp, + aregion, aentry, + rulei); + + return err; +} + int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam) { @@ -534,3 +635,14 @@ void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_acl_erps_fini(mlxsw_sp, atcam); } + +void * +mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion) +{ + return mlxsw_sp_acl_erp_rehash_hints_get(aregion); +} + +void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv) +{ + mlxsw_sp_acl_erp_rehash_hints_put(hints_priv); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c new file mode 100644 index 000000000000..3a2de13fcb68 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/refcount.h> +#include <linux/mutex.h> + +#include "spectrum.h" +#include "spectrum_acl_tcam.h" + +struct mlxsw_sp_acl_bf { + struct mutex lock; /* Protects Bloom Filter updates. */ + unsigned int bank_size; + refcount_t refcnt[0]; +}; + +/* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key + * blocks, eRP ID and region ID. In Spectrum-2, region key is combined of up to + * 12 key blocks, so there can be up to 3 chunks in the Bloom filter key, + * depending on the actual number of key blocks used in the region. + * The layout of the Bloom filter key is as follows: + * + * +-------------------------+------------------------+------------------------+ + * | Chunk 2 Key blocks 11-8 | Chunk 1 Key blocks 7-4 | Chunk 0 Key blocks 3-0 | + * +-------------------------+------------------------+------------------------+ + */ +#define MLXSW_BLOOM_KEY_CHUNKS 3 +#define MLXSW_BLOOM_KEY_LEN 69 + +/* Each chunk size is 23 bytes. 18 bytes of it contain 4 key blocks, each is + * 36 bits, 2 bytes which hold eRP ID and region ID, and 3 bytes of zero + * padding. + * The layout of each chunk is as follows: + * + * +---------+----------------------+-----------------------------------+ + * | 3 bytes | 2 bytes | 18 bytes | + * +---------+-----------+----------+-----------------------------------+ + * | 183:158 | 157:148 | 147:144 | 143:0 | + * +---------+-----------+----------+-----------------------------------+ + * | 0 | region ID | eRP ID | 4 Key blocks (18 Bytes) | + * +---------+-----------+----------+-----------------------------------+ + */ +#define MLXSW_BLOOM_CHUNK_PAD_BYTES 3 +#define MLXSW_BLOOM_CHUNK_KEY_BYTES 18 +#define MLXSW_BLOOM_KEY_CHUNK_BYTES 23 + +/* The offset of the key block within a chunk is 5 bytes as it comes after + * 3 bytes of zero padding and 16 bits of region ID and eRP ID. + */ +#define MLXSW_BLOOM_CHUNK_KEY_OFFSET 5 + +/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8, + * and we need to populate it with 4 key blocks copied from the entry encoded + * key. Since the encoded key contains a padding, key block 11 starts at offset + * 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks take + * 18 bytes. + * This array defines key offsets for easy access when copying key blocks from + * entry key to Bloom filter chunk. + */ +static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38}; + +/* This table is just the CRC of each possible byte. It is + * computed, Msbit first, for the Bloom filter polynomial + * which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and + * the implicit x^16). + */ +static const u16 mlxsw_sp_acl_bf_crc_tab[256] = { +0x0000, 0x8529, 0x8f7b, 0x0a52, 0x9bdf, 0x1ef6, 0x14a4, 0x918d, +0xb297, 0x37be, 0x3dec, 0xb8c5, 0x2948, 0xac61, 0xa633, 0x231a, +0xe007, 0x652e, 0x6f7c, 0xea55, 0x7bd8, 0xfef1, 0xf4a3, 0x718a, +0x5290, 0xd7b9, 0xddeb, 0x58c2, 0xc94f, 0x4c66, 0x4634, 0xc31d, +0x4527, 0xc00e, 0xca5c, 0x4f75, 0xdef8, 0x5bd1, 0x5183, 0xd4aa, +0xf7b0, 0x7299, 0x78cb, 0xfde2, 0x6c6f, 0xe946, 0xe314, 0x663d, +0xa520, 0x2009, 0x2a5b, 0xaf72, 0x3eff, 0xbbd6, 0xb184, 0x34ad, +0x17b7, 0x929e, 0x98cc, 0x1de5, 0x8c68, 0x0941, 0x0313, 0x863a, +0x8a4e, 0x0f67, 0x0535, 0x801c, 0x1191, 0x94b8, 0x9eea, 0x1bc3, +0x38d9, 0xbdf0, 0xb7a2, 0x328b, 0xa306, 0x262f, 0x2c7d, 0xa954, +0x6a49, 0xef60, 0xe532, 0x601b, 0xf196, 0x74bf, 0x7eed, 0xfbc4, +0xd8de, 0x5df7, 0x57a5, 0xd28c, 0x4301, 0xc628, 0xcc7a, 0x4953, +0xcf69, 0x4a40, 0x4012, 0xc53b, 0x54b6, 0xd19f, 0xdbcd, 0x5ee4, +0x7dfe, 0xf8d7, 0xf285, 0x77ac, 0xe621, 0x6308, 0x695a, 0xec73, +0x2f6e, 0xaa47, 0xa015, 0x253c, 0xb4b1, 0x3198, 0x3bca, 0xbee3, +0x9df9, 0x18d0, 0x1282, 0x97ab, 0x0626, 0x830f, 0x895d, 0x0c74, +0x91b5, 0x149c, 0x1ece, 0x9be7, 0x0a6a, 0x8f43, 0x8511, 0x0038, +0x2322, 0xa60b, 0xac59, 0x2970, 0xb8fd, 0x3dd4, 0x3786, 0xb2af, +0x71b2, 0xf49b, 0xfec9, 0x7be0, 0xea6d, 0x6f44, 0x6516, 0xe03f, +0xc325, 0x460c, 0x4c5e, 0xc977, 0x58fa, 0xddd3, 0xd781, 0x52a8, +0xd492, 0x51bb, 0x5be9, 0xdec0, 0x4f4d, 0xca64, 0xc036, 0x451f, +0x6605, 0xe32c, 0xe97e, 0x6c57, 0xfdda, 0x78f3, 0x72a1, 0xf788, +0x3495, 0xb1bc, 0xbbee, 0x3ec7, 0xaf4a, 0x2a63, 0x2031, 0xa518, +0x8602, 0x032b, 0x0979, 0x8c50, 0x1ddd, 0x98f4, 0x92a6, 0x178f, +0x1bfb, 0x9ed2, 0x9480, 0x11a9, 0x8024, 0x050d, 0x0f5f, 0x8a76, +0xa96c, 0x2c45, 0x2617, 0xa33e, 0x32b3, 0xb79a, 0xbdc8, 0x38e1, +0xfbfc, 0x7ed5, 0x7487, 0xf1ae, 0x6023, 0xe50a, 0xef58, 0x6a71, +0x496b, 0xcc42, 0xc610, 0x4339, 0xd2b4, 0x579d, 0x5dcf, 0xd8e6, +0x5edc, 0xdbf5, 0xd1a7, 0x548e, 0xc503, 0x402a, 0x4a78, 0xcf51, +0xec4b, 0x6962, 0x6330, 0xe619, 0x7794, 0xf2bd, 0xf8ef, 0x7dc6, +0xbedb, 0x3bf2, 0x31a0, 0xb489, 0x2504, 0xa02d, 0xaa7f, 0x2f56, +0x0c4c, 0x8965, 0x8337, 0x061e, 0x9793, 0x12ba, 0x18e8, 0x9dc1, +}; + +static u16 mlxsw_sp_acl_bf_crc_byte(u16 crc, u8 c) +{ + return (crc << 8) ^ mlxsw_sp_acl_bf_crc_tab[(crc >> 8) ^ c]; +} + +static u16 mlxsw_sp_acl_bf_crc(const u8 *buffer, size_t len) +{ + u16 crc = 0; + + while (len--) + crc = mlxsw_sp_acl_bf_crc_byte(crc, *buffer++); + return crc; +} + +static void +mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + char *output, u8 *len) +{ + struct mlxsw_afk_key_info *key_info = aregion->region->key_info; + u8 chunk_index, chunk_count, block_count; + char *chunk = output; + __be16 erp_region_id; + + block_count = mlxsw_afk_key_info_blocks_count_get(key_info); + chunk_count = 1 + ((block_count - 1) >> 2); + erp_region_id = cpu_to_be16(aentry->ht_key.erp_id | + (aregion->region->id << 4)); + for (chunk_index = MLXSW_BLOOM_KEY_CHUNKS - chunk_count; + chunk_index < MLXSW_BLOOM_KEY_CHUNKS; chunk_index++) { + memset(chunk, 0, MLXSW_BLOOM_CHUNK_PAD_BYTES); + memcpy(chunk + MLXSW_BLOOM_CHUNK_PAD_BYTES, &erp_region_id, + sizeof(erp_region_id)); + memcpy(chunk + MLXSW_BLOOM_CHUNK_KEY_OFFSET, + &aentry->enc_key[chunk_key_offsets[chunk_index]], + MLXSW_BLOOM_CHUNK_KEY_BYTES); + chunk += MLXSW_BLOOM_KEY_CHUNK_BYTES; + } + *len = chunk_count * MLXSW_BLOOM_KEY_CHUNK_BYTES; +} + +static unsigned int +mlxsw_sp_acl_bf_rule_count_index_get(struct mlxsw_sp_acl_bf *bf, + unsigned int erp_bank, + unsigned int bf_index) +{ + return erp_bank * bf->bank_size + bf_index; +} + +static unsigned int +mlxsw_sp_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + char bf_key[MLXSW_BLOOM_KEY_LEN]; + u8 bf_size; + + mlxsw_sp_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size); + return mlxsw_sp_acl_bf_crc(bf_key, bf_size); +} + +int +mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + unsigned int rule_index; + char *peabfe_pl; + u16 bf_index; + int err; + + mutex_lock(&bf->lock); + + bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry); + rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank, + bf_index); + + if (refcount_inc_not_zero(&bf->refcnt[rule_index])) { + err = 0; + goto unlock; + } + + peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL); + if (!peabfe_pl) { + err = -ENOMEM; + goto unlock; + } + + mlxsw_reg_peabfe_pack(peabfe_pl); + mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 1, erp_bank, bf_index); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl); + kfree(peabfe_pl); + if (err) + goto unlock; + + refcount_set(&bf->refcnt[rule_index], 1); + err = 0; + +unlock: + mutex_unlock(&bf->lock); + return err; +} + +void +mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + unsigned int rule_index; + char *peabfe_pl; + u16 bf_index; + + mutex_lock(&bf->lock); + + bf_index = mlxsw_sp_acl_bf_index_get(bf, aregion, aentry); + rule_index = mlxsw_sp_acl_bf_rule_count_index_get(bf, erp_bank, + bf_index); + + if (refcount_dec_and_test(&bf->refcnt[rule_index])) { + peabfe_pl = kmalloc(MLXSW_REG_PEABFE_LEN, GFP_KERNEL); + if (!peabfe_pl) + goto unlock; + + mlxsw_reg_peabfe_pack(peabfe_pl); + mlxsw_reg_peabfe_rec_pack(peabfe_pl, 0, 0, erp_bank, bf_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(peabfe), peabfe_pl); + kfree(peabfe_pl); + } + +unlock: + mutex_unlock(&bf->lock); +} + +struct mlxsw_sp_acl_bf * +mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks) +{ + struct mlxsw_sp_acl_bf *bf; + unsigned int bf_bank_size; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_BF_LOG)) + return ERR_PTR(-EIO); + + /* Bloom filter size per erp_table_bank + * is 2^ACL_MAX_BF_LOG + */ + bf_bank_size = 1 << MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_BF_LOG); + bf = kzalloc(struct_size(bf, refcnt, bf_bank_size * num_erp_banks), + GFP_KERNEL); + if (!bf) + return ERR_PTR(-ENOMEM); + + bf->bank_size = bf_bank_size; + mutex_init(&bf->lock); + + return bf; +} + +void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf) +{ + mutex_destroy(&bf->lock); + kfree(bf); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c index e3c6fe8b1d40..05680a7e6c56 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_ctcam.c @@ -46,7 +46,6 @@ mlxsw_sp_acl_ctcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_region *region = cregion->region; struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); char ptce2_pl[MLXSW_REG_PTCE2_LEN]; - unsigned int blocks_count; char *act_set; u32 priority; char *mask; @@ -63,9 +62,7 @@ mlxsw_sp_acl_ctcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, centry->parman_item.index, priority); key = mlxsw_reg_ptce2_flex_key_blocks_data(ptce2_pl); mask = mlxsw_reg_ptce2_mask_data(ptce2_pl); - blocks_count = mlxsw_afk_key_info_blocks_count_get(region->key_info); - mlxsw_afk_encode(afk, region->key_info, &rulei->values, key, mask, 0, - blocks_count - 1); + mlxsw_afk_encode(afk, region->key_info, &rulei->values, key, mask); err = cregion->ops->entry_insert(cregion, centry, mask); if (err) @@ -75,7 +72,15 @@ mlxsw_sp_acl_ctcam_region_entry_insert(struct mlxsw_sp *mlxsw_sp, act_set = mlxsw_afa_block_first_set(rulei->act_block); mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); + if (err) + goto err_ptce2_write; + + return 0; + +err_ptce2_write: + cregion->ops->entry_remove(cregion, centry); + return err; } static void @@ -92,6 +97,27 @@ mlxsw_sp_acl_ctcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp, cregion->ops->entry_remove(cregion, centry); } +static int +mlxsw_sp_acl_ctcam_region_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_afa_block *afa_block, + unsigned int priority) +{ + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + char *act_set; + + mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_WRITE_UPDATE, + cregion->region->tcam_region_info, + centry->parman_item.index, priority); + + act_set = mlxsw_afa_block_first_set(afa_block); + mlxsw_reg_ptce2_flex_action_set_memcpy_to(ptce2_pl, act_set); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); +} + + static int mlxsw_sp_acl_ctcam_region_parman_resize(void *priv, unsigned long new_count) { @@ -194,3 +220,14 @@ void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp, parman_item_remove(cregion->parman, &cchunk->parman_prio, ¢ry->parman_item); } + +int mlxsw_sp_acl_ctcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_sp_acl_ctcam_region_entry_action_replace(mlxsw_sp, cregion, + centry, + rulei->act_block, + rulei->priority); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index 0a4fd3c8662a..c1a9cc9a3292 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -7,7 +7,8 @@ #include <linux/gfp.h> #include <linux/kernel.h> #include <linux/list.h> -#include <linux/rhashtable.h> +#include <linux/mutex.h> +#include <linux/objagg.h> #include <linux/rtnetlink.h> #include <linux/slab.h> @@ -24,11 +25,14 @@ struct mlxsw_sp_acl_erp_core { unsigned int erpt_entries_size[MLXSW_SP_ACL_ATCAM_REGION_TYPE_MAX + 1]; struct gen_pool *erp_tables; struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_acl_bf *bf; unsigned int num_erp_banks; }; struct mlxsw_sp_acl_erp_key { char mask[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; +#define __MASK_LEN 0x38 +#define __MASK_IDX(i) (__MASK_LEN - (i) - 1) bool ctcam; }; @@ -36,10 +40,8 @@ struct mlxsw_sp_acl_erp { struct mlxsw_sp_acl_erp_key key; u8 id; u8 index; - refcount_t refcnt; DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN); struct list_head list; - struct rhash_head ht_node; struct mlxsw_sp_acl_erp_table *erp_table; }; @@ -53,7 +55,6 @@ struct mlxsw_sp_acl_erp_table { DECLARE_BITMAP(erp_id_bitmap, MLXSW_SP_ACL_ERP_MAX_PER_REGION); DECLARE_BITMAP(erp_index_bitmap, MLXSW_SP_ACL_ERP_MAX_PER_REGION); struct list_head atcam_erps_list; - struct rhashtable erp_ht; struct mlxsw_sp_acl_erp_core *erp_core; struct mlxsw_sp_acl_atcam_region *aregion; const struct mlxsw_sp_acl_erp_table_ops *ops; @@ -61,12 +62,9 @@ struct mlxsw_sp_acl_erp_table { unsigned int num_atcam_erps; unsigned int num_max_atcam_erps; unsigned int num_ctcam_erps; -}; - -static const struct rhashtable_params mlxsw_sp_acl_erp_ht_params = { - .key_len = sizeof(struct mlxsw_sp_acl_erp_key), - .key_offset = offsetof(struct mlxsw_sp_acl_erp, key), - .head_offset = offsetof(struct mlxsw_sp_acl_erp, ht_node), + unsigned int num_deltas; + struct objagg *objagg; + struct mutex objagg_lock; /* guards objagg manipulation */ }; struct mlxsw_sp_acl_erp_table_ops { @@ -119,14 +117,17 @@ static const struct mlxsw_sp_acl_erp_table_ops erp_no_mask_ops = { .erp_destroy = mlxsw_sp_acl_erp_no_mask_destroy, }; -bool mlxsw_sp_acl_erp_is_ctcam_erp(const struct mlxsw_sp_acl_erp *erp) +static bool +mlxsw_sp_acl_erp_table_is_used(const struct mlxsw_sp_acl_erp_table *erp_table) { - return erp->key.ctcam; + return erp_table->ops != &erp_single_mask_ops && + erp_table->ops != &erp_no_mask_ops; } -u8 mlxsw_sp_acl_erp_id(const struct mlxsw_sp_acl_erp *erp) +static unsigned int +mlxsw_sp_acl_erp_bank_get(const struct mlxsw_sp_acl_erp *erp) { - return erp->id; + return erp->index % erp->erp_table->erp_core->num_erp_banks; } static unsigned int @@ -194,12 +195,15 @@ mlxsw_sp_acl_erp_master_mask_update(struct mlxsw_sp_acl_erp_table *erp_table) static int mlxsw_sp_acl_erp_master_mask_set(struct mlxsw_sp_acl_erp_table *erp_table, - const struct mlxsw_sp_acl_erp *erp) + struct mlxsw_sp_acl_erp_key *key) { + DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN); unsigned long bit; int err; - for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) + bitmap_from_arr32(mask_bitmap, (u32 *) key->mask, + MLXSW_SP_ACL_TCAM_MASK_LEN); + for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) mlxsw_sp_acl_erp_master_mask_bit_set(bit, &erp_table->master_mask); @@ -210,7 +214,7 @@ mlxsw_sp_acl_erp_master_mask_set(struct mlxsw_sp_acl_erp_table *erp_table, return 0; err_master_mask_update: - for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) + for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) mlxsw_sp_acl_erp_master_mask_bit_clear(bit, &erp_table->master_mask); return err; @@ -218,12 +222,15 @@ err_master_mask_update: static int mlxsw_sp_acl_erp_master_mask_clear(struct mlxsw_sp_acl_erp_table *erp_table, - const struct mlxsw_sp_acl_erp *erp) + struct mlxsw_sp_acl_erp_key *key) { + DECLARE_BITMAP(mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN); unsigned long bit; int err; - for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) + bitmap_from_arr32(mask_bitmap, (u32 *) key->mask, + MLXSW_SP_ACL_TCAM_MASK_LEN); + for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) mlxsw_sp_acl_erp_master_mask_bit_clear(bit, &erp_table->master_mask); @@ -234,7 +241,7 @@ mlxsw_sp_acl_erp_master_mask_clear(struct mlxsw_sp_acl_erp_table *erp_table, return 0; err_master_mask_update: - for_each_set_bit(bit, erp->mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) + for_each_set_bit(bit, mask_bitmap, MLXSW_SP_ACL_TCAM_MASK_LEN) mlxsw_sp_acl_erp_master_mask_bit_set(bit, &erp_table->master_mask); return err; @@ -256,26 +263,16 @@ mlxsw_sp_acl_erp_generic_create(struct mlxsw_sp_acl_erp_table *erp_table, goto err_erp_id_get; memcpy(&erp->key, key, sizeof(*key)); - bitmap_from_arr32(erp->mask_bitmap, (u32 *) key->mask, - MLXSW_SP_ACL_TCAM_MASK_LEN); list_add(&erp->list, &erp_table->atcam_erps_list); - refcount_set(&erp->refcnt, 1); erp_table->num_atcam_erps++; erp->erp_table = erp_table; - err = mlxsw_sp_acl_erp_master_mask_set(erp_table, erp); + err = mlxsw_sp_acl_erp_master_mask_set(erp_table, &erp->key); if (err) goto err_master_mask_set; - err = rhashtable_insert_fast(&erp_table->erp_ht, &erp->ht_node, - mlxsw_sp_acl_erp_ht_params); - if (err) - goto err_rhashtable_insert; - return erp; -err_rhashtable_insert: - mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp); err_master_mask_set: erp_table->num_atcam_erps--; list_del(&erp->list); @@ -290,9 +287,7 @@ mlxsw_sp_acl_erp_generic_destroy(struct mlxsw_sp_acl_erp *erp) { struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table; - rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node, - mlxsw_sp_acl_erp_ht_params); - mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp); + mlxsw_sp_acl_erp_master_mask_clear(erp_table, &erp->key); erp_table->num_atcam_erps--; list_del(&erp->list); mlxsw_sp_acl_erp_id_put(erp_table, erp->id); @@ -525,6 +520,48 @@ err_table_relocate: } static int +mlxsw_acl_erp_table_bf_add(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion; + unsigned int erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + struct mlxsw_sp_acl_atcam_entry *aentry; + int err; + + list_for_each_entry(aentry, &aregion->entries_list, list) { + err = mlxsw_sp_acl_bf_entry_add(aregion->region->mlxsw_sp, + erp_table->erp_core->bf, + aregion, erp_bank, aentry); + if (err) + goto bf_entry_add_err; + } + + return 0; + +bf_entry_add_err: + list_for_each_entry_continue_reverse(aentry, &aregion->entries_list, + list) + mlxsw_sp_acl_bf_entry_del(aregion->region->mlxsw_sp, + erp_table->erp_core->bf, + aregion, erp_bank, aentry); + return err; +} + +static void +mlxsw_acl_erp_table_bf_del(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp *erp) +{ + struct mlxsw_sp_acl_atcam_region *aregion = erp_table->aregion; + unsigned int erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + struct mlxsw_sp_acl_atcam_entry *aentry; + + list_for_each_entry_reverse(aentry, &aregion->entries_list, list) + mlxsw_sp_acl_bf_entry_del(aregion->region->mlxsw_sp, + erp_table->erp_core->bf, + aregion, erp_bank, aentry); +} + +static int mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table) { struct mlxsw_sp_acl_erp_core *erp_core = erp_table->erp_core; @@ -548,16 +585,24 @@ mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table) goto err_table_master_rp; } - /* Maintain the same eRP bank for the master RP, so that we - * wouldn't need to update the bloom filter + /* Make sure the master RP is using a valid index, as + * only a single eRP row is currently allocated. */ - master_rp->index = master_rp->index % erp_core->num_erp_banks; + master_rp->index = 0; __set_bit(master_rp->index, erp_table->erp_index_bitmap); err = mlxsw_sp_acl_erp_table_erp_add(erp_table, master_rp); if (err) goto err_table_master_rp_add; + /* Update Bloom filter before enabling eRP table, as rules + * on the master RP were not set to Bloom filter up to this + * point. + */ + err = mlxsw_acl_erp_table_bf_add(erp_table, master_rp); + if (err) + goto err_table_bf_add; + err = mlxsw_sp_acl_erp_table_enable(erp_table, false); if (err) goto err_table_enable; @@ -565,6 +610,8 @@ mlxsw_sp_acl_erp_region_table_trans(struct mlxsw_sp_acl_erp_table *erp_table) return 0; err_table_enable: + mlxsw_acl_erp_table_bf_del(erp_table, master_rp); +err_table_bf_add: mlxsw_sp_acl_erp_table_erp_del(master_rp); err_table_master_rp_add: __clear_bit(master_rp->index, erp_table->erp_index_bitmap); @@ -585,6 +632,7 @@ mlxsw_sp_acl_erp_region_master_mask_trans(struct mlxsw_sp_acl_erp_table *erp_tab master_rp = mlxsw_sp_acl_erp_table_master_rp(erp_table); if (!master_rp) return; + mlxsw_acl_erp_table_bf_del(erp_table, master_rp); mlxsw_sp_acl_erp_table_erp_del(master_rp); __clear_bit(master_rp->index, erp_table->erp_index_bitmap); mlxsw_sp_acl_erp_table_free(erp_core, erp_table->num_max_atcam_erps, @@ -647,9 +695,55 @@ mlxsw_sp_acl_erp_region_ctcam_disable(struct mlxsw_sp_acl_erp_table *erp_table) mlxsw_sp_acl_erp_table_enable(erp_table, false); } +static int +__mlxsw_sp_acl_erp_table_other_inc(struct mlxsw_sp_acl_erp_table *erp_table, + unsigned int *inc_num) +{ + int err; + + /* If there are C-TCAM eRP or deltas in use we need to transition + * the region to use eRP table, if it is not already done + */ + if (!mlxsw_sp_acl_erp_table_is_used(erp_table)) { + err = mlxsw_sp_acl_erp_region_table_trans(erp_table); + if (err) + return err; + } + + /* When C-TCAM or deltas are used, the eRP table must be used */ + if (erp_table->ops != &erp_multiple_masks_ops) + erp_table->ops = &erp_multiple_masks_ops; + + (*inc_num)++; + + return 0; +} + +static int mlxsw_sp_acl_erp_ctcam_inc(struct mlxsw_sp_acl_erp_table *erp_table) +{ + return __mlxsw_sp_acl_erp_table_other_inc(erp_table, + &erp_table->num_ctcam_erps); +} + +static int mlxsw_sp_acl_erp_delta_inc(struct mlxsw_sp_acl_erp_table *erp_table) +{ + return __mlxsw_sp_acl_erp_table_other_inc(erp_table, + &erp_table->num_deltas); +} + static void -mlxsw_sp_acl_erp_ctcam_table_ops_set(struct mlxsw_sp_acl_erp_table *erp_table) +__mlxsw_sp_acl_erp_table_other_dec(struct mlxsw_sp_acl_erp_table *erp_table, + unsigned int *dec_num) { + (*dec_num)--; + + /* If there are no C-TCAM eRP or deltas in use, the state we + * transition to depends on the number of A-TCAM eRPs currently + * in use. + */ + if (erp_table->num_ctcam_erps > 0 || erp_table->num_deltas > 0) + return; + switch (erp_table->num_atcam_erps) { case 2: /* Keep using the eRP table, but correctly set the @@ -683,9 +777,21 @@ mlxsw_sp_acl_erp_ctcam_table_ops_set(struct mlxsw_sp_acl_erp_table *erp_table) } } +static void mlxsw_sp_acl_erp_ctcam_dec(struct mlxsw_sp_acl_erp_table *erp_table) +{ + __mlxsw_sp_acl_erp_table_other_dec(erp_table, + &erp_table->num_ctcam_erps); +} + +static void mlxsw_sp_acl_erp_delta_dec(struct mlxsw_sp_acl_erp_table *erp_table) +{ + __mlxsw_sp_acl_erp_table_other_dec(erp_table, + &erp_table->num_deltas); +} + static struct mlxsw_sp_acl_erp * -__mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, - struct mlxsw_sp_acl_erp_key *key) +mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, + struct mlxsw_sp_acl_erp_key *key) { struct mlxsw_sp_acl_erp *erp; int err; @@ -697,89 +803,41 @@ __mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, memcpy(&erp->key, key, sizeof(*key)); bitmap_from_arr32(erp->mask_bitmap, (u32 *) key->mask, MLXSW_SP_ACL_TCAM_MASK_LEN); - refcount_set(&erp->refcnt, 1); - erp_table->num_ctcam_erps++; - erp->erp_table = erp_table; - err = mlxsw_sp_acl_erp_master_mask_set(erp_table, erp); + err = mlxsw_sp_acl_erp_ctcam_inc(erp_table); if (err) - goto err_master_mask_set; + goto err_erp_ctcam_inc; - err = rhashtable_insert_fast(&erp_table->erp_ht, &erp->ht_node, - mlxsw_sp_acl_erp_ht_params); + erp->erp_table = erp_table; + + err = mlxsw_sp_acl_erp_master_mask_set(erp_table, &erp->key); if (err) - goto err_rhashtable_insert; + goto err_master_mask_set; err = mlxsw_sp_acl_erp_region_ctcam_enable(erp_table); if (err) goto err_erp_region_ctcam_enable; - /* When C-TCAM is used, the eRP table must be used */ - erp_table->ops = &erp_multiple_masks_ops; - return erp; err_erp_region_ctcam_enable: - rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node, - mlxsw_sp_acl_erp_ht_params); -err_rhashtable_insert: - mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp); + mlxsw_sp_acl_erp_master_mask_clear(erp_table, &erp->key); err_master_mask_set: - erp_table->num_ctcam_erps--; + mlxsw_sp_acl_erp_ctcam_dec(erp_table); +err_erp_ctcam_inc: kfree(erp); return ERR_PTR(err); } -static struct mlxsw_sp_acl_erp * -mlxsw_sp_acl_erp_ctcam_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, - struct mlxsw_sp_acl_erp_key *key) -{ - struct mlxsw_sp_acl_erp *erp; - int err; - - /* There is a special situation where we need to spill rules - * into the C-TCAM, yet the region is still using a master - * mask and thus not performing a lookup in the C-TCAM. This - * can happen when two rules that only differ in priority - and - * thus sharing the same key - are programmed. In this case - * we transition the region to use an eRP table - */ - err = mlxsw_sp_acl_erp_region_table_trans(erp_table); - if (err) - return ERR_PTR(err); - - erp = __mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key); - if (IS_ERR(erp)) { - err = PTR_ERR(erp); - goto err_erp_create; - } - - return erp; - -err_erp_create: - mlxsw_sp_acl_erp_region_master_mask_trans(erp_table); - return ERR_PTR(err); -} - static void mlxsw_sp_acl_erp_ctcam_mask_destroy(struct mlxsw_sp_acl_erp *erp) { struct mlxsw_sp_acl_erp_table *erp_table = erp->erp_table; mlxsw_sp_acl_erp_region_ctcam_disable(erp_table); - rhashtable_remove_fast(&erp_table->erp_ht, &erp->ht_node, - mlxsw_sp_acl_erp_ht_params); - mlxsw_sp_acl_erp_master_mask_clear(erp_table, erp); - erp_table->num_ctcam_erps--; + mlxsw_sp_acl_erp_master_mask_clear(erp_table, &erp->key); + mlxsw_sp_acl_erp_ctcam_dec(erp_table); kfree(erp); - - /* Once the last C-TCAM eRP was destroyed, the state we - * transition to depends on the number of A-TCAM eRPs currently - * in use - */ - if (erp_table->num_ctcam_erps > 0) - return; - mlxsw_sp_acl_erp_ctcam_table_ops_set(erp_table); } static struct mlxsw_sp_acl_erp * @@ -790,7 +848,7 @@ mlxsw_sp_acl_erp_mask_create(struct mlxsw_sp_acl_erp_table *erp_table, int err; if (key->ctcam) - return __mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key); + return mlxsw_sp_acl_erp_ctcam_mask_create(erp_table, key); /* Expand the eRP table for the new eRP, if needed */ err = mlxsw_sp_acl_erp_table_expand(erp_table); @@ -838,7 +896,8 @@ mlxsw_sp_acl_erp_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table, mlxsw_sp_acl_erp_index_put(erp_table, erp->index); mlxsw_sp_acl_erp_generic_destroy(erp); - if (erp_table->num_atcam_erps == 2 && erp_table->num_ctcam_erps == 0) + if (erp_table->num_atcam_erps == 2 && erp_table->num_ctcam_erps == 0 && + erp_table->num_deltas == 0) erp_table->ops = &erp_two_masks_ops; } @@ -940,46 +999,324 @@ mlxsw_sp_acl_erp_no_mask_destroy(struct mlxsw_sp_acl_erp_table *erp_table, WARN_ON(1); } -struct mlxsw_sp_acl_erp * -mlxsw_sp_acl_erp_get(struct mlxsw_sp_acl_atcam_region *aregion, - const char *mask, bool ctcam) +struct mlxsw_sp_acl_erp_mask * +mlxsw_sp_acl_erp_mask_get(struct mlxsw_sp_acl_atcam_region *aregion, + const char *mask, bool ctcam) { struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; struct mlxsw_sp_acl_erp_key key; - struct mlxsw_sp_acl_erp *erp; - - /* eRPs are allocated from a shared resource, but currently all - * allocations are done under RTNL. - */ - ASSERT_RTNL(); + struct objagg_obj *objagg_obj; memcpy(key.mask, mask, MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN); key.ctcam = ctcam; - erp = rhashtable_lookup_fast(&erp_table->erp_ht, &key, - mlxsw_sp_acl_erp_ht_params); - if (erp) { - refcount_inc(&erp->refcnt); - return erp; + mutex_lock(&erp_table->objagg_lock); + objagg_obj = objagg_obj_get(erp_table->objagg, &key); + mutex_unlock(&erp_table->objagg_lock); + if (IS_ERR(objagg_obj)) + return ERR_CAST(objagg_obj); + return (struct mlxsw_sp_acl_erp_mask *) objagg_obj; +} + +void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + + mutex_lock(&erp_table->objagg_lock); + objagg_obj_put(erp_table->objagg, objagg_obj); + mutex_unlock(&erp_table->objagg_lock); +} + +int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj); + unsigned int erp_bank; + + if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table)) + return 0; + + erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + return mlxsw_sp_acl_bf_entry_add(mlxsw_sp, + erp->erp_table->erp_core->bf, + aregion, erp_bank, aentry); +} + +void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj); + unsigned int erp_bank; + + if (!mlxsw_sp_acl_erp_table_is_used(erp->erp_table)) + return; + + erp_bank = mlxsw_sp_acl_erp_bank_get(erp); + mlxsw_sp_acl_bf_entry_del(mlxsw_sp, + erp->erp_table->erp_core->bf, + aregion, erp_bank, aentry); +} + +bool +mlxsw_sp_acl_erp_mask_is_ctcam(const struct mlxsw_sp_acl_erp_mask *erp_mask) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp_key *key = objagg_obj_raw(objagg_obj); + + return key->ctcam; +} + +u8 mlxsw_sp_acl_erp_mask_erp_id(const struct mlxsw_sp_acl_erp_mask *erp_mask) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp *erp = objagg_obj_root_priv(objagg_obj); + + return erp->id; +} + +struct mlxsw_sp_acl_erp_delta { + struct mlxsw_sp_acl_erp_key key; + u16 start; + u8 mask; +}; + +u16 mlxsw_sp_acl_erp_delta_start(const struct mlxsw_sp_acl_erp_delta *delta) +{ + return delta->start; +} + +u8 mlxsw_sp_acl_erp_delta_mask(const struct mlxsw_sp_acl_erp_delta *delta) +{ + return delta->mask; +} + +u8 mlxsw_sp_acl_erp_delta_value(const struct mlxsw_sp_acl_erp_delta *delta, + const char *enc_key) +{ + u16 start = delta->start; + u8 mask = delta->mask; + u16 tmp; + + if (!mask) + return 0; + + tmp = (unsigned char) enc_key[__MASK_IDX(start / 8)]; + if (start / 8 + 1 < __MASK_LEN) + tmp |= (unsigned char) enc_key[__MASK_IDX(start / 8 + 1)] << 8; + tmp >>= start % 8; + tmp &= mask; + return tmp; +} + +void mlxsw_sp_acl_erp_delta_clear(const struct mlxsw_sp_acl_erp_delta *delta, + const char *enc_key) +{ + u16 start = delta->start; + u8 mask = delta->mask; + unsigned char *byte; + u16 tmp; + + tmp = mask; + tmp <<= start % 8; + tmp = ~tmp; + + byte = (unsigned char *) &enc_key[__MASK_IDX(start / 8)]; + *byte &= tmp & 0xff; + if (start / 8 + 1 < __MASK_LEN) { + byte = (unsigned char *) &enc_key[__MASK_IDX(start / 8 + 1)]; + *byte &= (tmp >> 8) & 0xff; + } +} + +static const struct mlxsw_sp_acl_erp_delta +mlxsw_sp_acl_erp_delta_default = {}; + +const struct mlxsw_sp_acl_erp_delta * +mlxsw_sp_acl_erp_delta(const struct mlxsw_sp_acl_erp_mask *erp_mask) +{ + struct objagg_obj *objagg_obj = (struct objagg_obj *) erp_mask; + const struct mlxsw_sp_acl_erp_delta *delta; + + delta = objagg_obj_delta_priv(objagg_obj); + if (!delta) + delta = &mlxsw_sp_acl_erp_delta_default; + return delta; +} + +static int +mlxsw_sp_acl_erp_delta_fill(const struct mlxsw_sp_acl_erp_key *parent_key, + const struct mlxsw_sp_acl_erp_key *key, + u16 *delta_start, u8 *delta_mask) +{ + int offset = 0; + int si = -1; + u16 pmask; + u16 mask; + int i; + + /* The difference between 2 masks can be up to 8 consecutive bits. */ + for (i = 0; i < __MASK_LEN; i++) { + if (parent_key->mask[__MASK_IDX(i)] == key->mask[__MASK_IDX(i)]) + continue; + if (si == -1) + si = i; + else if (si != i - 1) + return -EINVAL; + } + if (si == -1) { + /* The masks are the same, this cannot happen. + * That means the caller is broken. + */ + WARN_ON(1); + *delta_start = 0; + *delta_mask = 0; + return 0; } + pmask = (unsigned char) parent_key->mask[__MASK_IDX(si)]; + mask = (unsigned char) key->mask[__MASK_IDX(si)]; + if (si + 1 < __MASK_LEN) { + pmask |= (unsigned char) parent_key->mask[__MASK_IDX(si + 1)] << 8; + mask |= (unsigned char) key->mask[__MASK_IDX(si + 1)] << 8; + } + + if ((pmask ^ mask) & pmask) + return -EINVAL; + mask &= ~pmask; + while (!(mask & (1 << offset))) + offset++; + while (!(mask & 1)) + mask >>= 1; + if (mask & 0xff00) + return -EINVAL; - return erp_table->ops->erp_create(erp_table, &key); + *delta_start = si * 8 + offset; + *delta_mask = mask; + + return 0; } -void mlxsw_sp_acl_erp_put(struct mlxsw_sp_acl_atcam_region *aregion, - struct mlxsw_sp_acl_erp *erp) +static bool mlxsw_sp_acl_erp_delta_check(void *priv, const void *parent_obj, + const void *obj) { + const struct mlxsw_sp_acl_erp_key *parent_key = parent_obj; + const struct mlxsw_sp_acl_erp_key *key = obj; + u16 delta_start; + u8 delta_mask; + int err; + + err = mlxsw_sp_acl_erp_delta_fill(parent_key, key, + &delta_start, &delta_mask); + return err ? false : true; +} + +static int mlxsw_sp_acl_erp_hints_obj_cmp(const void *obj1, const void *obj2) +{ + const struct mlxsw_sp_acl_erp_key *key1 = obj1; + const struct mlxsw_sp_acl_erp_key *key2 = obj2; + + /* For hints purposes, two objects are considered equal + * in case the masks are the same. Does not matter what + * the "ctcam" value is. + */ + return memcmp(key1->mask, key2->mask, sizeof(key1->mask)); +} + +static void *mlxsw_sp_acl_erp_delta_create(void *priv, void *parent_obj, + void *obj) +{ + struct mlxsw_sp_acl_erp_key *parent_key = parent_obj; + struct mlxsw_sp_acl_atcam_region *aregion = priv; struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + struct mlxsw_sp_acl_erp_key *key = obj; + struct mlxsw_sp_acl_erp_delta *delta; + u16 delta_start; + u8 delta_mask; + int err; - ASSERT_RTNL(); + if (parent_key->ctcam || key->ctcam) + return ERR_PTR(-EINVAL); + err = mlxsw_sp_acl_erp_delta_fill(parent_key, key, + &delta_start, &delta_mask); + if (err) + return ERR_PTR(-EINVAL); - if (!refcount_dec_and_test(&erp->refcnt)) - return; + delta = kzalloc(sizeof(*delta), GFP_KERNEL); + if (!delta) + return ERR_PTR(-ENOMEM); + delta->start = delta_start; + delta->mask = delta_mask; + + err = mlxsw_sp_acl_erp_delta_inc(erp_table); + if (err) + goto err_erp_delta_inc; + + memcpy(&delta->key, key, sizeof(*key)); + err = mlxsw_sp_acl_erp_master_mask_set(erp_table, &delta->key); + if (err) + goto err_master_mask_set; + + return delta; + +err_master_mask_set: + mlxsw_sp_acl_erp_delta_dec(erp_table); +err_erp_delta_inc: + kfree(delta); + return ERR_PTR(err); +} + +static void mlxsw_sp_acl_erp_delta_destroy(void *priv, void *delta_priv) +{ + struct mlxsw_sp_acl_erp_delta *delta = delta_priv; + struct mlxsw_sp_acl_atcam_region *aregion = priv; + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; - erp_table->ops->erp_destroy(erp_table, erp); + mlxsw_sp_acl_erp_master_mask_clear(erp_table, &delta->key); + mlxsw_sp_acl_erp_delta_dec(erp_table); + kfree(delta); } +static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj, + unsigned int root_id) +{ + struct mlxsw_sp_acl_atcam_region *aregion = priv; + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + struct mlxsw_sp_acl_erp_key *key = obj; + + if (!key->ctcam && + root_id != OBJAGG_OBJ_ROOT_ID_INVALID && + root_id >= MLXSW_SP_ACL_ERP_MAX_PER_REGION) + return ERR_PTR(-ENOBUFS); + return erp_table->ops->erp_create(erp_table, key); +} + +static void mlxsw_sp_acl_erp_root_destroy(void *priv, void *root_priv) +{ + struct mlxsw_sp_acl_atcam_region *aregion = priv; + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + + erp_table->ops->erp_destroy(erp_table, root_priv); +} + +static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = { + .obj_size = sizeof(struct mlxsw_sp_acl_erp_key), + .delta_check = mlxsw_sp_acl_erp_delta_check, + .hints_obj_cmp = mlxsw_sp_acl_erp_hints_obj_cmp, + .delta_create = mlxsw_sp_acl_erp_delta_create, + .delta_destroy = mlxsw_sp_acl_erp_delta_destroy, + .root_create = mlxsw_sp_acl_erp_root_create, + .root_destroy = mlxsw_sp_acl_erp_root_destroy, +}; + static struct mlxsw_sp_acl_erp_table * -mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion) +mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion, + struct objagg_hints *hints) { struct mlxsw_sp_acl_erp_table *erp_table; int err; @@ -988,18 +1325,22 @@ mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion) if (!erp_table) return ERR_PTR(-ENOMEM); - err = rhashtable_init(&erp_table->erp_ht, &mlxsw_sp_acl_erp_ht_params); - if (err) - goto err_rhashtable_init; + erp_table->objagg = objagg_create(&mlxsw_sp_acl_erp_objagg_ops, + hints, aregion); + if (IS_ERR(erp_table->objagg)) { + err = PTR_ERR(erp_table->objagg); + goto err_objagg_create; + } erp_table->erp_core = aregion->atcam->erp_core; erp_table->ops = &erp_no_mask_ops; INIT_LIST_HEAD(&erp_table->atcam_erps_list); erp_table->aregion = aregion; + mutex_init(&erp_table->objagg_lock); return erp_table; -err_rhashtable_init: +err_objagg_create: kfree(erp_table); return ERR_PTR(err); } @@ -1008,7 +1349,8 @@ static void mlxsw_sp_acl_erp_table_destroy(struct mlxsw_sp_acl_erp_table *erp_table) { WARN_ON(!list_empty(&erp_table->atcam_erps_list)); - rhashtable_destroy(&erp_table->erp_ht); + mutex_destroy(&erp_table->objagg_lock); + objagg_destroy(erp_table->objagg); kfree(erp_table); } @@ -1033,12 +1375,93 @@ mlxsw_sp_acl_erp_region_param_init(struct mlxsw_sp_acl_atcam_region *aregion) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pererp), pererp_pl); } -int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion) +static int +mlxsw_sp_acl_erp_hints_check(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct objagg_hints *hints, bool *p_rehash_needed) +{ + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + const struct objagg_stats *ostats; + const struct objagg_stats *hstats; + int err; + + *p_rehash_needed = false; + + mutex_lock(&erp_table->objagg_lock); + ostats = objagg_stats_get(erp_table->objagg); + mutex_unlock(&erp_table->objagg_lock); + if (IS_ERR(ostats)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP stats\n"); + return PTR_ERR(ostats); + } + + hstats = objagg_hints_stats_get(hints); + if (IS_ERR(hstats)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get ERP hints stats\n"); + err = PTR_ERR(hstats); + goto err_hints_stats_get; + } + + /* Very basic criterion for now. */ + if (hstats->root_count < ostats->root_count) + *p_rehash_needed = true; + + err = 0; + + objagg_stats_put(hstats); +err_hints_stats_get: + objagg_stats_put(ostats); + return err; +} + +void * +mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion) +{ + struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table; + struct mlxsw_sp *mlxsw_sp = aregion->region->mlxsw_sp; + struct objagg_hints *hints; + bool rehash_needed; + int err; + + mutex_lock(&erp_table->objagg_lock); + hints = objagg_hints_get(erp_table->objagg, + OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + mutex_unlock(&erp_table->objagg_lock); + if (IS_ERR(hints)) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to create ERP hints\n"); + return ERR_CAST(hints); + } + err = mlxsw_sp_acl_erp_hints_check(mlxsw_sp, aregion, hints, + &rehash_needed); + if (err) + goto errout; + + if (!rehash_needed) { + err = -EAGAIN; + goto errout; + } + return hints; + +errout: + objagg_hints_put(hints); + return ERR_PTR(err); +} + +void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv) +{ + struct objagg_hints *hints = hints_priv; + + objagg_hints_put(hints); +} + +int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion, + void *hints_priv) { struct mlxsw_sp_acl_erp_table *erp_table; + struct objagg_hints *hints = hints_priv; int err; - erp_table = mlxsw_sp_acl_erp_table_create(aregion); + erp_table = mlxsw_sp_acl_erp_table_create(aregion, hints); if (IS_ERR(erp_table)) return PTR_ERR(erp_table); aregion->erp_table = erp_table; @@ -1118,6 +1541,12 @@ static int mlxsw_sp_acl_erp_tables_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_gen_pool_add; + erp_core->bf = mlxsw_sp_acl_bf_init(mlxsw_sp, erp_core->num_erp_banks); + if (IS_ERR(erp_core->bf)) { + err = PTR_ERR(erp_core->bf); + goto err_bf_init; + } + /* Different regions require masks of different sizes */ err = mlxsw_sp_acl_erp_tables_sizes_query(mlxsw_sp, erp_core); if (err) @@ -1126,6 +1555,8 @@ static int mlxsw_sp_acl_erp_tables_init(struct mlxsw_sp *mlxsw_sp, return 0; err_erp_tables_sizes_query: + mlxsw_sp_acl_bf_fini(erp_core->bf); +err_bf_init: err_gen_pool_add: gen_pool_destroy(erp_core->erp_tables); return err; @@ -1134,6 +1565,7 @@ err_gen_pool_add: static void mlxsw_sp_acl_erp_tables_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_erp_core *erp_core) { + mlxsw_sp_acl_bf_fini(erp_core->bf); gen_pool_destroy(erp_core->erp_tables); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c index d409b09ba8df..2a998dea4f39 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -98,8 +98,8 @@ static const struct mlxsw_afk_block mlxsw_sp1_afk_blocks[] = { #define MLXSW_SP1_AFK_KEY_BLOCK_SIZE 16 -static void mlxsw_sp1_afk_encode_block(char *block, int block_index, - char *output) +static void mlxsw_sp1_afk_encode_block(char *output, int block_index, + char *block) { unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE; char *output_indexed = output + offset; @@ -107,10 +107,19 @@ static void mlxsw_sp1_afk_encode_block(char *block, int block_index, memcpy(output_indexed, block, MLXSW_SP1_AFK_KEY_BLOCK_SIZE); } +static void mlxsw_sp1_afk_clear_block(char *output, int block_index) +{ + unsigned int offset = block_index * MLXSW_SP1_AFK_KEY_BLOCK_SIZE; + char *output_indexed = output + offset; + + memset(output_indexed, 0, MLXSW_SP1_AFK_KEY_BLOCK_SIZE); +} + const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = { .blocks = mlxsw_sp1_afk_blocks, .blocks_count = ARRAY_SIZE(mlxsw_sp1_afk_blocks), .encode_block = mlxsw_sp1_afk_encode_block, + .clear_block = mlxsw_sp1_afk_clear_block, }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_0[] = { @@ -158,6 +167,11 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = { MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x04, 16, 8), }; +static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = { + MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_0_7, 0x04, 24, 8), + MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_8_10, 0x00, 0, 3), +}; + static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = { MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_32_63, 0x04, 4), }; @@ -201,6 +215,7 @@ static const struct mlxsw_afk_block mlxsw_sp2_afk_blocks[] = { MLXSW_AFK_BLOCK(0x38, mlxsw_sp_afk_element_info_ipv4_0), MLXSW_AFK_BLOCK(0x39, mlxsw_sp_afk_element_info_ipv4_1), MLXSW_AFK_BLOCK(0x3A, mlxsw_sp_afk_element_info_ipv4_2), + MLXSW_AFK_BLOCK(0x3C, mlxsw_sp_afk_element_info_ipv4_4), MLXSW_AFK_BLOCK(0x40, mlxsw_sp_afk_element_info_ipv6_0), MLXSW_AFK_BLOCK(0x41, mlxsw_sp_afk_element_info_ipv6_1), MLXSW_AFK_BLOCK(0x42, mlxsw_sp_afk_element_info_ipv6_2), @@ -263,10 +278,9 @@ static const struct mlxsw_sp2_afk_block_layout mlxsw_sp2_afk_blocks_layout[] = { MLXSW_SP2_AFK_BLOCK_LAYOUT(block11, 0x00, 12), }; -static void mlxsw_sp2_afk_encode_block(char *block, int block_index, - char *output) +static void __mlxsw_sp2_afk_block_value_set(char *output, int block_index, + u64 block_value) { - u64 block_value = mlxsw_sp2_afk_block_value_get(block); const struct mlxsw_sp2_afk_block_layout *block_layout; if (WARN_ON(block_index < 0 || @@ -278,8 +292,22 @@ static void mlxsw_sp2_afk_encode_block(char *block, int block_index, &block_layout->item, 0, block_value); } +static void mlxsw_sp2_afk_encode_block(char *output, int block_index, + char *block) +{ + u64 block_value = mlxsw_sp2_afk_block_value_get(block); + + __mlxsw_sp2_afk_block_value_set(output, block_index, block_value); +} + +static void mlxsw_sp2_afk_clear_block(char *output, int block_index) +{ + __mlxsw_sp2_afk_block_value_set(output, block_index, 0); +} + const struct mlxsw_afk_ops mlxsw_sp2_afk_ops = { .blocks = mlxsw_sp2_afk_blocks, .blocks_count = ARRAY_SIZE(mlxsw_sp2_afk_blocks), .encode_block = mlxsw_sp2_afk_encode_block, + .clear_block = mlxsw_sp2_afk_clear_block, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index e171513bb32a..8811f6513e36 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -8,6 +8,8 @@ #include <linux/list.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> +#include <linux/mutex.h> +#include <trace/events/mlxsw.h> #include "reg.h" #include "core.h" @@ -23,6 +25,10 @@ size_t mlxsw_sp_acl_tcam_priv_size(struct mlxsw_sp *mlxsw_sp) return ops->priv_size; } +#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT 5000 /* ms */ +#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN 3000 /* ms */ +#define MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS 100 /* number of entries */ + int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam) { @@ -33,6 +39,11 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, size_t alloc_size; int err; + mutex_init(&tcam->lock); + tcam->vregion_rehash_intrvl = + MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_DFLT; + INIT_LIST_HEAD(&tcam->vregion_list); + max_tcam_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_REGIONS); max_regions = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_REGIONS); @@ -76,6 +87,7 @@ void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + mutex_destroy(&tcam->lock); ops->fini(mlxsw_sp, tcam->priv); kfree(tcam->used_groups); kfree(tcam->used_regions); @@ -95,8 +107,9 @@ int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, KVD_SIZE)) return -EIO; - max_priority = MLXSW_CORE_RES_GET(mlxsw_sp->core, KVD_SIZE); - if (rulei->priority > max_priority) + /* Priority range is 1..cap_kvd_size-1. */ + max_priority = MLXSW_CORE_RES_GET(mlxsw_sp->core, KVD_SIZE) - 1; + if (rulei->priority >= max_priority) return -EINVAL; /* Unlike in TC, in HW, higher number means higher priority. */ @@ -152,37 +165,100 @@ struct mlxsw_sp_acl_tcam_pattern { struct mlxsw_sp_acl_tcam_group { struct mlxsw_sp_acl_tcam *tcam; u16 id; + struct mutex lock; /* guards region list updates */ struct list_head region_list; unsigned int region_count; - struct rhashtable chunk_ht; - struct mlxsw_sp_acl_tcam_group_ops *ops; +}; + +struct mlxsw_sp_acl_tcam_vgroup { + struct mlxsw_sp_acl_tcam_group group; + struct list_head vregion_list; + struct rhashtable vchunk_ht; const struct mlxsw_sp_acl_tcam_pattern *patterns; unsigned int patterns_count; bool tmplt_elusage_set; struct mlxsw_afk_element_usage tmplt_elusage; + bool vregion_rehash_enabled; }; -struct mlxsw_sp_acl_tcam_chunk { - struct list_head list; /* Member of a TCAM region */ - struct rhash_head ht_node; /* Member of a chunk HT */ - unsigned int priority; /* Priority within the region and group */ - struct mlxsw_sp_acl_tcam_group *group; +struct mlxsw_sp_acl_tcam_rehash_ctx { + void *hints_priv; + bool this_is_rollback; + struct mlxsw_sp_acl_tcam_vchunk *current_vchunk; /* vchunk being + * currently migrated. + */ + struct mlxsw_sp_acl_tcam_ventry *start_ventry; /* ventry to start + * migration from in + * a vchunk being + * currently migrated. + */ + struct mlxsw_sp_acl_tcam_ventry *stop_ventry; /* ventry to stop + * migration at + * a vchunk being + * currently migrated. + */ +}; + +struct mlxsw_sp_acl_tcam_vregion { + struct mutex lock; /* Protects consistency of region, region2 pointers + * and vchunk_list. + */ struct mlxsw_sp_acl_tcam_region *region; + struct mlxsw_sp_acl_tcam_region *region2; /* Used during migration */ + struct list_head list; /* Member of a TCAM group */ + struct list_head tlist; /* Member of a TCAM */ + struct list_head vchunk_list; /* List of vchunks under this vregion */ + struct mlxsw_afk_key_info *key_info; + struct mlxsw_sp_acl_tcam *tcam; + struct mlxsw_sp_acl_tcam_vgroup *vgroup; + struct { + struct delayed_work dw; + struct mlxsw_sp_acl_tcam_rehash_ctx ctx; + } rehash; + struct mlxsw_sp *mlxsw_sp; + bool failed_rollback; /* Indicates failed rollback during migration */ unsigned int ref_count; +}; + +struct mlxsw_sp_acl_tcam_vchunk; + +struct mlxsw_sp_acl_tcam_chunk { + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + struct mlxsw_sp_acl_tcam_region *region; unsigned long priv[0]; /* priv has to be always the last item */ }; +struct mlxsw_sp_acl_tcam_vchunk { + struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_chunk *chunk2; /* Used during migration */ + struct list_head list; /* Member of a TCAM vregion */ + struct rhash_head ht_node; /* Member of a chunk HT */ + struct list_head ventry_list; + unsigned int priority; /* Priority within the vregion and group */ + struct mlxsw_sp_acl_tcam_vgroup *vgroup; + struct mlxsw_sp_acl_tcam_vregion *vregion; + unsigned int ref_count; +}; + struct mlxsw_sp_acl_tcam_entry { + struct mlxsw_sp_acl_tcam_ventry *ventry; struct mlxsw_sp_acl_tcam_chunk *chunk; unsigned long priv[0]; /* priv has to be always the last item */ }; -static const struct rhashtable_params mlxsw_sp_acl_tcam_chunk_ht_params = { +struct mlxsw_sp_acl_tcam_ventry { + struct mlxsw_sp_acl_tcam_entry *entry; + struct list_head list; /* Member of a TCAM vchunk */ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + struct mlxsw_sp_acl_rule_info *rulei; +}; + +static const struct rhashtable_params mlxsw_sp_acl_tcam_vchunk_ht_params = { .key_len = sizeof(unsigned int), - .key_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, priority), - .head_offset = offsetof(struct mlxsw_sp_acl_tcam_chunk, ht_node), + .key_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, priority), + .head_offset = offsetof(struct mlxsw_sp_acl_tcam_vchunk, ht_node), .automatic_shrinking = true, }; @@ -194,55 +270,90 @@ static int mlxsw_sp_acl_tcam_group_update(struct mlxsw_sp *mlxsw_sp, int acl_index = 0; mlxsw_reg_pagt_pack(pagt_pl, group->id); - list_for_each_entry(region, &group->region_list, list) - mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, region->id); + list_for_each_entry(region, &group->region_list, list) { + bool multi = false; + + /* Check if the next entry in the list has the same vregion. */ + if (region->list.next != &group->region_list && + list_next_entry(region, list)->vregion == region->vregion) + multi = true; + mlxsw_reg_pagt_acl_id_pack(pagt_pl, acl_index++, + region->id, multi); + } mlxsw_reg_pagt_size_set(pagt_pl, acl_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pagt), pagt_pl); } static int -mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam *tcam, - struct mlxsw_sp_acl_tcam_group *group, - const struct mlxsw_sp_acl_tcam_pattern *patterns, - unsigned int patterns_count, - struct mlxsw_afk_element_usage *tmplt_elusage) +mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp_acl_tcam *tcam, + struct mlxsw_sp_acl_tcam_group *group) { int err; group->tcam = tcam; - group->patterns = patterns; - group->patterns_count = patterns_count; - if (tmplt_elusage) { - group->tmplt_elusage_set = true; - memcpy(&group->tmplt_elusage, tmplt_elusage, - sizeof(group->tmplt_elusage)); - } + mutex_init(&group->lock); INIT_LIST_HEAD(&group->region_list); + err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id); if (err) return err; - err = rhashtable_init(&group->chunk_ht, - &mlxsw_sp_acl_tcam_chunk_ht_params); + return 0; +} + +static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp_acl_tcam_group *group) +{ + struct mlxsw_sp_acl_tcam *tcam = group->tcam; + + mutex_destroy(&group->lock); + mlxsw_sp_acl_tcam_group_id_put(tcam, group->id); + WARN_ON(!list_empty(&group->region_list)); +} + +static int +mlxsw_sp_acl_tcam_vgroup_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + const struct mlxsw_sp_acl_tcam_pattern *patterns, + unsigned int patterns_count, + struct mlxsw_afk_element_usage *tmplt_elusage, + bool vregion_rehash_enabled) +{ + int err; + + vgroup->patterns = patterns; + vgroup->patterns_count = patterns_count; + vgroup->vregion_rehash_enabled = vregion_rehash_enabled; + + if (tmplt_elusage) { + vgroup->tmplt_elusage_set = true; + memcpy(&vgroup->tmplt_elusage, tmplt_elusage, + sizeof(vgroup->tmplt_elusage)); + } + INIT_LIST_HEAD(&vgroup->vregion_list); + + err = mlxsw_sp_acl_tcam_group_add(tcam, &vgroup->group); + if (err) + return err; + + err = rhashtable_init(&vgroup->vchunk_ht, + &mlxsw_sp_acl_tcam_vchunk_ht_params); if (err) goto err_rhashtable_init; return 0; err_rhashtable_init: - mlxsw_sp_acl_tcam_group_id_put(tcam, group->id); + mlxsw_sp_acl_tcam_group_del(&vgroup->group); return err; } -static void mlxsw_sp_acl_tcam_group_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group) +static void +mlxsw_sp_acl_tcam_vgroup_del(struct mlxsw_sp_acl_tcam_vgroup *vgroup) { - struct mlxsw_sp_acl_tcam *tcam = group->tcam; - - rhashtable_destroy(&group->chunk_ht); - mlxsw_sp_acl_tcam_group_id_put(tcam, group->id); - WARN_ON(!list_empty(&group->region_list)); + rhashtable_destroy(&vgroup->vchunk_ht); + mlxsw_sp_acl_tcam_group_del(&vgroup->group); + WARN_ON(!list_empty(&vgroup->vregion_list)); } static int @@ -282,146 +393,194 @@ mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group) } static unsigned int -mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_vregion_prio(struct mlxsw_sp_acl_tcam_vregion *vregion) { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; - if (list_empty(®ion->chunk_list)) + if (list_empty(&vregion->vchunk_list)) return 0; - /* As a priority of a region, return priority of the first chunk */ - chunk = list_first_entry(®ion->chunk_list, typeof(*chunk), list); - return chunk->priority; + /* As a priority of a vregion, return priority of the first vchunk */ + vchunk = list_first_entry(&vregion->vchunk_list, + typeof(*vchunk), list); + return vchunk->priority; } static unsigned int -mlxsw_sp_acl_tcam_region_max_prio(struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_vregion_max_prio(struct mlxsw_sp_acl_tcam_vregion *vregion) { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; - if (list_empty(®ion->chunk_list)) + if (list_empty(&vregion->vchunk_list)) return 0; - chunk = list_last_entry(®ion->chunk_list, typeof(*chunk), list); - return chunk->priority; + vchunk = list_last_entry(&vregion->vchunk_list, + typeof(*vchunk), list); + return vchunk->priority; } -static void -mlxsw_sp_acl_tcam_group_list_add(struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_sp_acl_tcam_region *region) +static int +mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_group *group, + struct mlxsw_sp_acl_tcam_region *region, + unsigned int priority, + struct mlxsw_sp_acl_tcam_region *next_region) { struct mlxsw_sp_acl_tcam_region *region2; struct list_head *pos; + int err; - /* Position the region inside the list according to priority */ - list_for_each(pos, &group->region_list) { - region2 = list_entry(pos, typeof(*region2), list); - if (mlxsw_sp_acl_tcam_region_prio(region2) > - mlxsw_sp_acl_tcam_region_prio(region)) - break; + mutex_lock(&group->lock); + if (group->region_count == group->tcam->max_group_size) { + err = -ENOBUFS; + goto err_region_count_check; + } + + if (next_region) { + /* If the next region is defined, place the new one + * before it. The next one is a sibling. + */ + pos = &next_region->list; + } else { + /* Position the region inside the list according to priority */ + list_for_each(pos, &group->region_list) { + region2 = list_entry(pos, typeof(*region2), list); + if (mlxsw_sp_acl_tcam_vregion_prio(region2->vregion) > + priority) + break; + } } list_add_tail(®ion->list, pos); + region->group = group; + + err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + if (err) + goto err_group_update; + group->region_count++; + mutex_unlock(&group->lock); + return 0; + +err_group_update: + list_del(®ion->list); +err_region_count_check: + mutex_unlock(&group->lock); + return err; } static void -mlxsw_sp_acl_tcam_group_list_del(struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_group_region_detach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region) { - group->region_count--; + struct mlxsw_sp_acl_tcam_group *group = region->group; + + mutex_lock(&group->lock); list_del(®ion->list); + group->region_count--; + mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + mutex_unlock(&group->lock); } static int -mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_vgroup_vregion_attach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + struct mlxsw_sp_acl_tcam_vregion *vregion, + unsigned int priority) { + struct mlxsw_sp_acl_tcam_vregion *vregion2; + struct list_head *pos; int err; - if (group->region_count == group->tcam->max_group_size) - return -ENOBUFS; - - mlxsw_sp_acl_tcam_group_list_add(group, region); + /* Position the vregion inside the list according to priority */ + list_for_each(pos, &vgroup->vregion_list) { + vregion2 = list_entry(pos, typeof(*vregion2), list); + if (mlxsw_sp_acl_tcam_vregion_prio(vregion2) > priority) + break; + } + list_add_tail(&vregion->list, pos); - err = mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, &vgroup->group, + vregion->region, + priority, NULL); if (err) - goto err_group_update; - region->group = group; + goto err_region_attach; return 0; -err_group_update: - mlxsw_sp_acl_tcam_group_list_del(group, region); - mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); +err_region_attach: + list_del(&vregion->list); return err; } static void -mlxsw_sp_acl_tcam_group_region_detach(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_region *region) +mlxsw_sp_acl_tcam_vgroup_vregion_detach(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) { - struct mlxsw_sp_acl_tcam_group *group = region->group; - - mlxsw_sp_acl_tcam_group_list_del(group, region); - mlxsw_sp_acl_tcam_group_update(mlxsw_sp, group); + list_del(&vregion->list); + if (vregion->region2) + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, + vregion->region2); + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, vregion->region); } -static struct mlxsw_sp_acl_tcam_region * -mlxsw_sp_acl_tcam_group_region_find(struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage, - bool *p_need_split) +static struct mlxsw_sp_acl_tcam_vregion * +mlxsw_sp_acl_tcam_vgroup_vregion_find(struct mlxsw_sp_acl_tcam_vgroup *vgroup, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage, + bool *p_need_split) { - struct mlxsw_sp_acl_tcam_region *region, *region2; + struct mlxsw_sp_acl_tcam_vregion *vregion, *vregion2; struct list_head *pos; bool issubset; - list_for_each(pos, &group->region_list) { - region = list_entry(pos, typeof(*region), list); + list_for_each(pos, &vgroup->vregion_list) { + vregion = list_entry(pos, typeof(*vregion), list); /* First, check if the requested priority does not rather belong - * under some of the next regions. + * under some of the next vregions. */ - if (pos->next != &group->region_list) { /* not last */ - region2 = list_entry(pos->next, typeof(*region2), list); - if (priority >= mlxsw_sp_acl_tcam_region_prio(region2)) + if (pos->next != &vgroup->vregion_list) { /* not last */ + vregion2 = list_entry(pos->next, typeof(*vregion2), + list); + if (priority >= + mlxsw_sp_acl_tcam_vregion_prio(vregion2)) continue; } - issubset = mlxsw_afk_key_info_subset(region->key_info, elusage); + issubset = mlxsw_afk_key_info_subset(vregion->key_info, + elusage); /* If requested element usage would not fit and the priority - * is lower than the currently inspected region we cannot - * use this region, so return NULL to indicate new region has + * is lower than the currently inspected vregion we cannot + * use this region, so return NULL to indicate new vregion has * to be created. */ if (!issubset && - priority < mlxsw_sp_acl_tcam_region_prio(region)) + priority < mlxsw_sp_acl_tcam_vregion_prio(vregion)) return NULL; /* If requested element usage would not fit and the priority - * is higher than the currently inspected region we cannot - * use this region. There is still some hope that the next - * region would be the fit. So let it be processed and + * is higher than the currently inspected vregion we cannot + * use this vregion. There is still some hope that the next + * vregion would be the fit. So let it be processed and * eventually break at the check right above this. */ if (!issubset && - priority > mlxsw_sp_acl_tcam_region_max_prio(region)) + priority > mlxsw_sp_acl_tcam_vregion_max_prio(vregion)) continue; - /* Indicate if the region needs to be split in order to add + /* Indicate if the vregion needs to be split in order to add * the requested priority. Split is needed when requested - * element usage won't fit into the found region. + * element usage won't fit into the found vregion. */ *p_need_split = !issubset; - return region; + return vregion; } - return NULL; /* New region has to be created. */ + return NULL; /* New vregion has to be created. */ } static void -mlxsw_sp_acl_tcam_group_use_patterns(struct mlxsw_sp_acl_tcam_group *group, - struct mlxsw_afk_element_usage *elusage, - struct mlxsw_afk_element_usage *out) +mlxsw_sp_acl_tcam_vgroup_use_patterns(struct mlxsw_sp_acl_tcam_vgroup *vgroup, + struct mlxsw_afk_element_usage *elusage, + struct mlxsw_afk_element_usage *out) { const struct mlxsw_sp_acl_tcam_pattern *pattern; int i; @@ -429,14 +588,14 @@ mlxsw_sp_acl_tcam_group_use_patterns(struct mlxsw_sp_acl_tcam_group *group, /* In case the template is set, we don't have to look up the pattern * and just use the template. */ - if (group->tmplt_elusage_set) { - memcpy(out, &group->tmplt_elusage, sizeof(*out)); + if (vgroup->tmplt_elusage_set) { + memcpy(out, &vgroup->tmplt_elusage, sizeof(*out)); WARN_ON(!mlxsw_afk_element_usage_subset(elusage, out)); return; } - for (i = 0; i < group->patterns_count; i++) { - pattern = &group->patterns[i]; + for (i = 0; i < vgroup->patterns_count; i++) { + pattern = &vgroup->patterns[i]; mlxsw_afk_element_usage_fill(out, pattern->elements, pattern->elements_count); if (mlxsw_afk_element_usage_subset(elusage, out)) @@ -510,24 +669,19 @@ mlxsw_sp_acl_tcam_region_disable(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_acl_tcam_region * mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam, - struct mlxsw_afk_element_usage *elusage) + struct mlxsw_sp_acl_tcam_vregion *vregion, + void *hints_priv) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); struct mlxsw_sp_acl_tcam_region *region; int err; region = kzalloc(sizeof(*region) + ops->region_priv_size, GFP_KERNEL); if (!region) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(®ion->chunk_list); region->mlxsw_sp = mlxsw_sp; - - region->key_info = mlxsw_afk_key_info_get(afk, elusage); - if (IS_ERR(region->key_info)) { - err = PTR_ERR(region->key_info); - goto err_key_info_get; - } + region->vregion = vregion; + region->key_info = vregion->key_info; err = mlxsw_sp_acl_tcam_region_id_get(tcam, ®ion->id); if (err) @@ -546,7 +700,8 @@ mlxsw_sp_acl_tcam_region_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_tcam_region_enable; - err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, region); + err = ops->region_init(mlxsw_sp, region->priv, tcam->priv, + region, hints_priv); if (err) goto err_tcam_region_init; @@ -560,8 +715,6 @@ err_tcam_region_alloc: err_tcam_region_associate: mlxsw_sp_acl_tcam_region_id_put(tcam, region->id); err_region_id_get: - mlxsw_afk_key_info_put(region->key_info); -err_key_info_get: kfree(region); return ERR_PTR(err); } @@ -575,220 +728,793 @@ mlxsw_sp_acl_tcam_region_destroy(struct mlxsw_sp *mlxsw_sp, ops->region_fini(mlxsw_sp, region->priv); mlxsw_sp_acl_tcam_region_disable(mlxsw_sp, region); mlxsw_sp_acl_tcam_region_free(mlxsw_sp, region); - mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, region->id); - mlxsw_afk_key_info_put(region->key_info); + mlxsw_sp_acl_tcam_region_id_put(region->group->tcam, + region->id); kfree(region); } -static int -mlxsw_sp_acl_tcam_chunk_assoc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage, - struct mlxsw_sp_acl_tcam_chunk *chunk) +static void +mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(struct mlxsw_sp_acl_tcam_vregion *vregion) { - struct mlxsw_sp_acl_tcam_region *region; - bool region_created = false; - bool need_split; - int err; + unsigned long interval = vregion->tcam->vregion_rehash_intrvl; + + if (!interval) + return; + mlxsw_core_schedule_dw(&vregion->rehash.dw, + msecs_to_jiffies(interval)); +} + +static void +mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + int *credits); - region = mlxsw_sp_acl_tcam_group_region_find(group, priority, elusage, - &need_split); - if (region && need_split) { - /* According to priority, the chunk should belong to an - * existing region. However, this chunk needs elements - * that region does not contain. We need to split the existing - * region into two and create a new region for this chunk - * in between. This is not supported now. +static void mlxsw_sp_acl_tcam_vregion_rehash_work(struct work_struct *work) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion = + container_of(work, struct mlxsw_sp_acl_tcam_vregion, + rehash.dw.work); + int credits = MLXSW_SP_ACL_TCAM_VREGION_REHASH_CREDITS; + + mlxsw_sp_acl_tcam_vregion_rehash(vregion->mlxsw_sp, vregion, &credits); + if (credits < 0) + /* Rehash gone out of credits so it was interrupted. + * Schedule the work as soon as possible to continue. */ - return -EOPNOTSUPP; - } - if (!region) { - struct mlxsw_afk_element_usage region_elusage; + mlxsw_core_schedule_dw(&vregion->rehash.dw, 0); + else + mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); +} - mlxsw_sp_acl_tcam_group_use_patterns(group, elusage, - ®ion_elusage); - region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, group->tcam, - ®ion_elusage); - if (IS_ERR(region)) - return PTR_ERR(region); - region_created = true; +static void +mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(struct mlxsw_sp_acl_tcam_vchunk *vchunk) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion; + + /* If a rule was added or deleted from vchunk which is currently + * under rehash migration, we have to reset the ventry pointers + * to make sure all rules are properly migrated. + */ + if (vregion->rehash.ctx.current_vchunk == vchunk) { + vregion->rehash.ctx.start_ventry = NULL; + vregion->rehash.ctx.stop_ventry = NULL; } +} - chunk->region = region; - list_add_tail(&chunk->list, ®ion->chunk_list); +static void +mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + /* If a chunk was added or deleted from vregion we have to reset + * the current chunk pointer to make sure all chunks + * are properly migrated. + */ + vregion->rehash.ctx.current_vchunk = NULL; +} - if (!region_created) - return 0; +static struct mlxsw_sp_acl_tcam_vregion * +mlxsw_sp_acl_tcam_vregion_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_afk *afk = mlxsw_sp_acl_afk(mlxsw_sp->acl); + struct mlxsw_sp_acl_tcam *tcam = vgroup->group.tcam; + struct mlxsw_sp_acl_tcam_vregion *vregion; + int err; + + vregion = kzalloc(sizeof(*vregion), GFP_KERNEL); + if (!vregion) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&vregion->vchunk_list); + mutex_init(&vregion->lock); + vregion->tcam = tcam; + vregion->mlxsw_sp = mlxsw_sp; + vregion->vgroup = vgroup; + vregion->ref_count = 1; + + vregion->key_info = mlxsw_afk_key_info_get(afk, elusage); + if (IS_ERR(vregion->key_info)) { + err = PTR_ERR(vregion->key_info); + goto err_key_info_get; + } + + vregion->region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, tcam, + vregion, NULL); + if (IS_ERR(vregion->region)) { + err = PTR_ERR(vregion->region); + goto err_region_create; + } - err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, group, region); + err = mlxsw_sp_acl_tcam_vgroup_vregion_attach(mlxsw_sp, vgroup, vregion, + priority); if (err) - goto err_group_region_attach; + goto err_vgroup_vregion_attach; + + if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) { + /* Create the delayed work for vregion periodic rehash */ + INIT_DELAYED_WORK(&vregion->rehash.dw, + mlxsw_sp_acl_tcam_vregion_rehash_work); + mlxsw_sp_acl_tcam_vregion_rehash_work_schedule(vregion); + mutex_lock(&tcam->lock); + list_add_tail(&vregion->tlist, &tcam->vregion_list); + mutex_unlock(&tcam->lock); + } - return 0; + return vregion; -err_group_region_attach: - mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region); - return err; +err_vgroup_vregion_attach: + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region); +err_region_create: + mlxsw_afk_key_info_put(vregion->key_info); +err_key_info_get: + kfree(vregion); + return ERR_PTR(err); } static void -mlxsw_sp_acl_tcam_chunk_deassoc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_chunk *chunk) +mlxsw_sp_acl_tcam_vregion_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_sp_acl_tcam_vgroup *vgroup = vregion->vgroup; + struct mlxsw_sp_acl_tcam *tcam = vregion->tcam; + + if (vgroup->vregion_rehash_enabled && ops->region_rehash_hints_get) { + mutex_lock(&tcam->lock); + list_del(&vregion->tlist); + mutex_unlock(&tcam->lock); + cancel_delayed_work_sync(&vregion->rehash.dw); + } + mlxsw_sp_acl_tcam_vgroup_vregion_detach(mlxsw_sp, vregion); + if (vregion->region2) + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region2); + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, vregion->region); + mlxsw_afk_key_info_put(vregion->key_info); + mutex_destroy(&vregion->lock); + kfree(vregion); +} + +u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + u32 vregion_rehash_intrvl; + + if (WARN_ON(!ops->region_rehash_hints_get)) + return 0; + vregion_rehash_intrvl = tcam->vregion_rehash_intrvl; + return vregion_rehash_intrvl; +} + +int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + u32 val) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_sp_acl_tcam_vregion *vregion; + + if (val < MLXSW_SP_ACL_TCAM_VREGION_REHASH_INTRVL_MIN && val) + return -EINVAL; + if (WARN_ON(!ops->region_rehash_hints_get)) + return -EOPNOTSUPP; + tcam->vregion_rehash_intrvl = val; + mutex_lock(&tcam->lock); + list_for_each_entry(vregion, &tcam->vregion_list, tlist) { + if (val) + mlxsw_core_schedule_dw(&vregion->rehash.dw, 0); + else + cancel_delayed_work_sync(&vregion->rehash.dw); + } + mutex_unlock(&tcam->lock); + return 0; +} + +static struct mlxsw_sp_acl_tcam_vregion * +mlxsw_sp_acl_tcam_vregion_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) { - struct mlxsw_sp_acl_tcam_region *region = chunk->region; + struct mlxsw_afk_element_usage vregion_elusage; + struct mlxsw_sp_acl_tcam_vregion *vregion; + bool need_split; - list_del(&chunk->list); - if (list_empty(®ion->chunk_list)) { - mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, region); - mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region); + vregion = mlxsw_sp_acl_tcam_vgroup_vregion_find(vgroup, priority, + elusage, &need_split); + if (vregion) { + if (need_split) { + /* According to priority, new vchunk should belong to + * an existing vregion. However, this vchunk needs + * elements that vregion does not contain. We need + * to split the existing vregion into two and create + * a new vregion for the new vchunk in between. + * This is not supported now. + */ + return ERR_PTR(-EOPNOTSUPP); + } + vregion->ref_count++; + return vregion; } + + mlxsw_sp_acl_tcam_vgroup_use_patterns(vgroup, elusage, + &vregion_elusage); + + return mlxsw_sp_acl_tcam_vregion_create(mlxsw_sp, vgroup, priority, + &vregion_elusage); +} + +static void +mlxsw_sp_acl_tcam_vregion_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion) +{ + if (--vregion->ref_count) + return; + mlxsw_sp_acl_tcam_vregion_destroy(mlxsw_sp, vregion); } static struct mlxsw_sp_acl_tcam_chunk * mlxsw_sp_acl_tcam_chunk_create(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage) + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_region *region) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; struct mlxsw_sp_acl_tcam_chunk *chunk; + + chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL); + if (!chunk) + return ERR_PTR(-ENOMEM); + chunk->vchunk = vchunk; + chunk->region = region; + + ops->chunk_init(region->priv, chunk->priv, vchunk->priority); + return chunk; +} + +static void +mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_chunk *chunk) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + ops->chunk_fini(chunk->priv); + kfree(chunk); +} + +static struct mlxsw_sp_acl_tcam_vchunk * +mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; int err; if (priority == MLXSW_SP_ACL_TCAM_CATCHALL_PRIO) return ERR_PTR(-EINVAL); - chunk = kzalloc(sizeof(*chunk) + ops->chunk_priv_size, GFP_KERNEL); - if (!chunk) + vchunk = kzalloc(sizeof(*vchunk), GFP_KERNEL); + if (!vchunk) return ERR_PTR(-ENOMEM); - chunk->priority = priority; - chunk->group = group; - chunk->ref_count = 1; - - err = mlxsw_sp_acl_tcam_chunk_assoc(mlxsw_sp, group, priority, - elusage, chunk); - if (err) - goto err_chunk_assoc; + INIT_LIST_HEAD(&vchunk->ventry_list); + vchunk->priority = priority; + vchunk->vgroup = vgroup; + vchunk->ref_count = 1; + + vregion = mlxsw_sp_acl_tcam_vregion_get(mlxsw_sp, vgroup, + priority, elusage); + if (IS_ERR(vregion)) { + err = PTR_ERR(vregion); + goto err_vregion_get; + } - ops->chunk_init(chunk->region->priv, chunk->priv, priority); + vchunk->vregion = vregion; - err = rhashtable_insert_fast(&group->chunk_ht, &chunk->ht_node, - mlxsw_sp_acl_tcam_chunk_ht_params); + err = rhashtable_insert_fast(&vgroup->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); if (err) goto err_rhashtable_insert; - return chunk; + mutex_lock(&vregion->lock); + vchunk->chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, + vchunk->vregion->region); + if (IS_ERR(vchunk->chunk)) { + mutex_unlock(&vregion->lock); + err = PTR_ERR(vchunk->chunk); + goto err_chunk_create; + } + + mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(vregion); + list_add_tail(&vchunk->list, &vregion->vchunk_list); + mutex_unlock(&vregion->lock); + return vchunk; + +err_chunk_create: + rhashtable_remove_fast(&vgroup->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); err_rhashtable_insert: - ops->chunk_fini(chunk->priv); - mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk); -err_chunk_assoc: - kfree(chunk); + mlxsw_sp_acl_tcam_vregion_put(mlxsw_sp, vregion); +err_vregion_get: + kfree(vchunk); return ERR_PTR(err); } static void -mlxsw_sp_acl_tcam_chunk_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_chunk *chunk) +mlxsw_sp_acl_tcam_vchunk_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) { - const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_group *group = chunk->group; - - rhashtable_remove_fast(&group->chunk_ht, &chunk->ht_node, - mlxsw_sp_acl_tcam_chunk_ht_params); - ops->chunk_fini(chunk->priv); - mlxsw_sp_acl_tcam_chunk_deassoc(mlxsw_sp, chunk); - kfree(chunk); + struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion; + struct mlxsw_sp_acl_tcam_vgroup *vgroup = vchunk->vgroup; + + mutex_lock(&vregion->lock); + mlxsw_sp_acl_tcam_rehash_ctx_vregion_changed(vregion); + list_del(&vchunk->list); + if (vchunk->chunk2) + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk); + mutex_unlock(&vregion->lock); + rhashtable_remove_fast(&vgroup->vchunk_ht, &vchunk->ht_node, + mlxsw_sp_acl_tcam_vchunk_ht_params); + mlxsw_sp_acl_tcam_vregion_put(mlxsw_sp, vchunk->vregion); + kfree(vchunk); } -static struct mlxsw_sp_acl_tcam_chunk * -mlxsw_sp_acl_tcam_chunk_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, - unsigned int priority, - struct mlxsw_afk_element_usage *elusage) +static struct mlxsw_sp_acl_tcam_vchunk * +mlxsw_sp_acl_tcam_vchunk_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + unsigned int priority, + struct mlxsw_afk_element_usage *elusage) { - struct mlxsw_sp_acl_tcam_chunk *chunk; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; - chunk = rhashtable_lookup_fast(&group->chunk_ht, &priority, - mlxsw_sp_acl_tcam_chunk_ht_params); - if (chunk) { - if (WARN_ON(!mlxsw_afk_key_info_subset(chunk->region->key_info, + vchunk = rhashtable_lookup_fast(&vgroup->vchunk_ht, &priority, + mlxsw_sp_acl_tcam_vchunk_ht_params); + if (vchunk) { + if (WARN_ON(!mlxsw_afk_key_info_subset(vchunk->vregion->key_info, elusage))) return ERR_PTR(-EINVAL); - chunk->ref_count++; - return chunk; + vchunk->ref_count++; + return vchunk; } - return mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, group, - priority, elusage); + return mlxsw_sp_acl_tcam_vchunk_create(mlxsw_sp, vgroup, + priority, elusage); } -static void mlxsw_sp_acl_tcam_chunk_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_chunk *chunk) +static void +mlxsw_sp_acl_tcam_vchunk_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk) { - if (--chunk->ref_count) + if (--vchunk->ref_count) return; - mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, chunk); + mlxsw_sp_acl_tcam_vchunk_destroy(mlxsw_sp, vchunk); +} + +static struct mlxsw_sp_acl_tcam_entry * +mlxsw_sp_acl_tcam_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_tcam_chunk *chunk) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + struct mlxsw_sp_acl_tcam_entry *entry; + int err; + + entry = kzalloc(sizeof(*entry) + ops->entry_priv_size, GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + entry->ventry = ventry; + entry->chunk = chunk; + + err = ops->entry_add(mlxsw_sp, chunk->region->priv, chunk->priv, + entry->priv, ventry->rulei); + if (err) + goto err_entry_add; + + return entry; + +err_entry_add: + kfree(entry); + return ERR_PTR(err); } -static size_t mlxsw_sp_acl_tcam_entry_priv_size(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_acl_tcam_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_entry *entry) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - return ops->entry_priv_size; + ops->entry_del(mlxsw_sp, entry->chunk->region->priv, + entry->chunk->priv, entry->priv); + kfree(entry); } -static int mlxsw_sp_acl_tcam_entry_add(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_group *group, +static int +mlxsw_sp_acl_tcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, struct mlxsw_sp_acl_tcam_entry *entry, struct mlxsw_sp_acl_rule_info *rulei) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk; - struct mlxsw_sp_acl_tcam_region *region; + + return ops->entry_action_replace(mlxsw_sp, region->priv, + entry->priv, rulei); +} + +static int +mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_entry *entry, + bool *activity) +{ + const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; + + return ops->entry_activity_get(mlxsw_sp, entry->chunk->region->priv, + entry->priv, activity); +} + +static int mlxsw_sp_acl_tcam_ventry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vgroup *vgroup, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_vregion *vregion; + struct mlxsw_sp_acl_tcam_vchunk *vchunk; int err; - chunk = mlxsw_sp_acl_tcam_chunk_get(mlxsw_sp, group, rulei->priority, - &rulei->values.elusage); - if (IS_ERR(chunk)) - return PTR_ERR(chunk); + vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, vgroup, rulei->priority, + &rulei->values.elusage); + if (IS_ERR(vchunk)) + return PTR_ERR(vchunk); + + ventry->vchunk = vchunk; + ventry->rulei = rulei; + vregion = vchunk->vregion; + + mutex_lock(&vregion->lock); + ventry->entry = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, + vchunk->chunk); + if (IS_ERR(ventry->entry)) { + mutex_unlock(&vregion->lock); + err = PTR_ERR(ventry->entry); + goto err_entry_create; + } - region = chunk->region; + list_add_tail(&ventry->list, &vchunk->ventry_list); + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(vchunk); + mutex_unlock(&vregion->lock); - err = ops->entry_add(mlxsw_sp, region->priv, chunk->priv, - entry->priv, rulei); - if (err) - goto err_entry_add; - entry->chunk = chunk; + return 0; + +err_entry_create: + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); + return err; +} + +static void mlxsw_sp_acl_tcam_ventry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; + struct mlxsw_sp_acl_tcam_vregion *vregion = vchunk->vregion; + + mutex_lock(&vregion->lock); + mlxsw_sp_acl_tcam_rehash_ctx_vchunk_changed(vchunk); + list_del(&ventry->list); + mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry); + mutex_unlock(&vregion->lock); + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, vchunk); +} + +static int +mlxsw_sp_acl_tcam_ventry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk = ventry->vchunk; + + return mlxsw_sp_acl_tcam_entry_action_replace(mlxsw_sp, + vchunk->vregion->region, + ventry->entry, rulei); +} + +static int +mlxsw_sp_acl_tcam_ventry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + bool *activity) +{ + return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, + ventry->entry, activity); +} + +static int +mlxsw_sp_acl_tcam_ventry_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_ventry *ventry, + struct mlxsw_sp_acl_tcam_chunk *chunk, + int *credits) +{ + struct mlxsw_sp_acl_tcam_entry *new_entry; + + /* First check if the entry is not already where we want it to be. */ + if (ventry->entry->chunk == chunk) + return 0; + + if (--(*credits) < 0) + return 0; + new_entry = mlxsw_sp_acl_tcam_entry_create(mlxsw_sp, ventry, chunk); + if (IS_ERR(new_entry)) + return PTR_ERR(new_entry); + mlxsw_sp_acl_tcam_entry_destroy(mlxsw_sp, ventry->entry); + ventry->entry = new_entry; return 0; +} -err_entry_add: - mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); +static int +mlxsw_sp_acl_tcam_vchunk_migrate_start(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_region *region, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + struct mlxsw_sp_acl_tcam_chunk *new_chunk; + + new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region); + if (IS_ERR(new_chunk)) { + if (ctx->this_is_rollback) + vchunk->vregion->failed_rollback = true; + return PTR_ERR(new_chunk); + } + vchunk->chunk2 = vchunk->chunk; + vchunk->chunk = new_chunk; + ctx->current_vchunk = vchunk; + ctx->start_ventry = NULL; + ctx->stop_ventry = NULL; + return 0; +} + +static void +mlxsw_sp_acl_tcam_vchunk_migrate_end(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + mlxsw_sp_acl_tcam_chunk_destroy(mlxsw_sp, vchunk->chunk2); + vchunk->chunk2 = NULL; + ctx->current_vchunk = NULL; +} + +static int +mlxsw_sp_acl_tcam_vchunk_migrate_one(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vchunk *vchunk, + struct mlxsw_sp_acl_tcam_region *region, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx, + int *credits) +{ + struct mlxsw_sp_acl_tcam_ventry *ventry; + int err; + + if (vchunk->chunk->region != region) { + err = mlxsw_sp_acl_tcam_vchunk_migrate_start(mlxsw_sp, vchunk, + region, ctx); + if (err) + return err; + } else if (!vchunk->chunk2) { + /* The chunk is already as it should be, nothing to do. */ + return 0; + } + + /* If the migration got interrupted, we have the ventry to start from + * stored in context. + */ + if (ctx->start_ventry) + ventry = ctx->start_ventry; + else + ventry = list_first_entry(&vchunk->ventry_list, + typeof(*ventry), list); + + list_for_each_entry_from(ventry, &vchunk->ventry_list, list) { + /* During rollback, once we reach the ventry that failed + * to migrate, we are done. + */ + if (ventry == ctx->stop_ventry) + break; + + err = mlxsw_sp_acl_tcam_ventry_migrate(mlxsw_sp, ventry, + vchunk->chunk, credits); + if (err) { + if (ctx->this_is_rollback) + return err; + /* Swap the chunk and chunk2 pointers so the follow-up + * rollback call will see the original chunk pointer + * in vchunk->chunk. + */ + swap(vchunk->chunk, vchunk->chunk2); + /* The rollback has to be done from beginning of the + * chunk, that is why we have to null the start_ventry. + * However, we know where to stop the rollback, + * at the current ventry. + */ + ctx->start_ventry = NULL; + ctx->stop_ventry = ventry; + return err; + } else if (*credits < 0) { + /* We are out of credits, the rest of the ventries + * will be migrated later. Save the ventry + * which we ended with. + */ + ctx->start_ventry = ventry; + return 0; + } + } + + mlxsw_sp_acl_tcam_vchunk_migrate_end(mlxsw_sp, vchunk, ctx); + return 0; +} + +static int +mlxsw_sp_acl_tcam_vchunk_migrate_all(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx, + int *credits) +{ + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + int err; + + /* If the migration got interrupted, we have the vchunk + * we are working on stored in context. + */ + if (ctx->current_vchunk) + vchunk = ctx->current_vchunk; + else + vchunk = list_first_entry(&vregion->vchunk_list, + typeof(*vchunk), list); + + list_for_each_entry_from(vchunk, &vregion->vchunk_list, list) { + err = mlxsw_sp_acl_tcam_vchunk_migrate_one(mlxsw_sp, vchunk, + vregion->region, + ctx, credits); + if (err || *credits < 0) + return err; + } + return 0; +} + +static int +mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx, + int *credits) +{ + int err, err2; + + trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion); + mutex_lock(&vregion->lock); + err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, + ctx, credits); + if (err) { + /* In case migration was not successful, we need to swap + * so the original region pointer is assigned again + * to vregion->region. + */ + swap(vregion->region, vregion->region2); + ctx->current_vchunk = NULL; + ctx->this_is_rollback = true; + err2 = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion, + ctx, credits); + if (err2) + vregion->failed_rollback = true; + } + mutex_unlock(&vregion->lock); + trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion); return err; } -static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_entry *entry) +static bool +mlxsw_sp_acl_tcam_vregion_rehash_in_progress(const struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) +{ + return ctx->hints_priv; +} + +static int +mlxsw_sp_acl_tcam_vregion_rehash_start(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) { const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->region; + unsigned int priority = mlxsw_sp_acl_tcam_vregion_prio(vregion); + struct mlxsw_sp_acl_tcam_region *new_region; + void *hints_priv; + int err; + + trace_mlxsw_sp_acl_tcam_vregion_rehash(mlxsw_sp, vregion); + if (vregion->failed_rollback) + return -EBUSY; + + hints_priv = ops->region_rehash_hints_get(vregion->region->priv); + if (IS_ERR(hints_priv)) + return PTR_ERR(hints_priv); + + new_region = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, vregion->tcam, + vregion, hints_priv); + if (IS_ERR(new_region)) { + err = PTR_ERR(new_region); + goto err_region_create; + } + + /* vregion->region contains the pointer to the new region + * we are going to migrate to. + */ + vregion->region2 = vregion->region; + vregion->region = new_region; + err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp, + vregion->region2->group, + new_region, priority, + vregion->region2); + if (err) + goto err_group_region_attach; + + ctx->hints_priv = hints_priv; + ctx->this_is_rollback = false; + + return 0; - ops->entry_del(mlxsw_sp, region->priv, chunk->priv, entry->priv); - mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); +err_group_region_attach: + vregion->region = vregion->region2; + vregion->region2 = NULL; + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, new_region); +err_region_create: + ops->region_rehash_hints_put(hints_priv); + return err; } -static int -mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_acl_tcam_entry *entry, - bool *activity) +static void +mlxsw_sp_acl_tcam_vregion_rehash_end(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx) { + struct mlxsw_sp_acl_tcam_region *unused_region = vregion->region2; const struct mlxsw_sp_acl_tcam_ops *ops = mlxsw_sp->acl_tcam_ops; - struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; - struct mlxsw_sp_acl_tcam_region *region = chunk->region; - return ops->entry_activity_get(mlxsw_sp, region->priv, - entry->priv, activity); + if (!vregion->failed_rollback) { + vregion->region2 = NULL; + mlxsw_sp_acl_tcam_group_region_detach(mlxsw_sp, unused_region); + mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, unused_region); + } + ops->region_rehash_hints_put(ctx->hints_priv); + ctx->hints_priv = NULL; +} + +static void +mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_vregion *vregion, + int *credits) +{ + struct mlxsw_sp_acl_tcam_rehash_ctx *ctx = &vregion->rehash.ctx; + int err; + + /* Check if the previous rehash work was interrupted + * which means we have to continue it now. + * If not, start a new rehash. + */ + if (!mlxsw_sp_acl_tcam_vregion_rehash_in_progress(ctx)) { + err = mlxsw_sp_acl_tcam_vregion_rehash_start(mlxsw_sp, + vregion, ctx); + if (err) { + if (err != -EAGAIN) + dev_err(mlxsw_sp->bus_info->dev, "Failed get rehash hints\n"); + return; + } + } + + err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, + ctx, credits); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n"); + if (vregion->failed_rollback) { + trace_mlxsw_sp_acl_tcam_vregion_rehash_dis(mlxsw_sp, + vregion); + dev_err(mlxsw_sp->bus_info->dev, "Failed to rollback during vregion migration fail\n"); + } + } + + if (*credits >= 0) + mlxsw_sp_acl_tcam_vregion_rehash_end(mlxsw_sp, vregion, ctx); } static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { @@ -841,11 +1567,11 @@ static const struct mlxsw_sp_acl_tcam_pattern mlxsw_sp_acl_tcam_patterns[] = { ARRAY_SIZE(mlxsw_sp_acl_tcam_patterns) struct mlxsw_sp_acl_tcam_flower_ruleset { - struct mlxsw_sp_acl_tcam_group group; + struct mlxsw_sp_acl_tcam_vgroup vgroup; }; struct mlxsw_sp_acl_tcam_flower_rule { - struct mlxsw_sp_acl_tcam_entry entry; + struct mlxsw_sp_acl_tcam_ventry ventry; }; static int @@ -856,10 +1582,10 @@ mlxsw_sp_acl_tcam_flower_ruleset_add(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; - return mlxsw_sp_acl_tcam_group_add(mlxsw_sp, tcam, &ruleset->group, - mlxsw_sp_acl_tcam_patterns, - MLXSW_SP_ACL_TCAM_PATTERNS_COUNT, - tmplt_elusage); + return mlxsw_sp_acl_tcam_vgroup_add(mlxsw_sp, tcam, &ruleset->vgroup, + mlxsw_sp_acl_tcam_patterns, + MLXSW_SP_ACL_TCAM_PATTERNS_COUNT, + tmplt_elusage, true); } static void @@ -868,7 +1594,7 @@ mlxsw_sp_acl_tcam_flower_ruleset_del(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; - mlxsw_sp_acl_tcam_group_del(mlxsw_sp, &ruleset->group); + mlxsw_sp_acl_tcam_vgroup_del(&ruleset->vgroup); } static int @@ -879,7 +1605,7 @@ mlxsw_sp_acl_tcam_flower_ruleset_bind(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; - return mlxsw_sp_acl_tcam_group_bind(mlxsw_sp, &ruleset->group, + return mlxsw_sp_acl_tcam_group_bind(mlxsw_sp, &ruleset->vgroup.group, mlxsw_sp_port, ingress); } @@ -891,7 +1617,7 @@ mlxsw_sp_acl_tcam_flower_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; - mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->group, + mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->vgroup.group, mlxsw_sp_port, ingress); } @@ -900,13 +1626,7 @@ mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv) { struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; - return mlxsw_sp_acl_tcam_group_id(&ruleset->group); -} - -static size_t mlxsw_sp_acl_tcam_flower_rule_priv_size(struct mlxsw_sp *mlxsw_sp) -{ - return sizeof(struct mlxsw_sp_acl_tcam_flower_rule) + - mlxsw_sp_acl_tcam_entry_priv_size(mlxsw_sp); + return mlxsw_sp_acl_tcam_group_id(&ruleset->vgroup.group); } static int @@ -917,8 +1637,8 @@ mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_add(mlxsw_sp, &ruleset->group, - &rule->entry, rulei); + return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->vgroup, + &rule->ventry, rulei); } static void @@ -926,7 +1646,15 @@ mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) { struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; - mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry); + mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry); +} + +static int +mlxsw_sp_acl_tcam_flower_rule_action_replace(struct mlxsw_sp *mlxsw_sp, + void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + return -EOPNOTSUPP; } static int @@ -935,8 +1663,8 @@ mlxsw_sp_acl_tcam_flower_rule_activity_get(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; - return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry, - activity); + return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry, + activity); } static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { @@ -946,15 +1674,152 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .ruleset_bind = mlxsw_sp_acl_tcam_flower_ruleset_bind, .ruleset_unbind = mlxsw_sp_acl_tcam_flower_ruleset_unbind, .ruleset_group_id = mlxsw_sp_acl_tcam_flower_ruleset_group_id, - .rule_priv_size = mlxsw_sp_acl_tcam_flower_rule_priv_size, + .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, + .rule_action_replace = mlxsw_sp_acl_tcam_flower_rule_action_replace, .rule_activity_get = mlxsw_sp_acl_tcam_flower_rule_activity_get, }; +struct mlxsw_sp_acl_tcam_mr_ruleset { + struct mlxsw_sp_acl_tcam_vchunk *vchunk; + struct mlxsw_sp_acl_tcam_vgroup vgroup; +}; + +struct mlxsw_sp_acl_tcam_mr_rule { + struct mlxsw_sp_acl_tcam_ventry ventry; +}; + +static int +mlxsw_sp_acl_tcam_mr_ruleset_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + void *ruleset_priv, + struct mlxsw_afk_element_usage *tmplt_elusage) +{ + struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; + int err; + + err = mlxsw_sp_acl_tcam_vgroup_add(mlxsw_sp, tcam, &ruleset->vgroup, + mlxsw_sp_acl_tcam_patterns, + MLXSW_SP_ACL_TCAM_PATTERNS_COUNT, + tmplt_elusage, false); + if (err) + return err; + + /* For most of the TCAM clients it would make sense to take a tcam chunk + * only when the first rule is written. This is not the case for + * multicast router as it is required to bind the multicast router to a + * specific ACL Group ID which must exist in HW before multicast router + * is initialized. + */ + ruleset->vchunk = mlxsw_sp_acl_tcam_vchunk_get(mlxsw_sp, + &ruleset->vgroup, 1, + tmplt_elusage); + if (IS_ERR(ruleset->vchunk)) { + err = PTR_ERR(ruleset->vchunk); + goto err_chunk_get; + } + + return 0; + +err_chunk_get: + mlxsw_sp_acl_tcam_vgroup_del(&ruleset->vgroup); + return err; +} + +static void +mlxsw_sp_acl_tcam_mr_ruleset_del(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; + + mlxsw_sp_acl_tcam_vchunk_put(mlxsw_sp, ruleset->vchunk); + mlxsw_sp_acl_tcam_vgroup_del(&ruleset->vgroup); +} + +static int +mlxsw_sp_acl_tcam_mr_ruleset_bind(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ + /* Binding is done when initializing multicast router */ + return 0; +} + +static void +mlxsw_sp_acl_tcam_mr_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, + void *ruleset_priv, + struct mlxsw_sp_port *mlxsw_sp_port, + bool ingress) +{ +} + +static u16 +mlxsw_sp_acl_tcam_mr_ruleset_group_id(void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; + + return mlxsw_sp_acl_tcam_group_id(&ruleset->vgroup.group); +} + +static int +mlxsw_sp_acl_tcam_mr_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, + void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv; + struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; + + return mlxsw_sp_acl_tcam_ventry_add(mlxsw_sp, &ruleset->vgroup, + &rule->ventry, rulei); +} + +static void +mlxsw_sp_acl_tcam_mr_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) +{ + struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; + + mlxsw_sp_acl_tcam_ventry_del(mlxsw_sp, &rule->ventry); +} + +static int +mlxsw_sp_acl_tcam_mr_rule_action_replace(struct mlxsw_sp *mlxsw_sp, + void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei) +{ + struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; + + return mlxsw_sp_acl_tcam_ventry_action_replace(mlxsw_sp, &rule->ventry, + rulei); +} + +static int +mlxsw_sp_acl_tcam_mr_rule_activity_get(struct mlxsw_sp *mlxsw_sp, + void *rule_priv, bool *activity) +{ + struct mlxsw_sp_acl_tcam_mr_rule *rule = rule_priv; + + return mlxsw_sp_acl_tcam_ventry_activity_get(mlxsw_sp, &rule->ventry, + activity); +} + +static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_mr_ops = { + .ruleset_priv_size = sizeof(struct mlxsw_sp_acl_tcam_mr_ruleset), + .ruleset_add = mlxsw_sp_acl_tcam_mr_ruleset_add, + .ruleset_del = mlxsw_sp_acl_tcam_mr_ruleset_del, + .ruleset_bind = mlxsw_sp_acl_tcam_mr_ruleset_bind, + .ruleset_unbind = mlxsw_sp_acl_tcam_mr_ruleset_unbind, + .ruleset_group_id = mlxsw_sp_acl_tcam_mr_ruleset_group_id, + .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_mr_rule), + .rule_add = mlxsw_sp_acl_tcam_mr_rule_add, + .rule_del = mlxsw_sp_acl_tcam_mr_rule_del, + .rule_action_replace = mlxsw_sp_acl_tcam_mr_rule_action_replace, + .rule_activity_get = mlxsw_sp_acl_tcam_mr_rule_activity_get, +}; + static const struct mlxsw_sp_acl_profile_ops * mlxsw_sp_acl_tcam_profile_ops_arr[] = { [MLXSW_SP_ACL_PROFILE_FLOWER] = &mlxsw_sp_acl_tcam_flower_ops, + [MLXSW_SP_ACL_PROFILE_MR] = &mlxsw_sp_acl_tcam_mr_ops, }; const struct mlxsw_sp_acl_profile_ops * diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h index 219a4e26c332..5965913565a5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h @@ -17,6 +17,9 @@ struct mlxsw_sp_acl_tcam { unsigned long *used_groups; /* bit array */ unsigned int max_groups; unsigned int max_group_size; + struct mutex lock; /* guards vregion list */ + struct list_head vregion_list; + u32 vregion_rehash_intrvl; /* ms */ unsigned long priv[0]; /* priv has to be always the last item */ }; @@ -26,6 +29,11 @@ int mlxsw_sp_acl_tcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam); void mlxsw_sp_acl_tcam_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_tcam *tcam); +u32 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam); +int mlxsw_sp_acl_tcam_vregion_rehash_intrvl_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam *tcam, + u32 val); int mlxsw_sp_acl_tcam_priority_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u32 *priority, bool fillup_priority); @@ -43,11 +51,13 @@ struct mlxsw_sp_acl_profile_ops { struct mlxsw_sp_port *mlxsw_sp_port, bool ingress); u16 (*ruleset_group_id)(void *ruleset_priv); - size_t (*rule_priv_size)(struct mlxsw_sp *mlxsw_sp); + size_t rule_priv_size; int (*rule_add)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, struct mlxsw_sp_acl_rule_info *rulei); void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv); + int (*rule_action_replace)(struct mlxsw_sp *mlxsw_sp, void *rule_priv, + struct mlxsw_sp_acl_rule_info *rulei); int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv, bool *activity); }; @@ -65,11 +75,12 @@ mlxsw_sp_acl_tcam_profile_ops(struct mlxsw_sp *mlxsw_sp, (MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN * BITS_PER_BYTE) struct mlxsw_sp_acl_tcam_group; +struct mlxsw_sp_acl_tcam_vregion; struct mlxsw_sp_acl_tcam_region { - struct list_head list; /* Member of a TCAM group */ - struct list_head chunk_list; /* List of chunks under this region */ + struct mlxsw_sp_acl_tcam_vregion *vregion; struct mlxsw_sp_acl_tcam_group *group; + struct list_head list; /* Member of a TCAM group */ enum mlxsw_reg_ptar_key_type key_type; u16 id; /* ACL ID and region ID - they are same */ char tcam_region_info[MLXSW_REG_PXXX_TCAM_REGION_INFO_LEN]; @@ -121,6 +132,10 @@ void mlxsw_sp_acl_ctcam_entry_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ctcam_region *cregion, struct mlxsw_sp_acl_ctcam_chunk *cchunk, struct mlxsw_sp_acl_ctcam_entry *centry); +int mlxsw_sp_acl_ctcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_ctcam_region *cregion, + struct mlxsw_sp_acl_ctcam_entry *centry, + struct mlxsw_sp_acl_rule_info *rulei); static inline unsigned int mlxsw_sp_acl_ctcam_entry_offset(struct mlxsw_sp_acl_ctcam_entry *centry) { @@ -144,6 +159,7 @@ struct mlxsw_sp_acl_atcam { struct mlxsw_sp_acl_atcam_region { struct rhashtable entries_ht; /* A-TCAM only */ + struct list_head entries_list; /* A-TCAM only */ struct mlxsw_sp_acl_ctcam_region cregion; const struct mlxsw_sp_acl_atcam_region_ops *ops; struct mlxsw_sp_acl_tcam_region *region; @@ -154,7 +170,9 @@ struct mlxsw_sp_acl_atcam_region { }; struct mlxsw_sp_acl_atcam_entry_ht_key { - char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key */ + char full_enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded + * key. + */ u8 erp_id; }; @@ -164,10 +182,19 @@ struct mlxsw_sp_acl_atcam_chunk { struct mlxsw_sp_acl_atcam_entry { struct rhash_head ht_node; + struct list_head list; /* Member in entries_list */ struct mlxsw_sp_acl_atcam_entry_ht_key ht_key; + char enc_key[MLXSW_REG_PTCEX_FLEX_KEY_BLOCKS_LEN]; /* Encoded key, + * minus delta bits. + */ + struct { + u16 start; + u8 mask; + u8 value; + } delta_info; struct mlxsw_sp_acl_ctcam_entry centry; struct mlxsw_sp_acl_atcam_lkey_id *lkey_id; - struct mlxsw_sp_acl_erp *erp; + struct mlxsw_sp_acl_erp_mask *erp_mask; }; static inline struct mlxsw_sp_acl_atcam_region * @@ -189,6 +216,7 @@ mlxsw_sp_acl_atcam_region_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam, struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_tcam_region *region, + void *hints_priv, const struct mlxsw_sp_acl_ctcam_region_ops *ops); void mlxsw_sp_acl_atcam_region_fini(struct mlxsw_sp_acl_atcam_region *aregion); void mlxsw_sp_acl_atcam_chunk_init(struct mlxsw_sp_acl_atcam_region *aregion, @@ -204,25 +232,74 @@ void mlxsw_sp_acl_atcam_entry_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam_region *aregion, struct mlxsw_sp_acl_atcam_chunk *achunk, struct mlxsw_sp_acl_atcam_entry *aentry); +int mlxsw_sp_acl_atcam_entry_action_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_atcam_entry *aentry, + struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_atcam_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); void mlxsw_sp_acl_atcam_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); +void * +mlxsw_sp_acl_atcam_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion); +void mlxsw_sp_acl_atcam_rehash_hints_put(void *hints_priv); + +struct mlxsw_sp_acl_erp_delta; + +u16 mlxsw_sp_acl_erp_delta_start(const struct mlxsw_sp_acl_erp_delta *delta); +u8 mlxsw_sp_acl_erp_delta_mask(const struct mlxsw_sp_acl_erp_delta *delta); +u8 mlxsw_sp_acl_erp_delta_value(const struct mlxsw_sp_acl_erp_delta *delta, + const char *enc_key); +void mlxsw_sp_acl_erp_delta_clear(const struct mlxsw_sp_acl_erp_delta *delta, + const char *enc_key); -struct mlxsw_sp_acl_erp; +struct mlxsw_sp_acl_erp_mask; -bool mlxsw_sp_acl_erp_is_ctcam_erp(const struct mlxsw_sp_acl_erp *erp); -u8 mlxsw_sp_acl_erp_id(const struct mlxsw_sp_acl_erp *erp); -struct mlxsw_sp_acl_erp * -mlxsw_sp_acl_erp_get(struct mlxsw_sp_acl_atcam_region *aregion, - const char *mask, bool ctcam); -void mlxsw_sp_acl_erp_put(struct mlxsw_sp_acl_atcam_region *aregion, - struct mlxsw_sp_acl_erp *erp); -int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion); +bool +mlxsw_sp_acl_erp_mask_is_ctcam(const struct mlxsw_sp_acl_erp_mask *erp_mask); +u8 mlxsw_sp_acl_erp_mask_erp_id(const struct mlxsw_sp_acl_erp_mask *erp_mask); +const struct mlxsw_sp_acl_erp_delta * +mlxsw_sp_acl_erp_delta(const struct mlxsw_sp_acl_erp_mask *erp_mask); +struct mlxsw_sp_acl_erp_mask * +mlxsw_sp_acl_erp_mask_get(struct mlxsw_sp_acl_atcam_region *aregion, + const char *mask, bool ctcam); +void mlxsw_sp_acl_erp_mask_put(struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask); +int mlxsw_sp_acl_erp_bf_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry); +void mlxsw_sp_acl_erp_bf_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_atcam_region *aregion, + struct mlxsw_sp_acl_erp_mask *erp_mask, + struct mlxsw_sp_acl_atcam_entry *aentry); +void * +mlxsw_sp_acl_erp_rehash_hints_get(struct mlxsw_sp_acl_atcam_region *aregion); +void mlxsw_sp_acl_erp_rehash_hints_put(void *hints_priv); +int mlxsw_sp_acl_erp_region_init(struct mlxsw_sp_acl_atcam_region *aregion, + void *hints_priv); void mlxsw_sp_acl_erp_region_fini(struct mlxsw_sp_acl_atcam_region *aregion); int mlxsw_sp_acl_erps_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); void mlxsw_sp_acl_erps_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_atcam *atcam); +struct mlxsw_sp_acl_bf; + +int +mlxsw_sp_acl_bf_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry); +void +mlxsw_sp_acl_bf_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_bf *bf, + struct mlxsw_sp_acl_atcam_region *aregion, + unsigned int erp_bank, + struct mlxsw_sp_acl_atcam_entry *aentry); +struct mlxsw_sp_acl_bf * +mlxsw_sp_acl_bf_init(struct mlxsw_sp *mlxsw_sp, unsigned int num_erp_banks); +void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf); + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 12c61e0cc570..d633bef5f105 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -37,13 +37,19 @@ struct mlxsw_sp_sb_pm { struct mlxsw_cp_sb_occ occ; }; +struct mlxsw_sp_sb_mm { + u32 min_buff; + u32 max_buff; + u16 pool_index; +}; + struct mlxsw_sp_sb_pool_des { enum mlxsw_reg_sbxx_dir dir; u8 pool; }; /* Order ingress pools before egress pools. */ -static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = { +static const struct mlxsw_sp_sb_pool_des mlxsw_sp1_sb_pool_dess[] = { {MLXSW_REG_SBXX_DIR_INGRESS, 0}, {MLXSW_REG_SBXX_DIR_INGRESS, 1}, {MLXSW_REG_SBXX_DIR_INGRESS, 2}, @@ -55,7 +61,17 @@ static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = { {MLXSW_REG_SBXX_DIR_EGRESS, 15}, }; -#define MLXSW_SP_SB_POOL_DESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pool_dess) +static const struct mlxsw_sp_sb_pool_des mlxsw_sp2_sb_pool_dess[] = { + {MLXSW_REG_SBXX_DIR_INGRESS, 0}, + {MLXSW_REG_SBXX_DIR_INGRESS, 1}, + {MLXSW_REG_SBXX_DIR_INGRESS, 2}, + {MLXSW_REG_SBXX_DIR_INGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 0}, + {MLXSW_REG_SBXX_DIR_EGRESS, 1}, + {MLXSW_REG_SBXX_DIR_EGRESS, 2}, + {MLXSW_REG_SBXX_DIR_EGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 15}, +}; #define MLXSW_SP_SB_ING_TC_COUNT 8 #define MLXSW_SP_SB_EG_TC_COUNT 16 @@ -63,16 +79,32 @@ static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = { struct mlxsw_sp_sb_port { struct mlxsw_sp_sb_cm ing_cms[MLXSW_SP_SB_ING_TC_COUNT]; struct mlxsw_sp_sb_cm eg_cms[MLXSW_SP_SB_EG_TC_COUNT]; - struct mlxsw_sp_sb_pm pms[MLXSW_SP_SB_POOL_DESS_LEN]; + struct mlxsw_sp_sb_pm *pms; }; struct mlxsw_sp_sb { - struct mlxsw_sp_sb_pr prs[MLXSW_SP_SB_POOL_DESS_LEN]; + struct mlxsw_sp_sb_pr *prs; struct mlxsw_sp_sb_port *ports; u32 cell_size; + u32 max_headroom_cells; u64 sb_size; }; +struct mlxsw_sp_sb_vals { + unsigned int pool_count; + const struct mlxsw_sp_sb_pool_des *pool_dess; + const struct mlxsw_sp_sb_pm *pms; + const struct mlxsw_sp_sb_pr *prs; + const struct mlxsw_sp_sb_mm *mms; + const struct mlxsw_sp_sb_cm *cms_ingress; + const struct mlxsw_sp_sb_cm *cms_egress; + const struct mlxsw_sp_sb_cm *cms_cpu; + unsigned int mms_count; + unsigned int cms_ingress_count; + unsigned int cms_egress_count; + unsigned int cms_cpu_count; +}; + u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) { return mlxsw_sp->sb->cell_size * cells; @@ -83,6 +115,11 @@ u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes) return DIV_ROUND_UP(bytes, mlxsw_sp->sb->cell_size); } +u32 mlxsw_sp_sb_max_headroom_cells(const struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp->sb->max_headroom_cells; +} + static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, u16 pool_index) { @@ -121,7 +158,7 @@ static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index, u32 size, bool infi_size) { const struct mlxsw_sp_sb_pool_des *des = - &mlxsw_sp_sb_pool_dess[pool_index]; + &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbpr_pl[MLXSW_REG_SBPR_LEN]; struct mlxsw_sp_sb_pr *pr; int err; @@ -145,7 +182,7 @@ static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool infi_max, u16 pool_index) { const struct mlxsw_sp_sb_pool_des *des = - &mlxsw_sp_sb_pool_dess[pool_index]; + &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbcm_pl[MLXSW_REG_SBCM_LEN]; struct mlxsw_sp_sb_cm *cm; int err; @@ -174,7 +211,7 @@ static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, u16 pool_index, u32 min_buff, u32 max_buff) { const struct mlxsw_sp_sb_pool_des *des = - &mlxsw_sp_sb_pool_dess[pool_index]; + &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; struct mlxsw_sp_sb_pm *pm; int err; @@ -195,7 +232,7 @@ static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port, u16 pool_index, struct list_head *bulk_list) { const struct mlxsw_sp_sb_pool_des *des = - &mlxsw_sp_sb_pool_dess[pool_index]; + &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, @@ -217,7 +254,7 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, u16 pool_index, struct list_head *bulk_list) { const struct mlxsw_sp_sb_pool_des *des = - &mlxsw_sp_sb_pool_dess[pool_index]; + &mlxsw_sp->sb_vals->pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; struct mlxsw_sp_sb_pm *pm; @@ -230,24 +267,24 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, (unsigned long) pm); } -static const u16 mlxsw_sp_pbs[] = { - [0] = 2 * ETH_FRAME_LEN, - [9] = 2 * MLXSW_PORT_MAX_MTU, -}; - -#define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs) +/* 1/4 of a headroom necessary for 100Gbps port and 100m cable. */ +#define MLXSW_SP_PB_HEADROOM 25632 #define MLXSW_SP_PB_UNUSED 8 static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) { + const u32 pbs[] = { + [0] = MLXSW_SP_PB_HEADROOM * mlxsw_sp_port->mapping.width, + [9] = 2 * MLXSW_PORT_MAX_MTU, + }; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pbmc_pl[MLXSW_REG_PBMC_LEN]; int i; mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0xffff, 0xffff / 2); - for (i = 0; i < MLXSW_SP_PBS_LEN; i++) { - u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp_pbs[i]); + for (i = 0; i < ARRAY_SIZE(pbs); i++) { + u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, pbs[i]); if (i == MLXSW_SP_PB_UNUSED) continue; @@ -280,50 +317,120 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port); } +static int mlxsw_sp_sb_port_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_sb_port *sb_port) +{ + struct mlxsw_sp_sb_pm *pms; + + pms = kcalloc(mlxsw_sp->sb_vals->pool_count, sizeof(*pms), + GFP_KERNEL); + if (!pms) + return -ENOMEM; + sb_port->pms = pms; + return 0; +} + +static void mlxsw_sp_sb_port_fini(struct mlxsw_sp_sb_port *sb_port) +{ + kfree(sb_port->pms); +} + static int mlxsw_sp_sb_ports_init(struct mlxsw_sp *mlxsw_sp) { unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + struct mlxsw_sp_sb_pr *prs; + int i; + int err; mlxsw_sp->sb->ports = kcalloc(max_ports, sizeof(struct mlxsw_sp_sb_port), GFP_KERNEL); if (!mlxsw_sp->sb->ports) return -ENOMEM; + + prs = kcalloc(mlxsw_sp->sb_vals->pool_count, sizeof(*prs), + GFP_KERNEL); + if (!prs) { + err = -ENOMEM; + goto err_alloc_prs; + } + mlxsw_sp->sb->prs = prs; + + for (i = 0; i < max_ports; i++) { + err = mlxsw_sp_sb_port_init(mlxsw_sp, &mlxsw_sp->sb->ports[i]); + if (err) + goto err_sb_port_init; + } + return 0; + +err_sb_port_init: + for (i--; i >= 0; i--) + mlxsw_sp_sb_port_fini(&mlxsw_sp->sb->ports[i]); + kfree(mlxsw_sp->sb->prs); +err_alloc_prs: + kfree(mlxsw_sp->sb->ports); + return err; } static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp) { + int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + int i; + + for (i = max_ports - 1; i >= 0; i--) + mlxsw_sp_sb_port_fini(&mlxsw_sp->sb->ports[i]); + kfree(mlxsw_sp->sb->prs); kfree(mlxsw_sp->sb->ports); } -#define MLXSW_SP_SB_PR_INGRESS_SIZE 12440000 -#define MLXSW_SP_SB_PR_INGRESS_MNG_SIZE (200 * 1000) -#define MLXSW_SP_SB_PR_EGRESS_SIZE 13232000 - #define MLXSW_SP_SB_PR(_mode, _size) \ { \ .mode = _mode, \ .size = _size, \ } -static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs[] = { +#define MLXSW_SP1_SB_PR_INGRESS_SIZE 12440000 +#define MLXSW_SP1_SB_PR_INGRESS_MNG_SIZE (200 * 1000) +#define MLXSW_SP1_SB_PR_EGRESS_SIZE 13232000 + +static const struct mlxsw_sp_sb_pr mlxsw_sp1_sb_prs[] = { /* Ingress pools. */ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP_SB_PR_INGRESS_SIZE), + MLXSW_SP1_SB_PR_INGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP_SB_PR_INGRESS_MNG_SIZE), + MLXSW_SP1_SB_PR_INGRESS_MNG_SIZE), /* Egress pools. */ - MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP1_SB_PR_EGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI), }; -#define MLXSW_SP_SB_PRS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs) +#define MLXSW_SP2_SB_PR_INGRESS_SIZE 40960000 +#define MLXSW_SP2_SB_PR_INGRESS_MNG_SIZE (200 * 1000) +#define MLXSW_SP2_SB_PR_EGRESS_SIZE 40960000 + +static const struct mlxsw_sp_sb_pr mlxsw_sp2_sb_prs[] = { + /* Ingress pools. */ + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP2_SB_PR_INGRESS_SIZE), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP2_SB_PR_INGRESS_MNG_SIZE), + /* Egress pools. */ + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP2_SB_PR_EGRESS_SIZE), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI), +}; static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_sb_pr *prs, @@ -357,7 +464,7 @@ static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, .pool_index = _pool, \ } -static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { +static const struct mlxsw_sp_sb_cm mlxsw_sp1_sb_cms_ingress[] = { MLXSW_SP_SB_CM(10000, 8, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), @@ -370,9 +477,20 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { MLXSW_SP_SB_CM(20000, 1, 3), }; -#define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress) +static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_ingress[] = { + MLXSW_SP_SB_CM(0, 7, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */ + MLXSW_SP_SB_CM(20000, 1, 3), +}; -static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { +static const struct mlxsw_sp_sb_cm mlxsw_sp1_sb_cms_egress[] = { MLXSW_SP_SB_CM(1500, 9, 4), MLXSW_SP_SB_CM(1500, 9, 4), MLXSW_SP_SB_CM(1500, 9, 4), @@ -392,7 +510,25 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { MLXSW_SP_SB_CM(1, 0xff, 4), }; -#define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress) +static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_egress[] = { + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, 7, 4), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(1, 0xff, 4), +}; #define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 4) @@ -431,9 +567,6 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, }; -#define MLXSW_SP_CPU_PORT_SB_MCS_LEN \ - ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms) - static bool mlxsw_sp_sb_pool_is_static(struct mlxsw_sp *mlxsw_sp, u16 pool_index) { @@ -447,6 +580,7 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, const struct mlxsw_sp_sb_cm *cms, size_t cms_len) { + const struct mlxsw_sp_sb_vals *sb_vals = mlxsw_sp->sb_vals; int i; int err; @@ -458,7 +592,7 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) continue; /* PG number 8 does not exist, skip it */ cm = &cms[i]; - if (WARN_ON(mlxsw_sp_sb_pool_dess[cm->pool_index].dir != dir)) + if (WARN_ON(sb_vals->pool_dess[cm->pool_index].dir != dir)) continue; min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff); @@ -484,27 +618,28 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, static int mlxsw_sp_port_sb_cms_init(struct mlxsw_sp_port *mlxsw_sp_port) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; int err; - err = __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, + err = __mlxsw_sp_sb_cms_init(mlxsw_sp, mlxsw_sp_port->local_port, MLXSW_REG_SBXX_DIR_INGRESS, - mlxsw_sp_sb_cms_ingress, - MLXSW_SP_SB_CMS_INGRESS_LEN); + mlxsw_sp->sb_vals->cms_ingress, + mlxsw_sp->sb_vals->cms_ingress_count); if (err) return err; return __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_port->local_port, MLXSW_REG_SBXX_DIR_EGRESS, - mlxsw_sp_sb_cms_egress, - MLXSW_SP_SB_CMS_EGRESS_LEN); + mlxsw_sp->sb_vals->cms_egress, + mlxsw_sp->sb_vals->cms_egress_count); } static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) { return __mlxsw_sp_sb_cms_init(mlxsw_sp, 0, MLXSW_REG_SBXX_DIR_EGRESS, - mlxsw_sp_cpu_port_sb_cms, - MLXSW_SP_CPU_PORT_SB_MCS_LEN); + mlxsw_sp->sb_vals->cms_cpu, + mlxsw_sp->sb_vals->cms_cpu_count); } #define MLXSW_SP_SB_PM(_min_buff, _max_buff) \ @@ -513,7 +648,7 @@ static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) .max_buff = _max_buff, \ } -static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { +static const struct mlxsw_sp_sb_pm mlxsw_sp1_sb_pms[] = { /* Ingress pools. */ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), @@ -527,7 +662,19 @@ static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { MLXSW_SP_SB_PM(10000, 90000), }; -#define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms) +static const struct mlxsw_sp_sb_pm mlxsw_sp2_sb_pms[] = { + /* Ingress pools. */ + MLXSW_SP_SB_PM(0, 7), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), + /* Egress pools. */ + MLXSW_SP_SB_PM(0, 7), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(0, 0), + MLXSW_SP_SB_PM(10000, 90000), +}; static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) { @@ -535,8 +682,8 @@ static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) int i; int err; - for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) { - const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp_sb_pms[i]; + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { + const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp->sb_vals->pms[i]; u32 max_buff; u32 min_buff; @@ -552,12 +699,6 @@ static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -struct mlxsw_sp_sb_mm { - u32 min_buff; - u32 max_buff; - u16 pool_index; -}; - #define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \ { \ .min_buff = _min_buff, \ @@ -583,21 +724,19 @@ static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { MLXSW_SP_SB_MM(0, 6, 4), }; -#define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) - static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) { char sbmm_pl[MLXSW_REG_SBMM_LEN]; int i; int err; - for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) { + for (i = 0; i < mlxsw_sp->sb_vals->mms_count; i++) { const struct mlxsw_sp_sb_pool_des *des; const struct mlxsw_sp_sb_mm *mc; u32 min_buff; - mc = &mlxsw_sp_sb_mms[i]; - des = &mlxsw_sp_sb_pool_dess[mc->pool_index]; + mc = &mlxsw_sp->sb_vals->mms[i]; + des = &mlxsw_sp->sb_vals->pool_dess[mc->pool_index]; /* All pools used by sb_mm's are initialized using dynamic * thresholds, therefore 'max_buff' isn't specified in cells. */ @@ -611,22 +750,55 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) return 0; } -static void mlxsw_sp_pool_count(u16 *p_ingress_len, u16 *p_egress_len) +static void mlxsw_sp_pool_count(struct mlxsw_sp *mlxsw_sp, + u16 *p_ingress_len, u16 *p_egress_len) { int i; - for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; ++i) - if (mlxsw_sp_sb_pool_dess[i].dir == MLXSW_REG_SBXX_DIR_EGRESS) + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; ++i) + if (mlxsw_sp->sb_vals->pool_dess[i].dir == + MLXSW_REG_SBXX_DIR_EGRESS) goto out; WARN(1, "No egress pools\n"); out: *p_ingress_len = i; - *p_egress_len = MLXSW_SP_SB_POOL_DESS_LEN - i; + *p_egress_len = mlxsw_sp->sb_vals->pool_count - i; } +const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals = { + .pool_count = ARRAY_SIZE(mlxsw_sp1_sb_pool_dess), + .pool_dess = mlxsw_sp1_sb_pool_dess, + .pms = mlxsw_sp1_sb_pms, + .prs = mlxsw_sp1_sb_prs, + .mms = mlxsw_sp_sb_mms, + .cms_ingress = mlxsw_sp1_sb_cms_ingress, + .cms_egress = mlxsw_sp1_sb_cms_egress, + .cms_cpu = mlxsw_sp_cpu_port_sb_cms, + .mms_count = ARRAY_SIZE(mlxsw_sp_sb_mms), + .cms_ingress_count = ARRAY_SIZE(mlxsw_sp1_sb_cms_ingress), + .cms_egress_count = ARRAY_SIZE(mlxsw_sp1_sb_cms_egress), + .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), +}; + +const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals = { + .pool_count = ARRAY_SIZE(mlxsw_sp2_sb_pool_dess), + .pool_dess = mlxsw_sp2_sb_pool_dess, + .pms = mlxsw_sp2_sb_pms, + .prs = mlxsw_sp2_sb_prs, + .mms = mlxsw_sp_sb_mms, + .cms_ingress = mlxsw_sp2_sb_cms_ingress, + .cms_egress = mlxsw_sp2_sb_cms_egress, + .cms_cpu = mlxsw_sp_cpu_port_sb_cms, + .mms_count = ARRAY_SIZE(mlxsw_sp_sb_mms), + .cms_ingress_count = ARRAY_SIZE(mlxsw_sp2_sb_cms_ingress), + .cms_egress_count = ARRAY_SIZE(mlxsw_sp2_sb_cms_egress), + .cms_cpu_count = ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms), +}; + int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { + u32 max_headroom_size; u16 ing_pool_count; u16 eg_pool_count; int err; @@ -637,18 +809,26 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE)) return -EIO; + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_HEADROOM_SIZE)) + return -EIO; + mlxsw_sp->sb = kzalloc(sizeof(*mlxsw_sp->sb), GFP_KERNEL); if (!mlxsw_sp->sb) return -ENOMEM; mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE); + max_headroom_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_HEADROOM_SIZE); + /* Round down, because this limit must not be overstepped. */ + mlxsw_sp->sb->max_headroom_cells = max_headroom_size / + mlxsw_sp->sb->cell_size; err = mlxsw_sp_sb_ports_init(mlxsw_sp); if (err) goto err_sb_ports_init; - err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp_sb_prs, - MLXSW_SP_SB_PRS_LEN); + err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp->sb_vals->prs, + mlxsw_sp->sb_vals->pool_count); if (err) goto err_sb_prs_init; err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); @@ -657,7 +837,7 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_sb_mms_init(mlxsw_sp); if (err) goto err_sb_mms_init; - mlxsw_sp_pool_count(&ing_pool_count, &eg_pool_count); + mlxsw_sp_pool_count(mlxsw_sp, &ing_pool_count, &eg_pool_count); err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, mlxsw_sp->sb->sb_size, ing_pool_count, @@ -705,14 +885,16 @@ int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info) { - enum mlxsw_reg_sbxx_dir dir = mlxsw_sp_sb_pool_dess[pool_index].dir; struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + enum mlxsw_reg_sbxx_dir dir; struct mlxsw_sp_sb_pr *pr; + dir = mlxsw_sp->sb_vals->pool_dess[pool_index].dir; pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); pool_info->pool_type = (enum devlink_sb_pool_type) dir; pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size); pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode; + pool_info->cell_size = mlxsw_sp->sb->cell_size; return 0; } @@ -833,7 +1015,7 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, u32 max_buff; int err; - if (dir != mlxsw_sp_sb_pool_dess[pool_index].dir) + if (dir != mlxsw_sp->sb_vals->pool_dess[pool_index].dir) return -EINVAL; err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index, @@ -931,7 +1113,7 @@ next_batch: continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); - for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) { + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, &bulk_list); if (err) @@ -990,7 +1172,7 @@ next_batch: continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); - for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) { + for (i = 0; i < mlxsw_sp->sb_vals->pool_count; i++) { err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, &bulk_list); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 41e607a14846..49933818c6f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -220,7 +220,7 @@ start_again: for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); - if (!rif) + if (!rif || !mlxsw_sp_rif_dev(rif)) continue; err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif, counters_enabled); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index a3db033d7399..46baf3b44309 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -15,6 +15,7 @@ struct mlxsw_sp_fid_family; struct mlxsw_sp_fid_core { + struct rhashtable fid_ht; struct rhashtable vni_ht; struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX]; unsigned int *port_fid_mappings; @@ -26,10 +27,13 @@ struct mlxsw_sp_fid { unsigned int ref_count; u16 fid_index; struct mlxsw_sp_fid_family *fid_family; + struct rhash_head ht_node; struct rhash_head vni_ht_node; + enum mlxsw_sp_nve_type nve_type; __be32 vni; u32 nve_flood_index; + int nve_ifindex; u8 vni_valid:1, nve_flood_index_valid:1; }; @@ -44,6 +48,12 @@ struct mlxsw_sp_fid_8021d { int br_ifindex; }; +static const struct rhashtable_params mlxsw_sp_fid_ht_params = { + .key_len = sizeof_field(struct mlxsw_sp_fid, fid_index), + .key_offset = offsetof(struct mlxsw_sp_fid, fid_index), + .head_offset = offsetof(struct mlxsw_sp_fid, ht_node), +}; + static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = { .key_len = sizeof_field(struct mlxsw_sp_fid, vni), .key_offset = offsetof(struct mlxsw_sp_fid, vni), @@ -75,6 +85,8 @@ struct mlxsw_sp_fid_ops { int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid, u32 nve_flood_index); void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid); + void (*fdb_clear_offload)(const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev); }; struct mlxsw_sp_fid_family { @@ -89,6 +101,7 @@ struct mlxsw_sp_fid_family { enum mlxsw_sp_rif_type rif_type; const struct mlxsw_sp_fid_ops *ops; struct mlxsw_sp *mlxsw_sp; + u8 lag_vid_valid:1; }; static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { @@ -113,6 +126,45 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = { [MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types, }; +bool mlxsw_sp_fid_lag_vid_valid(const struct mlxsw_sp_fid *fid) +{ + return fid->fid_family->lag_vid_valid; +} + +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_index(struct mlxsw_sp *mlxsw_sp, + u16 fid_index) +{ + struct mlxsw_sp_fid *fid; + + fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->fid_ht, &fid_index, + mlxsw_sp_fid_ht_params); + if (fid) + fid->ref_count++; + + return fid; +} + +int mlxsw_sp_fid_nve_ifindex(const struct mlxsw_sp_fid *fid, int *nve_ifindex) +{ + if (!fid->vni_valid) + return -EINVAL; + + *nve_ifindex = fid->nve_ifindex; + + return 0; +} + +int mlxsw_sp_fid_nve_type(const struct mlxsw_sp_fid *fid, + enum mlxsw_sp_nve_type *p_type) +{ + if (!fid->vni_valid) + return -EINVAL; + + *p_type = fid->nve_type; + + return 0; +} + struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, __be32 vni) { @@ -173,7 +225,8 @@ bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid) return fid->nve_flood_index_valid; } -int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni) +int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_nve_type type, + __be32 vni, int nve_ifindex) { struct mlxsw_sp_fid_family *fid_family = fid->fid_family; const struct mlxsw_sp_fid_ops *ops = fid_family->ops; @@ -183,6 +236,8 @@ int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni) if (WARN_ON(!ops->vni_set || fid->vni_valid)) return -EINVAL; + fid->nve_type = type; + fid->nve_ifindex = nve_ifindex; fid->vni = vni; err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node, @@ -224,6 +279,16 @@ bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid) return fid->vni_valid; } +void mlxsw_sp_fid_fdb_clear_offload(const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + + if (ops->fdb_clear_offload) + ops->fdb_clear_offload(fid, nve_dev); +} + static const struct mlxsw_sp_flood_table * mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type) @@ -284,11 +349,6 @@ void mlxsw_sp_fid_port_vid_unmap(struct mlxsw_sp_fid *fid, fid->fid_family->ops->port_vid_unmap(fid, mlxsw_sp_port, vid); } -enum mlxsw_sp_rif_type mlxsw_sp_fid_rif_type(const struct mlxsw_sp_fid *fid) -{ - return fid->fid_family->rif_type; -} - u16 mlxsw_sp_fid_index(const struct mlxsw_sp_fid *fid) { return fid->fid_index; @@ -304,6 +364,11 @@ void mlxsw_sp_fid_rif_set(struct mlxsw_sp_fid *fid, struct mlxsw_sp_rif *rif) fid->rif = rif; } +struct mlxsw_sp_rif *mlxsw_sp_fid_rif(const struct mlxsw_sp_fid *fid) +{ + return fid->rif; +} + enum mlxsw_sp_rif_type mlxsw_sp_fid_type_rif_type(const struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_fid_type type) @@ -568,7 +633,7 @@ mlxsw_sp_fid_8021d_compare(const struct mlxsw_sp_fid *fid, const void *arg) static u16 mlxsw_sp_fid_8021d_flood_index(const struct mlxsw_sp_fid *fid) { - return fid->fid_index - fid->fid_family->start_index; + return fid->fid_index - VLAN_N_VID; } static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) @@ -713,6 +778,13 @@ static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid) fid->vni_valid, 0, false); } +static void +mlxsw_sp_fid_8021d_fdb_clear_offload(const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev) +{ + br_fdb_clear_offload(nve_dev, 0); +} + static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = { .setup = mlxsw_sp_fid_8021d_setup, .configure = mlxsw_sp_fid_8021d_configure, @@ -726,6 +798,7 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = { .vni_clear = mlxsw_sp_fid_8021d_vni_clear, .nve_flood_index_set = mlxsw_sp_fid_8021d_nve_flood_index_set, .nve_flood_index_clear = mlxsw_sp_fid_8021d_nve_flood_index_clear, + .fdb_clear_offload = mlxsw_sp_fid_8021d_fdb_clear_offload, }; static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = { @@ -759,6 +832,48 @@ static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021d_family = { .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables), .rif_type = MLXSW_SP_RIF_TYPE_FID, .ops = &mlxsw_sp_fid_8021d_ops, + .lag_vid_valid = 1, +}; + +static void +mlxsw_sp_fid_8021q_fdb_clear_offload(const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev) +{ + br_fdb_clear_offload(nve_dev, mlxsw_sp_fid_8021q_vid(fid)); +} + +static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021q_emu_ops = { + .setup = mlxsw_sp_fid_8021q_setup, + .configure = mlxsw_sp_fid_8021d_configure, + .deconfigure = mlxsw_sp_fid_8021d_deconfigure, + .index_alloc = mlxsw_sp_fid_8021d_index_alloc, + .compare = mlxsw_sp_fid_8021q_compare, + .flood_index = mlxsw_sp_fid_8021d_flood_index, + .port_vid_map = mlxsw_sp_fid_8021d_port_vid_map, + .port_vid_unmap = mlxsw_sp_fid_8021d_port_vid_unmap, + .vni_set = mlxsw_sp_fid_8021d_vni_set, + .vni_clear = mlxsw_sp_fid_8021d_vni_clear, + .nve_flood_index_set = mlxsw_sp_fid_8021d_nve_flood_index_set, + .nve_flood_index_clear = mlxsw_sp_fid_8021d_nve_flood_index_clear, + .fdb_clear_offload = mlxsw_sp_fid_8021q_fdb_clear_offload, +}; + +/* There are 4K-2 emulated 802.1Q FIDs, starting right after the 802.1D FIDs */ +#define MLXSW_SP_FID_8021Q_EMU_START (VLAN_N_VID + MLXSW_SP_FID_8021D_MAX) +#define MLXSW_SP_FID_8021Q_EMU_END (MLXSW_SP_FID_8021Q_EMU_START + \ + VLAN_VID_MASK - 2) + +/* Range and flood configuration must match mlxsw_config_profile */ +static const struct mlxsw_sp_fid_family mlxsw_sp_fid_8021q_emu_family = { + .type = MLXSW_SP_FID_TYPE_8021Q, + .fid_size = sizeof(struct mlxsw_sp_fid_8021q), + .start_index = MLXSW_SP_FID_8021Q_EMU_START, + .end_index = MLXSW_SP_FID_8021Q_EMU_END, + .flood_tables = mlxsw_sp_fid_8021d_flood_tables, + .nr_flood_tables = ARRAY_SIZE(mlxsw_sp_fid_8021d_flood_tables), + .rif_type = MLXSW_SP_RIF_TYPE_VLAN, + .ops = &mlxsw_sp_fid_8021q_emu_ops, + .lag_vid_valid = 1, }; static int mlxsw_sp_fid_rfid_configure(struct mlxsw_sp_fid *fid) @@ -882,13 +997,13 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_dummy_ops = { static const struct mlxsw_sp_fid_family mlxsw_sp_fid_dummy_family = { .type = MLXSW_SP_FID_TYPE_DUMMY, .fid_size = sizeof(struct mlxsw_sp_fid), - .start_index = MLXSW_SP_RFID_BASE - 1, - .end_index = MLXSW_SP_RFID_BASE - 1, + .start_index = VLAN_N_VID - 1, + .end_index = VLAN_N_VID - 1, .ops = &mlxsw_sp_fid_dummy_ops, }; static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = { - [MLXSW_SP_FID_TYPE_8021Q] = &mlxsw_sp_fid_8021q_family, + [MLXSW_SP_FID_TYPE_8021Q] = &mlxsw_sp_fid_8021q_emu_family, [MLXSW_SP_FID_TYPE_8021D] = &mlxsw_sp_fid_8021d_family, [MLXSW_SP_FID_TYPE_RFID] = &mlxsw_sp_fid_rfid_family, [MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp_fid_dummy_family, @@ -944,10 +1059,17 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, if (err) goto err_configure; + err = rhashtable_insert_fast(&mlxsw_sp->fid_core->fid_ht, &fid->ht_node, + mlxsw_sp_fid_ht_params); + if (err) + goto err_rhashtable_insert; + list_add(&fid->list, &fid_family->fids_list); fid->ref_count++; return fid; +err_rhashtable_insert: + fid->fid_family->ops->deconfigure(fid); err_configure: __clear_bit(fid_index - fid_family->start_index, fid_family->fids_bitmap); @@ -959,19 +1081,18 @@ err_index_alloc: void mlxsw_sp_fid_put(struct mlxsw_sp_fid *fid) { struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; - if (--fid->ref_count == 1 && fid->rif) { - /* Destroy the associated RIF and let it drop the last - * reference on the FID. - */ - return mlxsw_sp_rif_destroy(fid->rif); - } else if (fid->ref_count == 0) { - list_del(&fid->list); - fid->fid_family->ops->deconfigure(fid); - __clear_bit(fid->fid_index - fid_family->start_index, - fid_family->fids_bitmap); - kfree(fid); - } + if (--fid->ref_count != 0) + return; + + list_del(&fid->list); + rhashtable_remove_fast(&mlxsw_sp->fid_core->fid_ht, + &fid->ht_node, mlxsw_sp_fid_ht_params); + fid->fid_family->ops->deconfigure(fid); + __clear_bit(fid->fid_index - fid_family->start_index, + fid_family->fids_bitmap); + kfree(fid); } struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid) @@ -985,6 +1106,12 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex); } +struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_lookup(struct mlxsw_sp *mlxsw_sp, + u16 vid) +{ + return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021Q, &vid); +} + struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp, int br_ifindex) { @@ -1061,8 +1188,7 @@ static int mlxsw_sp_fid_family_register(struct mlxsw_sp *mlxsw_sp, fid_family->mlxsw_sp = mlxsw_sp; INIT_LIST_HEAD(&fid_family->fids_list); - fid_family->fids_bitmap = kcalloc(BITS_TO_LONGS(nr_fids), - sizeof(unsigned long), GFP_KERNEL); + fid_family->fids_bitmap = bitmap_zalloc(nr_fids, GFP_KERNEL); if (!fid_family->fids_bitmap) { err = -ENOMEM; goto err_alloc_fids_bitmap; @@ -1079,7 +1205,7 @@ static int mlxsw_sp_fid_family_register(struct mlxsw_sp *mlxsw_sp, return 0; err_fid_flood_tables_init: - kfree(fid_family->fids_bitmap); + bitmap_free(fid_family->fids_bitmap); err_alloc_fids_bitmap: kfree(fid_family); return err; @@ -1090,7 +1216,7 @@ mlxsw_sp_fid_family_unregister(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid_family *fid_family) { mlxsw_sp->fid_core->fid_family_arr[fid_family->type] = NULL; - kfree(fid_family->fids_bitmap); + bitmap_free(fid_family->fids_bitmap); WARN_ON_ONCE(!list_empty(&fid_family->fids_list)); kfree(fid_family); } @@ -1126,9 +1252,13 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp) return -ENOMEM; mlxsw_sp->fid_core = fid_core; + err = rhashtable_init(&fid_core->fid_ht, &mlxsw_sp_fid_ht_params); + if (err) + goto err_rhashtable_fid_init; + err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params); if (err) - goto err_rhashtable_init; + goto err_rhashtable_vni_init; fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int), GFP_KERNEL); @@ -1157,7 +1287,9 @@ err_fid_ops_register: kfree(fid_core->port_fid_mappings); err_alloc_port_fid_mappings: rhashtable_destroy(&fid_core->vni_ht); -err_rhashtable_init: +err_rhashtable_vni_init: + rhashtable_destroy(&fid_core->fid_ht); +err_rhashtable_fid_init: kfree(fid_core); return err; } @@ -1172,5 +1304,6 @@ void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp) fid_core->fid_family_arr[i]); kfree(fid_core->port_fid_mappings); rhashtable_destroy(&fid_core->vni_ht); + rhashtable_destroy(&fid_core->fid_ht); kfree(fid_core); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 8d211972c5e9..15f804453cd6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -17,13 +17,13 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_acl_rule_info *rulei, - struct tcf_exts *exts, + struct flow_action *flow_action, struct netlink_ext_ack *extack) { - const struct tc_action *a; + const struct flow_action_entry *act; int err, i; - if (!tcf_exts_has_actions(exts)) + if (!flow_action_has_entries(flow_action)) return 0; /* Count action is inserted first */ @@ -31,27 +31,31 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (err) return err; - tcf_exts_for_each_action(i, a, exts) { - if (is_tcf_gact_ok(a)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_ACCEPT: err = mlxsw_sp_acl_rulei_act_terminate(rulei); if (err) { NL_SET_ERR_MSG_MOD(extack, "Cannot append terminate action"); return err; } - } else if (is_tcf_gact_shot(a)) { + break; + case FLOW_ACTION_DROP: err = mlxsw_sp_acl_rulei_act_drop(rulei); if (err) { NL_SET_ERR_MSG_MOD(extack, "Cannot append drop action"); return err; } - } else if (is_tcf_gact_trap(a)) { + break; + case FLOW_ACTION_TRAP: err = mlxsw_sp_acl_rulei_act_trap(rulei); if (err) { NL_SET_ERR_MSG_MOD(extack, "Cannot append trap action"); return err; } - } else if (is_tcf_gact_goto_chain(a)) { - u32 chain_index = tcf_gact_goto_chain_index(a); + break; + case FLOW_ACTION_GOTO: { + u32 chain_index = act->chain_index; struct mlxsw_sp_acl_ruleset *ruleset; u16 group_id; @@ -67,7 +71,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, NL_SET_ERR_MSG_MOD(extack, "Cannot append jump action"); return err; } - } else if (is_tcf_mirred_egress_redirect(a)) { + } + break; + case FLOW_ACTION_REDIRECT: { struct net_device *out_dev; struct mlxsw_sp_fid *fid; u16 fid_index; @@ -79,29 +85,33 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (err) return err; - out_dev = tcf_mirred_dev(a); + out_dev = act->dev; err = mlxsw_sp_acl_rulei_act_fwd(mlxsw_sp, rulei, out_dev, extack); if (err) return err; - } else if (is_tcf_mirred_egress_mirror(a)) { - struct net_device *out_dev = tcf_mirred_dev(a); + } + break; + case FLOW_ACTION_MIRRED: { + struct net_device *out_dev = act->dev; err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei, block, out_dev, extack); if (err) return err; - } else if (is_tcf_vlan(a)) { - u16 proto = be16_to_cpu(tcf_vlan_push_proto(a)); - u32 action = tcf_vlan_action(a); - u8 prio = tcf_vlan_push_prio(a); - u16 vid = tcf_vlan_push_vid(a); + } + break; + case FLOW_ACTION_VLAN_MANGLE: { + u16 proto = be16_to_cpu(act->vlan.proto); + u8 prio = act->vlan.prio; + u16 vid = act->vlan.vid; return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, - action, vid, + act->id, vid, proto, prio, extack); - } else { + } + default: NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); return -EOPNOTSUPP; @@ -113,59 +123,49 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(f->rule, &match); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31, - (char *) &key->src, - (char *) &mask->src, 4); + (char *) &match.key->src, + (char *) &match.mask->src, 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31, - (char *) &key->dst, - (char *) &mask->dst, 4); + (char *) &match.key->dst, + (char *) &match.mask->dst, 4); } static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(f->rule, &match); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127, - &key->src.s6_addr[0x0], - &mask->src.s6_addr[0x0], 4); + &match.key->src.s6_addr[0x0], + &match.mask->src.s6_addr[0x0], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95, - &key->src.s6_addr[0x4], - &mask->src.s6_addr[0x4], 4); + &match.key->src.s6_addr[0x4], + &match.mask->src.s6_addr[0x4], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63, - &key->src.s6_addr[0x8], - &mask->src.s6_addr[0x8], 4); + &match.key->src.s6_addr[0x8], + &match.mask->src.s6_addr[0x8], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31, - &key->src.s6_addr[0xC], - &mask->src.s6_addr[0xC], 4); + &match.key->src.s6_addr[0xC], + &match.mask->src.s6_addr[0xC], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127, - &key->dst.s6_addr[0x0], - &mask->dst.s6_addr[0x0], 4); + &match.key->dst.s6_addr[0x0], + &match.mask->dst.s6_addr[0x0], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95, - &key->dst.s6_addr[0x4], - &mask->dst.s6_addr[0x4], 4); + &match.key->dst.s6_addr[0x4], + &match.mask->dst.s6_addr[0x4], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63, - &key->dst.s6_addr[0x8], - &mask->dst.s6_addr[0x8], 4); + &match.key->dst.s6_addr[0x8], + &match.mask->dst.s6_addr[0x8], 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31, - &key->dst.s6_addr[0xC], - &mask->dst.s6_addr[0xC], 4); + &match.key->dst.s6_addr[0xC], + &match.mask->dst.s6_addr[0xC], 4); } static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, @@ -173,9 +173,10 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, struct tc_cls_flower_offload *f, u8 ip_proto) { - struct flow_dissector_key_ports *key, *mask; + const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_match_ports match; - if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) return 0; if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { @@ -184,16 +185,13 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, return -EINVAL; } - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); + flow_rule_match_ports(rule, &match); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DST_L4_PORT, - ntohs(key->dst), ntohs(mask->dst)); + ntohs(match.key->dst), + ntohs(match.mask->dst)); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_SRC_L4_PORT, - ntohs(key->src), ntohs(mask->src)); + ntohs(match.key->src), + ntohs(match.mask->src)); return 0; } @@ -202,9 +200,10 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, struct tc_cls_flower_offload *f, u8 ip_proto) { - struct flow_dissector_key_tcp *key, *mask; + const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_match_tcp match; - if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) return 0; if (ip_proto != IPPROTO_TCP) { @@ -213,14 +212,11 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, return -EINVAL; } - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->mask); + flow_rule_match_tcp(rule, &match); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_TCP_FLAGS, - ntohs(key->flags), ntohs(mask->flags)); + ntohs(match.key->flags), + ntohs(match.mask->flags)); return 0; } @@ -229,9 +225,10 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, struct tc_cls_flower_offload *f, u16 n_proto) { - struct flow_dissector_key_ip *key, *mask; + const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_match_ip match; - if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) return 0; if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) { @@ -240,20 +237,18 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, return -EINVAL; } - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->key); - mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->mask); + flow_rule_match_ip(rule, &match); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_, - key->ttl, mask->ttl); + match.key->ttl, match.mask->ttl); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN, - key->tos & 0x3, mask->tos & 0x3); + match.key->tos & 0x3, + match.mask->tos & 0x3); mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP, - key->tos >> 6, mask->tos >> 6); + match.key->tos >> 6, + match.mask->tos >> 6); return 0; } @@ -263,13 +258,15 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; u16 n_proto_mask = 0; u16 n_proto_key = 0; u16 addr_type = 0; u8 ip_proto = 0; int err; - if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@ -286,25 +283,19 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_rulei_priority(rulei, f->common.prio); - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->key); - addr_type = key->addr_type; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - n_proto_key = ntohs(key->n_proto); - n_proto_mask = ntohs(mask->n_proto); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + n_proto_key = ntohs(match.key->n_proto); + n_proto_mask = ntohs(match.mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; @@ -314,60 +305,53 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, MLXSW_AFK_ELEMENT_ETHERTYPE, n_proto_key, n_proto_mask); - ip_proto = key->ip_proto; + ip_proto = match.key->ip_proto; mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_PROTO, - key->ip_proto, mask->ip_proto); + match.key->ip_proto, + match.mask->ip_proto); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); - struct flow_dissector_key_eth_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + flow_rule_match_eth_addrs(rule, &match); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DMAC_32_47, - key->dst, mask->dst, 2); + match.key->dst, + match.mask->dst, 2); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DMAC_0_31, - key->dst + 2, mask->dst + 2, 4); + match.key->dst + 2, + match.mask->dst + 2, 4); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SMAC_32_47, - key->src, mask->src, 2); + match.key->src, + match.mask->src, 2); mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SMAC_0_31, - key->src + 2, mask->src + 2, 4); + match.key->src + 2, + match.mask->src + 2, 4); } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + flow_rule_match_vlan(rule, &match); if (mlxsw_sp_acl_block_is_egress_bound(block)) { NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress"); return -EOPNOTSUPP; } - if (mask->vlan_id != 0) + if (match.mask->vlan_id != 0) mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_VID, - key->vlan_id, - mask->vlan_id); - if (mask->vlan_priority != 0) + match.key->vlan_id, + match.mask->vlan_id); + if (match.mask->vlan_priority != 0) mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_PCP, - key->vlan_priority, - mask->vlan_priority); + match.key->vlan_priority, + match.mask->vlan_priority); } if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) @@ -387,7 +371,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, if (err) return err; - return mlxsw_sp_flower_parse_actions(mlxsw_sp, block, rulei, f->exts, + return mlxsw_sp_flower_parse_actions(mlxsw_sp, block, rulei, + &f->rule->action, f->common.extack); } @@ -406,7 +391,7 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp, if (IS_ERR(ruleset)) return PTR_ERR(ruleset); - rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, f->cookie, + rule = mlxsw_sp_acl_rule_create(mlxsw_sp, ruleset, f->cookie, NULL, f->common.extack); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -486,7 +471,7 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp, if (err) goto err_rule_get_stats; - tcf_exts_stats_update(f->exts, bytes, packets, lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse); mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 00db26c96bf5..6400cd644b7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -145,6 +145,7 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb); char rtdp_pl[MLXSW_REG_RTDP_LEN]; struct ip_tunnel_parm parms; unsigned int type_check; @@ -157,6 +158,7 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, ikey = mlxsw_sp_ipip_parms4_ikey(parms); mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); + mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_id); type_check = has_ikey ? MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY : diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index b5b54b41349a..1df164a4b06d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -174,6 +174,20 @@ mlxsw_sp_nve_mc_record_ops_arr[] = { [MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops, }; +int mlxsw_sp_nve_learned_ip_resolve(struct mlxsw_sp *mlxsw_sp, u32 uip, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + addr->addr4 = cpu_to_be32(uip); + return 0; + default: + WARN_ON(1); + return -EINVAL; + } +} + static struct mlxsw_sp_nve_mc_list * mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_nve_mc_list_key *key) @@ -253,8 +267,8 @@ mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nve_mc_record *mc_record; int err; - mc_record = kzalloc(sizeof(*mc_record) + num_max_entries * - sizeof(struct mlxsw_sp_nve_mc_entry), GFP_KERNEL); + mc_record = kzalloc(struct_size(mc_record, entries, num_max_entries), + GFP_KERNEL); if (!mc_record) return ERR_PTR(-ENOMEM); @@ -775,6 +789,21 @@ static void mlxsw_sp_nve_fdb_flush_by_fid(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); } +static void mlxsw_sp_nve_fdb_clear_offload(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fid *fid, + const struct net_device *nve_dev, + __be32 vni) +{ + const struct mlxsw_sp_nve_ops *ops; + enum mlxsw_sp_nve_type type; + + if (WARN_ON(mlxsw_sp_fid_nve_type(fid, &type))) + return; + + ops = mlxsw_sp->nve->nve_ops_arr[type]; + ops->fdb_clear_offload(nve_dev, vni); +} + int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, struct mlxsw_sp_nve_params *params, struct netlink_ext_ack *extack) @@ -787,14 +816,14 @@ int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, ops = nve->nve_ops_arr[params->type]; if (!ops->can_offload(nve, params->dev, extack)) - return -EOPNOTSUPP; + return -EINVAL; memset(&config, 0, sizeof(config)); ops->nve_config(nve, params->dev, &config); if (nve->num_nve_tunnels && memcmp(&config, &nve->config, sizeof(config))) { NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration"); - return -EOPNOTSUPP; + return -EINVAL; } err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config); @@ -803,7 +832,8 @@ int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, return err; } - err = mlxsw_sp_fid_vni_set(fid, params->vni); + err = mlxsw_sp_fid_vni_set(fid, params->type, params->vni, + params->dev->ifindex); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID"); goto err_fid_vni_set; @@ -811,8 +841,14 @@ int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, nve->config = config; + err = ops->fdb_replay(params->dev, params->vni, extack); + if (err) + goto err_fdb_replay; + return 0; +err_fdb_replay: + mlxsw_sp_fid_vni_clear(fid); err_fid_vni_set: mlxsw_sp_nve_tunnel_fini(mlxsw_sp); return err; @@ -822,9 +858,27 @@ void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid) { u16 fid_index = mlxsw_sp_fid_index(fid); + struct net_device *nve_dev; + int nve_ifindex; + __be32 vni; mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid); mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index); + + if (WARN_ON(mlxsw_sp_fid_nve_ifindex(fid, &nve_ifindex) || + mlxsw_sp_fid_vni(fid, &vni))) + goto out; + + nve_dev = dev_get_by_index(&init_net, nve_ifindex); + if (!nve_dev) + goto out; + + mlxsw_sp_nve_fdb_clear_offload(mlxsw_sp, fid, nve_dev, vni); + mlxsw_sp_fid_fdb_clear_offload(fid, nve_dev); + + dev_put(nve_dev); + +out: mlxsw_sp_fid_vni_clear(fid); mlxsw_sp_nve_tunnel_fini(mlxsw_sp); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h index 4cc3297e13d6..0035640156a1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h @@ -28,6 +28,7 @@ struct mlxsw_sp_nve { unsigned int num_nve_tunnels; /* Protected by RTNL */ unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX]; u32 tunnel_index; + u16 ul_rif_index; /* Reserved for Spectrum */ }; struct mlxsw_sp_nve_ops { @@ -41,6 +42,9 @@ struct mlxsw_sp_nve_ops { int (*init)(struct mlxsw_sp_nve *nve, const struct mlxsw_sp_nve_config *config); void (*fini)(struct mlxsw_sp_nve *nve); + int (*fdb_replay)(const struct net_device *nve_dev, __be32 vni, + struct netlink_ext_ack *extack); + void (*fdb_clear_offload)(const struct net_device *nve_dev, __be32 vni); }; extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c index d21c7be5b1c9..93ccd9fc2266 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -7,6 +7,7 @@ #include <net/vxlan.h> #include "reg.h" +#include "spectrum.h" #include "spectrum_nve.h" /* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B) @@ -17,11 +18,12 @@ #define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128 #define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96 -#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS VXLAN_F_UDP_ZERO_CSUM_TX +#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS (VXLAN_F_UDP_ZERO_CSUM_TX | \ + VXLAN_F_LEARN) -static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, - const struct net_device *dev, - struct netlink_ext_ack *extack) +static bool mlxsw_sp_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_config *cfg = &vxlan->cfg; @@ -61,11 +63,6 @@ static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, return false; } - if (cfg->flags & VXLAN_F_LEARN) { - NL_SET_ERR_MSG_MOD(extack, "VxLAN: Learning is not supported"); - return false; - } - if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) { NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported"); return false; @@ -116,13 +113,30 @@ static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl); } +static void +mlxsw_sp_nve_vxlan_config_prepare(char *tngcr_pl, + const struct mlxsw_sp_nve_config *config) +{ + u8 udp_sport; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true, + config->ttl); + /* VxLAN driver's default UDP source port range is 32768 (0x8000) + * to 60999 (0xee47). Set the upper 8 bits of the UDP source port + * to a random number between 0x80 and 0xee + */ + get_random_bytes(&udp_sport, sizeof(udp_sport)); + udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80; + mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport); + mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4)); +} + static int mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_nve_config *config) { char tngcr_pl[MLXSW_REG_TNGCR_LEN]; u16 ul_vr_id; - u8 udp_sport; int err; err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id, @@ -130,18 +144,9 @@ mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, if (err) return err; - mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true, - config->ttl); - /* VxLAN driver's default UDP source port range is 32768 (0x8000) - * to 60999 (0xee47). Set the upper 8 bits of the UDP source port - * to a random number between 0x80 and 0xee - */ - get_random_bytes(&udp_sport, sizeof(udp_sport)); - udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80; - mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport); + mlxsw_sp_nve_vxlan_config_prepare(tngcr_pl, config); mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en); mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id); - mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4)); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); } @@ -215,35 +220,157 @@ static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve) config->udp_dport); } +static int +mlxsw_sp_nve_vxlan_fdb_replay(const struct net_device *nve_dev, __be32 vni, + struct netlink_ext_ack *extack) +{ + if (WARN_ON(!netif_is_vxlan(nve_dev))) + return -EINVAL; + return vxlan_fdb_replay(nve_dev, vni, &mlxsw_sp_switchdev_notifier, + extack); +} + +static void +mlxsw_sp_nve_vxlan_clear_offload(const struct net_device *nve_dev, __be32 vni) +{ + if (WARN_ON(!netif_is_vxlan(nve_dev))) + return; + vxlan_fdb_clear_offload(nve_dev, vni); +} + const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = { .type = MLXSW_SP_NVE_TYPE_VXLAN, - .can_offload = mlxsw_sp1_nve_vxlan_can_offload, + .can_offload = mlxsw_sp_nve_vxlan_can_offload, .nve_config = mlxsw_sp_nve_vxlan_config, .init = mlxsw_sp1_nve_vxlan_init, .fini = mlxsw_sp1_nve_vxlan_fini, + .fdb_replay = mlxsw_sp_nve_vxlan_fdb_replay, + .fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload, }; -static bool mlxsw_sp2_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, - const struct net_device *dev, - struct netlink_ext_ack *extack) +static bool mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp, + bool learning_en) +{ + char tnpc_pl[MLXSW_REG_TNPC_LEN]; + + mlxsw_reg_tnpc_pack(tnpc_pl, MLXSW_REG_TNPC_TUNNEL_PORT_NVE, + learning_en); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnpc), tnpc_pl); +} + +static int +mlxsw_sp2_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_config *config) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + u16 ul_rif_index; + int err; + + err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, config->ul_tb_id, + &ul_rif_index); + if (err) + return err; + mlxsw_sp->nve->ul_rif_index = ul_rif_index; + + err = mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, config->learning_en); + if (err) + goto err_vxlan_learning_set; + + mlxsw_sp_nve_vxlan_config_prepare(tngcr_pl, config); + mlxsw_reg_tngcr_underlay_rif_set(tngcr_pl, ul_rif_index); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); + if (err) + goto err_tngcr_write; + + return 0; + +err_tngcr_write: + mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, false); +err_vxlan_learning_set: + mlxsw_sp_router_ul_rif_put(mlxsw_sp, ul_rif_index); + return err; +} + +static void mlxsw_sp2_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp) { - return false; + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); + mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, false); + mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->nve->ul_rif_index); +} + +static int mlxsw_sp2_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp, + unsigned int tunnel_index, + u16 ul_rif_index) +{ + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index); + mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_index); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); } static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve, const struct mlxsw_sp_nve_config *config) { - return -EOPNOTSUPP; + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + int err; + + err = mlxsw_sp_nve_parsing_set(mlxsw_sp, + MLXSW_SP_NVE_VXLAN_PARSING_DEPTH, + config->udp_dport); + if (err) + return err; + + err = mlxsw_sp2_nve_vxlan_config_set(mlxsw_sp, config); + if (err) + goto err_config_set; + + err = mlxsw_sp2_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index, + nve->ul_rif_index); + if (err) + goto err_rtdp_set; + + err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, + &config->ul_sip, + nve->tunnel_index); + if (err) + goto err_promote_decap; + + return 0; + +err_promote_decap: +err_rtdp_set: + mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp); +err_config_set: + mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH, + config->udp_dport); + return err; } static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve) { + struct mlxsw_sp_nve_config *config = &nve->config; + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + + mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, &config->ul_sip); + mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp); + mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH, + config->udp_dport); } const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = { .type = MLXSW_SP_NVE_TYPE_VXLAN, - .can_offload = mlxsw_sp2_nve_vxlan_can_offload, + .can_offload = mlxsw_sp_nve_vxlan_can_offload, .nve_config = mlxsw_sp_nve_vxlan_config, .init = mlxsw_sp2_nve_vxlan_init, .fini = mlxsw_sp2_nve_vxlan_fini, + .fdb_replay = mlxsw_sp_nve_vxlan_fdb_replay, + .fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6ebf99cc3154..902e766a8ed3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -15,6 +15,7 @@ #include <linux/gcd.h> #include <linux/random.h> #include <linux/if_macvlan.h> +#include <linux/refcount.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -70,6 +71,8 @@ struct mlxsw_sp_router { bool aborted; struct notifier_block fib_nb; struct notifier_block netevent_nb; + struct notifier_block inetaddr_nb; + struct notifier_block inet6addr_nb; const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_ipip_ops **ipip_ops_arr; }; @@ -77,7 +80,7 @@ struct mlxsw_sp_router { struct mlxsw_sp_rif { struct list_head nexthop_list; struct list_head neigh_list; - struct net_device *dev; + struct net_device *dev; /* NULL for underlay RIF */ struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; int mtu; @@ -104,6 +107,7 @@ struct mlxsw_sp_rif_params { struct mlxsw_sp_rif_subport { struct mlxsw_sp_rif common; + refcount_t ref_count; union { u16 system_port; u16 lag_id; @@ -116,6 +120,7 @@ struct mlxsw_sp_rif_ipip_lb { struct mlxsw_sp_rif common; struct mlxsw_sp_rif_ipip_lb_config lb_config; u16 ul_vr_id; /* Reserved for Spectrum-2. */ + u16 ul_rif_id; /* Reserved for Spectrum. */ }; struct mlxsw_sp_rif_params_ipip_lb { @@ -136,6 +141,7 @@ struct mlxsw_sp_rif_ops { void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac); }; +static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree); static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lpm_tree *lpm_tree); @@ -358,6 +364,7 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, MLXSW_SP_FIB_ENTRY_TYPE_TRAP, + MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE, /* This is a special case of local delivery, where a packet should be * decapsulated on reception. Note that there is no corresponding ENCAP, @@ -435,6 +442,8 @@ struct mlxsw_sp_vr { struct mlxsw_sp_fib *fib4; struct mlxsw_sp_fib *fib6; struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX]; + struct mlxsw_sp_rif *ul_rif; + refcount_t ul_rif_refcnt; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -1432,8 +1441,8 @@ mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, - struct mlxsw_sp_vr *ul_vr, bool enable) +mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, + u16 ul_rif_id, bool enable) { struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; struct mlxsw_sp_rif *rif = &lb_rif->common; @@ -1448,7 +1457,7 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, rif->rif_index, rif->vr_id, rif->dev->mtu); mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, - ul_vr->id, saddr4, lb_cf.okey); + ul_vr_id, ul_rif_id, saddr4, lb_cf.okey); break; case MLXSW_SP_L3_PROTO_IPV6: @@ -1463,14 +1472,13 @@ static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_rif_ipip_lb *lb_rif; - struct mlxsw_sp_vr *ul_vr; int err = 0; ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); if (ipip_entry) { lb_rif = ipip_entry->ol_lb; - ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; - err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id, + lb_rif->ul_rif_id, true); if (err) goto out; lb_rif->common.mtu = ol_dev->mtu; @@ -3806,13 +3814,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; struct fib_nh *fib_nh; - size_t alloc_size; int i; int err; - alloc_size = sizeof(*nh_grp) + - fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); - nh_grp = kzalloc(alloc_size, GFP_KERNEL); + nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs), + GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); nh_grp->priv = fi; @@ -3921,6 +3927,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) return !!nh_group->adj_index_valid; case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: return !!nh_group->nh_rif; + case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: return true; @@ -3956,6 +3963,7 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) int i; if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE || fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP || fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) { nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; @@ -3997,7 +4005,8 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); - if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) { list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; return; @@ -4165,6 +4174,19 @@ static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } +static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + enum mlxsw_reg_ralue_trap_action trap_action; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR; + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + static int mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, @@ -4204,6 +4226,8 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: + return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, fib_entry, op); @@ -4272,8 +4296,10 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, case RTN_BROADCAST: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; + case RTN_BLACKHOLE: + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; + return 0; case RTN_UNREACHABLE: /* fall through */ - case RTN_BLACKHOLE: /* fall through */ case RTN_PROHIBIT: /* Packets hitting these routes need to be trapped, but * can do so with a lower priority than packets directed @@ -5038,13 +5064,11 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; struct mlxsw_sp_nexthop *nh; - size_t alloc_size; int i = 0; int err; - alloc_size = sizeof(*nh_grp) + - fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop); - nh_grp = kzalloc(alloc_size, GFP_KERNEL); + nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6), + GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&nh_grp->fib_list); @@ -5222,6 +5246,8 @@ static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, */ if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + else if (rt->fib6_type == RTN_BLACKHOLE) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; else if (rt->fib6_flags & RTF_REJECT) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) @@ -6116,7 +6142,7 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) mlxsw_reg_ritr_rif_pack(ritr_pl, rif); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); - if (WARN_ON_ONCE(err)) + if (err) return err; mlxsw_reg_ritr_enable_set(ritr_pl, false); @@ -6219,10 +6245,12 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, INIT_LIST_HEAD(&rif->nexthop_list); INIT_LIST_HEAD(&rif->neigh_list); - ether_addr_copy(rif->addr, l3_dev->dev_addr); - rif->mtu = l3_dev->mtu; + if (l3_dev) { + ether_addr_copy(rif->addr, l3_dev->dev_addr); + rif->mtu = l3_dev->mtu; + rif->dev = l3_dev; + } rif->vr_id = vr_id; - rif->dev = l3_dev; rif->rif_index = rif_index; return rif; @@ -6246,7 +6274,19 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) { - return lb_rif->ul_vr_id; + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev); + struct mlxsw_sp_vr *ul_vr; + + ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); + if (WARN_ON(IS_ERR(ul_vr))) + return 0; + + return ul_vr->id; +} + +u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->ul_rif_id; } int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) @@ -6279,7 +6319,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, int i, err; type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev); - ops = mlxsw_sp->router->rif_ops_arr[type]; + ops = mlxsw_sp->rif_ops_arr[type]; vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack); if (IS_ERR(vr)) @@ -6297,6 +6337,8 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, err = -ENOMEM; goto err_rif_alloc; } + dev_hold(rif->dev); + mlxsw_sp->router->rifs[rif_index] = rif; rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; @@ -6323,7 +6365,6 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, } mlxsw_sp_rif_counters_alloc(rif); - mlxsw_sp->router->rifs[rif_index] = rif; return rif; @@ -6335,6 +6376,8 @@ err_configure: if (fid) mlxsw_sp_fid_put(fid); err_fid_get: + mlxsw_sp->router->rifs[rif_index] = NULL; + dev_put(rif->dev); kfree(rif); err_rif_alloc: err_rif_index_alloc: @@ -6343,7 +6386,7 @@ err_rif_index_alloc: return ERR_PTR(err); } -void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) +static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) { const struct mlxsw_sp_rif_ops *ops = rif->ops; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; @@ -6354,7 +6397,6 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; - mlxsw_sp->router->rifs[rif->rif_index] = NULL; mlxsw_sp_rif_counters_free(rif); for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) mlxsw_sp_mr_rif_del(vr->mr_table[i], rif); @@ -6362,6 +6404,8 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) if (fid) /* Loopback RIFs are not associated with a FID. */ mlxsw_sp_fid_put(fid); + mlxsw_sp->router->rifs[rif->rif_index] = NULL; + dev_put(rif->dev); kfree(rif); vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); @@ -6392,6 +6436,40 @@ mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params, params->system_port = mlxsw_sp_port->local_port; } +static struct mlxsw_sp_rif_subport * +mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif) +{ + return container_of(rif, struct mlxsw_sp_rif_subport, common); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_subport *rif_subport; + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev); + if (!rif) + return mlxsw_sp_rif_create(mlxsw_sp, params, extack); + + rif_subport = mlxsw_sp_rif_subport_rif(rif); + refcount_inc(&rif_subport->ref_count); + return rif; +} + +static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_subport *rif_subport; + + rif_subport = mlxsw_sp_rif_subport_rif(rif); + if (!refcount_dec_and_test(&rif_subport->ref_count)) + return; + + mlxsw_sp_rif_destroy(rif); +} + static int mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, struct net_device *l3_dev, @@ -6399,22 +6477,18 @@ mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_rif_params params = { + .dev = l3_dev, + }; u16 vid = mlxsw_sp_port_vlan->vid; struct mlxsw_sp_rif *rif; struct mlxsw_sp_fid *fid; int err; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); - if (!rif) { - struct mlxsw_sp_rif_params params = { - .dev = l3_dev, - }; - - mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan); - rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack); - if (IS_ERR(rif)) - return PTR_ERR(rif); - } + mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan); + rif = mlxsw_sp_rif_subport_get(mlxsw_sp, ¶ms, extack); + if (IS_ERR(rif)) + return PTR_ERR(rif); /* FID was already created, just take a reference */ fid = rif->ops->fid_get(rif, extack); @@ -6441,6 +6515,7 @@ err_port_vid_learning_set: mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid); err_fid_port_vid_map: mlxsw_sp_fid_put(fid); + mlxsw_sp_rif_subport_put(rif); return err; } @@ -6449,6 +6524,7 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid; + struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid); u16 vid = mlxsw_sp_port_vlan->vid; if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID)) @@ -6458,10 +6534,8 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING); mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true); mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid); - /* If router port holds the last reference on the rFID, then the - * associated Sub-port RIF will be destroyed. - */ mlxsw_sp_fid_put(fid); + mlxsw_sp_rif_subport_put(rif); } static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, @@ -6497,8 +6571,8 @@ static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, netif_is_ovs_port(port_dev)) return 0; - return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1, - extack); + return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, + MLXSW_SP_DEFAULT_VID, extack); } static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, @@ -6531,15 +6605,15 @@ static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, if (netif_is_bridge_port(lag_dev)) return 0; - return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1, - extack); + return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, + MLXSW_SP_DEFAULT_VID, extack); } -static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, +static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, unsigned long event, struct netlink_ext_ack *extack) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); struct mlxsw_sp_rif_params params = { .dev = l3_dev, }; @@ -6560,7 +6634,8 @@ static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, return 0; } -static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, +static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev, unsigned long event, struct netlink_ext_ack *extack) { @@ -6577,7 +6652,8 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, vid, extack); else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev)) - return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack); + return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event, + extack); return 0; } @@ -6678,16 +6754,11 @@ void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_index(rif->fid), false); } -static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev, +static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *macvlan_dev, unsigned long event, struct netlink_ext_ack *extack) { - struct mlxsw_sp *mlxsw_sp; - - mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev); - if (!mlxsw_sp) - return 0; - switch (event) { case NETDEV_UP: return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack); @@ -6699,7 +6770,35 @@ static int mlxsw_sp_inetaddr_macvlan_event(struct net_device *macvlan_dev, return 0; } -static int __mlxsw_sp_inetaddr_event(struct net_device *dev, +static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + const unsigned char *dev_addr, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif *rif; + int i; + + /* A RIF is not created for macvlan netdevs. Their MAC is used to + * populate the FDB + */ + if (netif_is_macvlan(dev) || netif_is_l3_master(dev)) + return 0; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + rif = mlxsw_sp->router->rifs[i]; + if (rif && rif->dev && rif->dev != dev && + !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr, + mlxsw_sp->mac_mask)) { + NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix"); + return -EINVAL; + } + } + + return 0; +} + +static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, unsigned long event, struct netlink_ext_ack *extack) { @@ -6708,21 +6807,24 @@ static int __mlxsw_sp_inetaddr_event(struct net_device *dev, else if (netif_is_lag_master(dev)) return mlxsw_sp_inetaddr_lag_event(dev, event, extack); else if (netif_is_bridge_master(dev)) - return mlxsw_sp_inetaddr_bridge_event(dev, event, extack); + return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event, + extack); else if (is_vlan_dev(dev)) - return mlxsw_sp_inetaddr_vlan_event(dev, event, extack); + return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event, + extack); else if (netif_is_macvlan(dev)) - return mlxsw_sp_inetaddr_macvlan_event(dev, event, extack); + return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event, + extack); else return 0; } -int mlxsw_sp_inetaddr_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_inetaddr_event(struct notifier_block *nb, + unsigned long event, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; struct net_device *dev = ifa->ifa_dev->dev; - struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_router *router; struct mlxsw_sp_rif *rif; int err = 0; @@ -6730,15 +6832,12 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, if (event == NETDEV_UP) goto out; - mlxsw_sp = mlxsw_sp_lower_get(dev); - if (!mlxsw_sp) - goto out; - - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb); + rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(dev, event, NULL); + err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL); out: return notifier_from_errno(err); } @@ -6760,13 +6859,19 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack); + err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr, + ivi->extack); + if (err) + goto out; + + err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack); out: return notifier_from_errno(err); } struct mlxsw_sp_inet6addr_event_work { struct work_struct work; + struct mlxsw_sp *mlxsw_sp; struct net_device *dev; unsigned long event; }; @@ -6775,21 +6880,18 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) { struct mlxsw_sp_inet6addr_event_work *inet6addr_work = container_of(work, struct mlxsw_sp_inet6addr_event_work, work); + struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp; struct net_device *dev = inet6addr_work->dev; unsigned long event = inet6addr_work->event; - struct mlxsw_sp *mlxsw_sp; struct mlxsw_sp_rif *rif; rtnl_lock(); - mlxsw_sp = mlxsw_sp_lower_get(dev); - if (!mlxsw_sp) - goto out; rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - __mlxsw_sp_inetaddr_event(dev, event, NULL); + __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL); out: rtnl_unlock(); dev_put(dev); @@ -6797,25 +6899,25 @@ out: } /* Called with rcu_read_lock() */ -int mlxsw_sp_inet6addr_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_inet6addr_event(struct notifier_block *nb, + unsigned long event, void *ptr) { struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr; struct mlxsw_sp_inet6addr_event_work *inet6addr_work; struct net_device *dev = if6->idev->dev; + struct mlxsw_sp_router *router; /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */ if (event == NETDEV_UP) return NOTIFY_DONE; - if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) - return NOTIFY_DONE; - inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC); if (!inet6addr_work) return NOTIFY_BAD; + router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb); INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work); + inet6addr_work->mlxsw_sp = router->mlxsw_sp; inet6addr_work->dev = dev; inet6addr_work->event = event; dev_hold(dev); @@ -6841,7 +6943,12 @@ int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack); + err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr, + i6vi->extack); + if (err) + goto out; + + err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack); out: return notifier_from_errno(err); } @@ -6863,20 +6970,14 @@ static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); } -int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) +static int +mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) { - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_rif *rif; + struct net_device *dev = rif->dev; u16 fid_index; int err; - mlxsw_sp = mlxsw_sp_lower_get(dev); - if (!mlxsw_sp) - return 0; - - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - return 0; fid_index = mlxsw_sp_fid_index(rif->fid); err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false); @@ -6920,6 +7021,41 @@ err_rif_edit: return err; } +static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif, + struct netdev_notifier_pre_changeaddr_info *info) +{ + struct netlink_ext_ack *extack; + + extack = netdev_notifier_info_to_extack(&info->info); + return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev, + info->dev_addr, extack); +} + +int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + return 0; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + + switch (event) { + case NETDEV_CHANGEMTU: /* fall through */ + case NETDEV_CHANGEADDR: + return mlxsw_sp_router_port_change_event(mlxsw_sp, rif); + case NETDEV_PRE_CHANGEADDR: + return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); + } + + return 0; +} + static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, struct net_device *l3_dev, struct netlink_ext_ack *extack) @@ -6931,9 +7067,10 @@ static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, */ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (rif) - __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack); + __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, + extack); - return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack); + return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack); } static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, @@ -6944,7 +7081,7 @@ static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (!rif) return; - __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL); + __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL); } int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, @@ -6998,18 +7135,13 @@ static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif) __mlxsw_sp_rif_macvlan_flush, rif); } -static struct mlxsw_sp_rif_subport * -mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif) -{ - return container_of(rif, struct mlxsw_sp_rif_subport, common); -} - static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif, const struct mlxsw_sp_rif_params *params) { struct mlxsw_sp_rif_subport *rif_subport; rif_subport = mlxsw_sp_rif_subport_rif(rif); + refcount_set(&rif_subport->ref_count, 1); rif_subport->vid = params->vid; rif_subport->lag = params->lag; if (params->lag) @@ -7164,11 +7296,15 @@ static struct mlxsw_sp_fid * mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { + struct net_device *br_dev = rif->dev; u16 vid; int err; if (is_vlan_dev(rif->dev)) { vid = vlan_dev_vlan_id(rif->dev); + br_dev = vlan_dev_real_dev(rif->dev); + if (WARN_ON(!netif_is_bridge_master(br_dev))) + return ERR_PTR(-EINVAL); } else { err = br_vlan_get_pvid(rif->dev, &vid); if (err < 0 || !vid) { @@ -7177,7 +7313,7 @@ mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, } } - return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid); + return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack); } static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) @@ -7194,7 +7330,8 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) info.addr = mac; info.vid = vid; - call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info); + call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, + NULL); } static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = { @@ -7267,7 +7404,7 @@ static struct mlxsw_sp_fid * mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { - return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex); + return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack); } static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) @@ -7281,7 +7418,8 @@ static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) info.addr = mac; info.vid = 0; - call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info); + call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info, + NULL); } static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { @@ -7293,6 +7431,15 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { .fdb_del = mlxsw_sp_rif_fid_fdb_del, }; +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = { + .type = MLXSW_SP_RIF_TYPE_VLAN, + .rif_size = sizeof(struct mlxsw_sp_rif), + .configure = mlxsw_sp_rif_fid_configure, + .deconfigure = mlxsw_sp_rif_fid_deconfigure, + .fid_get = mlxsw_sp_rif_vlan_fid_get, + .fdb_del = mlxsw_sp_rif_vlan_fdb_del, +}; + static struct mlxsw_sp_rif_ipip_lb * mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif) { @@ -7313,7 +7460,7 @@ mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif, } static int -mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); @@ -7325,11 +7472,12 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) if (IS_ERR(ul_vr)) return PTR_ERR(ul_vr); - err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true); if (err) goto err_loopback_op; lb_rif->ul_vr_id = ul_vr->id; + lb_rif->ul_rif_id = 0; ++ul_vr->rif_count; return 0; @@ -7338,32 +7486,213 @@ err_loopback_op: return err; } -static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_vr *ul_vr; ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; - mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false); + mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false); --ul_vr->rif_count; mlxsw_sp_vr_put(mlxsw_sp, ul_vr); } -static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = { +static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = { + .type = MLXSW_SP_RIF_TYPE_IPIP_LB, + .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), + .setup = mlxsw_sp_rif_ipip_lb_setup, + .configure = mlxsw_sp1_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure, +}; + +const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = { + [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, + [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, + [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp1_rif_ipip_lb_ops, +}; + +static int +mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, + ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU); + mlxsw_reg_ritr_loopback_protocol_set(ritr_pl, + MLXSW_REG_RITR_LOOPBACK_GENERIC); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif *ul_rif; + u16 rif_index; + int err; + + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); + return ERR_PTR(err); + } + + ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); + if (!ul_rif) + return ERR_PTR(-ENOMEM); + + mlxsw_sp->router->rifs[rif_index] = ul_rif; + ul_rif->mlxsw_sp = mlxsw_sp; + err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true); + if (err) + goto ul_rif_op_err; + + return ul_rif; + +ul_rif_op_err: + mlxsw_sp->router->rifs[rif_index] = NULL; + kfree(ul_rif); + return ERR_PTR(err); +} + +static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + + mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false); + mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL; + kfree(ul_rif); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack); + if (IS_ERR(vr)) + return ERR_CAST(vr); + + if (refcount_inc_not_zero(&vr->ul_rif_refcnt)) + return vr->ul_rif; + + vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack); + if (IS_ERR(vr->ul_rif)) { + err = PTR_ERR(vr->ul_rif); + goto err_ul_rif_create; + } + + vr->rif_count++; + refcount_set(&vr->ul_rif_refcnt, 1); + + return vr->ul_rif; + +err_ul_rif_create: + mlxsw_sp_vr_put(mlxsw_sp, vr); + return ERR_PTR(err); +} + +static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif) +{ + struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + struct mlxsw_sp_vr *vr; + + vr = &mlxsw_sp->router->vrs[ul_rif->vr_id]; + + if (!refcount_dec_and_test(&vr->ul_rif_refcnt)) + return; + + vr->rif_count--; + mlxsw_sp_ul_rif_destroy(ul_rif); + mlxsw_sp_vr_put(mlxsw_sp, vr); +} + +int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + u16 *ul_rif_index) +{ + struct mlxsw_sp_rif *ul_rif; + + ASSERT_RTNL(); + + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + if (IS_ERR(ul_rif)) + return PTR_ERR(ul_rif); + *ul_rif_index = ul_rif->rif_index; + + return 0; +} + +void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index) +{ + struct mlxsw_sp_rif *ul_rif; + + ASSERT_RTNL(); + + ul_rif = mlxsw_sp->router->rifs[ul_rif_index]; + if (WARN_ON(!ul_rif)) + return; + + mlxsw_sp_ul_rif_put(ul_rif); +} + +static int +mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif *ul_rif; + int err; + + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + if (IS_ERR(ul_rif)) + return PTR_ERR(ul_rif); + + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true); + if (err) + goto err_loopback_op; + + lb_rif->ul_vr_id = 0; + lb_rif->ul_rif_id = ul_rif->rif_index; + + return 0; + +err_loopback_op: + mlxsw_sp_ul_rif_put(ul_rif); + return err; +} + +static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_rif *ul_rif; + + ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id); + mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false); + mlxsw_sp_ul_rif_put(ul_rif); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = { .type = MLXSW_SP_RIF_TYPE_IPIP_LB, .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), .setup = mlxsw_sp_rif_ipip_lb_setup, - .configure = mlxsw_sp_rif_ipip_lb_configure, - .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure, + .configure = mlxsw_sp2_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure, }; -static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = { +const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, - [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_ops, + [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, - [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops, }; static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) @@ -7376,8 +7705,6 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp->router->rifs) return -ENOMEM; - mlxsw_sp->router->rif_ops_arr = mlxsw_sp_rif_ops_arr; - return 0; } @@ -7552,6 +7879,16 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) mlxsw_sp->router = router; router->mlxsw_sp = mlxsw_sp; + router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event; + err = register_inetaddr_notifier(&router->inetaddr_nb); + if (err) + goto err_register_inetaddr_notifier; + + router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event; + err = register_inet6addr_notifier(&router->inet6addr_nb); + if (err) + goto err_register_inet6addr_notifier; + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) @@ -7637,6 +7974,10 @@ err_ipips_init: err_rifs_init: __mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + unregister_inet6addr_notifier(&router->inet6addr_nb); +err_register_inet6addr_notifier: + unregister_inetaddr_notifier(&router->inetaddr_nb); +err_register_inetaddr_notifier: kfree(mlxsw_sp->router); return err; } @@ -7654,5 +7995,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_ipips_fini(mlxsw_sp); mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); + unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); + unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); kfree(mlxsw_sp->router); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 3dbafdeaab2b..cc1de91e8217 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -29,6 +29,7 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); +u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index d965fd275c90..536c23c578c3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -305,7 +305,7 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, parms = mlxsw_sp_ipip_netdev_parms4(to_dev); ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp, - 0, 0, parms.link, tun->fwmark); + 0, 0, parms.link, tun->fwmark, 0); rt = ip_route_output_key(tun->net, &fl4); if (IS_ERR(rt)) @@ -383,7 +383,7 @@ mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry) } static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = { - .can_handle = is_gretap_dev, + .can_handle = netif_is_gretap, .parms = mlxsw_sp_span_entry_gretap4_parms, .configure = mlxsw_sp_span_entry_gretap4_configure, .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure, @@ -484,7 +484,7 @@ mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry) static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = { - .can_handle = is_ip6gretap_dev, + .can_handle = netif_is_ip6gretap, .parms = mlxsw_sp_span_entry_gretap6_parms, .configure = mlxsw_sp_span_entry_gretap6_configure, .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 50080c60a279..50111f228d77 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -85,13 +85,11 @@ struct mlxsw_sp_bridge_ops { struct mlxsw_sp_bridge_port *bridge_port, struct mlxsw_sp_port *mlxsw_sp_port); int (*vxlan_join)(struct mlxsw_sp_bridge_device *bridge_device, - const struct net_device *vxlan_dev, + const struct net_device *vxlan_dev, u16 vid, struct netlink_ext_ack *extack); - void (*vxlan_leave)(struct mlxsw_sp_bridge_device *bridge_device, - const struct net_device *vxlan_dev); struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_bridge_device *bridge_device, - u16 vid); + u16 vid, struct netlink_ext_ack *extack); struct mlxsw_sp_fid * (*fid_lookup)(struct mlxsw_sp_bridge_device *bridge_device, u16 vid); @@ -292,30 +290,6 @@ mlxsw_sp_bridge_port_destroy(struct mlxsw_sp_bridge_port *bridge_port) kfree(bridge_port); } -static bool -mlxsw_sp_bridge_port_should_destroy(const struct mlxsw_sp_bridge_port * - bridge_port) -{ - struct net_device *dev = bridge_port->dev; - struct mlxsw_sp *mlxsw_sp; - - if (is_vlan_dev(dev)) - mlxsw_sp = mlxsw_sp_lower_get(vlan_dev_real_dev(dev)); - else - mlxsw_sp = mlxsw_sp_lower_get(dev); - - /* In case ports were pulled from out of a bridged LAG, then - * it's possible the reference count isn't zero, yet the bridge - * port should be destroyed, as it's no longer an upper of ours. - */ - if (!mlxsw_sp && list_empty(&bridge_port->vlans_list)) - return true; - else if (bridge_port->ref_count == 0) - return true; - else - return false; -} - static struct mlxsw_sp_bridge_port * mlxsw_sp_bridge_port_get(struct mlxsw_sp_bridge *bridge, struct net_device *brport_dev) @@ -353,8 +327,7 @@ static void mlxsw_sp_bridge_port_put(struct mlxsw_sp_bridge *bridge, { struct mlxsw_sp_bridge_device *bridge_device; - bridge_port->ref_count--; - if (!mlxsw_sp_bridge_port_should_destroy(bridge_port)) + if (--bridge_port->ref_count != 0) return; bridge_device = bridge_port->bridge_device; mlxsw_sp_bridge_port_destroy(bridge_port); @@ -458,46 +431,6 @@ static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan) mlxsw_sp_bridge_vlan_destroy(bridge_vlan); } -static void mlxsw_sp_port_bridge_flags_get(struct mlxsw_sp_bridge *bridge, - struct net_device *dev, - unsigned long *brport_flags) -{ - struct mlxsw_sp_bridge_port *bridge_port; - - bridge_port = mlxsw_sp_bridge_port_find(bridge, dev); - if (WARN_ON(!bridge_port)) - return; - - memcpy(brport_flags, &bridge_port->flags, sizeof(*brport_flags)); -} - -static int mlxsw_sp_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sp->base_mac); - memcpy(&attr->u.ppid.id, &mlxsw_sp->base_mac, - attr->u.ppid.id_len); - break; - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - mlxsw_sp_port_bridge_flags_get(mlxsw_sp->bridge, attr->orig_dev, - &attr->u.brport_flags); - break; - case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: - attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD | - BR_MCAST_FLOOD; - break; - default: - return -EOPNOTSUPP; - } - - return 0; -} - static int mlxsw_sp_port_bridge_vlan_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_vlan *bridge_vlan, @@ -647,6 +580,17 @@ err_port_bridge_vlan_learning_set: return err; } +static int mlxsw_sp_port_attr_br_pre_flags_set(struct mlxsw_sp_port + *mlxsw_sp_port, + struct switchdev_trans *trans, + unsigned long brport_flags) +{ + if (brport_flags & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD)) + return -EINVAL; + + return 0; +} + static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, struct net_device *orig_dev, @@ -893,6 +837,11 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, attr->orig_dev, attr->u.stp_state); break; + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + err = mlxsw_sp_port_attr_br_pre_flags_set(mlxsw_sp_port, + trans, + attr->u.brport_flags); + break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans, attr->orig_dev, @@ -935,7 +884,8 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, static int mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, - struct mlxsw_sp_bridge_port *bridge_port) + struct mlxsw_sp_bridge_port *bridge_port, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp_bridge_device *bridge_device; @@ -945,7 +895,7 @@ mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, int err; bridge_device = bridge_port->bridge_device; - fid = bridge_device->ops->fid_get(bridge_device, vid); + fid = bridge_device->ops->fid_get(bridge_device, vid, extack); if (IS_ERR(fid)) return PTR_ERR(fid); @@ -1013,7 +963,8 @@ mlxsw_sp_port_pvid_determine(const struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_port_vlan_bridge_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, - struct mlxsw_sp_bridge_port *bridge_port) + struct mlxsw_sp_bridge_port *bridge_port, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp_bridge_vlan *bridge_vlan; @@ -1021,12 +972,11 @@ mlxsw_sp_port_vlan_bridge_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, int err; /* No need to continue if only VLAN flags were changed */ - if (mlxsw_sp_port_vlan->bridge_port) { - mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); + if (mlxsw_sp_port_vlan->bridge_port) return 0; - } - err = mlxsw_sp_port_vlan_fid_join(mlxsw_sp_port_vlan, bridge_port); + err = mlxsw_sp_port_vlan_fid_join(mlxsw_sp_port_vlan, bridge_port, + extack); if (err) return err; @@ -1103,16 +1053,29 @@ mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) static int mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port, - u16 vid, bool is_untagged, bool is_pvid) + u16 vid, bool is_untagged, bool is_pvid, + struct netlink_ext_ack *extack) { u16 pvid = mlxsw_sp_port_pvid_determine(mlxsw_sp_port, vid, is_pvid); struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; u16 old_pvid = mlxsw_sp_port->pvid; int err; - mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, vid); - if (IS_ERR(mlxsw_sp_port_vlan)) - return PTR_ERR(mlxsw_sp_port_vlan); + /* The only valid scenario in which a port-vlan already exists, is if + * the VLAN flags were changed and the port-vlan is associated with the + * correct bridge port + */ + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); + if (mlxsw_sp_port_vlan && + mlxsw_sp_port_vlan->bridge_port != bridge_port) + return -EEXIST; + + if (!mlxsw_sp_port_vlan) { + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_create(mlxsw_sp_port, + vid); + if (IS_ERR(mlxsw_sp_port_vlan)) + return PTR_ERR(mlxsw_sp_port_vlan); + } err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, true, is_untagged); @@ -1123,7 +1086,8 @@ mlxsw_sp_bridge_port_vlan_add(struct mlxsw_sp_port *mlxsw_sp_port, if (err) goto err_port_pvid_set; - err = mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port); + err = mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port, + extack); if (err) goto err_port_vlan_bridge_join; @@ -1134,7 +1098,7 @@ err_port_vlan_bridge_join: err_port_pvid_set: mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); err_port_vlan_set: - mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); return err; } @@ -1173,7 +1137,8 @@ mlxsw_sp_br_ban_rif_pvid_change(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + struct switchdev_trans *trans, + struct netlink_ext_ack *extack) { bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; @@ -1195,7 +1160,7 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, return err; } - if (switchdev_trans_ph_prepare(trans)) + if (switchdev_trans_ph_commit(trans)) return 0; bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); @@ -1210,7 +1175,7 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_bridge_port_vlan_add(mlxsw_sp_port, bridge_port, vid, flag_untagged, - flag_pvid); + flag_pvid, extack); if (err) return err; } @@ -1244,7 +1209,7 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) { return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : - MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; + MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_MLAG; } static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) @@ -1301,7 +1266,7 @@ out: static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, const char *mac, u16 fid, bool adding, enum mlxsw_reg_sfd_rec_action action, - bool dynamic) + enum mlxsw_reg_sfd_rec_policy policy) { char *sfd_pl; u8 num_rec; @@ -1312,8 +1277,7 @@ static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, return -ENOMEM; mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); - mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), - mac, fid, action, local_port); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, mac, fid, action, local_port); num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); if (err) @@ -1332,7 +1296,8 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool dynamic) { return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, - MLXSW_REG_SFD_REC_ACTION_NOP, dynamic); + MLXSW_REG_SFD_REC_ACTION_NOP, + mlxsw_sp_sfd_rec_policy(dynamic)); } int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, @@ -1340,7 +1305,7 @@ int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, { return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding, MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER, - false); + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY); } static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, @@ -1665,7 +1630,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid_index; int err = 0; - if (switchdev_trans_ph_prepare(trans)) + if (switchdev_trans_ph_commit(trans)) return 0; bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); @@ -1779,7 +1744,8 @@ static void mlxsw_sp_span_respin_schedule(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans) + struct switchdev_trans *trans, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); const struct switchdev_obj_port_vlan *vlan; @@ -1788,7 +1754,8 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, trans); + err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, trans, + extack); if (switchdev_trans_ph_prepare(trans)) { /* The event is emitted before the changes are actually @@ -1816,7 +1783,7 @@ static void mlxsw_sp_bridge_port_vlan_del(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port, u16 vid) { - u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : vid; + u16 pvid = mlxsw_sp_port->pvid == vid ? 0 : mlxsw_sp_port->pvid; struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); @@ -1826,7 +1793,7 @@ mlxsw_sp_bridge_port_vlan_del(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); mlxsw_sp_port_pvid_set(mlxsw_sp_port, pvid); mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid, false, false); - mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); + mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan); } static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, @@ -1971,32 +1938,20 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, return NULL; } -static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { - .switchdev_port_attr_get = mlxsw_sp_port_attr_get, - .switchdev_port_attr_set = mlxsw_sp_port_attr_set, - .switchdev_port_obj_add = mlxsw_sp_port_obj_add, - .switchdev_port_obj_del = mlxsw_sp_port_obj_del, -}; - static int mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, struct mlxsw_sp_port *mlxsw_sp_port, struct netlink_ext_ack *extack) { - struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - if (is_vlan_dev(bridge_port->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge"); return -EINVAL; } - mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1); - if (WARN_ON(!mlxsw_sp_port_vlan)) - return -EINVAL; - - /* Let VLAN-aware bridge take care of its own VLANs */ - mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); + /* Port is no longer usable as a router interface */ + if (mlxsw_sp_port->default_vlan->fid) + mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan); return 0; } @@ -2006,41 +1961,134 @@ mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, struct mlxsw_sp_port *mlxsw_sp_port) { - mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1); /* Make sure untagged frames are allowed to ingress */ - mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); + mlxsw_sp_port_pvid_set(mlxsw_sp_port, MLXSW_SP_DEFAULT_VID); } static int mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, - const struct net_device *vxlan_dev, + const struct net_device *vxlan_dev, u16 vid, struct netlink_ext_ack *extack) { - WARN_ON(1); - return -EINVAL; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + struct mlxsw_sp_nve_params params = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .vni = vxlan->cfg.vni, + .dev = vxlan_dev, + }; + struct mlxsw_sp_fid *fid; + int err; + + /* If the VLAN is 0, we need to find the VLAN that is configured as + * PVID and egress untagged on the bridge port of the VxLAN device. + * It is possible no such VLAN exists + */ + if (!vid) { + err = mlxsw_sp_vxlan_mapped_vid(vxlan_dev, &vid); + if (err || !vid) + return err; + } + + /* If no other port is member in the VLAN, then the FID does not exist. + * NVE will be enabled on the FID once a port joins the VLAN + */ + fid = mlxsw_sp_fid_8021q_lookup(mlxsw_sp, vid); + if (!fid) + return 0; + + if (mlxsw_sp_fid_vni_is_set(fid)) { + NL_SET_ERR_MSG_MOD(extack, "VNI is already set on FID"); + err = -EINVAL; + goto err_vni_exists; + } + + err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, ¶ms, extack); + if (err) + goto err_nve_fid_enable; + + /* The tunnel port does not hold a reference on the FID. Only + * local ports and the router port + */ + mlxsw_sp_fid_put(fid); + + return 0; + +err_nve_fid_enable: +err_vni_exists: + mlxsw_sp_fid_put(fid); + return err; } -static void -mlxsw_sp_bridge_8021q_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device, - const struct net_device *vxlan_dev) +static struct net_device * +mlxsw_sp_bridge_8021q_vxlan_dev_find(struct net_device *br_dev, u16 vid) { + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + u16 pvid; + int err; + + if (!netif_is_vxlan(dev)) + continue; + + err = mlxsw_sp_vxlan_mapped_vid(dev, &pvid); + if (err || pvid != vid) + continue; + + return dev; + } + + return NULL; } static struct mlxsw_sp_fid * mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, - u16 vid) + u16 vid, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct net_device *vxlan_dev; + struct mlxsw_sp_fid *fid; + int err; + + fid = mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); + if (IS_ERR(fid)) + return fid; + + if (mlxsw_sp_fid_vni_is_set(fid)) + return fid; + + /* Find the VxLAN device that has the specified VLAN configured as + * PVID and egress untagged. There can be at most one such device + */ + vxlan_dev = mlxsw_sp_bridge_8021q_vxlan_dev_find(bridge_device->dev, + vid); + if (!vxlan_dev) + return fid; + + if (!netif_running(vxlan_dev)) + return fid; + + err = mlxsw_sp_bridge_8021q_vxlan_join(bridge_device, vxlan_dev, vid, + extack); + if (err) + goto err_vxlan_join; - return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); + return fid; + +err_vxlan_join: + mlxsw_sp_fid_put(fid); + return ERR_PTR(err); } static struct mlxsw_sp_fid * mlxsw_sp_bridge_8021q_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device, u16 vid) { - WARN_ON(1); - return NULL; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + + return mlxsw_sp_fid_8021q_lookup(mlxsw_sp, vid); } static u16 @@ -2054,7 +2102,6 @@ static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = { .port_join = mlxsw_sp_bridge_8021q_port_join, .port_leave = mlxsw_sp_bridge_8021q_port_leave, .vxlan_join = mlxsw_sp_bridge_8021q_vxlan_join, - .vxlan_leave = mlxsw_sp_bridge_8021q_vxlan_leave, .fid_get = mlxsw_sp_bridge_8021q_fid_get, .fid_lookup = mlxsw_sp_bridge_8021q_fid_lookup, .fid_vid = mlxsw_sp_bridge_8021q_fid_vid, @@ -2087,7 +2134,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct net_device *dev = bridge_port->dev; u16 vid; - vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1; + vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : MLXSW_SP_DEFAULT_VID; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); if (WARN_ON(!mlxsw_sp_port_vlan)) return -EINVAL; @@ -2101,7 +2148,8 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, if (mlxsw_sp_port_vlan->fid) mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); - return mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port); + return mlxsw_sp_port_vlan_bridge_join(mlxsw_sp_port_vlan, bridge_port, + extack); } static void @@ -2113,9 +2161,9 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, struct net_device *dev = bridge_port->dev; u16 vid; - vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1; + vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : MLXSW_SP_DEFAULT_VID; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); - if (!mlxsw_sp_port_vlan) + if (!mlxsw_sp_port_vlan || !mlxsw_sp_port_vlan->bridge_port) return; mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); @@ -2123,7 +2171,7 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, static int mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, - const struct net_device *vxlan_dev, + const struct net_device *vxlan_dev, u16 vid, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); @@ -2137,10 +2185,13 @@ mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, int err; fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); - if (!fid) + if (!fid) { + NL_SET_ERR_MSG_MOD(extack, "Did not find a corresponding FID"); return -EINVAL; + } if (mlxsw_sp_fid_vni_is_set(fid)) { + NL_SET_ERR_MSG_MOD(extack, "VNI is already set on FID"); err = -EINVAL; goto err_vni_exists; } @@ -2162,29 +2213,9 @@ err_vni_exists: return err; } -static void -mlxsw_sp_bridge_8021d_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device, - const struct net_device *vxlan_dev) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); - struct mlxsw_sp_fid *fid; - - fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); - if (WARN_ON(!fid)) - return; - - /* If the VxLAN device is down, then the FID does not have a VNI */ - if (!mlxsw_sp_fid_vni_is_set(fid)) - goto out; - - mlxsw_sp_nve_fid_disable(mlxsw_sp, fid); -out: - mlxsw_sp_fid_put(fid); -} - static struct mlxsw_sp_fid * mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device, - u16 vid) + u16 vid, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); struct net_device *vxlan_dev; @@ -2205,7 +2236,8 @@ mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device, if (!netif_running(vxlan_dev)) return fid; - err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, NULL); + err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, 0, + extack); if (err) goto err_vxlan_join; @@ -2240,7 +2272,6 @@ static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = { .port_join = mlxsw_sp_bridge_8021d_port_join, .port_leave = mlxsw_sp_bridge_8021d_port_leave, .vxlan_join = mlxsw_sp_bridge_8021d_vxlan_join, - .vxlan_leave = mlxsw_sp_bridge_8021d_vxlan_leave, .fid_get = mlxsw_sp_bridge_8021d_fid_get, .fid_lookup = mlxsw_sp_bridge_8021d_fid_lookup, .fid_vid = mlxsw_sp_bridge_8021d_fid_vid, @@ -2295,7 +2326,7 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev, - const struct net_device *vxlan_dev, + const struct net_device *vxlan_dev, u16 vid, struct netlink_ext_ack *extack) { struct mlxsw_sp_bridge_device *bridge_device; @@ -2304,20 +2335,102 @@ int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(!bridge_device)) return -EINVAL; - return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, extack); + return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid, + extack); } void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev, const struct net_device *vxlan_dev) { + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + struct mlxsw_sp_fid *fid; + + /* If the VxLAN device is down, then the FID does not have a VNI */ + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan->cfg.vni); + if (!fid) + return; + + mlxsw_sp_nve_fid_disable(mlxsw_sp, fid); + mlxsw_sp_fid_put(fid); +} + +struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + u16 vid, + struct netlink_ext_ack *extack) +{ struct mlxsw_sp_bridge_device *bridge_device; bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); if (WARN_ON(!bridge_device)) - return; + return ERR_PTR(-EINVAL); + + return bridge_device->ops->fid_get(bridge_device, vid, extack); +} + +static void +mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr, + enum mlxsw_sp_l3proto *proto, + union mlxsw_sp_l3addr *addr) +{ + if (vxlan_addr->sa.sa_family == AF_INET) { + addr->addr4 = vxlan_addr->sin.sin_addr.s_addr; + *proto = MLXSW_SP_L3_PROTO_IPV4; + } else { + addr->addr6 = vxlan_addr->sin6.sin6_addr; + *proto = MLXSW_SP_L3_PROTO_IPV6; + } +} + +static void +mlxsw_sp_switchdev_addr_vxlan_convert(enum mlxsw_sp_l3proto proto, + const union mlxsw_sp_l3addr *addr, + union vxlan_addr *vxlan_addr) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + vxlan_addr->sa.sa_family = AF_INET; + vxlan_addr->sin.sin_addr.s_addr = addr->addr4; + break; + case MLXSW_SP_L3_PROTO_IPV6: + vxlan_addr->sa.sa_family = AF_INET6; + vxlan_addr->sin6.sin6_addr = addr->addr6; + break; + } +} + +static void mlxsw_sp_fdb_vxlan_call_notifiers(struct net_device *dev, + const char *mac, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr, + __be32 vni, bool adding) +{ + struct switchdev_notifier_vxlan_fdb_info info; + struct vxlan_dev *vxlan = netdev_priv(dev); + enum switchdev_notifier_type type; - bridge_device->ops->vxlan_leave(bridge_device, vxlan_dev); + type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE : + SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE; + mlxsw_sp_switchdev_addr_vxlan_convert(proto, addr, &info.remote_ip); + info.remote_port = vxlan->cfg.dst_port; + info.remote_vni = vni; + info.remote_ifindex = 0; + ether_addr_copy(info.eth_addr, mac); + info.vni = vni; + info.offloaded = adding; + call_switchdev_notifiers(type, dev, &info.info, NULL); +} + +static void mlxsw_sp_fdb_nve_call_notifiers(struct net_device *dev, + const char *mac, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr, + __be32 vni, + bool adding) +{ + if (netif_is_vxlan(dev)) + mlxsw_sp_fdb_vxlan_call_notifiers(dev, mac, proto, addr, vni, + adding); } static void @@ -2330,7 +2443,7 @@ mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type, info.addr = mac; info.vid = vid; info.offloaded = offloaded; - call_switchdev_notifiers(type, dev, &info.info); + call_switchdev_notifiers(type, dev, &info.info, NULL); } static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, @@ -2428,7 +2541,8 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, bridge_device = bridge_port->bridge_device; vid = bridge_device->vlan_enabled ? mlxsw_sp_port_vlan->vid : 0; - lag_vid = mlxsw_sp_port_vlan->vid; + lag_vid = mlxsw_sp_fid_lag_vid_valid(mlxsw_sp_port_vlan->fid) ? + mlxsw_sp_port_vlan->vid : 0; do_fdb_op: err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid, @@ -2451,6 +2565,122 @@ just_remove: goto do_fdb_op; } +static int +__mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_fid *fid, + bool adding, + struct net_device **nve_dev, + u16 *p_vid, __be32 *p_vni) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *br_dev, *dev; + int nve_ifindex; + int err; + + err = mlxsw_sp_fid_nve_ifindex(fid, &nve_ifindex); + if (err) + return err; + + err = mlxsw_sp_fid_vni(fid, p_vni); + if (err) + return err; + + dev = __dev_get_by_index(&init_net, nve_ifindex); + if (!dev) + return -EINVAL; + *nve_dev = dev; + + if (!netif_running(dev)) + return -EINVAL; + + if (adding && !br_port_flag_is_set(dev, BR_LEARNING)) + return -EINVAL; + + if (adding && netif_is_vxlan(dev)) { + struct vxlan_dev *vxlan = netdev_priv(dev); + + if (!(vxlan->cfg.flags & VXLAN_F_LEARN)) + return -EINVAL; + } + + br_dev = netdev_master_upper_dev_get(dev); + if (!br_dev) + return -EINVAL; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return -EINVAL; + + *p_vid = bridge_device->ops->fid_vid(bridge_device, fid); + + return 0; +} + +static void mlxsw_sp_fdb_notify_mac_uc_tunnel_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, + int rec_index, + bool adding) +{ + enum mlxsw_reg_sfn_uc_tunnel_protocol sfn_proto; + enum switchdev_notifier_type type; + struct net_device *nve_dev; + union mlxsw_sp_l3addr addr; + struct mlxsw_sp_fid *fid; + char mac[ETH_ALEN]; + u16 fid_index, vid; + __be32 vni; + u32 uip; + int err; + + mlxsw_reg_sfn_uc_tunnel_unpack(sfn_pl, rec_index, mac, &fid_index, + &uip, &sfn_proto); + + fid = mlxsw_sp_fid_lookup_by_index(mlxsw_sp, fid_index); + if (!fid) + goto err_fid_lookup; + + err = mlxsw_sp_nve_learned_ip_resolve(mlxsw_sp, uip, + (enum mlxsw_sp_l3proto) sfn_proto, + &addr); + if (err) + goto err_ip_resolve; + + err = __mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, fid, adding, + &nve_dev, &vid, &vni); + if (err) + goto err_fdb_process; + + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, mac, fid_index, + (enum mlxsw_sp_l3proto) sfn_proto, + &addr, adding, true); + if (err) + goto err_fdb_op; + + mlxsw_sp_fdb_nve_call_notifiers(nve_dev, mac, + (enum mlxsw_sp_l3proto) sfn_proto, + &addr, vni, adding); + + type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : + SWITCHDEV_FDB_DEL_TO_BRIDGE; + mlxsw_sp_fdb_call_notifiers(type, mac, vid, nve_dev, adding); + + mlxsw_sp_fid_put(fid); + + return; + +err_fdb_op: +err_fdb_process: +err_ip_resolve: + mlxsw_sp_fid_put(fid); +err_fid_lookup: + /* Remove an FDB entry in case we cannot process it. Otherwise the + * device will keep sending the same notification over and over again. + */ + mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, mac, fid_index, + (enum mlxsw_sp_l3proto) sfn_proto, &addr, + false, true); +} + static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index) { @@ -2471,6 +2701,14 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, rec_index, false); break; + case MLXSW_REG_SFN_REC_TYPE_LEARNED_UNICAST_TUNNEL: + mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, sfn_pl, + rec_index, true); + break; + case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_UNICAST_TUNNEL: + mlxsw_sp_fdb_notify_mac_uc_tunnel_process(mlxsw_sp, sfn_pl, + rec_index, false); + break; } } @@ -2526,20 +2764,6 @@ struct mlxsw_sp_switchdev_event_work { }; static void -mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr, - enum mlxsw_sp_l3proto *proto, - union mlxsw_sp_l3addr *addr) -{ - if (vxlan_addr->sa.sa_family == AF_INET) { - addr->addr4 = vxlan_addr->sin.sin_addr.s_addr; - *proto = MLXSW_SP_L3_PROTO_IPV4; - } else { - addr->addr6 = vxlan_addr->sin6.sin6_addr; - *proto = MLXSW_SP_L3_PROTO_IPV6; - } -} - -static void mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_switchdev_event_work * switchdev_work, @@ -2570,7 +2794,7 @@ mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp, return; vxlan_fdb_info.offloaded = true; call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, - &vxlan_fdb_info.info); + &vxlan_fdb_info.info, NULL); mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, vxlan_fdb_info.eth_addr, fdb_info->vid, dev, true); @@ -2583,7 +2807,7 @@ mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp, false); vxlan_fdb_info.offloaded = false; call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, - &vxlan_fdb_info.info); + &vxlan_fdb_info.info, NULL); break; } } @@ -2604,7 +2828,8 @@ mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work * switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE) return; - if (!switchdev_work->fdb_info.added_by_user) + if (switchdev_work->event == SWITCHDEV_FDB_ADD_TO_DEVICE && + !switchdev_work->fdb_info.added_by_user) return; if (!netif_running(dev)) @@ -2727,7 +2952,7 @@ mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp, } vxlan_fdb_info->offloaded = true; call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, - &vxlan_fdb_info->info); + &vxlan_fdb_info->info, NULL); mlxsw_sp_fid_put(fid); return; } @@ -2748,7 +2973,7 @@ mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp, goto err_fdb_tunnel_uc_op; vxlan_fdb_info->offloaded = true; call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, - &vxlan_fdb_info->info); + &vxlan_fdb_info->info, NULL); mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, vxlan_fdb_info->eth_addr, vid, dev, true); @@ -2849,23 +3074,34 @@ mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work * struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev); struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; struct vxlan_config *cfg = &vxlan->cfg; + struct netlink_ext_ack *extack; + extack = switchdev_notifier_info_to_extack(info); vxlan_fdb_info = container_of(info, struct switchdev_notifier_vxlan_fdb_info, info); - if (vxlan_fdb_info->remote_port != cfg->dst_port) - return -EOPNOTSUPP; - if (vxlan_fdb_info->remote_vni != cfg->vni) + if (vxlan_fdb_info->remote_port != cfg->dst_port) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Non-default remote port is not supported"); return -EOPNOTSUPP; - if (vxlan_fdb_info->vni != cfg->vni) + } + if (vxlan_fdb_info->remote_vni != cfg->vni || + vxlan_fdb_info->vni != cfg->vni) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Non-default VNI is not supported"); return -EOPNOTSUPP; - if (vxlan_fdb_info->remote_ifindex) + } + if (vxlan_fdb_info->remote_ifindex) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Local interface is not supported"); return -EOPNOTSUPP; - if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) + } + if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Multicast MAC addresses not supported"); return -EOPNOTSUPP; - if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) + } + if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: FDB: Multicast destination IP is not supported"); return -EOPNOTSUPP; + } switchdev_work->vxlan_fdb_info = *vxlan_fdb_info; @@ -2883,6 +3119,13 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, struct net_device *br_dev; int err; + if (event == SWITCHDEV_PORT_ATTR_SET) { + err = switchdev_handle_port_attr_set(dev, ptr, + mlxsw_sp_port_dev_check, + mlxsw_sp_port_attr_set); + return notifier_from_errno(err); + } + /* Tunnel devices are not our uppers, so check their master instead */ br_dev = netdev_master_upper_dev_get_rcu(dev); if (!br_dev) @@ -2947,10 +3190,279 @@ err_addr_alloc: return NOTIFY_BAD; } -static struct notifier_block mlxsw_sp_switchdev_notifier = { +struct notifier_block mlxsw_sp_switchdev_notifier = { .notifier_call = mlxsw_sp_switchdev_event, }; +static int +mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, u16 vid, + bool flag_untagged, bool flag_pvid, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + __be32 vni = vxlan->cfg.vni; + struct mlxsw_sp_fid *fid; + u16 old_vid; + int err; + + /* We cannot have the same VLAN as PVID and egress untagged on multiple + * VxLAN devices. Note that we get this notification before the VLAN is + * actually added to the bridge's database, so it is not possible for + * the lookup function to return 'vxlan_dev' + */ + if (flag_untagged && flag_pvid && + mlxsw_sp_bridge_8021q_vxlan_dev_find(bridge_device->dev, vid)) { + NL_SET_ERR_MSG_MOD(extack, "VLAN already mapped to a different VNI"); + return -EINVAL; + } + + if (!netif_running(vxlan_dev)) + return 0; + + /* First case: FID is not associated with this VNI, but the new VLAN + * is both PVID and egress untagged. Need to enable NVE on the FID, if + * it exists + */ + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vni); + if (!fid) { + if (!flag_untagged || !flag_pvid) + return 0; + return mlxsw_sp_bridge_8021q_vxlan_join(bridge_device, + vxlan_dev, vid, extack); + } + + /* Second case: FID is associated with the VNI and the VLAN associated + * with the FID is the same as the notified VLAN. This means the flags + * (PVID / egress untagged) were toggled and that NVE should be + * disabled on the FID + */ + old_vid = mlxsw_sp_fid_8021q_vid(fid); + if (vid == old_vid) { + if (WARN_ON(flag_untagged && flag_pvid)) { + mlxsw_sp_fid_put(fid); + return -EINVAL; + } + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + mlxsw_sp_fid_put(fid); + return 0; + } + + /* Third case: A new VLAN was configured on the VxLAN device, but this + * VLAN is not PVID, so there is nothing to do. + */ + if (!flag_pvid) { + mlxsw_sp_fid_put(fid); + return 0; + } + + /* Fourth case: Thew new VLAN is PVID, which means the VLAN currently + * mapped to the VNI should be unmapped + */ + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + mlxsw_sp_fid_put(fid); + + /* Fifth case: The new VLAN is also egress untagged, which means the + * VLAN needs to be mapped to the VNI + */ + if (!flag_untagged) + return 0; + + err = mlxsw_sp_bridge_8021q_vxlan_join(bridge_device, vxlan_dev, vid, + extack); + if (err) + goto err_vxlan_join; + + return 0; + +err_vxlan_join: + mlxsw_sp_bridge_8021q_vxlan_join(bridge_device, vxlan_dev, old_vid, + NULL); + return err; +} + +static void +mlxsw_sp_switchdev_vxlan_vlan_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, u16 vid) +{ + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + __be32 vni = vxlan->cfg.vni; + struct mlxsw_sp_fid *fid; + + if (!netif_running(vxlan_dev)) + return; + + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vni); + if (!fid) + return; + + /* A different VLAN than the one mapped to the VNI is deleted */ + if (mlxsw_sp_fid_8021q_vid(fid) != vid) + goto out; + + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + +out: + mlxsw_sp_fid_put(fid); +} + +static int +mlxsw_sp_switchdev_vxlan_vlans_add(struct net_device *vxlan_dev, + struct switchdev_notifier_port_obj_info * + port_obj_info) +{ + struct switchdev_obj_port_vlan *vlan = + SWITCHDEV_OBJ_PORT_VLAN(port_obj_info->obj); + bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + struct switchdev_trans *trans = port_obj_info->trans; + struct mlxsw_sp_bridge_device *bridge_device; + struct netlink_ext_ack *extack; + struct mlxsw_sp *mlxsw_sp; + struct net_device *br_dev; + u16 vid; + + extack = switchdev_notifier_info_to_extack(&port_obj_info->info); + br_dev = netdev_master_upper_dev_get(vxlan_dev); + if (!br_dev) + return 0; + + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return 0; + + port_obj_info->handled = true; + + if (switchdev_trans_ph_commit(trans)) + return 0; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return -EINVAL; + + if (!bridge_device->vlan_enabled) + return 0; + + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { + int err; + + err = mlxsw_sp_switchdev_vxlan_vlan_add(mlxsw_sp, bridge_device, + vxlan_dev, vid, + flag_untagged, + flag_pvid, extack); + if (err) + return err; + } + + return 0; +} + +static void +mlxsw_sp_switchdev_vxlan_vlans_del(struct net_device *vxlan_dev, + struct switchdev_notifier_port_obj_info * + port_obj_info) +{ + struct switchdev_obj_port_vlan *vlan = + SWITCHDEV_OBJ_PORT_VLAN(port_obj_info->obj); + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp *mlxsw_sp; + struct net_device *br_dev; + u16 vid; + + br_dev = netdev_master_upper_dev_get(vxlan_dev); + if (!br_dev) + return; + + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return; + + port_obj_info->handled = true; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + if (!bridge_device->vlan_enabled) + return; + + for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) + mlxsw_sp_switchdev_vxlan_vlan_del(mlxsw_sp, bridge_device, + vxlan_dev, vid); +} + +static int +mlxsw_sp_switchdev_handle_vxlan_obj_add(struct net_device *vxlan_dev, + struct switchdev_notifier_port_obj_info * + port_obj_info) +{ + int err = 0; + + switch (port_obj_info->obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = mlxsw_sp_switchdev_vxlan_vlans_add(vxlan_dev, + port_obj_info); + break; + default: + break; + } + + return err; +} + +static void +mlxsw_sp_switchdev_handle_vxlan_obj_del(struct net_device *vxlan_dev, + struct switchdev_notifier_port_obj_info * + port_obj_info) +{ + switch (port_obj_info->obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + mlxsw_sp_switchdev_vxlan_vlans_del(vxlan_dev, port_obj_info); + break; + default: + break; + } +} + +static int mlxsw_sp_switchdev_blocking_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + int err = 0; + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + if (netif_is_vxlan(dev)) + err = mlxsw_sp_switchdev_handle_vxlan_obj_add(dev, ptr); + else + err = switchdev_handle_port_obj_add(dev, ptr, + mlxsw_sp_port_dev_check, + mlxsw_sp_port_obj_add); + return notifier_from_errno(err); + case SWITCHDEV_PORT_OBJ_DEL: + if (netif_is_vxlan(dev)) + mlxsw_sp_switchdev_handle_vxlan_obj_del(dev, ptr); + else + err = switchdev_handle_port_obj_del(dev, ptr, + mlxsw_sp_port_dev_check, + mlxsw_sp_port_obj_del); + return notifier_from_errno(err); + case SWITCHDEV_PORT_ATTR_SET: + err = switchdev_handle_port_attr_set(dev, ptr, + mlxsw_sp_port_dev_check, + mlxsw_sp_port_attr_set); + return notifier_from_errno(err); + } + + return NOTIFY_DONE; +} + +static struct notifier_block mlxsw_sp_switchdev_blocking_notifier = { + .notifier_call = mlxsw_sp_switchdev_blocking_event, +}; + u8 mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port) { @@ -2960,6 +3472,7 @@ mlxsw_sp_bridge_port_stp_state(struct mlxsw_sp_bridge_port *bridge_port) static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_bridge *bridge = mlxsw_sp->bridge; + struct notifier_block *nb; int err; err = mlxsw_sp_ageing_set(mlxsw_sp, MLXSW_SP_DEFAULT_AGEING_TIME); @@ -2974,17 +3487,33 @@ static int mlxsw_sp_fdb_init(struct mlxsw_sp *mlxsw_sp) return err; } + nb = &mlxsw_sp_switchdev_blocking_notifier; + err = register_switchdev_blocking_notifier(nb); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to register switchdev blocking notifier\n"); + goto err_register_switchdev_blocking_notifier; + } + INIT_DELAYED_WORK(&bridge->fdb_notify.dw, mlxsw_sp_fdb_notify_work); bridge->fdb_notify.interval = MLXSW_SP_DEFAULT_LEARNING_INTERVAL; mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); return 0; + +err_register_switchdev_blocking_notifier: + unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier); + return err; } static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) { + struct notifier_block *nb; + cancel_delayed_work_sync(&mlxsw_sp->bridge->fdb_notify.dw); - unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier); + nb = &mlxsw_sp_switchdev_blocking_notifier; + unregister_switchdev_blocking_notifier(nb); + + unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier); } int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) @@ -3012,11 +3541,3 @@ void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->bridge); } -void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port) -{ - mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops; -} - -void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port) -{ -} diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 2d4f213e154d..533fe6235b7c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -11,7 +11,6 @@ #include <linux/device.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> -#include <net/switchdev.h> #include "pci.h" #include "core.h" @@ -390,6 +389,18 @@ static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name, name, len); } +static int mlxsw_sx_port_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); + struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; + + ppid->id_len = sizeof(mlxsw_sx->hw_id); + memcpy(&ppid->id, &mlxsw_sx->hw_id, ppid->id_len); + + return 0; +} + static const struct net_device_ops mlxsw_sx_port_netdev_ops = { .ndo_open = mlxsw_sx_port_open, .ndo_stop = mlxsw_sx_port_stop, @@ -397,6 +408,7 @@ static const struct net_device_ops mlxsw_sx_port_netdev_ops = { .ndo_change_mtu = mlxsw_sx_port_change_mtu, .ndo_get_stats64 = mlxsw_sx_port_get_stats64, .ndo_get_phys_port_name = mlxsw_sx_port_get_phys_port_name, + .ndo_get_port_parent_id = mlxsw_sx_port_get_port_parent_id, }; static void mlxsw_sx_port_get_drvinfo(struct net_device *dev, @@ -901,28 +913,6 @@ static const struct ethtool_ops mlxsw_sx_port_ethtool_ops = { .set_link_ksettings = mlxsw_sx_port_set_link_ksettings, }; -static int mlxsw_sx_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev); - struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx; - - switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(mlxsw_sx->hw_id); - memcpy(&attr->u.ppid.id, &mlxsw_sx->hw_id, attr->u.ppid.id_len); - break; - default: - return -EOPNOTSUPP; - } - - return 0; -} - -static const struct switchdev_ops mlxsw_sx_port_switchdev_ops = { - .switchdev_port_attr_get = mlxsw_sx_port_attr_get, -}; - static int mlxsw_sx_hw_id_get(struct mlxsw_sx *mlxsw_sx) { char spad_pl[MLXSW_REG_SPAD_LEN] = {0}; @@ -1034,7 +1024,6 @@ static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port, dev->netdev_ops = &mlxsw_sx_port_netdev_ops; dev->ethtool_ops = &mlxsw_sx_port_ethtool_ops; - dev->switchdev_ops = &mlxsw_sx_port_switchdev_ops; err = mlxsw_sx_port_dev_addr_get(mlxsw_sx_port); if (err) { |