diff options
Diffstat (limited to 'drivers/net/ethernet')
42 files changed, 6880 insertions, 455 deletions
diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig index 99b30353541a..9e87d7b8360f 100644 --- a/drivers/net/ethernet/amazon/Kconfig +++ b/drivers/net/ethernet/amazon/Kconfig @@ -17,7 +17,7 @@ if NET_VENDOR_AMAZON config ENA_ETHERNET tristate "Elastic Network Adapter (ENA) support" - depends on (PCI_MSI && X86) + depends on PCI_MSI && !CPU_BIG_ENDIAN ---help--- This driver supports Elastic Network Adapter (ENA)" diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 284a0a612131..18956e7604a3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3022,20 +3022,10 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev, int io_sq_num, io_queue_num; /* In case of LLQ use the llq number in the get feature cmd */ - if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - io_sq_num = get_feat_ctx->max_queues.max_legacy_llq_num; - - if (io_sq_num == 0) { - dev_err(&pdev->dev, - "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n"); - - ena_dev->tx_mem_queue_type = - ENA_ADMIN_PLACEMENT_POLICY_HOST; - io_sq_num = get_feat_ctx->max_queues.max_sq_num; - } - } else { + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + io_sq_num = get_feat_ctx->llq.max_llq_num; + else io_sq_num = get_feat_ctx->max_queues.max_sq_num; - } io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); io_queue_num = min_t(int, io_queue_num, io_sq_num); @@ -3238,7 +3228,7 @@ static int ena_calc_queue_size(struct pci_dev *pdev, if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) queue_size = min_t(u32, queue_size, - get_feat_ctx->max_queues.max_legacy_llq_depth); + get_feat_ctx->llq.max_llq_depth); queue_size = rounddown_pow_of_two(queue_size); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/Makefile b/drivers/net/ethernet/marvell/octeontx2/af/Makefile index eaac26424486..45b108f8f955 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/af/Makefile @@ -7,4 +7,4 @@ obj-$(CONFIG_OCTEONTX2_MBOX) += octeontx2_mbox.o obj-$(CONFIG_OCTEONTX2_AF) += octeontx2_af.o octeontx2_mbox-y := mbox.o -octeontx2_af-y := cgx.o rvu.o rvu_cgx.o +octeontx2_af-y := cgx.o rvu.o rvu_cgx.o rvu_npa.o rvu_nix.o diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 5328ecc8cea2..352501b54aee 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -29,6 +29,7 @@ * @wq_cmd_cmplt: waitq to keep the process blocked until cmd completion * @cmd_lock: Lock to serialize the command interface * @resp: command response + * @link_info: link related information * @event_cb: callback for linkchange events * @cmd_pend: flag set before new command is started * flag cleared after command response is received @@ -40,6 +41,7 @@ struct lmac { wait_queue_head_t wq_cmd_cmplt; struct mutex cmd_lock; u64 resp; + struct cgx_link_user_info link_info; struct cgx_event_cb event_cb; bool cmd_pend; struct cgx *cgx; @@ -58,6 +60,12 @@ struct cgx { static LIST_HEAD(cgx_list); +/* Convert firmware speed encoding to user format(Mbps) */ +static u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX]; + +/* Convert firmware lmac type encoding to string */ +static char *cgx_lmactype_string[LMAC_MODE_MAX]; + /* Supported devices */ static const struct pci_device_id cgx_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) }, @@ -119,6 +127,177 @@ void *cgx_get_pdata(int cgx_id) } EXPORT_SYMBOL(cgx_get_pdata); +/* Ensure the required lock for event queue(where asynchronous events are + * posted) is acquired before calling this API. Else an asynchronous event(with + * latest link status) can reach the destination before this function returns + * and could make the link status appear wrong. + */ +int cgx_get_link_info(void *cgxd, int lmac_id, + struct cgx_link_user_info *linfo) +{ + struct lmac *lmac = lmac_pdata(lmac_id, cgxd); + + if (!lmac) + return -ENODEV; + + *linfo = lmac->link_info; + return 0; +} +EXPORT_SYMBOL(cgx_get_link_info); + +static u64 mac2u64 (u8 *mac_addr) +{ + u64 mac = 0; + int index; + + for (index = ETH_ALEN - 1; index >= 0; index--) + mac |= ((u64)*mac_addr++) << (8 * index); + return mac; +} + +int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + u64 cfg; + + /* copy 6bytes from macaddr */ + /* memcpy(&cfg, mac_addr, 6); */ + + cfg = mac2u64 (mac_addr); + + cgx_write(cgx_dev, 0, (CGXX_CMRX_RX_DMAC_CAM0 + (lmac_id * 0x8)), + cfg | CGX_DMAC_CAM_ADDR_ENABLE | ((u64)lmac_id << 49)); + + cfg = cgx_read(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); + cfg |= CGX_DMAC_CTL0_CAM_ENABLE; + cgx_write(cgx_dev, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); + + return 0; +} +EXPORT_SYMBOL(cgx_lmac_addr_set); + +u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id) +{ + struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + u64 cfg; + + cfg = cgx_read(cgx_dev, 0, CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8); + return cfg & CGX_RX_DMAC_ADR_MASK; +} +EXPORT_SYMBOL(cgx_lmac_addr_get); + +static inline u8 cgx_get_lmac_type(struct cgx *cgx, int lmac_id) +{ + u64 cfg; + + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); + return (cfg >> CGX_LMAC_TYPE_SHIFT) & CGX_LMAC_TYPE_MASK; +} + +/* Configure CGX LMAC in internal loopback mode */ +int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable) +{ + struct cgx *cgx = cgxd; + u8 lmac_type; + u64 cfg; + + if (!cgx || lmac_id >= cgx->lmac_count) + return -ENODEV; + + lmac_type = cgx_get_lmac_type(cgx, lmac_id); + if (lmac_type == LMAC_MODE_SGMII || lmac_type == LMAC_MODE_QSGMII) { + cfg = cgx_read(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL); + if (enable) + cfg |= CGXX_GMP_PCS_MRX_CTL_LBK; + else + cfg &= ~CGXX_GMP_PCS_MRX_CTL_LBK; + cgx_write(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL, cfg); + } else { + cfg = cgx_read(cgx, lmac_id, CGXX_SPUX_CONTROL1); + if (enable) + cfg |= CGXX_SPUX_CONTROL1_LBK; + else + cfg &= ~CGXX_SPUX_CONTROL1_LBK; + cgx_write(cgx, lmac_id, CGXX_SPUX_CONTROL1, cfg); + } + return 0; +} +EXPORT_SYMBOL(cgx_lmac_internal_loopback); + +void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable) +{ + struct cgx *cgx = cgx_get_pdata(cgx_id); + u64 cfg = 0; + + if (!cgx) + return; + + if (enable) { + /* Enable promiscuous mode on LMAC */ + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); + cfg &= ~(CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE); + cfg |= CGX_DMAC_BCAST_MODE; + cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); + + cfg = cgx_read(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8)); + cfg &= ~CGX_DMAC_CAM_ADDR_ENABLE; + cgx_write(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg); + } else { + /* Disable promiscuous mode */ + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0); + cfg |= CGX_DMAC_CAM_ACCEPT | CGX_DMAC_MCAST_MODE; + cgx_write(cgx, lmac_id, CGXX_CMRX_RX_DMAC_CTL0, cfg); + cfg = cgx_read(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8)); + cfg |= CGX_DMAC_CAM_ADDR_ENABLE; + cgx_write(cgx, 0, + (CGXX_CMRX_RX_DMAC_CAM0 + lmac_id * 0x8), cfg); + } +} +EXPORT_SYMBOL(cgx_lmac_promisc_config); + +int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat) +{ + struct cgx *cgx = cgxd; + + if (!cgx || lmac_id >= cgx->lmac_count) + return -ENODEV; + *rx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_RX_STAT0 + (idx * 8)); + return 0; +} +EXPORT_SYMBOL(cgx_get_rx_stats); + +int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat) +{ + struct cgx *cgx = cgxd; + + if (!cgx || lmac_id >= cgx->lmac_count) + return -ENODEV; + *tx_stat = cgx_read(cgx, lmac_id, CGXX_CMRX_TX_STAT0 + (idx * 8)); + return 0; +} +EXPORT_SYMBOL(cgx_get_tx_stats); + +int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!cgx || lmac_id >= cgx->lmac_count) + return -ENODEV; + + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); + if (enable) + cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN; + else + cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN); + cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg); + return 0; +} +EXPORT_SYMBOL(cgx_lmac_rx_tx_enable); + /* CGX Firmware interface low level support */ static int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac) { @@ -191,36 +370,79 @@ static inline int cgx_fwi_cmd_generic(u64 req, u64 *resp, return err; } +static inline void cgx_link_usertable_init(void) +{ + cgx_speed_mbps[CGX_LINK_NONE] = 0; + cgx_speed_mbps[CGX_LINK_10M] = 10; + cgx_speed_mbps[CGX_LINK_100M] = 100; + cgx_speed_mbps[CGX_LINK_1G] = 1000; + cgx_speed_mbps[CGX_LINK_2HG] = 2500; + cgx_speed_mbps[CGX_LINK_5G] = 5000; + cgx_speed_mbps[CGX_LINK_10G] = 10000; + cgx_speed_mbps[CGX_LINK_20G] = 20000; + cgx_speed_mbps[CGX_LINK_25G] = 25000; + cgx_speed_mbps[CGX_LINK_40G] = 40000; + cgx_speed_mbps[CGX_LINK_50G] = 50000; + cgx_speed_mbps[CGX_LINK_100G] = 100000; + + cgx_lmactype_string[LMAC_MODE_SGMII] = "SGMII"; + cgx_lmactype_string[LMAC_MODE_XAUI] = "XAUI"; + cgx_lmactype_string[LMAC_MODE_RXAUI] = "RXAUI"; + cgx_lmactype_string[LMAC_MODE_10G_R] = "10G_R"; + cgx_lmactype_string[LMAC_MODE_40G_R] = "40G_R"; + cgx_lmactype_string[LMAC_MODE_QSGMII] = "QSGMII"; + cgx_lmactype_string[LMAC_MODE_25G_R] = "25G_R"; + cgx_lmactype_string[LMAC_MODE_50G_R] = "50G_R"; + cgx_lmactype_string[LMAC_MODE_100G_R] = "100G_R"; + cgx_lmactype_string[LMAC_MODE_USXGMII] = "USXGMII"; +} + +static inline void link_status_user_format(u64 lstat, + struct cgx_link_user_info *linfo, + struct cgx *cgx, u8 lmac_id) +{ + char *lmac_string; + + linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); + linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); + linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)]; + linfo->lmac_type_id = cgx_get_lmac_type(cgx, lmac_id); + lmac_string = cgx_lmactype_string[linfo->lmac_type_id]; + strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1); +} + /* Hardware event handlers */ static inline void cgx_link_change_handler(u64 lstat, struct lmac *lmac) { + struct cgx_link_user_info *linfo; struct cgx *cgx = lmac->cgx; struct cgx_link_event event; struct device *dev; + int err_type; dev = &cgx->pdev->dev; - event.lstat.link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); - event.lstat.full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); - event.lstat.speed = FIELD_GET(RESP_LINKSTAT_SPEED, lstat); - event.lstat.err_type = FIELD_GET(RESP_LINKSTAT_ERRTYPE, lstat); + link_status_user_format(lstat, &event.link_uinfo, cgx, lmac->lmac_id); + err_type = FIELD_GET(RESP_LINKSTAT_ERRTYPE, lstat); event.cgx_id = cgx->cgx_id; event.lmac_id = lmac->lmac_id; + /* update the local copy of link status */ + lmac->link_info = event.link_uinfo; + linfo = &lmac->link_info; + if (!lmac->event_cb.notify_link_chg) { dev_dbg(dev, "cgx port %d:%d Link change handler null", cgx->cgx_id, lmac->lmac_id); - if (event.lstat.err_type != CGX_ERR_NONE) { + if (err_type != CGX_ERR_NONE) { dev_err(dev, "cgx port %d:%d Link error %d\n", - cgx->cgx_id, lmac->lmac_id, - event.lstat.err_type); + cgx->cgx_id, lmac->lmac_id, err_type); } - dev_info(dev, "cgx port %d:%d Link status %s, speed %x\n", + dev_info(dev, "cgx port %d:%d Link is %s %d Mbps\n", cgx->cgx_id, lmac->lmac_id, - event.lstat.link_up ? "UP" : "DOWN", - event.lstat.speed); + linfo->link_up ? "UP" : "DOWN", linfo->speed); return; } @@ -448,6 +670,8 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) list_add(&cgx->cgx_list, &cgx_list); cgx->cgx_id = cgx_get_cgx_cnt() - 1; + cgx_link_usertable_init(); + err = cgx_lmac_init(cgx); if (err) goto err_release_lmac; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index a2a7a6d72d32..ada25ed0765b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -11,6 +11,7 @@ #ifndef CGX_H #define CGX_H +#include "mbox.h" #include "cgx_fw_if.h" /* PCI device IDs */ @@ -24,14 +25,35 @@ #define CGX_OFFSET(x) ((x) * MAX_LMAC_PER_CGX) /* Registers */ +#define CGXX_CMRX_CFG 0x00 +#define CMR_EN BIT_ULL(55) +#define DATA_PKT_TX_EN BIT_ULL(53) +#define DATA_PKT_RX_EN BIT_ULL(54) +#define CGX_LMAC_TYPE_SHIFT 40 +#define CGX_LMAC_TYPE_MASK 0xF #define CGXX_CMRX_INT 0x040 #define FW_CGX_INT BIT_ULL(1) #define CGXX_CMRX_INT_ENA_W1S 0x058 #define CGXX_CMRX_RX_ID_MAP 0x060 +#define CGXX_CMRX_RX_STAT0 0x070 #define CGXX_CMRX_RX_LMACS 0x128 +#define CGXX_CMRX_RX_DMAC_CTL0 0x1F8 +#define CGX_DMAC_CTL0_CAM_ENABLE BIT_ULL(3) +#define CGX_DMAC_CAM_ACCEPT BIT_ULL(3) +#define CGX_DMAC_MCAST_MODE BIT_ULL(1) +#define CGX_DMAC_BCAST_MODE BIT_ULL(0) +#define CGXX_CMRX_RX_DMAC_CAM0 0x200 +#define CGX_DMAC_CAM_ADDR_ENABLE BIT_ULL(48) +#define CGXX_CMRX_RX_DMAC_CAM1 0x400 +#define CGX_RX_DMAC_ADR_MASK GENMASK_ULL(47, 0) +#define CGXX_CMRX_TX_STAT0 0x700 #define CGXX_SCRATCH0_REG 0x1050 #define CGXX_SCRATCH1_REG 0x1058 #define CGX_CONST 0x2000 +#define CGXX_SPUX_CONTROL1 0x10000 +#define CGXX_SPUX_CONTROL1_LBK BIT_ULL(14) +#define CGXX_GMP_PCS_MRX_CTL 0x30000 +#define CGXX_GMP_PCS_MRX_CTL_LBK BIT_ULL(14) #define CGX_COMMAND_REG CGXX_SCRATCH1_REG #define CGX_EVENT_REG CGXX_SCRATCH0_REG @@ -40,8 +62,22 @@ #define CGX_NVEC 37 #define CGX_LMAC_FWI 0 +enum LMAC_TYPE { + LMAC_MODE_SGMII = 0, + LMAC_MODE_XAUI = 1, + LMAC_MODE_RXAUI = 2, + LMAC_MODE_10G_R = 3, + LMAC_MODE_40G_R = 4, + LMAC_MODE_QSGMII = 6, + LMAC_MODE_25G_R = 7, + LMAC_MODE_50G_R = 8, + LMAC_MODE_100G_R = 9, + LMAC_MODE_USXGMII = 10, + LMAC_MODE_MAX, +}; + struct cgx_link_event { - struct cgx_lnk_sts lstat; + struct cgx_link_user_info link_uinfo; u8 cgx_id; u8 lmac_id; }; @@ -62,4 +98,13 @@ int cgx_get_cgx_cnt(void); int cgx_get_lmac_cnt(void *cgxd); void *cgx_get_pdata(int cgx_id); int cgx_lmac_evh_register(struct cgx_event_cb *cb, void *cgxd, int lmac_id); +int cgx_get_tx_stats(void *cgxd, int lmac_id, int idx, u64 *tx_stat); +int cgx_get_rx_stats(void *cgxd, int lmac_id, int idx, u64 *rx_stat); +int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable); +int cgx_lmac_addr_set(u8 cgx_id, u8 lmac_id, u8 *mac_addr); +u64 cgx_lmac_addr_get(u8 cgx_id, u8 lmac_id); +void cgx_lmac_promisc_config(int cgx_id, int lmac_id, bool enable); +int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable); +int cgx_get_link_info(void *cgxd, int lmac_id, + struct cgx_link_user_info *linfo); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h new file mode 100644 index 000000000000..28eb691185f4 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTx2 RVU Admin Function driver + * + * Copyright (C) 2018 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef COMMON_H +#define COMMON_H + +#include "rvu_struct.h" + +#define OTX2_ALIGN 128 /* Align to cacheline */ + +#define Q_SIZE_16 0ULL /* 16 entries */ +#define Q_SIZE_64 1ULL /* 64 entries */ +#define Q_SIZE_256 2ULL +#define Q_SIZE_1K 3ULL +#define Q_SIZE_4K 4ULL +#define Q_SIZE_16K 5ULL +#define Q_SIZE_64K 6ULL +#define Q_SIZE_256K 7ULL +#define Q_SIZE_1M 8ULL /* Million entries */ +#define Q_SIZE_MIN Q_SIZE_16 +#define Q_SIZE_MAX Q_SIZE_1M + +#define Q_COUNT(x) (16ULL << (2 * x)) +#define Q_SIZE(x, n) ((ilog2(x) - (n)) / 2) + +/* Admin queue info */ + +/* Since we intend to add only one instruction at a time, + * keep queue size to it's minimum. + */ +#define AQ_SIZE Q_SIZE_16 +/* HW head & tail pointer mask */ +#define AQ_PTR_MASK 0xFFFFF + +struct qmem { + void *base; + dma_addr_t iova; + int alloc_sz; + u8 entry_sz; + u8 align; + u32 qsize; +}; + +static inline int qmem_alloc(struct device *dev, struct qmem **q, + int qsize, int entry_sz) +{ + struct qmem *qmem; + int aligned_addr; + + if (!qsize) + return -EINVAL; + + *q = devm_kzalloc(dev, sizeof(*qmem), GFP_KERNEL); + if (!*q) + return -ENOMEM; + qmem = *q; + + qmem->entry_sz = entry_sz; + qmem->alloc_sz = (qsize * entry_sz) + OTX2_ALIGN; + qmem->base = dma_zalloc_coherent(dev, qmem->alloc_sz, + &qmem->iova, GFP_KERNEL); + if (!qmem->base) + return -ENOMEM; + + qmem->qsize = qsize; + + aligned_addr = ALIGN((u64)qmem->iova, OTX2_ALIGN); + qmem->align = (aligned_addr - qmem->iova); + qmem->base += qmem->align; + qmem->iova += qmem->align; + return 0; +} + +static inline void qmem_free(struct device *dev, struct qmem *qmem) +{ + if (!qmem) + return; + + if (qmem->base) + dma_free_coherent(dev, qmem->alloc_sz, + qmem->base - qmem->align, + qmem->iova - qmem->align); + devm_kfree(dev, qmem); +} + +struct admin_queue { + struct qmem *inst; + struct qmem *res; + spinlock_t lock; /* Serialize inst enqueue from PFs */ +}; + +/* NPA aura count */ +enum npa_aura_sz { + NPA_AURA_SZ_0, + NPA_AURA_SZ_128, + NPA_AURA_SZ_256, + NPA_AURA_SZ_512, + NPA_AURA_SZ_1K, + NPA_AURA_SZ_2K, + NPA_AURA_SZ_4K, + NPA_AURA_SZ_8K, + NPA_AURA_SZ_16K, + NPA_AURA_SZ_32K, + NPA_AURA_SZ_64K, + NPA_AURA_SZ_128K, + NPA_AURA_SZ_256K, + NPA_AURA_SZ_512K, + NPA_AURA_SZ_1M, + NPA_AURA_SZ_MAX, +}; + +#define NPA_AURA_COUNT(x) (1ULL << ((x) + 6)) + +/* NPA AQ result structure for init/read/write of aura HW contexts */ +struct npa_aq_aura_res { + struct npa_aq_res_s res; + struct npa_aura_s aura_ctx; + struct npa_aura_s ctx_mask; +}; + +/* NPA AQ result structure for init/read/write of pool HW contexts */ +struct npa_aq_pool_res { + struct npa_aq_res_s res; + struct npa_pool_s pool_ctx; + struct npa_pool_s ctx_mask; +}; + +/* NIX Transmit schedulers */ +enum nix_scheduler { + NIX_TXSCH_LVL_SMQ = 0x0, + NIX_TXSCH_LVL_MDQ = 0x0, + NIX_TXSCH_LVL_TL4 = 0x1, + NIX_TXSCH_LVL_TL3 = 0x2, + NIX_TXSCH_LVL_TL2 = 0x3, + NIX_TXSCH_LVL_TL1 = 0x4, + NIX_TXSCH_LVL_CNT = 0x5, +}; + +/* NIX LSO format indices. + * As of now TSO is the only one using, so statically assigning indices. + */ +#define NIX_LSO_FORMAT_IDX_TSOV4 0 +#define NIX_LSO_FORMAT_IDX_TSOV6 1 + +/* RSS info */ +#define MAX_RSS_GROUPS 8 +/* Group 0 has to be used in default pkt forwarding MCAM entries + * reserved for NIXLFs. Groups 1-7 can be used for RSS for ntuple + * filters. + */ +#define DEFAULT_RSS_CONTEXT_GROUP 0 +#define MAX_RSS_INDIR_TBL_SIZE 256 /* 1 << Max adder bits */ + +#endif /* COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index bedf0ee62450..c339024d04e0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -124,21 +124,50 @@ M(ATTACH_RESOURCES, 0x002, rsrc_attach, msg_rsp) \ M(DETACH_RESOURCES, 0x003, rsrc_detach, msg_rsp) \ M(MSIX_OFFSET, 0x004, msg_req, msix_offset_rsp) \ /* CGX mbox IDs (range 0x200 - 0x3FF) */ \ +M(CGX_START_RXTX, 0x200, msg_req, msg_rsp) \ +M(CGX_STOP_RXTX, 0x201, msg_req, msg_rsp) \ +M(CGX_STATS, 0x202, msg_req, cgx_stats_rsp) \ +M(CGX_MAC_ADDR_SET, 0x203, cgx_mac_addr_set_or_get, \ + cgx_mac_addr_set_or_get) \ +M(CGX_MAC_ADDR_GET, 0x204, cgx_mac_addr_set_or_get, \ + cgx_mac_addr_set_or_get) \ +M(CGX_PROMISC_ENABLE, 0x205, msg_req, msg_rsp) \ +M(CGX_PROMISC_DISABLE, 0x206, msg_req, msg_rsp) \ +M(CGX_START_LINKEVENTS, 0x207, msg_req, msg_rsp) \ +M(CGX_STOP_LINKEVENTS, 0x208, msg_req, msg_rsp) \ +M(CGX_GET_LINKINFO, 0x209, msg_req, cgx_link_info_msg) \ +M(CGX_INTLBK_ENABLE, 0x20A, msg_req, msg_rsp) \ +M(CGX_INTLBK_DISABLE, 0x20B, msg_req, msg_rsp) \ /* NPA mbox IDs (range 0x400 - 0x5FF) */ \ +M(NPA_LF_ALLOC, 0x400, npa_lf_alloc_req, npa_lf_alloc_rsp) \ +M(NPA_LF_FREE, 0x401, msg_req, msg_rsp) \ +M(NPA_AQ_ENQ, 0x402, npa_aq_enq_req, npa_aq_enq_rsp) \ +M(NPA_HWCTX_DISABLE, 0x403, hwctx_disable_req, msg_rsp) \ /* SSO/SSOW mbox IDs (range 0x600 - 0x7FF) */ \ /* TIM mbox IDs (range 0x800 - 0x9FF) */ \ /* CPT mbox IDs (range 0xA00 - 0xBFF) */ \ /* NPC mbox IDs (range 0x6000 - 0x7FFF) */ \ /* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \ +M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc_req, nix_lf_alloc_rsp) \ +M(NIX_LF_FREE, 0x8001, msg_req, msg_rsp) \ +M(NIX_AQ_ENQ, 0x8002, nix_aq_enq_req, nix_aq_enq_rsp) \ +M(NIX_HWCTX_DISABLE, 0x8003, hwctx_disable_req, msg_rsp) + +/* Messages initiated by AF (range 0xC00 - 0xDFF) */ +#define MBOX_UP_CGX_MESSAGES \ +M(CGX_LINK_EVENT, 0xC00, cgx_link_info_msg, msg_rsp) enum { #define M(_name, _id, _1, _2) MBOX_MSG_ ## _name = _id, MBOX_MESSAGES +MBOX_UP_CGX_MESSAGES #undef M }; /* Mailbox message formats */ +#define RVU_DEFAULT_PF_FUNC 0xFFFF + /* Generic request msg used for those mbox messages which * don't send any data in the request. */ @@ -208,4 +237,181 @@ struct msix_offset_rsp { u16 cptlf_msixoff[MAX_RVU_BLKLF_CNT]; }; +/* CGX mbox message formats */ + +struct cgx_stats_rsp { + struct mbox_msghdr hdr; +#define CGX_RX_STATS_COUNT 13 +#define CGX_TX_STATS_COUNT 18 + u64 rx_stats[CGX_RX_STATS_COUNT]; + u64 tx_stats[CGX_TX_STATS_COUNT]; +}; + +/* Structure for requesting the operation for + * setting/getting mac address in the CGX interface + */ +struct cgx_mac_addr_set_or_get { + struct mbox_msghdr hdr; + u8 mac_addr[ETH_ALEN]; +}; + +struct cgx_link_user_info { + uint64_t link_up:1; + uint64_t full_duplex:1; + uint64_t lmac_type_id:4; + uint64_t speed:20; /* speed in Mbps */ +#define LMACTYPE_STR_LEN 16 + char lmac_type[LMACTYPE_STR_LEN]; +}; + +struct cgx_link_info_msg { + struct mbox_msghdr hdr; + struct cgx_link_user_info link_info; +}; + +/* NPA mbox message formats */ + +/* NPA mailbox error codes + * Range 301 - 400. + */ +enum npa_af_status { + NPA_AF_ERR_PARAM = -301, + NPA_AF_ERR_AQ_FULL = -302, + NPA_AF_ERR_AQ_ENQUEUE = -303, + NPA_AF_ERR_AF_LF_INVALID = -304, + NPA_AF_ERR_AF_LF_ALLOC = -305, + NPA_AF_ERR_LF_RESET = -306, +}; + +/* For NPA LF context alloc and init */ +struct npa_lf_alloc_req { + struct mbox_msghdr hdr; + int node; + int aura_sz; /* No of auras */ + u32 nr_pools; /* No of pools */ +}; + +struct npa_lf_alloc_rsp { + struct mbox_msghdr hdr; + u32 stack_pg_ptrs; /* No of ptrs per stack page */ + u32 stack_pg_bytes; /* Size of stack page */ + u16 qints; /* NPA_AF_CONST::QINTS */ +}; + +/* NPA AQ enqueue msg */ +struct npa_aq_enq_req { + struct mbox_msghdr hdr; + u32 aura_id; + u8 ctype; + u8 op; + union { + /* Valid when op == WRITE/INIT and ctype == AURA. + * LF fills the pool_id in aura.pool_addr. AF will translate + * the pool_id to pool context pointer. + */ + struct npa_aura_s aura; + /* Valid when op == WRITE/INIT and ctype == POOL */ + struct npa_pool_s pool; + }; + /* Mask data when op == WRITE (1=write, 0=don't write) */ + union { + /* Valid when op == WRITE and ctype == AURA */ + struct npa_aura_s aura_mask; + /* Valid when op == WRITE and ctype == POOL */ + struct npa_pool_s pool_mask; + }; +}; + +struct npa_aq_enq_rsp { + struct mbox_msghdr hdr; + union { + /* Valid when op == READ and ctype == AURA */ + struct npa_aura_s aura; + /* Valid when op == READ and ctype == POOL */ + struct npa_pool_s pool; + }; +}; + +/* Disable all contexts of type 'ctype' */ +struct hwctx_disable_req { + struct mbox_msghdr hdr; + u8 ctype; +}; + +/* NIX mailbox error codes + * Range 401 - 500. + */ +enum nix_af_status { + NIX_AF_ERR_PARAM = -401, + NIX_AF_ERR_AQ_FULL = -402, + NIX_AF_ERR_AQ_ENQUEUE = -403, + NIX_AF_ERR_AF_LF_INVALID = -404, + NIX_AF_ERR_AF_LF_ALLOC = -405, + NIX_AF_ERR_TLX_ALLOC_FAIL = -406, + NIX_AF_ERR_TLX_INVALID = -407, + NIX_AF_ERR_RSS_SIZE_INVALID = -408, + NIX_AF_ERR_RSS_GRPS_INVALID = -409, + NIX_AF_ERR_FRS_INVALID = -410, + NIX_AF_ERR_RX_LINK_INVALID = -411, + NIX_AF_INVAL_TXSCHQ_CFG = -412, + NIX_AF_SMQ_FLUSH_FAILED = -413, + NIX_AF_ERR_LF_RESET = -414, +}; + +/* For NIX LF context alloc and init */ +struct nix_lf_alloc_req { + struct mbox_msghdr hdr; + int node; + u32 rq_cnt; /* No of receive queues */ + u32 sq_cnt; /* No of send queues */ + u32 cq_cnt; /* No of completion queues */ + u8 xqe_sz; + u16 rss_sz; + u8 rss_grps; + u16 npa_func; + u16 sso_func; + u64 rx_cfg; /* See NIX_AF_LF(0..127)_RX_CFG */ +}; + +struct nix_lf_alloc_rsp { + struct mbox_msghdr hdr; + u16 sqb_size; + u8 lso_tsov4_idx; + u8 lso_tsov6_idx; + u8 mac_addr[ETH_ALEN]; +}; + +/* NIX AQ enqueue msg */ +struct nix_aq_enq_req { + struct mbox_msghdr hdr; + u32 qidx; + u8 ctype; + u8 op; + union { + struct nix_rq_ctx_s rq; + struct nix_sq_ctx_s sq; + struct nix_cq_ctx_s cq; + struct nix_rsse_s rss; + struct nix_rx_mce_s mce; + }; + union { + struct nix_rq_ctx_s rq_mask; + struct nix_sq_ctx_s sq_mask; + struct nix_cq_ctx_s cq_mask; + struct nix_rsse_s rss_mask; + struct nix_rx_mce_s mce_mask; + }; +}; + +struct nix_aq_enq_rsp { + struct mbox_msghdr hdr; + union { + struct nix_rq_ctx_s rq; + struct nix_sq_ctx_s sq; + struct nix_cq_ctx_s cq; + struct nix_rsse_s rss; + struct nix_rx_mce_s mce; + }; +}; + #endif /* MBOX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 2033f42c3226..c06cca9c9e1a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -47,18 +47,18 @@ MODULE_DEVICE_TABLE(pci, rvu_id_table); */ int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero) { + unsigned long timeout = jiffies + usecs_to_jiffies(100); void __iomem *reg; - int timeout = 100; u64 reg_val; reg = rvu->afreg_base + ((block << 28) | offset); - while (timeout) { + while (time_before(jiffies, timeout)) { reg_val = readq(reg); if (zero && !(reg_val & mask)) return 0; if (!zero && (reg_val & mask)) return 0; - usleep_range(1, 2); + usleep_range(1, 5); timeout--; } return -EBUSY; @@ -361,6 +361,19 @@ static void rvu_check_block_implemented(struct rvu *rvu) } } +int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf) +{ + int err; + + if (!block->implemented) + return 0; + + rvu_write64(rvu, block->addr, block->lfreset_reg, lf | BIT_ULL(12)); + err = rvu_poll_reg(rvu, block->addr, block->lfreset_reg, BIT_ULL(12), + true); + return err; +} + static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg) { struct rvu_block *block = &rvu->hw->block[blkaddr]; @@ -552,6 +565,9 @@ static void rvu_free_hw_resources(struct rvu *rvu) int id, max_msix; u64 cfg; + rvu_npa_freemem(rvu); + rvu_nix_freemem(rvu); + /* Free block LF bitmaps */ for (id = 0; id < BLK_COUNT; id++) { block = &hw->block[id]; @@ -755,6 +771,54 @@ init: rvu_scan_block(rvu, block); } + err = rvu_npa_init(rvu); + if (err) + return err; + + err = rvu_nix_init(rvu); + if (err) + return err; + + return 0; +} + +/* NPA and NIX admin queue APIs */ +void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq) +{ + if (!aq) + return; + + qmem_free(rvu->dev, aq->inst); + qmem_free(rvu->dev, aq->res); + devm_kfree(rvu->dev, aq); +} + +int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue, + int qsize, int inst_size, int res_size) +{ + struct admin_queue *aq; + int err; + + *ad_queue = devm_kzalloc(rvu->dev, sizeof(*aq), GFP_KERNEL); + if (!*ad_queue) + return -ENOMEM; + aq = *ad_queue; + + /* Alloc memory for instructions i.e AQ */ + err = qmem_alloc(rvu->dev, &aq->inst, qsize, inst_size); + if (err) { + devm_kfree(rvu->dev, aq); + return err; + } + + /* Alloc memory for results */ + err = qmem_alloc(rvu->dev, &aq->res, qsize, res_size); + if (err) { + rvu_aq_free(rvu, aq); + return err; + } + + spin_lock_init(&aq->lock); return 0; } @@ -1316,6 +1380,63 @@ static void rvu_mbox_handler(struct work_struct *work) otx2_mbox_msg_send(mbox, pf); } +static void rvu_mbox_up_handler(struct work_struct *work) +{ + struct rvu_work *mwork = container_of(work, struct rvu_work, work); + struct rvu *rvu = mwork->rvu; + struct otx2_mbox_dev *mdev; + struct mbox_hdr *rsp_hdr; + struct mbox_msghdr *msg; + struct otx2_mbox *mbox; + int offset, id; + u16 pf; + + mbox = &rvu->mbox_up; + pf = mwork - rvu->mbox_wrk_up; + mdev = &mbox->dev[pf]; + + rsp_hdr = mdev->mbase + mbox->rx_start; + if (rsp_hdr->num_msgs == 0) { + dev_warn(rvu->dev, "mbox up handler: num_msgs = 0\n"); + return; + } + + offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + + for (id = 0; id < rsp_hdr->num_msgs; id++) { + msg = mdev->mbase + offset; + + if (msg->id >= MBOX_MSG_MAX) { + dev_err(rvu->dev, + "Mbox msg with unknown ID 0x%x\n", msg->id); + goto end; + } + + if (msg->sig != OTX2_MBOX_RSP_SIG) { + dev_err(rvu->dev, + "Mbox msg with wrong signature %x, ID 0x%x\n", + msg->sig, msg->id); + goto end; + } + + switch (msg->id) { + case MBOX_MSG_CGX_LINK_EVENT: + break; + default: + if (msg->rc) + dev_err(rvu->dev, + "Mbox msg response has err %d, ID 0x%x\n", + msg->rc, msg->id); + break; + } +end: + offset = mbox->rx_start + msg->next_msgoff; + mdev->msgs_acked++; + } + + otx2_mbox_reset(mbox, 0); +} + static int rvu_mbox_init(struct rvu *rvu) { struct rvu_hwinfo *hw = rvu->hw; @@ -1337,6 +1458,13 @@ static int rvu_mbox_init(struct rvu *rvu) goto exit; } + rvu->mbox_wrk_up = devm_kcalloc(rvu->dev, hw->total_pfs, + sizeof(struct rvu_work), GFP_KERNEL); + if (!rvu->mbox_wrk_up) { + err = -ENOMEM; + goto exit; + } + /* Map mbox region shared with PFs */ bar4_addr = rvu_read64(rvu, BLKADDR_RVUM, RVU_AF_PF_BAR4_ADDR); /* Mailbox is a reserved memory (in RAM) region shared between @@ -1355,12 +1483,23 @@ static int rvu_mbox_init(struct rvu *rvu) if (err) goto exit; + err = otx2_mbox_init(&rvu->mbox_up, hwbase, rvu->pdev, rvu->afreg_base, + MBOX_DIR_AFPF_UP, hw->total_pfs); + if (err) + goto exit; + for (pf = 0; pf < hw->total_pfs; pf++) { mwork = &rvu->mbox_wrk[pf]; mwork->rvu = rvu; INIT_WORK(&mwork->work, rvu_mbox_handler); } + for (pf = 0; pf < hw->total_pfs; pf++) { + mwork = &rvu->mbox_wrk_up[pf]; + mwork->rvu = rvu; + INIT_WORK(&mwork->work, rvu_mbox_up_handler); + } + return 0; exit: if (hwbase) @@ -1381,6 +1520,7 @@ static void rvu_mbox_destroy(struct rvu *rvu) iounmap((void __iomem *)rvu->mbox.hwbase); otx2_mbox_destroy(&rvu->mbox); + otx2_mbox_destroy(&rvu->mbox_up); } static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) @@ -1407,6 +1547,12 @@ static irqreturn_t rvu_mbox_intr_handler(int irq, void *rvu_irq) if (hdr->num_msgs) queue_work(rvu->mbox_wq, &rvu->mbox_wrk[pf].work); + mbox = &rvu->mbox_up; + mdev = &mbox->dev[pf]; + hdr = mdev->mbase + mbox->rx_start; + if (hdr->num_msgs) + queue_work(rvu->mbox_wq, + &rvu->mbox_wrk_up[pf].work); } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index d169fa9eb45e..b48b5af83f1d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -12,6 +12,7 @@ #define RVU_H #include "rvu_struct.h" +#include "common.h" #include "mbox.h" /* PCI device IDs */ @@ -41,7 +42,8 @@ struct rsrc_bmap { }; struct rvu_block { - struct rsrc_bmap lf; + struct rsrc_bmap lf; + struct admin_queue *aq; /* NIX/NPA AQ */ u16 *fn_map; /* LF to pcifunc mapping */ bool multislot; bool implemented; @@ -70,14 +72,50 @@ struct rvu_pfvf { struct rsrc_bmap msix; /* Bitmap for MSIX vector alloc */ #define MSIX_BLKLF(blkaddr, lf) (((blkaddr) << 8) | ((lf) & 0xFF)) u16 *msix_lfmap; /* Vector to block LF mapping */ + + /* NPA contexts */ + struct qmem *aura_ctx; + struct qmem *pool_ctx; + struct qmem *npa_qints_ctx; + unsigned long *aura_bmap; + unsigned long *pool_bmap; + + /* NIX contexts */ + struct qmem *rq_ctx; + struct qmem *sq_ctx; + struct qmem *cq_ctx; + struct qmem *rss_ctx; + struct qmem *cq_ints_ctx; + struct qmem *nix_qints_ctx; + unsigned long *sq_bmap; + unsigned long *rq_bmap; + unsigned long *cq_bmap; + + u8 mac_addr[ETH_ALEN]; /* MAC address of this PF/VF */ +}; + +struct nix_txsch { + struct rsrc_bmap schq; + u8 lvl; + u16 *pfvf_map; +}; + +struct nix_hw { + struct nix_txsch txsch[NIX_TXSCH_LVL_CNT]; /* Tx schedulers */ }; struct rvu_hwinfo { u8 total_pfs; /* MAX RVU PFs HW supports */ u16 total_vfs; /* Max RVU VFs HW supports */ u16 max_vfs_per_pf; /* Max VFs that can be attached to a PF */ + u8 cgx; + u8 lmac_per_cgx; + u8 cgx_links; + u8 lbk_links; + u8 sdp_links; struct rvu_block block[BLK_COUNT]; /* Block info */ + struct nix_hw *nix0; }; struct rvu { @@ -93,6 +131,8 @@ struct rvu { /* Mbox */ struct otx2_mbox mbox; struct rvu_work *mbox_wrk; + struct otx2_mbox mbox_up; + struct rvu_work *mbox_wrk_up; struct workqueue_struct *mbox_wq; /* MSI-X */ @@ -109,6 +149,7 @@ struct rvu { u16 *cgxlmac2pf_map; /* bitmap of mapped pfs for * every cgx lmac port */ + unsigned long pf_notify_bmap; /* Flags for PF notification */ void **cgx_idmap; /* cgx id to cgx data map table */ struct work_struct cgx_evh_work; struct workqueue_struct *cgx_evh_wq; @@ -149,10 +190,84 @@ struct rvu_pfvf *rvu_get_pfvf(struct rvu *rvu, int pcifunc); void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf); bool is_block_implemented(struct rvu_hwinfo *hw, int blkaddr); int rvu_get_lf(struct rvu *rvu, struct rvu_block *block, u16 pcifunc, u16 slot); +int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf); int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc); int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero); +/* NPA/NIX AQ APIs */ +int rvu_aq_alloc(struct rvu *rvu, struct admin_queue **ad_queue, + int qsize, int inst_size, int res_size); +void rvu_aq_free(struct rvu *rvu, struct admin_queue *aq); + /* CGX APIs */ +static inline bool is_pf_cgxmapped(struct rvu *rvu, u8 pf) +{ + return (pf >= PF_CGXMAP_BASE && pf <= rvu->cgx_mapped_pfs); +} + +static inline void rvu_get_cgx_lmac_id(u8 map, u8 *cgx_id, u8 *lmac_id) +{ + *cgx_id = (map >> 4) & 0xF; + *lmac_id = (map & 0xF); +} + int rvu_cgx_probe(struct rvu *rvu); void rvu_cgx_wq_destroy(struct rvu *rvu); +int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start); +int rvu_mbox_handler_CGX_START_RXTX(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_STOP_RXTX(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_STATS(struct rvu *rvu, struct msg_req *req, + struct cgx_stats_rsp *rsp); +int rvu_mbox_handler_CGX_MAC_ADDR_SET(struct rvu *rvu, + struct cgx_mac_addr_set_or_get *req, + struct cgx_mac_addr_set_or_get *rsp); +int rvu_mbox_handler_CGX_MAC_ADDR_GET(struct rvu *rvu, + struct cgx_mac_addr_set_or_get *req, + struct cgx_mac_addr_set_or_get *rsp); +int rvu_mbox_handler_CGX_PROMISC_ENABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_PROMISC_DISABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_START_LINKEVENTS(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_STOP_LINKEVENTS(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_GET_LINKINFO(struct rvu *rvu, struct msg_req *req, + struct cgx_link_info_msg *rsp); +int rvu_mbox_handler_CGX_INTLBK_ENABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_CGX_INTLBK_DISABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); + +/* NPA APIs */ +int rvu_npa_init(struct rvu *rvu); +void rvu_npa_freemem(struct rvu *rvu); +int rvu_mbox_handler_NPA_AQ_ENQ(struct rvu *rvu, + struct npa_aq_enq_req *req, + struct npa_aq_enq_rsp *rsp); +int rvu_mbox_handler_NPA_HWCTX_DISABLE(struct rvu *rvu, + struct hwctx_disable_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_NPA_LF_ALLOC(struct rvu *rvu, + struct npa_lf_alloc_req *req, + struct npa_lf_alloc_rsp *rsp); +int rvu_mbox_handler_NPA_LF_FREE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); + +/* NIX APIs */ +int rvu_nix_init(struct rvu *rvu); +void rvu_nix_freemem(struct rvu *rvu); +int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu, + struct nix_lf_alloc_req *req, + struct nix_lf_alloc_rsp *rsp); +int rvu_mbox_handler_NIX_LF_FREE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp); +int rvu_mbox_handler_NIX_AQ_ENQ(struct rvu *rvu, + struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp); +int rvu_mbox_handler_NIX_HWCTX_DISABLE(struct rvu *rvu, + struct hwctx_disable_req *req, + struct msg_rsp *rsp); #endif /* RVU_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 5ecc22308b22..e0aee2176637 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -20,6 +20,31 @@ struct cgx_evq_entry { struct cgx_link_event link_event; }; +#define M(_name, _id, _req_type, _rsp_type) \ +static struct _req_type __maybe_unused \ +*otx2_mbox_alloc_msg_ ## _name(struct rvu *rvu, int devid) \ +{ \ + struct _req_type *req; \ + \ + req = (struct _req_type *)otx2_mbox_alloc_msg_rsp( \ + &rvu->mbox_up, devid, sizeof(struct _req_type), \ + sizeof(struct _rsp_type)); \ + if (!req) \ + return NULL; \ + req->hdr.sig = OTX2_MBOX_REQ_SIG; \ + req->hdr.id = _id; \ + return req; \ +} + +MBOX_UP_CGX_MESSAGES +#undef M + +/* Returns bitmap of mapped PFs */ +static inline u16 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id) +{ + return rvu->cgxlmac2pf_map[CGX_OFFSET(cgx_id) + lmac_id]; +} + static inline u8 cgxlmac_id_to_bmap(u8 cgx_id, u8 lmac_id) { return ((cgx_id & 0xF) << 4) | (lmac_id & 0xF); @@ -77,6 +102,34 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) return 0; } +static int rvu_cgx_send_link_info(int cgx_id, int lmac_id, struct rvu *rvu) +{ + struct cgx_evq_entry *qentry; + unsigned long flags; + int err; + + qentry = kmalloc(sizeof(*qentry), GFP_KERNEL); + if (!qentry) + return -ENOMEM; + + /* Lock the event queue before we read the local link status */ + spin_lock_irqsave(&rvu->cgx_evq_lock, flags); + err = cgx_get_link_info(rvu_cgx_pdata(cgx_id, rvu), lmac_id, + &qentry->link_event.link_uinfo); + qentry->link_event.cgx_id = cgx_id; + qentry->link_event.lmac_id = lmac_id; + if (err) + goto skip_add; + list_add_tail(&qentry->evq_node, &rvu->cgx_evq_head); +skip_add: + spin_unlock_irqrestore(&rvu->cgx_evq_lock, flags); + + /* start worker to process the events */ + queue_work(rvu->cgx_evh_wq, &rvu->cgx_evh_work); + + return 0; +} + /* This is called from interrupt context and is expected to be atomic */ static int cgx_lmac_postevent(struct cgx_link_event *event, void *data) { @@ -98,6 +151,41 @@ static int cgx_lmac_postevent(struct cgx_link_event *event, void *data) return 0; } +static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) +{ + struct cgx_link_user_info *linfo; + struct cgx_link_info_msg *msg; + unsigned long pfmap; + int err, pfid; + + linfo = &event->link_uinfo; + pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id); + + do { + pfid = find_first_bit(&pfmap, 16); + clear_bit(pfid, &pfmap); + + /* check if notification is enabled */ + if (!test_bit(pfid, &rvu->pf_notify_bmap)) { + dev_info(rvu->dev, "cgx %d: lmac %d Link status %s\n", + event->cgx_id, event->lmac_id, + linfo->link_up ? "UP" : "DOWN"); + continue; + } + + /* Send mbox message to PF */ + msg = otx2_mbox_alloc_msg_CGX_LINK_EVENT(rvu, pfid); + if (!msg) + continue; + msg->link_info = *linfo; + otx2_mbox_msg_send(&rvu->mbox_up, pfid); + err = otx2_mbox_wait_for_rsp(&rvu->mbox_up, pfid); + if (err) + dev_warn(rvu->dev, "notification to pf %d failed\n", + pfid); + } while (pfmap); +} + static void cgx_evhandler_task(struct work_struct *work) { struct rvu *rvu = container_of(work, struct rvu, cgx_evh_work); @@ -119,7 +207,8 @@ static void cgx_evhandler_task(struct work_struct *work) event = &qentry->link_event; - /* Do nothing for now */ + /* process event */ + cgx_notify_pfs(event, rvu); kfree(qentry); } while (1); } @@ -192,3 +281,232 @@ int rvu_cgx_probe(struct rvu *rvu) cgx_lmac_event_handler_init(rvu); return 0; } + +int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start) +{ + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + + /* This msg is expected only from PFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + cgx_lmac_rx_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, start); + + return 0; +} + +int rvu_mbox_handler_CGX_START_RXTX(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_rxtx(rvu, req->hdr.pcifunc, true); + return 0; +} + +int rvu_mbox_handler_CGX_STOP_RXTX(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_rxtx(rvu, req->hdr.pcifunc, false); + return 0; +} + +int rvu_mbox_handler_CGX_STATS(struct rvu *rvu, struct msg_req *req, + struct cgx_stats_rsp *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + int stat = 0, err = 0; + u64 tx_stat, rx_stat; + u8 cgx_idx, lmac; + void *cgxd; + + if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) || + !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac); + cgxd = rvu_cgx_pdata(cgx_idx, rvu); + + /* Rx stats */ + while (stat < CGX_RX_STATS_COUNT) { + err = cgx_get_rx_stats(cgxd, lmac, stat, &rx_stat); + if (err) + return err; + rsp->rx_stats[stat] = rx_stat; + stat++; + } + + /* Tx stats */ + stat = 0; + while (stat < CGX_TX_STATS_COUNT) { + err = cgx_get_tx_stats(cgxd, lmac, stat, &tx_stat); + if (err) + return err; + rsp->tx_stats[stat] = tx_stat; + stat++; + } + return 0; +} + +int rvu_mbox_handler_CGX_MAC_ADDR_SET(struct rvu *rvu, + struct cgx_mac_addr_set_or_get *req, + struct cgx_mac_addr_set_or_get *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + cgx_lmac_addr_set(cgx_id, lmac_id, req->mac_addr); + + return 0; +} + +int rvu_mbox_handler_CGX_MAC_ADDR_GET(struct rvu *rvu, + struct cgx_mac_addr_set_or_get *req, + struct cgx_mac_addr_set_or_get *rsp) +{ + int pf = rvu_get_pf(req->hdr.pcifunc); + u8 cgx_id, lmac_id; + int rc = 0, i; + u64 cfg; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + rsp->hdr.rc = rc; + cfg = cgx_lmac_addr_get(cgx_id, lmac_id); + /* copy 48 bit mac address to req->mac_addr */ + for (i = 0; i < ETH_ALEN; i++) + rsp->mac_addr[i] = cfg >> (ETH_ALEN - 1 - i) * 8; + return 0; +} + +int rvu_mbox_handler_CGX_PROMISC_ENABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + + /* This msg is expected only from PFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) || + !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + cgx_lmac_promisc_config(cgx_id, lmac_id, true); + return 0; +} + +int rvu_mbox_handler_CGX_PROMISC_DISABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + u16 pcifunc = req->hdr.pcifunc; + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + + /* This msg is expected only from PFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if ((req->hdr.pcifunc & RVU_PFVF_FUNC_MASK) || + !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + cgx_lmac_promisc_config(cgx_id, lmac_id, false); + return 0; +} + +static int rvu_cgx_config_linkevents(struct rvu *rvu, u16 pcifunc, bool en) +{ + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + + /* This msg is expected only from PFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + if (en) { + set_bit(pf, &rvu->pf_notify_bmap); + /* Send the current link status to PF */ + rvu_cgx_send_link_info(cgx_id, lmac_id, rvu); + } else { + clear_bit(pf, &rvu->pf_notify_bmap); + } + + return 0; +} + +int rvu_mbox_handler_CGX_START_LINKEVENTS(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_linkevents(rvu, req->hdr.pcifunc, true); + return 0; +} + +int rvu_mbox_handler_CGX_STOP_LINKEVENTS(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_linkevents(rvu, req->hdr.pcifunc, false); + return 0; +} + +int rvu_mbox_handler_CGX_GET_LINKINFO(struct rvu *rvu, struct msg_req *req, + struct cgx_link_info_msg *rsp) +{ + u8 cgx_id, lmac_id; + int pf, err; + + pf = rvu_get_pf(req->hdr.pcifunc); + + if (!is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + err = cgx_get_link_info(rvu_cgx_pdata(cgx_id, rvu), lmac_id, + &rsp->link_info); + return err; +} + +static int rvu_cgx_config_intlbk(struct rvu *rvu, u16 pcifunc, bool en) +{ + int pf = rvu_get_pf(pcifunc); + u8 cgx_id, lmac_id; + + /* This msg is expected only from PFs that are mapped to CGX LMACs, + * if received from other PF/VF simply ACK, nothing to do. + */ + if ((pcifunc & RVU_PFVF_FUNC_MASK) || !is_pf_cgxmapped(rvu, pf)) + return -ENODEV; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + + return cgx_lmac_internal_loopback(rvu_cgx_pdata(cgx_id, rvu), + lmac_id, en); +} + +int rvu_mbox_handler_CGX_INTLBK_ENABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_intlbk(rvu, req->hdr.pcifunc, true); + return 0; +} + +int rvu_mbox_handler_CGX_INTLBK_DISABLE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + rvu_cgx_config_intlbk(rvu, req->hdr.pcifunc, false); + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c new file mode 100644 index 000000000000..214ca2c26ab4 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -0,0 +1,892 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Admin Function driver + * + * Copyright (C) 2018 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/pci.h> + +#include "rvu_struct.h" +#include "rvu_reg.h" +#include "rvu.h" +#include "cgx.h" + +static inline struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr) +{ + if (blkaddr == BLKADDR_NIX0 && hw->nix0) + return hw->nix0; + + return NULL; +} + +static bool is_valid_txschq(struct rvu *rvu, int blkaddr, + int lvl, u16 pcifunc, u16 schq) +{ + struct nix_txsch *txsch; + struct nix_hw *nix_hw; + + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return false; + + txsch = &nix_hw->txsch[lvl]; + /* Check out of bounds */ + if (schq >= txsch->schq.max) + return false; + + spin_lock(&rvu->rsrc_lock); + if (txsch->pfvf_map[schq] != pcifunc) { + spin_unlock(&rvu->rsrc_lock); + return false; + } + spin_unlock(&rvu->rsrc_lock); + return true; +} + +static void nix_setup_lso_tso_l3(struct rvu *rvu, int blkaddr, + u64 format, bool v4, u64 *fidx) +{ + struct nix_lso_format field = {0}; + + /* IP's Length field */ + field.layer = NIX_TXLAYER_OL3; + /* In ipv4, length field is at offset 2 bytes, for ipv6 it's 4 */ + field.offset = v4 ? 2 : 4; + field.sizem1 = 1; /* i.e 2 bytes */ + field.alg = NIX_LSOALG_ADD_PAYLEN; + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++), + *(u64 *)&field); + + /* No ID field in IPv6 header */ + if (!v4) + return; + + /* IP's ID field */ + field.layer = NIX_TXLAYER_OL3; + field.offset = 4; + field.sizem1 = 1; /* i.e 2 bytes */ + field.alg = NIX_LSOALG_ADD_SEGNUM; + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++), + *(u64 *)&field); +} + +static void nix_setup_lso_tso_l4(struct rvu *rvu, int blkaddr, + u64 format, u64 *fidx) +{ + struct nix_lso_format field = {0}; + + /* TCP's sequence number field */ + field.layer = NIX_TXLAYER_OL4; + field.offset = 4; + field.sizem1 = 3; /* i.e 4 bytes */ + field.alg = NIX_LSOALG_ADD_OFFSET; + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++), + *(u64 *)&field); + + /* TCP's flags field */ + field.layer = NIX_TXLAYER_OL4; + field.offset = 12; + field.sizem1 = 0; /* not needed */ + field.alg = NIX_LSOALG_TCP_FLAGS; + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(format, (*fidx)++), + *(u64 *)&field); +} + +static void nix_setup_lso(struct rvu *rvu, int blkaddr) +{ + u64 cfg, idx, fidx = 0; + + /* Enable LSO */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LSO_CFG); + /* For TSO, set first and middle segment flags to + * mask out PSH, RST & FIN flags in TCP packet + */ + cfg &= ~((0xFFFFULL << 32) | (0xFFFFULL << 16)); + cfg |= (0xFFF2ULL << 32) | (0xFFF2ULL << 16); + rvu_write64(rvu, blkaddr, NIX_AF_LSO_CFG, cfg | BIT_ULL(63)); + + /* Configure format fields for TCPv4 segmentation offload */ + idx = NIX_LSO_FORMAT_IDX_TSOV4; + nix_setup_lso_tso_l3(rvu, blkaddr, idx, true, &fidx); + nix_setup_lso_tso_l4(rvu, blkaddr, idx, &fidx); + + /* Set rest of the fields to NOP */ + for (; fidx < 8; fidx++) { + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(idx, fidx), 0x0ULL); + } + + /* Configure format fields for TCPv6 segmentation offload */ + idx = NIX_LSO_FORMAT_IDX_TSOV6; + fidx = 0; + nix_setup_lso_tso_l3(rvu, blkaddr, idx, false, &fidx); + nix_setup_lso_tso_l4(rvu, blkaddr, idx, &fidx); + + /* Set rest of the fields to NOP */ + for (; fidx < 8; fidx++) { + rvu_write64(rvu, blkaddr, + NIX_AF_LSO_FORMATX_FIELDX(idx, fidx), 0x0ULL); + } +} + +static void nix_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf) +{ + kfree(pfvf->rq_bmap); + kfree(pfvf->sq_bmap); + kfree(pfvf->cq_bmap); + if (pfvf->rq_ctx) + qmem_free(rvu->dev, pfvf->rq_ctx); + if (pfvf->sq_ctx) + qmem_free(rvu->dev, pfvf->sq_ctx); + if (pfvf->cq_ctx) + qmem_free(rvu->dev, pfvf->cq_ctx); + if (pfvf->rss_ctx) + qmem_free(rvu->dev, pfvf->rss_ctx); + if (pfvf->nix_qints_ctx) + qmem_free(rvu->dev, pfvf->nix_qints_ctx); + if (pfvf->cq_ints_ctx) + qmem_free(rvu->dev, pfvf->cq_ints_ctx); + + pfvf->rq_bmap = NULL; + pfvf->cq_bmap = NULL; + pfvf->sq_bmap = NULL; + pfvf->rq_ctx = NULL; + pfvf->sq_ctx = NULL; + pfvf->cq_ctx = NULL; + pfvf->rss_ctx = NULL; + pfvf->nix_qints_ctx = NULL; + pfvf->cq_ints_ctx = NULL; +} + +static int nixlf_rss_ctx_init(struct rvu *rvu, int blkaddr, + struct rvu_pfvf *pfvf, int nixlf, + int rss_sz, int rss_grps, int hwctx_size) +{ + int err, grp, num_indices; + + /* RSS is not requested for this NIXLF */ + if (!rss_sz) + return 0; + num_indices = rss_sz * rss_grps; + + /* Alloc NIX RSS HW context memory and config the base */ + err = qmem_alloc(rvu->dev, &pfvf->rss_ctx, num_indices, hwctx_size); + if (err) + return err; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_BASE(nixlf), + (u64)pfvf->rss_ctx->iova); + + /* Config full RSS table size, enable RSS and caching */ + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf), + BIT_ULL(36) | BIT_ULL(4) | + ilog2(num_indices / MAX_RSS_INDIR_TBL_SIZE)); + /* Config RSS group offset and sizes */ + for (grp = 0; grp < rss_grps; grp++) + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RSS_GRPX(nixlf, grp), + ((ilog2(rss_sz) - 1) << 16) | (rss_sz * grp)); + return 0; +} + +static int nix_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, + struct nix_aq_inst_s *inst) +{ + struct admin_queue *aq = block->aq; + struct nix_aq_res_s *result; + int timeout = 1000; + u64 reg, head; + + result = (struct nix_aq_res_s *)aq->res->base; + + /* Get current head pointer where to append this instruction */ + reg = rvu_read64(rvu, block->addr, NIX_AF_AQ_STATUS); + head = (reg >> 4) & AQ_PTR_MASK; + + memcpy((void *)(aq->inst->base + (head * aq->inst->entry_sz)), + (void *)inst, aq->inst->entry_sz); + memset(result, 0, sizeof(*result)); + /* sync into memory */ + wmb(); + + /* Ring the doorbell and wait for result */ + rvu_write64(rvu, block->addr, NIX_AF_AQ_DOOR, 1); + while (result->compcode == NIX_AQ_COMP_NOTDONE) { + cpu_relax(); + udelay(1); + timeout--; + if (!timeout) + return -EBUSY; + } + + if (result->compcode != NIX_AQ_COMP_GOOD) + /* TODO: Replace this with some error code */ + return -EBUSY; + + return 0; +} + +static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + int nixlf, blkaddr, rc = 0; + struct nix_aq_inst_s inst; + struct rvu_block *block; + struct admin_queue *aq; + struct rvu_pfvf *pfvf; + void *ctx, *mask; + bool ena; + u64 cfg; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (!pfvf->nixlf || blkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + aq = block->aq; + if (!aq) { + dev_warn(rvu->dev, "%s: NIX AQ not initialized\n", __func__); + return NIX_AF_ERR_AQ_ENQUEUE; + } + + nixlf = rvu_get_lf(rvu, block, pcifunc, 0); + if (nixlf < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + switch (req->ctype) { + case NIX_AQ_CTYPE_RQ: + /* Check if index exceeds max no of queues */ + if (!pfvf->rq_ctx || req->qidx >= pfvf->rq_ctx->qsize) + rc = NIX_AF_ERR_AQ_ENQUEUE; + break; + case NIX_AQ_CTYPE_SQ: + if (!pfvf->sq_ctx || req->qidx >= pfvf->sq_ctx->qsize) + rc = NIX_AF_ERR_AQ_ENQUEUE; + break; + case NIX_AQ_CTYPE_CQ: + if (!pfvf->cq_ctx || req->qidx >= pfvf->cq_ctx->qsize) + rc = NIX_AF_ERR_AQ_ENQUEUE; + break; + case NIX_AQ_CTYPE_RSS: + /* Check if RSS is enabled and qidx is within range */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_LFX_RSS_CFG(nixlf)); + if (!(cfg & BIT_ULL(4)) || !pfvf->rss_ctx || + (req->qidx >= (256UL << (cfg & 0xF)))) + rc = NIX_AF_ERR_AQ_ENQUEUE; + break; + default: + rc = NIX_AF_ERR_AQ_ENQUEUE; + } + + if (rc) + return rc; + + /* Check if SQ pointed SMQ belongs to this PF/VF or not */ + if (req->ctype == NIX_AQ_CTYPE_SQ && + req->op != NIX_AQ_INSTOP_WRITE) { + if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ, + pcifunc, req->sq.smq)) + return NIX_AF_ERR_AQ_ENQUEUE; + } + + memset(&inst, 0, sizeof(struct nix_aq_inst_s)); + inst.lf = nixlf; + inst.cindex = req->qidx; + inst.ctype = req->ctype; + inst.op = req->op; + /* Currently we are not supporting enqueuing multiple instructions, + * so always choose first entry in result memory. + */ + inst.res_addr = (u64)aq->res->iova; + + /* Clean result + context memory */ + memset(aq->res->base, 0, aq->res->entry_sz); + /* Context needs to be written at RES_ADDR + 128 */ + ctx = aq->res->base + 128; + /* Mask needs to be written at RES_ADDR + 256 */ + mask = aq->res->base + 256; + + switch (req->op) { + case NIX_AQ_INSTOP_WRITE: + if (req->ctype == NIX_AQ_CTYPE_RQ) + memcpy(mask, &req->rq_mask, + sizeof(struct nix_rq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_SQ) + memcpy(mask, &req->sq_mask, + sizeof(struct nix_sq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_CQ) + memcpy(mask, &req->cq_mask, + sizeof(struct nix_cq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_RSS) + memcpy(mask, &req->rss_mask, + sizeof(struct nix_rsse_s)); + /* Fall through */ + case NIX_AQ_INSTOP_INIT: + if (req->ctype == NIX_AQ_CTYPE_RQ) + memcpy(ctx, &req->rq, sizeof(struct nix_rq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_SQ) + memcpy(ctx, &req->sq, sizeof(struct nix_sq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_CQ) + memcpy(ctx, &req->cq, sizeof(struct nix_cq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_RSS) + memcpy(ctx, &req->rss, sizeof(struct nix_rsse_s)); + break; + case NIX_AQ_INSTOP_NOP: + case NIX_AQ_INSTOP_READ: + case NIX_AQ_INSTOP_LOCK: + case NIX_AQ_INSTOP_UNLOCK: + break; + default: + rc = NIX_AF_ERR_AQ_ENQUEUE; + return rc; + } + + spin_lock(&aq->lock); + + /* Submit the instruction to AQ */ + rc = nix_aq_enqueue_wait(rvu, block, &inst); + if (rc) { + spin_unlock(&aq->lock); + return rc; + } + + /* Set RQ/SQ/CQ bitmap if respective queue hw context is enabled */ + if (req->op == NIX_AQ_INSTOP_INIT) { + if (req->ctype == NIX_AQ_CTYPE_RQ && req->rq.ena) + __set_bit(req->qidx, pfvf->rq_bmap); + if (req->ctype == NIX_AQ_CTYPE_SQ && req->sq.ena) + __set_bit(req->qidx, pfvf->sq_bmap); + if (req->ctype == NIX_AQ_CTYPE_CQ && req->cq.ena) + __set_bit(req->qidx, pfvf->cq_bmap); + } + + if (req->op == NIX_AQ_INSTOP_WRITE) { + if (req->ctype == NIX_AQ_CTYPE_RQ) { + ena = (req->rq.ena & req->rq_mask.ena) | + (test_bit(req->qidx, pfvf->rq_bmap) & + ~req->rq_mask.ena); + if (ena) + __set_bit(req->qidx, pfvf->rq_bmap); + else + __clear_bit(req->qidx, pfvf->rq_bmap); + } + if (req->ctype == NIX_AQ_CTYPE_SQ) { + ena = (req->rq.ena & req->sq_mask.ena) | + (test_bit(req->qidx, pfvf->sq_bmap) & + ~req->sq_mask.ena); + if (ena) + __set_bit(req->qidx, pfvf->sq_bmap); + else + __clear_bit(req->qidx, pfvf->sq_bmap); + } + if (req->ctype == NIX_AQ_CTYPE_CQ) { + ena = (req->rq.ena & req->cq_mask.ena) | + (test_bit(req->qidx, pfvf->cq_bmap) & + ~req->cq_mask.ena); + if (ena) + __set_bit(req->qidx, pfvf->cq_bmap); + else + __clear_bit(req->qidx, pfvf->cq_bmap); + } + } + + if (rsp) { + /* Copy read context into mailbox */ + if (req->op == NIX_AQ_INSTOP_READ) { + if (req->ctype == NIX_AQ_CTYPE_RQ) + memcpy(&rsp->rq, ctx, + sizeof(struct nix_rq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_SQ) + memcpy(&rsp->sq, ctx, + sizeof(struct nix_sq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_CQ) + memcpy(&rsp->cq, ctx, + sizeof(struct nix_cq_ctx_s)); + else if (req->ctype == NIX_AQ_CTYPE_RSS) + memcpy(&rsp->rss, ctx, + sizeof(struct nix_cq_ctx_s)); + } + } + + spin_unlock(&aq->lock); + return 0; +} + +static int nix_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + struct nix_aq_enq_req aq_req; + unsigned long *bmap; + int qidx, q_cnt = 0; + int err = 0, rc; + + if (!pfvf->cq_ctx || !pfvf->sq_ctx || !pfvf->rq_ctx) + return NIX_AF_ERR_AQ_ENQUEUE; + + memset(&aq_req, 0, sizeof(struct nix_aq_enq_req)); + aq_req.hdr.pcifunc = req->hdr.pcifunc; + + if (req->ctype == NIX_AQ_CTYPE_CQ) { + aq_req.cq.ena = 0; + aq_req.cq_mask.ena = 1; + q_cnt = pfvf->cq_ctx->qsize; + bmap = pfvf->cq_bmap; + } + if (req->ctype == NIX_AQ_CTYPE_SQ) { + aq_req.sq.ena = 0; + aq_req.sq_mask.ena = 1; + q_cnt = pfvf->sq_ctx->qsize; + bmap = pfvf->sq_bmap; + } + if (req->ctype == NIX_AQ_CTYPE_RQ) { + aq_req.rq.ena = 0; + aq_req.rq_mask.ena = 1; + q_cnt = pfvf->rq_ctx->qsize; + bmap = pfvf->rq_bmap; + } + + aq_req.ctype = req->ctype; + aq_req.op = NIX_AQ_INSTOP_WRITE; + + for (qidx = 0; qidx < q_cnt; qidx++) { + if (!test_bit(qidx, bmap)) + continue; + aq_req.qidx = qidx; + rc = rvu_nix_aq_enq_inst(rvu, &aq_req, NULL); + if (rc) { + err = rc; + dev_err(rvu->dev, "Failed to disable %s:%d context\n", + (req->ctype == NIX_AQ_CTYPE_CQ) ? + "CQ" : ((req->ctype == NIX_AQ_CTYPE_RQ) ? + "RQ" : "SQ"), qidx); + } + } + + return err; +} + +int rvu_mbox_handler_NIX_AQ_ENQ(struct rvu *rvu, + struct nix_aq_enq_req *req, + struct nix_aq_enq_rsp *rsp) +{ + return rvu_nix_aq_enq_inst(rvu, req, rsp); +} + +int rvu_mbox_handler_NIX_HWCTX_DISABLE(struct rvu *rvu, + struct hwctx_disable_req *req, + struct msg_rsp *rsp) +{ + return nix_lf_hwctx_disable(rvu, req); +} + +int rvu_mbox_handler_NIX_LF_ALLOC(struct rvu *rvu, + struct nix_lf_alloc_req *req, + struct nix_lf_alloc_rsp *rsp) +{ + int nixlf, qints, hwctx_size, err, rc = 0; + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + struct rvu_pfvf *pfvf; + u64 cfg, ctx_cfg; + int blkaddr; + + if (!req->rq_cnt || !req->sq_cnt || !req->cq_cnt) + return NIX_AF_ERR_PARAM; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (!pfvf->nixlf || blkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + nixlf = rvu_get_lf(rvu, block, pcifunc, 0); + if (nixlf < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + /* If RSS is being enabled, check if requested config is valid. + * RSS table size should be power of two, otherwise + * RSS_GRP::OFFSET + adder might go beyond that group or + * won't be able to use entire table. + */ + if (req->rss_sz && (req->rss_sz > MAX_RSS_INDIR_TBL_SIZE || + !is_power_of_2(req->rss_sz))) + return NIX_AF_ERR_RSS_SIZE_INVALID; + + if (req->rss_sz && + (!req->rss_grps || req->rss_grps > MAX_RSS_GROUPS)) + return NIX_AF_ERR_RSS_GRPS_INVALID; + + /* Reset this NIX LF */ + err = rvu_lf_reset(rvu, block, nixlf); + if (err) { + dev_err(rvu->dev, "Failed to reset NIX%d LF%d\n", + block->addr - BLKADDR_NIX0, nixlf); + return NIX_AF_ERR_LF_RESET; + } + + ctx_cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST3); + + /* Alloc NIX RQ HW context memory and config the base */ + hwctx_size = 1UL << ((ctx_cfg >> 4) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->rq_ctx, req->rq_cnt, hwctx_size); + if (err) + goto free_mem; + + pfvf->rq_bmap = kcalloc(req->rq_cnt, sizeof(long), GFP_KERNEL); + if (!pfvf->rq_bmap) + goto free_mem; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RQS_BASE(nixlf), + (u64)pfvf->rq_ctx->iova); + + /* Set caching and queue count in HW */ + cfg = BIT_ULL(36) | (req->rq_cnt - 1); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RQS_CFG(nixlf), cfg); + + /* Alloc NIX SQ HW context memory and config the base */ + hwctx_size = 1UL << (ctx_cfg & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->sq_ctx, req->sq_cnt, hwctx_size); + if (err) + goto free_mem; + + pfvf->sq_bmap = kcalloc(req->sq_cnt, sizeof(long), GFP_KERNEL); + if (!pfvf->sq_bmap) + goto free_mem; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_SQS_BASE(nixlf), + (u64)pfvf->sq_ctx->iova); + cfg = BIT_ULL(36) | (req->sq_cnt - 1); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_SQS_CFG(nixlf), cfg); + + /* Alloc NIX CQ HW context memory and config the base */ + hwctx_size = 1UL << ((ctx_cfg >> 8) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->cq_ctx, req->cq_cnt, hwctx_size); + if (err) + goto free_mem; + + pfvf->cq_bmap = kcalloc(req->cq_cnt, sizeof(long), GFP_KERNEL); + if (!pfvf->cq_bmap) + goto free_mem; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CQS_BASE(nixlf), + (u64)pfvf->cq_ctx->iova); + cfg = BIT_ULL(36) | (req->cq_cnt - 1); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CQS_CFG(nixlf), cfg); + + /* Initialize receive side scaling (RSS) */ + hwctx_size = 1UL << ((ctx_cfg >> 12) & 0xF); + err = nixlf_rss_ctx_init(rvu, blkaddr, pfvf, nixlf, + req->rss_sz, req->rss_grps, hwctx_size); + if (err) + goto free_mem; + + /* Alloc memory for CQINT's HW contexts */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2); + qints = (cfg >> 24) & 0xFFF; + hwctx_size = 1UL << ((ctx_cfg >> 24) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->cq_ints_ctx, qints, hwctx_size); + if (err) + goto free_mem; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CINTS_BASE(nixlf), + (u64)pfvf->cq_ints_ctx->iova); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CINTS_CFG(nixlf), BIT_ULL(36)); + + /* Alloc memory for QINT's HW contexts */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST2); + qints = (cfg >> 12) & 0xFFF; + hwctx_size = 1UL << ((ctx_cfg >> 20) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->nix_qints_ctx, qints, hwctx_size); + if (err) + goto free_mem; + + rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_BASE(nixlf), + (u64)pfvf->nix_qints_ctx->iova); + rvu_write64(rvu, blkaddr, NIX_AF_LFX_QINTS_CFG(nixlf), BIT_ULL(36)); + + /* Enable LMTST for this NIX LF */ + rvu_write64(rvu, blkaddr, NIX_AF_LFX_TX_CFG2(nixlf), BIT_ULL(0)); + + /* Set CQE/WQE size, NPA_PF_FUNC for SQBs and also SSO_PF_FUNC + * If requester has sent a 'RVU_DEFAULT_PF_FUNC' use this NIX LF's + * PCIFUNC itself. + */ + if (req->npa_func == RVU_DEFAULT_PF_FUNC) + cfg = pcifunc; + else + cfg = req->npa_func; + + if (req->sso_func == RVU_DEFAULT_PF_FUNC) + cfg |= (u64)pcifunc << 16; + else + cfg |= (u64)req->sso_func << 16; + + cfg |= (u64)req->xqe_sz << 33; + rvu_write64(rvu, blkaddr, NIX_AF_LFX_CFG(nixlf), cfg); + + /* Config Rx pkt length, csum checks and apad enable / disable */ + rvu_write64(rvu, blkaddr, NIX_AF_LFX_RX_CFG(nixlf), req->rx_cfg); + + goto exit; + +free_mem: + nix_ctx_free(rvu, pfvf); + rc = -ENOMEM; + +exit: + /* Set macaddr of this PF/VF */ + ether_addr_copy(rsp->mac_addr, pfvf->mac_addr); + + /* set SQB size info */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQ_CONST); + rsp->sqb_size = (cfg >> 34) & 0xFFFF; + rsp->lso_tsov4_idx = NIX_LSO_FORMAT_IDX_TSOV4; + rsp->lso_tsov6_idx = NIX_LSO_FORMAT_IDX_TSOV6; + return rc; +} + +int rvu_mbox_handler_NIX_LF_FREE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + int blkaddr, nixlf, err; + struct rvu_pfvf *pfvf; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (!pfvf->nixlf || blkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + nixlf = rvu_get_lf(rvu, block, pcifunc, 0); + if (nixlf < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + /* Reset this NIX LF */ + err = rvu_lf_reset(rvu, block, nixlf); + if (err) { + dev_err(rvu->dev, "Failed to reset NIX%d LF%d\n", + block->addr - BLKADDR_NIX0, nixlf); + return NIX_AF_ERR_LF_RESET; + } + + nix_ctx_free(rvu, pfvf); + + return 0; +} + +static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) +{ + struct nix_txsch *txsch; + u64 cfg, reg; + int err, lvl; + + /* Get scheduler queue count of each type and alloc + * bitmap for each for alloc/free/attach operations. + */ + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + txsch = &nix_hw->txsch[lvl]; + txsch->lvl = lvl; + switch (lvl) { + case NIX_TXSCH_LVL_SMQ: + reg = NIX_AF_MDQ_CONST; + break; + case NIX_TXSCH_LVL_TL4: + reg = NIX_AF_TL4_CONST; + break; + case NIX_TXSCH_LVL_TL3: + reg = NIX_AF_TL3_CONST; + break; + case NIX_TXSCH_LVL_TL2: + reg = NIX_AF_TL2_CONST; + break; + case NIX_TXSCH_LVL_TL1: + reg = NIX_AF_TL1_CONST; + break; + } + cfg = rvu_read64(rvu, blkaddr, reg); + txsch->schq.max = cfg & 0xFFFF; + err = rvu_alloc_bitmap(&txsch->schq); + if (err) + return err; + + /* Allocate memory for scheduler queues to + * PF/VF pcifunc mapping info. + */ + txsch->pfvf_map = devm_kcalloc(rvu->dev, txsch->schq.max, + sizeof(u16), GFP_KERNEL); + if (!txsch->pfvf_map) + return -ENOMEM; + } + return 0; +} + +static int nix_calibrate_x2p(struct rvu *rvu, int blkaddr) +{ + int idx, err; + u64 status; + + /* Start X2P bus calibration */ + rvu_write64(rvu, blkaddr, NIX_AF_CFG, + rvu_read64(rvu, blkaddr, NIX_AF_CFG) | BIT_ULL(9)); + /* Wait for calibration to complete */ + err = rvu_poll_reg(rvu, blkaddr, + NIX_AF_STATUS, BIT_ULL(10), false); + if (err) { + dev_err(rvu->dev, "NIX X2P bus calibration failed\n"); + return err; + } + + status = rvu_read64(rvu, blkaddr, NIX_AF_STATUS); + /* Check if CGX devices are ready */ + for (idx = 0; idx < cgx_get_cgx_cnt(); idx++) { + if (status & (BIT_ULL(16 + idx))) + continue; + dev_err(rvu->dev, + "CGX%d didn't respond to NIX X2P calibration\n", idx); + err = -EBUSY; + } + + /* Check if LBK is ready */ + if (!(status & BIT_ULL(19))) { + dev_err(rvu->dev, + "LBK didn't respond to NIX X2P calibration\n"); + err = -EBUSY; + } + + /* Clear 'calibrate_x2p' bit */ + rvu_write64(rvu, blkaddr, NIX_AF_CFG, + rvu_read64(rvu, blkaddr, NIX_AF_CFG) & ~BIT_ULL(9)); + if (err || (status & 0x3FFULL)) + dev_err(rvu->dev, + "NIX X2P calibration failed, status 0x%llx\n", status); + if (err) + return err; + return 0; +} + +static int nix_aq_init(struct rvu *rvu, struct rvu_block *block) +{ + u64 cfg; + int err; + + /* Set admin queue endianness */ + cfg = rvu_read64(rvu, block->addr, NIX_AF_CFG); +#ifdef __BIG_ENDIAN + cfg |= BIT_ULL(1); + rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg); +#else + cfg &= ~BIT_ULL(1); + rvu_write64(rvu, block->addr, NIX_AF_CFG, cfg); +#endif + + /* Do not bypass NDC cache */ + cfg = rvu_read64(rvu, block->addr, NIX_AF_NDC_CFG); + cfg &= ~0x3FFEULL; + rvu_write64(rvu, block->addr, NIX_AF_NDC_CFG, cfg); + + /* Result structure can be followed by RQ/SQ/CQ context at + * RES + 128bytes and a write mask at RES + 256 bytes, depending on + * operation type. Alloc sufficient result memory for all operations. + */ + err = rvu_aq_alloc(rvu, &block->aq, + Q_COUNT(AQ_SIZE), sizeof(struct nix_aq_inst_s), + ALIGN(sizeof(struct nix_aq_res_s), 128) + 256); + if (err) + return err; + + rvu_write64(rvu, block->addr, NIX_AF_AQ_CFG, AQ_SIZE); + rvu_write64(rvu, block->addr, + NIX_AF_AQ_BASE, (u64)block->aq->inst->iova); + return 0; +} + +int rvu_nix_init(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkaddr, err; + u64 cfg; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return 0; + block = &hw->block[blkaddr]; + + /* Calibrate X2P bus to check if CGX/LBK links are fine */ + err = nix_calibrate_x2p(rvu, blkaddr); + if (err) + return err; + + /* Set num of links of each type */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST); + hw->cgx = (cfg >> 12) & 0xF; + hw->lmac_per_cgx = (cfg >> 8) & 0xF; + hw->cgx_links = hw->cgx * hw->lmac_per_cgx; + hw->lbk_links = 1; + hw->sdp_links = 1; + + /* Initialize admin queue */ + err = nix_aq_init(rvu, block); + if (err) + return err; + + /* Restore CINT timer delay to HW reset values */ + rvu_write64(rvu, blkaddr, NIX_AF_CINT_DELAY, 0x0ULL); + + /* Configure segmentation offload formats */ + nix_setup_lso(rvu, blkaddr); + + if (blkaddr == BLKADDR_NIX0) { + hw->nix0 = devm_kzalloc(rvu->dev, + sizeof(struct nix_hw), GFP_KERNEL); + if (!hw->nix0) + return -ENOMEM; + + err = nix_setup_txschq(rvu, hw->nix0, blkaddr); + if (err) + return err; + } + return 0; +} + +void rvu_nix_freemem(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + struct nix_txsch *txsch; + struct nix_hw *nix_hw; + int blkaddr, lvl; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return; + + block = &hw->block[blkaddr]; + rvu_aq_free(rvu, block->aq); + + if (blkaddr == BLKADDR_NIX0) { + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return; + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + txsch = &nix_hw->txsch[lvl]; + kfree(txsch->schq.bmap); + } + } +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c new file mode 100644 index 000000000000..0e43a693d119 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npa.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Admin Function driver + * + * Copyright (C) 2018 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/pci.h> + +#include "rvu_struct.h" +#include "rvu_reg.h" +#include "rvu.h" + +static int npa_aq_enqueue_wait(struct rvu *rvu, struct rvu_block *block, + struct npa_aq_inst_s *inst) +{ + struct admin_queue *aq = block->aq; + struct npa_aq_res_s *result; + int timeout = 1000; + u64 reg, head; + + result = (struct npa_aq_res_s *)aq->res->base; + + /* Get current head pointer where to append this instruction */ + reg = rvu_read64(rvu, block->addr, NPA_AF_AQ_STATUS); + head = (reg >> 4) & AQ_PTR_MASK; + + memcpy((void *)(aq->inst->base + (head * aq->inst->entry_sz)), + (void *)inst, aq->inst->entry_sz); + memset(result, 0, sizeof(*result)); + /* sync into memory */ + wmb(); + + /* Ring the doorbell and wait for result */ + rvu_write64(rvu, block->addr, NPA_AF_AQ_DOOR, 1); + while (result->compcode == NPA_AQ_COMP_NOTDONE) { + cpu_relax(); + udelay(1); + timeout--; + if (!timeout) + return -EBUSY; + } + + if (result->compcode != NPA_AQ_COMP_GOOD) + /* TODO: Replace this with some error code */ + return -EBUSY; + + return 0; +} + +static int rvu_npa_aq_enq_inst(struct rvu *rvu, struct npa_aq_enq_req *req, + struct npa_aq_enq_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + int blkaddr, npalf, rc = 0; + struct npa_aq_inst_s inst; + struct rvu_block *block; + struct admin_queue *aq; + struct rvu_pfvf *pfvf; + void *ctx, *mask; + bool ena; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + if (!pfvf->aura_ctx || req->aura_id >= pfvf->aura_ctx->qsize) + return NPA_AF_ERR_AQ_ENQUEUE; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc); + if (!pfvf->npalf || blkaddr < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + aq = block->aq; + if (!aq) { + dev_warn(rvu->dev, "%s: NPA AQ not initialized\n", __func__); + return NPA_AF_ERR_AQ_ENQUEUE; + } + + npalf = rvu_get_lf(rvu, block, pcifunc, 0); + if (npalf < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + memset(&inst, 0, sizeof(struct npa_aq_inst_s)); + inst.cindex = req->aura_id; + inst.lf = npalf; + inst.ctype = req->ctype; + inst.op = req->op; + /* Currently we are not supporting enqueuing multiple instructions, + * so always choose first entry in result memory. + */ + inst.res_addr = (u64)aq->res->iova; + + /* Clean result + context memory */ + memset(aq->res->base, 0, aq->res->entry_sz); + /* Context needs to be written at RES_ADDR + 128 */ + ctx = aq->res->base + 128; + /* Mask needs to be written at RES_ADDR + 256 */ + mask = aq->res->base + 256; + + switch (req->op) { + case NPA_AQ_INSTOP_WRITE: + /* Copy context and write mask */ + if (req->ctype == NPA_AQ_CTYPE_AURA) { + memcpy(mask, &req->aura_mask, + sizeof(struct npa_aura_s)); + memcpy(ctx, &req->aura, sizeof(struct npa_aura_s)); + } else { + memcpy(mask, &req->pool_mask, + sizeof(struct npa_pool_s)); + memcpy(ctx, &req->pool, sizeof(struct npa_pool_s)); + } + break; + case NPA_AQ_INSTOP_INIT: + if (req->ctype == NPA_AQ_CTYPE_AURA) { + if (req->aura.pool_addr >= pfvf->pool_ctx->qsize) { + rc = NPA_AF_ERR_AQ_FULL; + break; + } + /* Set pool's context address */ + req->aura.pool_addr = pfvf->pool_ctx->iova + + (req->aura.pool_addr * pfvf->pool_ctx->entry_sz); + memcpy(ctx, &req->aura, sizeof(struct npa_aura_s)); + } else { /* POOL's context */ + memcpy(ctx, &req->pool, sizeof(struct npa_pool_s)); + } + break; + case NPA_AQ_INSTOP_NOP: + case NPA_AQ_INSTOP_READ: + case NPA_AQ_INSTOP_LOCK: + case NPA_AQ_INSTOP_UNLOCK: + break; + default: + rc = NPA_AF_ERR_AQ_FULL; + break; + } + + if (rc) + return rc; + + spin_lock(&aq->lock); + + /* Submit the instruction to AQ */ + rc = npa_aq_enqueue_wait(rvu, block, &inst); + if (rc) { + spin_unlock(&aq->lock); + return rc; + } + + /* Set aura bitmap if aura hw context is enabled */ + if (req->ctype == NPA_AQ_CTYPE_AURA) { + if (req->op == NPA_AQ_INSTOP_INIT && req->aura.ena) + __set_bit(req->aura_id, pfvf->aura_bmap); + if (req->op == NPA_AQ_INSTOP_WRITE) { + ena = (req->aura.ena & req->aura_mask.ena) | + (test_bit(req->aura_id, pfvf->aura_bmap) & + ~req->aura_mask.ena); + if (ena) + __set_bit(req->aura_id, pfvf->aura_bmap); + else + __clear_bit(req->aura_id, pfvf->aura_bmap); + } + } + + /* Set pool bitmap if pool hw context is enabled */ + if (req->ctype == NPA_AQ_CTYPE_POOL) { + if (req->op == NPA_AQ_INSTOP_INIT && req->pool.ena) + __set_bit(req->aura_id, pfvf->pool_bmap); + if (req->op == NPA_AQ_INSTOP_WRITE) { + ena = (req->pool.ena & req->pool_mask.ena) | + (test_bit(req->aura_id, pfvf->pool_bmap) & + ~req->pool_mask.ena); + if (ena) + __set_bit(req->aura_id, pfvf->pool_bmap); + else + __clear_bit(req->aura_id, pfvf->pool_bmap); + } + } + spin_unlock(&aq->lock); + + if (rsp) { + /* Copy read context into mailbox */ + if (req->op == NPA_AQ_INSTOP_READ) { + if (req->ctype == NPA_AQ_CTYPE_AURA) + memcpy(&rsp->aura, ctx, + sizeof(struct npa_aura_s)); + else + memcpy(&rsp->pool, ctx, + sizeof(struct npa_pool_s)); + } + } + + return 0; +} + +static int npa_lf_hwctx_disable(struct rvu *rvu, struct hwctx_disable_req *req) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); + struct npa_aq_enq_req aq_req; + unsigned long *bmap; + int id, cnt = 0; + int err = 0, rc; + + if (!pfvf->pool_ctx || !pfvf->aura_ctx) + return NPA_AF_ERR_AQ_ENQUEUE; + + memset(&aq_req, 0, sizeof(struct npa_aq_enq_req)); + aq_req.hdr.pcifunc = req->hdr.pcifunc; + + if (req->ctype == NPA_AQ_CTYPE_POOL) { + aq_req.pool.ena = 0; + aq_req.pool_mask.ena = 1; + cnt = pfvf->pool_ctx->qsize; + bmap = pfvf->pool_bmap; + } else if (req->ctype == NPA_AQ_CTYPE_AURA) { + aq_req.aura.ena = 0; + aq_req.aura_mask.ena = 1; + cnt = pfvf->aura_ctx->qsize; + bmap = pfvf->aura_bmap; + } + + aq_req.ctype = req->ctype; + aq_req.op = NPA_AQ_INSTOP_WRITE; + + for (id = 0; id < cnt; id++) { + if (!test_bit(id, bmap)) + continue; + aq_req.aura_id = id; + rc = rvu_npa_aq_enq_inst(rvu, &aq_req, NULL); + if (rc) { + err = rc; + dev_err(rvu->dev, "Failed to disable %s:%d context\n", + (req->ctype == NPA_AQ_CTYPE_AURA) ? + "Aura" : "Pool", id); + } + } + + return err; +} + +int rvu_mbox_handler_NPA_AQ_ENQ(struct rvu *rvu, + struct npa_aq_enq_req *req, + struct npa_aq_enq_rsp *rsp) +{ + return rvu_npa_aq_enq_inst(rvu, req, rsp); +} + +int rvu_mbox_handler_NPA_HWCTX_DISABLE(struct rvu *rvu, + struct hwctx_disable_req *req, + struct msg_rsp *rsp) +{ + return npa_lf_hwctx_disable(rvu, req); +} + +static void npa_ctx_free(struct rvu *rvu, struct rvu_pfvf *pfvf) +{ + kfree(pfvf->aura_bmap); + pfvf->aura_bmap = NULL; + + qmem_free(rvu->dev, pfvf->aura_ctx); + pfvf->aura_ctx = NULL; + + kfree(pfvf->pool_bmap); + pfvf->pool_bmap = NULL; + + qmem_free(rvu->dev, pfvf->pool_ctx); + pfvf->pool_ctx = NULL; + + qmem_free(rvu->dev, pfvf->npa_qints_ctx); + pfvf->npa_qints_ctx = NULL; +} + +int rvu_mbox_handler_NPA_LF_ALLOC(struct rvu *rvu, + struct npa_lf_alloc_req *req, + struct npa_lf_alloc_rsp *rsp) +{ + int npalf, qints, hwctx_size, err, rc = 0; + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + struct rvu_pfvf *pfvf; + u64 cfg, ctx_cfg; + int blkaddr; + + if (req->aura_sz > NPA_AURA_SZ_MAX || + req->aura_sz == NPA_AURA_SZ_0 || !req->nr_pools) + return NPA_AF_ERR_PARAM; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc); + if (!pfvf->npalf || blkaddr < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + npalf = rvu_get_lf(rvu, block, pcifunc, 0); + if (npalf < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + /* Reset this NPA LF */ + err = rvu_lf_reset(rvu, block, npalf); + if (err) { + dev_err(rvu->dev, "Failed to reset NPALF%d\n", npalf); + return NPA_AF_ERR_LF_RESET; + } + + ctx_cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST1); + + /* Alloc memory for aura HW contexts */ + hwctx_size = 1UL << (ctx_cfg & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->aura_ctx, + NPA_AURA_COUNT(req->aura_sz), hwctx_size); + if (err) + goto free_mem; + + pfvf->aura_bmap = kcalloc(NPA_AURA_COUNT(req->aura_sz), sizeof(long), + GFP_KERNEL); + if (!pfvf->aura_bmap) + goto free_mem; + + /* Alloc memory for pool HW contexts */ + hwctx_size = 1UL << ((ctx_cfg >> 4) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->pool_ctx, req->nr_pools, hwctx_size); + if (err) + goto free_mem; + + pfvf->pool_bmap = kcalloc(NPA_AURA_COUNT(req->aura_sz), sizeof(long), + GFP_KERNEL); + if (!pfvf->pool_bmap) + goto free_mem; + + /* Get no of queue interrupts supported */ + cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST); + qints = (cfg >> 28) & 0xFFF; + + /* Alloc memory for Qints HW contexts */ + hwctx_size = 1UL << ((ctx_cfg >> 8) & 0xF); + err = qmem_alloc(rvu->dev, &pfvf->npa_qints_ctx, qints, hwctx_size); + if (err) + goto free_mem; + + cfg = rvu_read64(rvu, blkaddr, NPA_AF_LFX_AURAS_CFG(npalf)); + /* Clear way partition mask and set aura offset to '0' */ + cfg &= ~(BIT_ULL(34) - 1); + /* Set aura size & enable caching of contexts */ + cfg |= (req->aura_sz << 16) | BIT_ULL(34); + rvu_write64(rvu, blkaddr, NPA_AF_LFX_AURAS_CFG(npalf), cfg); + + /* Configure aura HW context's base */ + rvu_write64(rvu, blkaddr, NPA_AF_LFX_LOC_AURAS_BASE(npalf), + (u64)pfvf->aura_ctx->iova); + + /* Enable caching of qints hw context */ + rvu_write64(rvu, blkaddr, NPA_AF_LFX_QINTS_CFG(npalf), BIT_ULL(36)); + rvu_write64(rvu, blkaddr, NPA_AF_LFX_QINTS_BASE(npalf), + (u64)pfvf->npa_qints_ctx->iova); + + goto exit; + +free_mem: + npa_ctx_free(rvu, pfvf); + rc = -ENOMEM; + +exit: + /* set stack page info */ + cfg = rvu_read64(rvu, blkaddr, NPA_AF_CONST); + rsp->stack_pg_ptrs = (cfg >> 8) & 0xFF; + rsp->stack_pg_bytes = cfg & 0xFF; + rsp->qints = (cfg >> 28) & 0xFFF; + return rc; +} + +int rvu_mbox_handler_NPA_LF_FREE(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + struct rvu_block *block; + struct rvu_pfvf *pfvf; + int npalf, err; + int blkaddr; + + pfvf = rvu_get_pfvf(rvu, pcifunc); + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc); + if (!pfvf->npalf || blkaddr < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + block = &hw->block[blkaddr]; + npalf = rvu_get_lf(rvu, block, pcifunc, 0); + if (npalf < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + /* Reset this NPA LF */ + err = rvu_lf_reset(rvu, block, npalf); + if (err) { + dev_err(rvu->dev, "Failed to reset NPALF%d\n", npalf); + return NPA_AF_ERR_LF_RESET; + } + + npa_ctx_free(rvu, pfvf); + + return 0; +} + +static int npa_aq_init(struct rvu *rvu, struct rvu_block *block) +{ + u64 cfg; + int err; + + /* Set admin queue endianness */ + cfg = rvu_read64(rvu, block->addr, NPA_AF_GEN_CFG); +#ifdef __BIG_ENDIAN + cfg |= BIT_ULL(1); + rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg); +#else + cfg &= ~BIT_ULL(1); + rvu_write64(rvu, block->addr, NPA_AF_GEN_CFG, cfg); +#endif + + /* Do not bypass NDC cache */ + cfg = rvu_read64(rvu, block->addr, NPA_AF_NDC_CFG); + cfg &= ~0x03DULL; + rvu_write64(rvu, block->addr, NPA_AF_NDC_CFG, cfg); + + /* Result structure can be followed by Aura/Pool context at + * RES + 128bytes and a write mask at RES + 256 bytes, depending on + * operation type. Alloc sufficient result memory for all operations. + */ + err = rvu_aq_alloc(rvu, &block->aq, + Q_COUNT(AQ_SIZE), sizeof(struct npa_aq_inst_s), + ALIGN(sizeof(struct npa_aq_res_s), 128) + 256); + if (err) + return err; + + rvu_write64(rvu, block->addr, NPA_AF_AQ_CFG, AQ_SIZE); + rvu_write64(rvu, block->addr, + NPA_AF_AQ_BASE, (u64)block->aq->inst->iova); + return 0; +} + +int rvu_npa_init(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkaddr, err; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0); + if (blkaddr < 0) + return 0; + + block = &hw->block[blkaddr]; + + /* Initialize admin queue */ + err = npa_aq_init(rvu, &hw->block[blkaddr]); + if (err) + return err; + + return 0; +} + +void rvu_npa_freemem(struct rvu *rvu) +{ + struct rvu_hwinfo *hw = rvu->hw; + struct rvu_block *block; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, 0); + if (blkaddr < 0) + return; + + block = &hw->block[blkaddr]; + rvu_aq_free(rvu, block->aq); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index 92e05817ffe7..c331b237a26f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -71,4 +71,812 @@ enum rvu_pf_int_vec_e { RVU_PF_INT_VEC_CNT = 0x7, }; +/* NPA admin queue completion enumeration */ +enum npa_aq_comp { + NPA_AQ_COMP_NOTDONE = 0x0, + NPA_AQ_COMP_GOOD = 0x1, + NPA_AQ_COMP_SWERR = 0x2, + NPA_AQ_COMP_CTX_POISON = 0x3, + NPA_AQ_COMP_CTX_FAULT = 0x4, + NPA_AQ_COMP_LOCKERR = 0x5, +}; + +/* NPA admin queue context types */ +enum npa_aq_ctype { + NPA_AQ_CTYPE_AURA = 0x0, + NPA_AQ_CTYPE_POOL = 0x1, +}; + +/* NPA admin queue instruction opcodes */ +enum npa_aq_instop { + NPA_AQ_INSTOP_NOP = 0x0, + NPA_AQ_INSTOP_INIT = 0x1, + NPA_AQ_INSTOP_WRITE = 0x2, + NPA_AQ_INSTOP_READ = 0x3, + NPA_AQ_INSTOP_LOCK = 0x4, + NPA_AQ_INSTOP_UNLOCK = 0x5, +}; + +/* NPA admin queue instruction structure */ +struct npa_aq_inst_s { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 doneint : 1; /* W0 */ + u64 reserved_44_62 : 19; + u64 cindex : 20; + u64 reserved_17_23 : 7; + u64 lf : 9; + u64 ctype : 4; + u64 op : 4; +#else + u64 op : 4; + u64 ctype : 4; + u64 lf : 9; + u64 reserved_17_23 : 7; + u64 cindex : 20; + u64 reserved_44_62 : 19; + u64 doneint : 1; +#endif + u64 res_addr; /* W1 */ +}; + +/* NPA admin queue result structure */ +struct npa_aq_res_s { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_17_63 : 47; /* W0 */ + u64 doneint : 1; + u64 compcode : 8; + u64 ctype : 4; + u64 op : 4; +#else + u64 op : 4; + u64 ctype : 4; + u64 compcode : 8; + u64 doneint : 1; + u64 reserved_17_63 : 47; +#endif + u64 reserved_64_127; /* W1 */ +}; + +struct npa_aura_s { + u64 pool_addr; /* W0 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ + u64 avg_level : 8; + u64 reserved_118_119 : 2; + u64 shift : 6; + u64 aura_drop : 8; + u64 reserved_98_103 : 6; + u64 bp_ena : 2; + u64 aura_drop_ena : 1; + u64 pool_drop_ena : 1; + u64 reserved_93 : 1; + u64 avg_con : 9; + u64 pool_way_mask : 16; + u64 pool_caching : 1; + u64 reserved_65 : 2; + u64 ena : 1; +#else + u64 ena : 1; + u64 reserved_65 : 2; + u64 pool_caching : 1; + u64 pool_way_mask : 16; + u64 avg_con : 9; + u64 reserved_93 : 1; + u64 pool_drop_ena : 1; + u64 aura_drop_ena : 1; + u64 bp_ena : 2; + u64 reserved_98_103 : 6; + u64 aura_drop : 8; + u64 shift : 6; + u64 reserved_118_119 : 2; + u64 avg_level : 8; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ + u64 reserved_189_191 : 3; + u64 nix1_bpid : 9; + u64 reserved_177_179 : 3; + u64 nix0_bpid : 9; + u64 reserved_164_167 : 4; + u64 count : 36; +#else + u64 count : 36; + u64 reserved_164_167 : 4; + u64 nix0_bpid : 9; + u64 reserved_177_179 : 3; + u64 nix1_bpid : 9; + u64 reserved_189_191 : 3; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ + u64 reserved_252_255 : 4; + u64 fc_hyst_bits : 4; + u64 fc_stype : 2; + u64 fc_up_crossing : 1; + u64 fc_ena : 1; + u64 reserved_240_243 : 4; + u64 bp : 8; + u64 reserved_228_231 : 4; + u64 limit : 36; +#else + u64 limit : 36; + u64 reserved_228_231 : 4; + u64 bp : 8; + u64 reserved_240_243 : 4; + u64 fc_ena : 1; + u64 fc_up_crossing : 1; + u64 fc_stype : 2; + u64 fc_hyst_bits : 4; + u64 reserved_252_255 : 4; +#endif + u64 fc_addr; /* W4 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W5 */ + u64 reserved_379_383 : 5; + u64 err_qint_idx : 7; + u64 reserved_371 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_363 : 1; + u64 thresh_up : 1; + u64 thresh_int_ena : 1; + u64 thresh_int : 1; + u64 err_int_ena : 8; + u64 err_int : 8; + u64 update_time : 16; + u64 pool_drop : 8; +#else + u64 pool_drop : 8; + u64 update_time : 16; + u64 err_int : 8; + u64 err_int_ena : 8; + u64 thresh_int : 1; + u64 thresh_int_ena : 1; + u64 thresh_up : 1; + u64 reserved_363 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_371 : 1; + u64 err_qint_idx : 7; + u64 reserved_379_383 : 5; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W6 */ + u64 reserved_420_447 : 28; + u64 thresh : 36; +#else + u64 thresh : 36; + u64 reserved_420_447 : 28; +#endif + u64 reserved_448_511; /* W7 */ +}; + +struct npa_pool_s { + u64 stack_base; /* W0 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ + u64 reserved_115_127 : 13; + u64 buf_size : 11; + u64 reserved_100_103 : 4; + u64 buf_offset : 12; + u64 stack_way_mask : 16; + u64 reserved_70_71 : 3; + u64 stack_caching : 1; + u64 reserved_66_67 : 2; + u64 nat_align : 1; + u64 ena : 1; +#else + u64 ena : 1; + u64 nat_align : 1; + u64 reserved_66_67 : 2; + u64 stack_caching : 1; + u64 reserved_70_71 : 3; + u64 stack_way_mask : 16; + u64 buf_offset : 12; + u64 reserved_100_103 : 4; + u64 buf_size : 11; + u64 reserved_115_127 : 13; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ + u64 stack_pages : 32; + u64 stack_max_pages : 32; +#else + u64 stack_max_pages : 32; + u64 stack_pages : 32; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ + u64 reserved_240_255 : 16; + u64 op_pc : 48; +#else + u64 op_pc : 48; + u64 reserved_240_255 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W4 */ + u64 reserved_316_319 : 4; + u64 update_time : 16; + u64 reserved_297_299 : 3; + u64 fc_up_crossing : 1; + u64 fc_hyst_bits : 4; + u64 fc_stype : 2; + u64 fc_ena : 1; + u64 avg_con : 9; + u64 avg_level : 8; + u64 reserved_270_271 : 2; + u64 shift : 6; + u64 reserved_260_263 : 4; + u64 stack_offset : 4; +#else + u64 stack_offset : 4; + u64 reserved_260_263 : 4; + u64 shift : 6; + u64 reserved_270_271 : 2; + u64 avg_level : 8; + u64 avg_con : 9; + u64 fc_ena : 1; + u64 fc_stype : 2; + u64 fc_hyst_bits : 4; + u64 fc_up_crossing : 1; + u64 reserved_297_299 : 3; + u64 update_time : 16; + u64 reserved_316_319 : 4; +#endif + u64 fc_addr; /* W5 */ + u64 ptr_start; /* W6 */ + u64 ptr_end; /* W7 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W8 */ + u64 reserved_571_575 : 5; + u64 err_qint_idx : 7; + u64 reserved_563 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_555 : 1; + u64 thresh_up : 1; + u64 thresh_int_ena : 1; + u64 thresh_int : 1; + u64 err_int_ena : 8; + u64 err_int : 8; + u64 reserved_512_535 : 24; +#else + u64 reserved_512_535 : 24; + u64 err_int : 8; + u64 err_int_ena : 8; + u64 thresh_int : 1; + u64 thresh_int_ena : 1; + u64 thresh_up : 1; + u64 reserved_555 : 1; + u64 thresh_qint_idx : 7; + u64 reserved_563 : 1; + u64 err_qint_idx : 7; + u64 reserved_571_575 : 5; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */ + u64 reserved_612_639 : 28; + u64 thresh : 36; +#else + u64 thresh : 36; + u64 reserved_612_639 : 28; +#endif + u64 reserved_640_703; /* W10 */ + u64 reserved_704_767; /* W11 */ + u64 reserved_768_831; /* W12 */ + u64 reserved_832_895; /* W13 */ + u64 reserved_896_959; /* W14 */ + u64 reserved_960_1023; /* W15 */ +}; + +/* NIX admin queue completion status */ +enum nix_aq_comp { + NIX_AQ_COMP_NOTDONE = 0x0, + NIX_AQ_COMP_GOOD = 0x1, + NIX_AQ_COMP_SWERR = 0x2, + NIX_AQ_COMP_CTX_POISON = 0x3, + NIX_AQ_COMP_CTX_FAULT = 0x4, + NIX_AQ_COMP_LOCKERR = 0x5, + NIX_AQ_COMP_SQB_ALLOC_FAIL = 0x6, +}; + +/* NIX admin queue context types */ +enum nix_aq_ctype { + NIX_AQ_CTYPE_RQ = 0x0, + NIX_AQ_CTYPE_SQ = 0x1, + NIX_AQ_CTYPE_CQ = 0x2, + NIX_AQ_CTYPE_MCE = 0x3, + NIX_AQ_CTYPE_RSS = 0x4, + NIX_AQ_CTYPE_DYNO = 0x5, +}; + +/* NIX admin queue instruction opcodes */ +enum nix_aq_instop { + NIX_AQ_INSTOP_NOP = 0x0, + NIX_AQ_INSTOP_INIT = 0x1, + NIX_AQ_INSTOP_WRITE = 0x2, + NIX_AQ_INSTOP_READ = 0x3, + NIX_AQ_INSTOP_LOCK = 0x4, + NIX_AQ_INSTOP_UNLOCK = 0x5, +}; + +/* NIX admin queue instruction structure */ +struct nix_aq_inst_s { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 doneint : 1; /* W0 */ + u64 reserved_44_62 : 19; + u64 cindex : 20; + u64 reserved_15_23 : 9; + u64 lf : 7; + u64 ctype : 4; + u64 op : 4; +#else + u64 op : 4; + u64 ctype : 4; + u64 lf : 7; + u64 reserved_15_23 : 9; + u64 cindex : 20; + u64 reserved_44_62 : 19; + u64 doneint : 1; +#endif + u64 res_addr; /* W1 */ +}; + +/* NIX admin queue result structure */ +struct nix_aq_res_s { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_17_63 : 47; /* W0 */ + u64 doneint : 1; + u64 compcode : 8; + u64 ctype : 4; + u64 op : 4; +#else + u64 op : 4; + u64 ctype : 4; + u64 compcode : 8; + u64 doneint : 1; + u64 reserved_17_63 : 47; +#endif + u64 reserved_64_127; /* W1 */ +}; + +/* NIX Completion queue context structure */ +struct nix_cq_ctx_s { + u64 base; +#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ + u64 wrptr : 20; + u64 avg_con : 9; + u64 cint_idx : 7; + u64 cq_err : 1; + u64 qint_idx : 7; + u64 rsvd_81_83 : 3; + u64 bpid : 9; + u64 rsvd_69_71 : 3; + u64 bp_ena : 1; + u64 rsvd_64_67 : 4; +#else + u64 rsvd_64_67 : 4; + u64 bp_ena : 1; + u64 rsvd_69_71 : 3; + u64 bpid : 9; + u64 rsvd_81_83 : 3; + u64 qint_idx : 7; + u64 cq_err : 1; + u64 cint_idx : 7; + u64 avg_con : 9; + u64 wrptr : 20; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ + u64 update_time : 16; + u64 avg_level : 8; + u64 head : 20; + u64 tail : 20; +#else + u64 tail : 20; + u64 head : 20; + u64 avg_level : 8; + u64 update_time : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ + u64 cq_err_int_ena : 8; + u64 cq_err_int : 8; + u64 qsize : 4; + u64 rsvd_233_235 : 3; + u64 caching : 1; + u64 substream : 20; + u64 rsvd_210_211 : 2; + u64 ena : 1; + u64 drop_ena : 1; + u64 drop : 8; + u64 dp : 8; +#else + u64 dp : 8; + u64 drop : 8; + u64 drop_ena : 1; + u64 ena : 1; + u64 rsvd_210_211 : 2; + u64 substream : 20; + u64 caching : 1; + u64 rsvd_233_235 : 3; + u64 qsize : 4; + u64 cq_err_int : 8; + u64 cq_err_int_ena : 8; +#endif +}; + +/* NIX Receive queue context structure */ +struct nix_rq_ctx_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */ + u64 wqe_aura : 20; + u64 substream : 20; + u64 cq : 20; + u64 ena_wqwd : 1; + u64 ipsech_ena : 1; + u64 sso_ena : 1; + u64 ena : 1; +#else + u64 ena : 1; + u64 sso_ena : 1; + u64 ipsech_ena : 1; + u64 ena_wqwd : 1; + u64 cq : 20; + u64 substream : 20; + u64 wqe_aura : 20; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ + u64 rsvd_127_122 : 6; + u64 lpb_drop_ena : 1; + u64 spb_drop_ena : 1; + u64 xqe_drop_ena : 1; + u64 wqe_caching : 1; + u64 pb_caching : 2; + u64 sso_tt : 2; + u64 sso_grp : 10; + u64 lpb_aura : 20; + u64 spb_aura : 20; +#else + u64 spb_aura : 20; + u64 lpb_aura : 20; + u64 sso_grp : 10; + u64 sso_tt : 2; + u64 pb_caching : 2; + u64 wqe_caching : 1; + u64 xqe_drop_ena : 1; + u64 spb_drop_ena : 1; + u64 lpb_drop_ena : 1; + u64 rsvd_127_122 : 6; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ + u64 xqe_hdr_split : 1; + u64 xqe_imm_copy : 1; + u64 rsvd_189_184 : 6; + u64 xqe_imm_size : 6; + u64 later_skip : 6; + u64 rsvd_171 : 1; + u64 first_skip : 7; + u64 lpb_sizem1 : 12; + u64 spb_ena : 1; + u64 rsvd_150_148 : 3; + u64 wqe_skip : 2; + u64 spb_sizem1 : 6; + u64 rsvd_139_128 : 12; +#else + u64 rsvd_139_128 : 12; + u64 spb_sizem1 : 6; + u64 wqe_skip : 2; + u64 rsvd_150_148 : 3; + u64 spb_ena : 1; + u64 lpb_sizem1 : 12; + u64 first_skip : 7; + u64 rsvd_171 : 1; + u64 later_skip : 6; + u64 xqe_imm_size : 6; + u64 rsvd_189_184 : 6; + u64 xqe_imm_copy : 1; + u64 xqe_hdr_split : 1; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ + u64 spb_pool_pass : 8; + u64 spb_pool_drop : 8; + u64 spb_aura_pass : 8; + u64 spb_aura_drop : 8; + u64 wqe_pool_pass : 8; + u64 wqe_pool_drop : 8; + u64 xqe_pass : 8; + u64 xqe_drop : 8; +#else + u64 xqe_drop : 8; + u64 xqe_pass : 8; + u64 wqe_pool_drop : 8; + u64 wqe_pool_pass : 8; + u64 spb_aura_drop : 8; + u64 spb_aura_pass : 8; + u64 spb_pool_drop : 8; + u64 spb_pool_pass : 8; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W4 */ + u64 rsvd_319_315 : 5; + u64 qint_idx : 7; + u64 rq_int_ena : 8; + u64 rq_int : 8; + u64 rsvd_291_288 : 4; + u64 lpb_pool_pass : 8; + u64 lpb_pool_drop : 8; + u64 lpb_aura_pass : 8; + u64 lpb_aura_drop : 8; +#else + u64 lpb_aura_drop : 8; + u64 lpb_aura_pass : 8; + u64 lpb_pool_drop : 8; + u64 lpb_pool_pass : 8; + u64 rsvd_291_288 : 4; + u64 rq_int : 8; + u64 rq_int_ena : 8; + u64 qint_idx : 7; + u64 rsvd_319_315 : 5; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W5 */ + u64 rsvd_383_366 : 18; + u64 flow_tagw : 6; + u64 bad_utag : 8; + u64 good_utag : 8; + u64 ltag : 24; +#else + u64 ltag : 24; + u64 good_utag : 8; + u64 bad_utag : 8; + u64 flow_tagw : 6; + u64 rsvd_383_366 : 18; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W6 */ + u64 rsvd_447_432 : 16; + u64 octs : 48; +#else + u64 octs : 48; + u64 rsvd_447_432 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W7 */ + u64 rsvd_511_496 : 16; + u64 pkts : 48; +#else + u64 pkts : 48; + u64 rsvd_511_496 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W8 */ + u64 rsvd_575_560 : 16; + u64 drop_octs : 48; +#else + u64 drop_octs : 48; + u64 rsvd_575_560 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */ + u64 rsvd_639_624 : 16; + u64 drop_pkts : 48; +#else + u64 drop_pkts : 48; + u64 rsvd_639_624 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W10 */ + u64 rsvd_703_688 : 16; + u64 re_pkts : 48; +#else + u64 re_pkts : 48; + u64 rsvd_703_688 : 16; +#endif + u64 rsvd_767_704; /* W11 */ + u64 rsvd_831_768; /* W12 */ + u64 rsvd_895_832; /* W13 */ + u64 rsvd_959_896; /* W14 */ + u64 rsvd_1023_960; /* W15 */ +}; + +/* NIX sqe sizes */ +enum nix_maxsqesz { + NIX_MAXSQESZ_W16 = 0x0, + NIX_MAXSQESZ_W8 = 0x1, +}; + +/* NIX SQB caching type */ +enum nix_stype { + NIX_STYPE_STF = 0x0, + NIX_STYPE_STT = 0x1, + NIX_STYPE_STP = 0x2, +}; + +/* NIX Send queue context structure */ +struct nix_sq_ctx_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */ + u64 sqe_way_mask : 16; + u64 cq : 20; + u64 sdp_mcast : 1; + u64 substream : 20; + u64 qint_idx : 6; + u64 ena : 1; +#else + u64 ena : 1; + u64 qint_idx : 6; + u64 substream : 20; + u64 sdp_mcast : 1; + u64 cq : 20; + u64 sqe_way_mask : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W1 */ + u64 sqb_count : 16; + u64 default_chan : 12; + u64 smq_rr_quantum : 24; + u64 sso_ena : 1; + u64 xoff : 1; + u64 cq_ena : 1; + u64 smq : 9; +#else + u64 smq : 9; + u64 cq_ena : 1; + u64 xoff : 1; + u64 sso_ena : 1; + u64 smq_rr_quantum : 24; + u64 default_chan : 12; + u64 sqb_count : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W2 */ + u64 rsvd_191 : 1; + u64 sqe_stype : 2; + u64 sq_int_ena : 8; + u64 sq_int : 8; + u64 sqb_aura : 20; + u64 smq_rr_count : 25; +#else + u64 smq_rr_count : 25; + u64 sqb_aura : 20; + u64 sq_int : 8; + u64 sq_int_ena : 8; + u64 sqe_stype : 2; + u64 rsvd_191 : 1; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W3 */ + u64 rsvd_255_253 : 3; + u64 smq_next_sq_vld : 1; + u64 smq_pend : 1; + u64 smenq_next_sqb_vld : 1; + u64 head_offset : 6; + u64 smenq_offset : 6; + u64 tail_offset : 6; + u64 smq_lso_segnum : 8; + u64 smq_next_sq : 20; + u64 mnq_dis : 1; + u64 lmt_dis : 1; + u64 cq_limit : 8; + u64 max_sqe_size : 2; +#else + u64 max_sqe_size : 2; + u64 cq_limit : 8; + u64 lmt_dis : 1; + u64 mnq_dis : 1; + u64 smq_next_sq : 20; + u64 smq_lso_segnum : 8; + u64 tail_offset : 6; + u64 smenq_offset : 6; + u64 head_offset : 6; + u64 smenq_next_sqb_vld : 1; + u64 smq_pend : 1; + u64 smq_next_sq_vld : 1; + u64 rsvd_255_253 : 3; +#endif + u64 next_sqb : 64;/* W4 */ + u64 tail_sqb : 64;/* W5 */ + u64 smenq_sqb : 64;/* W6 */ + u64 smenq_next_sqb : 64;/* W7 */ + u64 head_sqb : 64;/* W8 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W9 */ + u64 rsvd_639_630 : 10; + u64 vfi_lso_vld : 1; + u64 vfi_lso_vlan1_ins_ena : 1; + u64 vfi_lso_vlan0_ins_ena : 1; + u64 vfi_lso_mps : 14; + u64 vfi_lso_sb : 8; + u64 vfi_lso_sizem1 : 3; + u64 vfi_lso_total : 18; + u64 rsvd_583_576 : 8; +#else + u64 rsvd_583_576 : 8; + u64 vfi_lso_total : 18; + u64 vfi_lso_sizem1 : 3; + u64 vfi_lso_sb : 8; + u64 vfi_lso_mps : 14; + u64 vfi_lso_vlan0_ins_ena : 1; + u64 vfi_lso_vlan1_ins_ena : 1; + u64 vfi_lso_vld : 1; + u64 rsvd_639_630 : 10; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W10 */ + u64 rsvd_703_658 : 46; + u64 scm_lso_rem : 18; +#else + u64 scm_lso_rem : 18; + u64 rsvd_703_658 : 46; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W11 */ + u64 rsvd_767_752 : 16; + u64 octs : 48; +#else + u64 octs : 48; + u64 rsvd_767_752 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W12 */ + u64 rsvd_831_816 : 16; + u64 pkts : 48; +#else + u64 pkts : 48; + u64 rsvd_831_816 : 16; +#endif + u64 rsvd_895_832 : 64;/* W13 */ +#if defined(__BIG_ENDIAN_BITFIELD) /* W14 */ + u64 rsvd_959_944 : 16; + u64 dropped_octs : 48; +#else + u64 dropped_octs : 48; + u64 rsvd_959_944 : 16; +#endif +#if defined(__BIG_ENDIAN_BITFIELD) /* W15 */ + u64 rsvd_1023_1008 : 16; + u64 dropped_pkts : 48; +#else + u64 dropped_pkts : 48; + u64 rsvd_1023_1008 : 16; +#endif +}; + +/* NIX Receive side scaling entry structure*/ +struct nix_rsse_s { +#if defined(__BIG_ENDIAN_BITFIELD) + uint32_t reserved_20_31 : 12; + uint32_t rq : 20; +#else + uint32_t rq : 20; + uint32_t reserved_20_31 : 12; + +#endif +}; + +/* NIX receive multicast/mirror entry structure */ +struct nix_rx_mce_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* W0 */ + uint64_t next : 16; + uint64_t pf_func : 16; + uint64_t rsvd_31_24 : 8; + uint64_t index : 20; + uint64_t eol : 1; + uint64_t rsvd_2 : 1; + uint64_t op : 2; +#else + uint64_t op : 2; + uint64_t rsvd_2 : 1; + uint64_t eol : 1; + uint64_t index : 20; + uint64_t rsvd_31_24 : 8; + uint64_t pf_func : 16; + uint64_t next : 16; +#endif +}; + +enum nix_lsoalg { + NIX_LSOALG_NOP, + NIX_LSOALG_ADD_SEGNUM, + NIX_LSOALG_ADD_PAYLEN, + NIX_LSOALG_ADD_OFFSET, + NIX_LSOALG_TCP_FLAGS, +}; + +enum nix_txlayer { + NIX_TXLAYER_OL3, + NIX_TXLAYER_OL4, + NIX_TXLAYER_IL3, + NIX_TXLAYER_IL4, +}; + +struct nix_lso_format { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 rsvd_19_63 : 45; + u64 alg : 3; + u64 rsvd_14_15 : 2; + u64 sizem1 : 2; + u64 rsvd_10_11 : 2; + u64 layer : 2; + u64 offset : 8; +#else + u64 offset : 8; + u64 layer : 2; + u64 rsvd_10_11 : 2; + u64 sizem1 : 2; + u64 rsvd_14_15 : 2; + u64 alg : 3; + u64 rsvd_19_63 : 45; +#endif +}; + #endif /* RVU_STRUCT_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a53736c26c0c..a5a0823e5ada 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -308,10 +308,11 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_FLOW_TABLE: case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: - case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT: case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: case MLX5_CMD_OP_FPGA_DESTROY_QP: case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: + case MLX5_CMD_OP_DEALLOC_MEMIC: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -426,7 +427,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: - case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: case MLX5_CMD_OP_FPGA_CREATE_QP: case MLX5_CMD_OP_FPGA_MODIFY_QP: @@ -435,6 +436,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_ALLOC_MEMIC: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -599,8 +601,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); - MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); - MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); + MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP); @@ -617,6 +619,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT); MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT); MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index a4179122a279..4b85abb5c9f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -109,6 +109,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cons_index = 0; cq->arm_sn = 0; cq->eq = eq; + cq->uid = MLX5_GET(create_cq_in, in, uid); refcount_set(&cq->refcount, 1); init_completion(&cq->free); if (!cq->comp) @@ -144,6 +145,7 @@ err_cmd: memset(dout, 0, sizeof(dout)); MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, din, cqn, cq->cqn); + MLX5_SET(destroy_cq_in, din, uid, cq->uid); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } @@ -165,6 +167,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); + MLX5_SET(destroy_cq_in, in, uid, cq->uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; @@ -196,6 +199,7 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0}; MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ); + MLX5_SET(modify_cq_in, in, uid, cq->uid); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index 0240aee9189e..d027ce00c8ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -133,7 +133,7 @@ TRACE_EVENT(mlx5_fs_del_fg, {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\ {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\ {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\ - {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\ {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\ @@ -252,10 +252,10 @@ TRACE_EVENT(mlx5_fs_add_rule, memcpy(__entry->destination, &rule->dest_attr, sizeof(__entry->destination)); - if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER && - rule->dest_attr.counter) + if (rule->dest_attr.type & + MLX5_FLOW_DESTINATION_TYPE_COUNTER) __entry->counter_id = - rule->dest_attr.counter->id; + rule->dest_attr.counter_id; ), TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", __entry->rule, __entry->fte, __entry->index, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index db3278cc052b..3078491cc0d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -153,7 +153,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) if (enable_uc_lb) MLX5_SET(modify_tir_in, in, ctx.self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST); MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c9848e333450..1243edbedc9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3392,9 +3392,6 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; - if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) - return -EOPNOTSUPP; - switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 64c2b9ea8b1e..c3c657548824 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -853,9 +853,6 @@ static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; - if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) - return -EOPNOTSUPP; - switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS); @@ -869,9 +866,6 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; - if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) - return -EOPNOTSUPP; - switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6de21d9f4fad..608025ca5c04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -61,6 +61,7 @@ struct mlx5_nic_flow_attr { u32 hairpin_tirn; u8 match_level; struct mlx5_flow_table *hairpin_ft; + struct mlx5_fc *counter; }; #define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1) @@ -73,6 +74,7 @@ enum { MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2), MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3), MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4), + MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5), }; #define MLX5E_TC_MAX_SPLITS 1 @@ -81,7 +83,7 @@ struct mlx5e_tc_flow { struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; - u8 flags; + u16 flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; struct list_head encap; /* flows sharing the same encap ID */ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ @@ -100,11 +102,6 @@ struct mlx5e_tc_flow_parse_attr { int mirred_ifindex; }; -enum { - MLX5_HEADER_TYPE_VXLAN = 0x0, - MLX5_HEADER_TYPE_NVGRE = 0x1, -}; - #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) @@ -677,7 +674,7 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, } } -static struct mlx5_flow_handle * +static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, @@ -688,19 +685,17 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .has_flow_tag = true, .flow_tag = attr->flow_tag, - .encap_id = 0, + .reformat_id = 0, + .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; - struct mlx5_flow_handle *rule; bool table_created = false; int err, dest_ix = 0; if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) { - rule = ERR_PTR(err); goto err_add_hairpin_flow; } if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { @@ -720,22 +715,21 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) { - rule = ERR_CAST(counter); + err = PTR_ERR(counter); goto err_fc_create; } dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[dest_ix].counter = counter; + dest[dest_ix].counter_id = mlx5_fc_id(counter); dest_ix++; + attr->counter = counter; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); flow_act.modify_id = attr->mod_hdr_id; kfree(parse_attr->mod_hdr_actions); - if (err) { - rule = ERR_PTR(err); + if (err) goto err_create_mod_hdr_id; - } } if (IS_ERR_OR_NULL(priv->fs.tc.t)) { @@ -761,7 +755,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, "Failed to create tc offload table\n"); netdev_err(priv->netdev, "Failed to create tc offload table\n"); - rule = ERR_CAST(priv->fs.tc.t); + err = PTR_ERR(priv->fs.tc.t); goto err_create_ft; } @@ -771,13 +765,15 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->match_level != MLX5_MATCH_NONE) parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, - &flow_act, dest, dest_ix); + flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, + &flow_act, dest, dest_ix); - if (IS_ERR(rule)) + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); goto err_add_rule; + } - return rule; + return 0; err_add_rule: if (table_created) { @@ -793,7 +789,7 @@ err_fc_create: if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) mlx5e_hairpin_flow_del(priv, flow); err_add_hairpin_flow: - return rule; + return err; } static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, @@ -802,7 +798,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_fc *counter = NULL; - counter = mlx5_flow_rule_counter(flow->rule[0]); + counter = attr->counter; mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter); @@ -829,28 +825,115 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct netlink_ext_ack *extack); static struct mlx5_flow_handle * +mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_flow_handle *rule; + + rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + if (IS_ERR(rule)) + return rule; + + if (attr->mirror_count) { + flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); + if (IS_ERR(flow->rule[1])) { + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + return flow->rule[1]; + } + } + + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + return rule; +} + +static void +mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + + if (attr->mirror_count) + mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); +} + +static struct mlx5_flow_handle * +mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *slow_attr) +{ + struct mlx5_flow_handle *rule; + + memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + slow_attr->mirror_count = 0, + slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN, + + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); + if (!IS_ERR(rule)) + flow->flags |= MLX5E_TC_FLOW_SLOW; + + return rule; +} + +static void +mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *slow_attr) +{ + memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); + mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); + flow->flags &= ~MLX5E_TC_FLOW_SLOW; +} + +static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + u32 max_chain = mlx5_eswitch_get_chain_range(esw); struct mlx5_esw_flow_attr *attr = flow->esw_attr; + u16 max_prio = mlx5_eswitch_get_prio_range(esw); struct net_device *out_dev, *encap_dev = NULL; - struct mlx5_flow_handle *rule = NULL; + struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; - int err; + int err = 0, encap_err = 0; + + /* if prios are not supported, keep the old behaviour of using same prio + * for all offloaded rules. + */ + if (!mlx5_eswitch_prios_supported(esw)) + attr->prio = 1; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + if (attr->chain > max_chain) { + NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); + err = -EOPNOTSUPP; + goto err_max_prio_chain; + } + + if (attr->prio > max_prio) { + NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); + err = -EOPNOTSUPP; + goto err_max_prio_chain; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { out_dev = __dev_get_by_index(dev_net(priv->netdev), attr->parse_attr->mirred_ifindex); - err = mlx5e_attach_encap(priv, &parse_attr->tun_info, - out_dev, &encap_dev, flow, extack); - if (err) { - rule = ERR_PTR(err); - if (err != -EAGAIN) - goto err_attach_encap; + encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info, + out_dev, &encap_dev, flow, + extack); + if (encap_err && encap_err != -EAGAIN) { + err = encap_err; + goto err_attach_encap; } out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; @@ -859,49 +942,58 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } err = mlx5_eswitch_add_vlan_action(esw, attr); - if (err) { - rule = ERR_PTR(err); + if (err) goto err_add_vlan; - } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); kfree(parse_attr->mod_hdr_actions); - if (err) { - rule = ERR_PTR(err); + if (err) goto err_mod_hdr; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_create_counter; } + + attr->counter = counter; } - /* we get here if (1) there's no error (rule being null) or when + /* we get here if (1) there's no error or when * (2) there's an encap action and we're on -EAGAIN (no valid neigh) */ - if (rule != ERR_PTR(-EAGAIN)) { - rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); - if (IS_ERR(rule)) - goto err_add_rule; - - if (attr->mirror_count) { - flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr); - if (IS_ERR(flow->rule[1])) - goto err_fwd_rule; - } + if (encap_err == -EAGAIN) { + /* continue with goto slow path rule instead */ + struct mlx5_esw_flow_attr slow_attr; + + flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr); + } else { + flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); } - return rule; -err_fwd_rule: - mlx5_eswitch_del_offloaded_rule(esw, rule, attr); - rule = flow->rule[1]; + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); + goto err_add_rule; + } + + return 0; + err_add_rule: + mlx5_fc_destroy(esw->dev, counter); +err_create_counter: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); err_mod_hdr: mlx5_eswitch_del_vlan_action(esw, attr); err_add_vlan: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) mlx5e_detach_encap(priv, flow); err_attach_encap: - return rule; +err_max_prio_chain: + return err; } static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, @@ -909,36 +1001,43 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5_esw_flow_attr slow_attr; if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - if (attr->mirror_count) - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr); - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + if (flow->flags & MLX5E_TC_FLOW_SLOW) + mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); + else + mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } mlx5_eswitch_del_vlan_action(esw, attr); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { mlx5e_detach_encap(priv, flow); kvfree(attr->parse_attr); } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(esw->dev, attr->counter); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_esw_flow_attr slow_attr, *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; int err; - err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - e->encap_size, e->encap_header, - &e->encap_id); + err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + e->encap_size, e->encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); if (err) { mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", err); @@ -950,26 +1049,20 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, list_for_each_entry(flow, &e->flows, encap) { esw_attr = flow->esw_attr; esw_attr->encap_id = e->encap_id; - flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr); - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); + spec = &esw_attr->parse_attr->spec; + + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); continue; } - if (esw_attr->mirror_count) { - flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr); - if (IS_ERR(flow->rule[1])) { - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr); - err = PTR_ERR(flow->rule[1]); - mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n", - err); - continue; - } - } - - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */ + flow->rule[0] = rule; } } @@ -977,25 +1070,44 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr slow_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; + int err; list_for_each_entry(flow, &e->flows, encap) { - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - struct mlx5_esw_flow_attr *attr = flow->esw_attr; + spec = &flow->esw_attr->parse_attr->spec; + + /* update from encap rule to slow path rule */ + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - if (attr->mirror_count) - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr); - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + continue; } + + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr); + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */ + flow->rule[0] = rule; } if (e->flags & MLX5_ENCAP_ENTRY_VALID) { e->flags &= ~MLX5_ENCAP_ENTRY_VALID; - mlx5_encap_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); } } +static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) +{ + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + return flow->esw_attr->counter; + else + return flow->nic_attr->counter; +} + void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh *m_neigh = &nhe->m_neigh; @@ -1021,7 +1133,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) continue; list_for_each_entry(flow, &e->flows, encap) { if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - counter = mlx5_flow_rule_counter(flow->rule[0]); + counter = mlx5e_tc_get_counter(flow); mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { neigh_used = true; @@ -1061,7 +1173,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_encap_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); hash_del_rcu(&e->encap_hlist); kfree(e->encap_header); @@ -2391,7 +2503,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, return -ENOMEM; switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: fl4.flowi4_proto = IPPROTO_UDP; fl4.fl4_dport = tun_key->tp_dst; break; @@ -2435,7 +2547,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, read_unlock_bh(&n->lock); switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: gen_vxlan_header_ipv4(out_dev, encap_header, ipv4_encap_size, e->h_dest, tos, ttl, fl4.daddr, @@ -2455,8 +2567,10 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, goto out; } - err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - ipv4_encap_size, encap_header, &e->encap_id); + err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); if (err) goto destroy_neigh_entry; @@ -2500,7 +2614,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, return -ENOMEM; switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: fl6.flowi6_proto = IPPROTO_UDP; fl6.fl6_dport = tun_key->tp_dst; break; @@ -2544,7 +2658,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, read_unlock_bh(&n->lock); switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: gen_vxlan_header_ipv6(out_dev, encap_header, ipv6_encap_size, e->h_dest, tos, ttl, &fl6.daddr, @@ -2565,8 +2679,10 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, goto out; } - err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - ipv6_encap_size, encap_header, &e->encap_id); + err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); if (err) goto destroy_neigh_entry; @@ -2617,7 +2733,7 @@ vxlan_encap_offload_err: if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - tunnel_type = MLX5_HEADER_TYPE_VXLAN; + tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; } else { NL_SET_ERR_MSG_MOD(extack, "port isn't an offloaded vxlan udp dport"); @@ -2728,6 +2844,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct ip_tunnel_info *info = NULL; @@ -2797,7 +2914,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, parse_attr->mirred_ifindex = out_dev->ifindex; parse_attr->tun_info = *info; attr->parse_attr = parse_attr; - action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | + action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; /* attr->out_rep is resolved when we handle encap */ @@ -2836,6 +2953,25 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_gact_goto_chain(a)) { + u32 dest_chain = tcf_gact_goto_chain_index(a); + u32 max_chain = mlx5_eswitch_get_chain_range(esw); + + if (dest_chain <= attr->chain) { + NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported"); + return -EOPNOTSUPP; + } + if (dest_chain > max_chain) { + NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range"); + return -EOPNOTSUPP; + } + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = dest_chain; + + continue; + } + return -EINVAL; } @@ -2853,9 +2989,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } -static void get_flags(int flags, u8 *flow_flags) +static void get_flags(int flags, u16 *flow_flags) { - u8 __flow_flags = 0; + u16 __flow_flags = 0; if (flags & MLX5E_TC_INGRESS) __flow_flags |= MLX5E_TC_FLOW_INGRESS; @@ -2884,34 +3020,15 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) return &priv->fs.tc.ht; } -int mlx5e_configure_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags) +static int +mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, + struct tc_cls_flower_offload *f, u16 flow_flags, + struct mlx5e_tc_flow_parse_attr **__parse_attr, + struct mlx5e_tc_flow **__flow) { - struct netlink_ext_ack *extack = f->common.extack; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct rhashtable *tc_ht = get_tc_ht(priv); struct mlx5e_tc_flow *flow; - int attr_size, err = 0; - u8 flow_flags = 0; - - get_flags(flags, &flow_flags); - - flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (flow) { - NL_SET_ERR_MSG_MOD(extack, - "flow cookie already exists, ignoring"); - netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie); - return 0; - } - - if (esw && esw->mode == SRIOV_OFFLOADS) { - flow_flags |= MLX5E_TC_FLOW_ESWITCH; - attr_size = sizeof(struct mlx5_esw_flow_attr); - } else { - flow_flags |= MLX5E_TC_FLOW_NIC; - attr_size = sizeof(struct mlx5_nic_flow_attr); - } + int err; flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); @@ -2925,49 +3042,161 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, flow->priv = priv; err = parse_cls_flower(priv, flow, &parse_attr->spec, f); - if (err < 0) + if (err) goto err_free; - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, - extack); - if (err < 0) - goto err_free; - flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, - extack); - } else { - err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, - extack); - if (err < 0) - goto err_free; - flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, - extack); - } + *__flow = flow; + *__parse_attr = parse_attr; - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - if (err != -EAGAIN) - goto err_free; - } + return 0; + +err_free: + kfree(flow); + kvfree(parse_attr); + return err; +} + +static int +mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct mlx5e_tc_flow **__flow) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_flow *flow; + int attr_size, err; - if (err != -EAGAIN) - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + flow_flags |= MLX5E_TC_FLOW_ESWITCH; + attr_size = sizeof(struct mlx5_esw_flow_attr); + err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, + &parse_attr, &flow); + if (err) + goto out; - if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || - !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)) + flow->esw_attr->chain = f->common.chain_index; + flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; + err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack); + if (err) + goto err_free; + + err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); + if (err) + goto err_free; + + if (!(flow->esw_attr->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)) kvfree(parse_attr); - err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); - if (err) { - mlx5e_tc_del_flow(priv, flow); - kfree(flow); - } + *__flow = flow; + return 0; + +err_free: + kfree(flow); + kvfree(parse_attr); +out: return err; +} + +static int +mlx5e_add_nic_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct mlx5e_tc_flow **__flow) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_flow *flow; + int attr_size, err; + + /* multi-chain not supported for NIC rules */ + if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) + return -EOPNOTSUPP; + + flow_flags |= MLX5E_TC_FLOW_NIC; + attr_size = sizeof(struct mlx5_nic_flow_attr); + err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, + &parse_attr, &flow); + if (err) + goto out; + + err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack); + if (err) + goto err_free; + + err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack); + if (err) + goto err_free; + + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + kvfree(parse_attr); + *__flow = flow; + + return 0; err_free: + kfree(flow); kvfree(parse_attr); +out: + return err; +} + +static int +mlx5e_tc_add_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + int flags, + struct mlx5e_tc_flow **flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + u16 flow_flags; + int err; + + get_flags(flags, &flow_flags); + + if (!tc_can_offload_extack(priv->netdev, f->common.extack)) + return -EOPNOTSUPP; + + if (esw && esw->mode == SRIOV_OFFLOADS) + err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow); + else + err = mlx5e_add_nic_flow(priv, f, flow_flags, flow); + + return err; +} + +int mlx5e_configure_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, int flags) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5e_tc_flow *flow; + int err = 0; + + flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + if (flow) { + NL_SET_ERR_MSG_MOD(extack, + "flow cookie already exists, ignoring"); + netdev_warn_once(priv->netdev, + "flow cookie %lx already exists, ignoring\n", + f->cookie); + goto out; + } + + err = mlx5e_tc_add_flow(priv, f, flags, &flow); + if (err) + goto out; + + err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); + if (err) + goto err_free; + + return 0; + +err_free: + mlx5e_tc_del_flow(priv, flow); kfree(flow); +out: return err; } @@ -3018,7 +3247,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) return 0; - counter = mlx5_flow_rule_counter(flow->rule[0]); + counter = mlx5e_tc_get_counter(flow); if (!counter) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ea7dedc2d5ad..d004957328f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -263,7 +263,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) esw_debug(dev, "Create FDB log_max_size(%d)\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + root_ns = mlx5_get_fdb_sub_ns(dev, 0); if (!root_ns) { esw_warn(dev, "Failed to get FDB flow namespace\n"); return -EOPNOTSUPP; @@ -1198,7 +1198,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, if (counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter = counter; + drop_ctr_dst.counter_id = mlx5_fc_id(counter); dst = &drop_ctr_dst; dest_num++; } @@ -1285,7 +1285,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, if (counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter = counter; + drop_ctr_dst.counter_id = mlx5_fc_id(counter); dst = &drop_ctr_dst; dest_num++; } @@ -1746,7 +1746,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index dfc642de4e6d..aaafc9f17115 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -59,6 +59,10 @@ #define mlx5_esw_has_fwd_fdb(dev) \ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) +#define FDB_MAX_CHAIN 3 +#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) +#define FDB_MAX_PRIO 16 + struct vport_ingress { struct mlx5_flow_table *acl; struct mlx5_flow_group *allow_untagged_spoofchk_grp; @@ -120,6 +124,13 @@ struct mlx5_vport { u16 enabled_events; }; +enum offloads_fdb_flags { + ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0), +}; + +extern const unsigned int ESW_POOLS[4]; + +#define PRIO_LEVELS 2 struct mlx5_eswitch_fdb { union { struct legacy_fdb { @@ -130,16 +141,24 @@ struct mlx5_eswitch_fdb { } legacy; struct offloads_fdb { - struct mlx5_flow_table *fast_fdb; - struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle *miss_rule_uni; struct mlx5_flow_handle *miss_rule_multi; int vlan_push_pop_refcount; + + struct { + struct mlx5_flow_table *fdb; + u32 num_rules; + } fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS]; + /* Protects fdb_prio table */ + struct mutex fdb_prio_lock; + + int fdb_left[ARRAY_SIZE(ESW_POOLS)]; } offloads; }; + u32 flags; }; struct mlx5_esw_offload { @@ -181,6 +200,7 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; int mode; + int nvports; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); @@ -228,6 +248,19 @@ void mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, struct mlx5_esw_flow_attr *attr); +void +mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr); + +bool +mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw); + +u16 +mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw); + +u32 +mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw); struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, @@ -266,6 +299,10 @@ struct mlx5_esw_flow_attr { u32 encap_id; u32 mod_hdr_id; u8 match_level; + struct mlx5_fc *counter; + u32 chain; + u16 prio; + u32 dest_chain; struct mlx5e_tc_flow_parse_attr *parse_attr; }; @@ -318,6 +355,11 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} + +#define FDB_MAX_CHAIN 1 +#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) +#define FDB_MAX_PRIO 1 + #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a35a2310f871..9eac137790f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -37,33 +37,59 @@ #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "eswitch.h" +#include "en.h" +#include "fs_core.h" enum { FDB_FAST_PATH = 0, FDB_SLOW_PATH }; +#define fdb_prio_table(esw, chain, prio, level) \ + (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] + +static struct mlx5_flow_table * +esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); +static void +esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); + +bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw) +{ + return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)); +} + +u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) + return FDB_MAX_CHAIN; + + return 0; +} + +u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) + return FDB_MAX_PRIO; + + return 1; +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; - struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_table *ft = NULL; - struct mlx5_fc *counter = NULL; + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + bool mirror = !!(attr->mirror_count); struct mlx5_flow_handle *rule; + struct mlx5_flow_table *fdb; int j, i = 0; void *misc; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); - if (attr->mirror_count) - ft = esw->fdb_table.offloads.fwd_fdb; - else - ft = esw->fdb_table.offloads.fast_fdb; - flow_act.action = attr->action; /* if per flow vlan pop/push is emulated, don't set that into the firmware */ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) @@ -81,23 +107,33 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - for (j = attr->mirror_count; j < attr->out_count; j++) { - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->out_rep[j]->vport; - dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); - dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + if (attr->dest_chain) { + struct mlx5_flow_table *ft; + + ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0); + if (IS_ERR(ft)) { + rule = ERR_CAST(ft); + goto err_create_goto_table; + } + + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = ft; i++; + } else { + for (j = attr->mirror_count; j < attr->out_count; j++) { + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = attr->out_rep[j]->vport; + dest[i].vport.vhca_id = + MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); + dest[i].vport.vhca_id_valid = + !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + i++; + } } } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(esw->dev, true); - if (IS_ERR(counter)) { - rule = ERR_CAST(counter); - goto err_counter_alloc; - } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[i].counter = counter; + dest[i].counter_id = mlx5_fc_id(attr->counter); i++; } @@ -127,10 +163,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) - flow_act.encap_id = attr->encap_id; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) + flow_act.reformat_id = attr->encap_id; - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i); + fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror); + if (IS_ERR(fdb)) { + rule = ERR_CAST(fdb); + goto err_esw_get; + } + + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) goto err_add_rule; else @@ -139,8 +181,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; err_add_rule: - mlx5_fc_destroy(esw->dev, counter); -err_counter_alloc: + esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); +err_esw_get: + if (attr->dest_chain) + esw_put_prio_table(esw, attr->dest_chain, 1, 0); +err_create_goto_table: return rule; } @@ -150,11 +195,25 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; - struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_flow_table *fast_fdb; + struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; void *misc; int i; + fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0); + if (IS_ERR(fast_fdb)) { + rule = ERR_CAST(fast_fdb); + goto err_get_fast; + } + + fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1); + if (IS_ERR(fwd_fdb)) { + rule = ERR_CAST(fwd_fdb); + goto err_get_fwd; + } + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; for (i = 0; i < attr->mirror_count; i++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -164,7 +223,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = esw->fdb_table.offloads.fwd_fdb, + dest[i].ft = fwd_fdb, i++; misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); @@ -187,12 +246,41 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS; - rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fast_fdb, spec, &flow_act, dest, i); + rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); - if (!IS_ERR(rule)) - esw->offloads.num_flows++; + if (IS_ERR(rule)) + goto add_err; + + esw->offloads.num_flows++; return rule; +add_err: + esw_put_prio_table(esw, attr->chain, attr->prio, 1); +err_get_fwd: + esw_put_prio_table(esw, attr->chain, attr->prio, 0); +err_get_fast: + return rule; +} + +static void +__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr, + bool fwd_rule) +{ + bool mirror = (attr->mirror_count > 0); + + mlx5_del_flow_rules(rule); + esw->offloads.num_flows--; + + if (fwd_rule) { + esw_put_prio_table(esw, attr->chain, attr->prio, 1); + esw_put_prio_table(esw, attr->chain, attr->prio, 0); + } else { + esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); + if (attr->dest_chain) + esw_put_prio_table(esw, attr->dest_chain, 1, 0); + } } void @@ -200,12 +288,15 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, struct mlx5_esw_flow_attr *attr) { - struct mlx5_fc *counter = NULL; + __mlx5_eswitch_del_rule(esw, rule, attr, false); +} - counter = mlx5_flow_rule_counter(rule); - mlx5_del_flow_rules(rule); - mlx5_fc_destroy(esw->dev, counter); - esw->offloads.num_flows--; +void +mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr) +{ + __mlx5_eswitch_del_rule(esw, rule, attr, true); } static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) @@ -294,7 +385,8 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); - fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && + !attr->dest_chain); err = esw_add_vlan_action_check(attr, push, pop, fwd); if (err) @@ -501,74 +593,170 @@ out: #define ESW_OFFLOADS_NUM_GROUPS 4 -static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), + * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated + * for each flow table pool. We can allocate up to 16M of each pool, + * and we keep track of how much we used via put/get_sz_to_pool. + * Firmware doesn't report any of this for now. + * ESW_POOL is expected to be sorted from large to small + */ +#define ESW_SIZE (16 * 1024 * 1024) +const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024, + 64 * 1024, 4 * 1024 }; + +static int +get_sz_from_pool(struct mlx5_eswitch *esw) +{ + int sz = 0, i; + + for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) { + if (esw->fdb_table.offloads.fdb_left[i]) { + --esw->fdb_table.offloads.fdb_left[i]; + sz = ESW_POOLS[i]; + break; + } + } + + return sz; +} + +static void +put_sz_to_pool(struct mlx5_eswitch *esw, int sz) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) { + if (sz >= ESW_POOLS[i]) { + ++esw->fdb_table.offloads.fdb_left[i]; + break; + } + } +} + +static struct mlx5_flow_table * +create_next_size_table(struct mlx5_eswitch *esw, + struct mlx5_flow_namespace *ns, + u16 table_prio, + int level, + u32 flags) +{ + struct mlx5_flow_table *fdb; + int sz; + + sz = get_sz_from_pool(esw); + if (!sz) + return ERR_PTR(-ENOSPC); + + fdb = mlx5_create_auto_grouped_flow_table(ns, + table_prio, + sz, + ESW_OFFLOADS_NUM_GROUPS, + level, + flags); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n", + (int)PTR_ERR(fdb), table_prio, level, sz); + put_sz_to_pool(esw, sz); + } + + return fdb; +} + +static struct mlx5_flow_table * +esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) { struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - int esw_size, err = 0; + struct mlx5_flow_namespace *ns; + int table_prio, l = 0; u32 flags = 0; - u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | - MLX5_CAP_GEN(dev, max_flow_counter_15_0); - root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); - if (!root_ns) { - esw_warn(dev, "Failed to get FDB flow namespace\n"); - err = -EOPNOTSUPP; - goto out_namespace; - } + if (chain == FDB_SLOW_PATH_CHAIN) + return esw->fdb_table.offloads.slow_fdb; - esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), - max_flow_counter, ESW_OFFLOADS_NUM_GROUPS); + mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); - esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS, - 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + fdb = fdb_prio_table(esw, chain, prio, level).fdb; + if (fdb) { + /* take ref on earlier levels as well */ + while (level >= 0) + fdb_prio_table(esw, chain, prio, level--).num_rules++; + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + return fdb; + } - if (mlx5_esw_has_fwd_fdb(dev)) - esw_size >>= 1; + ns = mlx5_get_fdb_sub_ns(dev, chain); + if (!ns) { + esw_warn(dev, "Failed to get FDB sub namespace\n"); + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + return ERR_PTR(-EOPNOTSUPP); + } if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN; + flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, - esw_size, - ESW_OFFLOADS_NUM_GROUPS, 0, - flags); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); - goto out_namespace; - } - esw->fdb_table.offloads.fast_fdb = fdb; + table_prio = (chain * FDB_MAX_PRIO) + prio - 1; - if (!mlx5_esw_has_fwd_fdb(dev)) - goto out_namespace; + /* create earlier levels for correct fs_core lookup when + * connecting tables + */ + for (l = 0; l <= level; l++) { + if (fdb_prio_table(esw, chain, prio, l).fdb) { + fdb_prio_table(esw, chain, prio, l).num_rules++; + continue; + } - fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, - esw_size, - ESW_OFFLOADS_NUM_GROUPS, 1, - flags); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create fwd table err %d\n", err); - goto out_ft; + fdb = create_next_size_table(esw, ns, table_prio, l, flags); + if (IS_ERR(fdb)) { + l--; + goto err_create_fdb; + } + + fdb_prio_table(esw, chain, prio, l).fdb = fdb; + fdb_prio_table(esw, chain, prio, l).num_rules = 1; } - esw->fdb_table.offloads.fwd_fdb = fdb; - return err; + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + return fdb; -out_ft: - mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb); -out_namespace: - return err; +err_create_fdb: + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + if (l >= 0) + esw_put_prio_table(esw, chain, prio, l); + + return fdb; +} + +static void +esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) +{ + int l; + + if (chain == FDB_SLOW_PATH_CHAIN) + return; + + mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); + + for (l = level; l >= 0; l--) { + if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0) + continue; + + put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte); + mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb); + fdb_prio_table(esw, chain, prio, l).fdb = NULL; + } + + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); } -static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw) { - if (mlx5_esw_has_fwd_fdb(esw->dev)) - mlx5_destroy_flow_table(esw->fdb_table.offloads.fwd_fdb); - mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb); + /* If lazy creation isn't supported, deref the fast path tables */ + if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) { + esw_put_prio_table(esw, 0, 1, 1); + esw_put_prio_table(esw, 0, 1, 0); + } } #define MAX_PF_SQ 256 @@ -579,12 +767,13 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; + u32 *flow_group_in, max_flow_counter; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - int table_size, ix, err = 0; + int table_size, ix, err = 0, i; struct mlx5_flow_group *g; + u32 flags = 0, fdb_max; void *match_criteria; - u32 *flow_group_in; u8 *dmac; esw_debug(esw->dev, "Create offloads FDB Tables\n"); @@ -599,12 +788,29 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) goto ns_err; } - err = esw_create_offloads_fast_fdb_table(esw); - if (err) - goto fast_fdb_err; + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + + esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), + max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, + fdb_max); + + for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) + esw->fdb_table.offloads.fdb_left[i] = + ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2; + /* create the slow path fdb with encap set, so further table instances + * can be created at run time while VFs are probed if the FW allows that. + */ + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + ft_attr.flags = flags; ft_attr.max_fte = table_size; ft_attr.prio = FDB_SLOW_PATH; @@ -616,6 +822,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.slow_fdb = fdb; + /* If lazy creation isn't supported, open the fast path tables now */ + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n"); + esw_get_prio_table(esw, 0, 1, 0); + esw_get_prio_table(esw, 0, 1, 1); + } else { + esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n"); + esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + } + /* create send-to-vport group */ memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -663,6 +881,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) if (err) goto miss_rule_err; + esw->nvports = nvports; kvfree(flow_group_in); return 0; @@ -671,10 +890,9 @@ miss_rule_err: miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: + esw_destroy_offloads_fast_fdb_tables(esw); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); slow_fdb_err: - esw_destroy_offloads_fast_fdb_table(esw); -fast_fdb_err: ns_err: kvfree(flow_group_in); return err; @@ -682,7 +900,7 @@ ns_err: static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) { - if (!esw->fdb_table.offloads.fast_fdb) + if (!esw->fdb_table.offloads.slow_fdb) return; esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); @@ -692,7 +910,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); - esw_destroy_offloads_fast_fdb_table(esw); + esw_destroy_offloads_fast_fdb_tables(esw); } static int esw_create_offloads_table(struct mlx5_eswitch *esw) @@ -949,6 +1167,8 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) { int err; + mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); + err = esw_create_offloads_fdb_tables(esw, nvports); if (err) return err; @@ -1256,7 +1476,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, return err; if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && - (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) || + (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) return -EOPNOTSUPP; @@ -1277,16 +1497,19 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, return -EOPNOTSUPP; } - esw_destroy_offloads_fast_fdb_table(esw); + esw_destroy_offloads_fdb_tables(esw); esw->offloads.encap = encap; - err = esw_create_offloads_fast_fdb_table(esw); + + err = esw_create_offloads_fdb_tables(esw, esw->nvports); + if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed re-creating fast FDB table"); esw->offloads.encap = !encap; - (void)esw_create_offloads_fast_fdb_table(esw); + (void)esw_create_offloads_fdb_tables(esw, esw->nvports); } + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 5645a4facad2..28aa8c968a80 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -650,7 +650,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, (match_criteria_enable & ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - flow_act->has_flow_tag) + (flow_act->flags & FLOW_ACT_HAS_TAG)) return false; return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 8e01f818021b..08a891f9aade 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -152,7 +152,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *next_ft, unsigned int *table_id, u32 flags) { - int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN); + int en_encap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); + int en_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; int err; @@ -169,9 +170,9 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, } MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, - en_encap_decap); - MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en, - en_encap_decap); + en_decap); + MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, + en_encap); switch (op_mod) { case FS_FT_OP_MOD_NORMAL: @@ -343,7 +344,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action.action); - MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id); + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.reformat_id); MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->action.modify_id); @@ -417,7 +419,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, continue; MLX5_SET(flow_counter_list, in_dests, flow_counter_id, - dst->dest_attr.counter->id); + dst->dest_attr.counter_id); in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); list_size++; } @@ -594,62 +596,78 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, *bytes = MLX5_GET64(traffic_counter, stats, octets); } -int mlx5_encap_alloc(struct mlx5_core_dev *dev, - int header_type, - size_t size, - void *encap_header, - u32 *encap_id) +int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + u32 *packet_reformat_id) { - int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); - u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)]; - void *encap_header_in; - void *header; + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)]; + void *packet_reformat_context_in; + int max_encap_size; + void *reformat; int inlen; int err; u32 *in; + if (namespace == MLX5_FLOW_NAMESPACE_FDB) + max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); + else + max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size); + if (size > max_encap_size) { mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n", size, max_encap_size); return -EINVAL; } - in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size, + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + size, GFP_KERNEL); if (!in) return -ENOMEM; - encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header); - header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header); - inlen = header - (void *)in + size; + packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in, + in, packet_reformat_context); + reformat = MLX5_ADDR_OF(packet_reformat_context_in, + packet_reformat_context_in, + reformat_data); + inlen = reformat - (void *)in + size; memset(in, 0, inlen); - MLX5_SET(alloc_encap_header_in, in, opcode, - MLX5_CMD_OP_ALLOC_ENCAP_HEADER); - MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size); - MLX5_SET(encap_header_in, encap_header_in, header_type, header_type); - memcpy(header, encap_header, size); + MLX5_SET(alloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_data_size, size); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_type, reformat_type); + memcpy(reformat, reformat_data, size); memset(out, 0, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id); + *packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out, + out, packet_reformat_id); kfree(in); return err; } +EXPORT_SYMBOL(mlx5_packet_reformat_alloc); -void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id) +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + u32 packet_reformat_id) { - u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)]; - u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)]; + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)]; memset(in, 0, sizeof(in)); - MLX5_SET(dealloc_encap_header_in, in, opcode, - MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); - MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id); + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, + packet_reformat_id); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, u8 namespace, u8 num_actions, @@ -667,9 +685,14 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, table_type = FS_FT_FDB; break; case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_BYPASS: max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions); table_type = FS_FT_NIC_RX; break; + case MLX5_FLOW_NAMESPACE_EGRESS: + max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_TX; + break; default: return -EOPNOTSUPP; } @@ -702,6 +725,7 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, kfree(in); return err; } +EXPORT_SYMBOL(mlx5_modify_header_alloc); void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) { @@ -716,6 +740,7 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_modify_header_dealloc); static const struct mlx5_flow_cmds mlx5_flow_cmds = { .create_flow_table = mlx5_cmd_create_flow_table, @@ -760,8 +785,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ case FS_FT_FDB: case FS_FT_SNIFFER_RX: case FS_FT_SNIFFER_TX: - return mlx5_fs_cmd_get_fw_cmds(); case FS_FT_NIC_TX: + return mlx5_fs_cmd_get_fw_cmds(); default: return mlx5_fs_cmd_get_stub_cmds(); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 37d114c668b7..67ba4c975d81 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,6 +40,7 @@ #include "diag/fs_tracepoint.h" #include "accel/ipsec.h" #include "fpga/ipsec.h" +#include "eswitch.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -76,6 +77,14 @@ FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \ FS_CAP(flow_table_properties_nic_receive.flow_table_modify)) +#define FS_CHAINING_CAPS_EGRESS \ + FS_REQUIRED_CAPS( \ + FS_CAP(flow_table_properties_nic_transmit.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_transmit.modify_root), \ + FS_CAP(flow_table_properties_nic_transmit \ + .identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_transmit.flow_table_modify)) + #define LEFTOVERS_NUM_LEVELS 1 #define LEFTOVERS_NUM_PRIOS 1 @@ -151,6 +160,17 @@ static struct init_tree_node { } }; +static struct init_tree_node egress_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 1, + .children = (struct init_tree_node[]) { + ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + } +}; + enum fs_i_lock_class { FS_LOCK_GRANDPARENT, FS_LOCK_PARENT, @@ -694,7 +714,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, struct fs_node *iter = list_entry(start, struct fs_node, list); struct mlx5_flow_table *ft = NULL; - if (!root) + if (!root || root->type == FS_TYPE_PRIO_CHAINS) return NULL; list_for_each_advance_continue(iter, &root->children, reverse) { @@ -1388,7 +1408,7 @@ static bool check_conflicting_actions(u32 action1, u32 action2) return false; if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | MLX5_FLOW_CONTEXT_ACTION_DECAP | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | @@ -1408,7 +1428,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act return -EEXIST; } - if (flow_act->has_flow_tag && + if ((flow_act->flags & FLOW_ACT_HAS_TAG) && fte->action.flow_tag != flow_act->flow_tag) { mlx5_core_warn(get_dev(&fte->node), "FTE flow tag %u already exists with different flow tag %u\n", @@ -1455,29 +1475,8 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, return handle; } -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle) +static bool counter_is_valid(u32 action) { - struct mlx5_flow_rule *dst; - struct fs_fte *fte; - - fs_get_obj(fte, handle->rule[0]->node.parent); - - fs_for_each_dst(dst, fte) { - if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) - return dst->dest_attr.counter; - } - - return NULL; -} - -static bool counter_is_valid(struct mlx5_fc *counter, u32 action) -{ - if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) - return !counter; - - if (!counter) - return false; - return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)); } @@ -1487,7 +1486,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, struct mlx5_flow_table *ft) { if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)) - return counter_is_valid(dest->counter, action); + return counter_is_valid(action); if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return true; @@ -1629,6 +1628,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, search_again_locked: version = matched_fgs_get_version(match_head); + if (flow_act->flags & FLOW_ACT_NO_APPEND) + goto skip_search; /* Try to find a fg that already contains a matching fte */ list_for_each_entry(iter, match_head, list) { struct fs_fte *fte_tmp; @@ -1645,6 +1646,11 @@ search_again_locked: return rule; } +skip_search: + /* No group with matching fte found, or we skipped the search. + * Try to add a new fte to any matching fg. + */ + /* Check the ft version, for case that new flow group * was added while the fgs weren't locked */ @@ -1975,12 +1981,24 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) fg->id); } +struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, + int n) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + if (!steering || !steering->fdb_sub_ns) + return NULL; + + return steering->fdb_sub_ns[n]; +} +EXPORT_SYMBOL(mlx5_get_fdb_sub_ns); + struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) { struct mlx5_flow_steering *steering = dev->priv.steering; struct mlx5_flow_root_namespace *root_ns; - int prio; + int prio = 0; struct fs_prio *fs_prio; struct mlx5_flow_namespace *ns; @@ -1988,40 +2006,29 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, return NULL; switch (type) { - case MLX5_FLOW_NAMESPACE_BYPASS: - case MLX5_FLOW_NAMESPACE_LAG: - case MLX5_FLOW_NAMESPACE_OFFLOADS: - case MLX5_FLOW_NAMESPACE_ETHTOOL: - case MLX5_FLOW_NAMESPACE_KERNEL: - case MLX5_FLOW_NAMESPACE_LEFTOVERS: - case MLX5_FLOW_NAMESPACE_ANCHOR: - prio = type; - break; case MLX5_FLOW_NAMESPACE_FDB: if (steering->fdb_root_ns) return &steering->fdb_root_ns->ns; - else - return NULL; + return NULL; case MLX5_FLOW_NAMESPACE_SNIFFER_RX: if (steering->sniffer_rx_root_ns) return &steering->sniffer_rx_root_ns->ns; - else - return NULL; + return NULL; case MLX5_FLOW_NAMESPACE_SNIFFER_TX: if (steering->sniffer_tx_root_ns) return &steering->sniffer_tx_root_ns->ns; - else - return NULL; - case MLX5_FLOW_NAMESPACE_EGRESS: - if (steering->egress_root_ns) - return &steering->egress_root_ns->ns; - else - return NULL; - default: return NULL; + default: + break; + } + + if (type == MLX5_FLOW_NAMESPACE_EGRESS) { + root_ns = steering->egress_root_ns; + } else { /* Must be NIC RX */ + root_ns = steering->root_ns; + prio = type; } - root_ns = steering->root_ns; if (!root_ns) return NULL; @@ -2064,8 +2071,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d } } -static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, - unsigned int prio, int num_levels) +static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, + int num_levels, + enum fs_node_type type) { struct fs_prio *fs_prio; @@ -2073,7 +2082,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, if (!fs_prio) return ERR_PTR(-ENOMEM); - fs_prio->node.type = FS_TYPE_PRIO; + fs_prio->node.type = type; tree_init_node(&fs_prio->node, NULL, del_sw_prio); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; @@ -2083,6 +2092,19 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return fs_prio; } +static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns, + unsigned int prio, + int num_levels) +{ + return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS); +} + +static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, int num_levels) +{ + return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO); +} + static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace *ns) { @@ -2387,6 +2409,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) cleanup_egress_acls_root_ns(dev); cleanup_ingress_acls_root_ns(dev); cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + kfree(steering->fdb_sub_ns); + steering->fdb_sub_ns = NULL; cleanup_root_ns(steering->sniffer_rx_root_ns); cleanup_root_ns(steering->sniffer_tx_root_ns); cleanup_root_ns(steering->egress_root_ns); @@ -2432,27 +2457,64 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { - struct fs_prio *prio; + struct mlx5_flow_namespace *ns; + struct fs_prio *maj_prio; + struct fs_prio *min_prio; + int levels; + int chain; + int prio; + int err; steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); if (!steering->fdb_root_ns) return -ENOMEM; - prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2); - if (IS_ERR(prio)) + steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) * + FDB_MAX_CHAIN + 1, GFP_KERNEL); + if (!steering->fdb_sub_ns) + return -ENOMEM; + + levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1); + maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, 0, + levels); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); goto out_err; + } + + for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) { + ns = fs_create_namespace(maj_prio); + if (IS_ERR(ns)) { + err = PTR_ERR(ns); + goto out_err; + } + + for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) { + min_prio = fs_create_prio(ns, prio, 2); + if (IS_ERR(min_prio)) { + err = PTR_ERR(min_prio); + goto out_err; + } + } + + steering->fdb_sub_ns[chain] = ns; + } - prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); - if (IS_ERR(prio)) + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); goto out_err; + } set_prio_attrs(steering->fdb_root_ns); return 0; out_err: cleanup_root_ns(steering->fdb_root_ns); + kfree(steering->fdb_sub_ns); + steering->fdb_sub_ns = NULL; steering->fdb_root_ns = NULL; - return PTR_ERR(prio); + return err; } static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) @@ -2537,16 +2599,23 @@ cleanup_root_ns: static int init_egress_root_ns(struct mlx5_flow_steering *steering) { - struct fs_prio *prio; + int err; steering->egress_root_ns = create_root_ns(steering, FS_FT_NIC_TX); if (!steering->egress_root_ns) return -ENOMEM; - /* create 1 prio*/ - prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1); - return PTR_ERR_OR_ZERO(prio); + err = init_root_tree(steering, &egress_root_fs, + &steering->egress_root_ns->ns.node); + if (err) + goto cleanup; + set_prio_attrs(steering->egress_root_ns); + return 0; +cleanup: + cleanup_root_ns(steering->egress_root_ns); + steering->egress_root_ns = NULL; + return err; } int mlx5_init_fs(struct mlx5_core_dev *dev) @@ -2614,7 +2683,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } - if (MLX5_IPSEC_DEV(dev)) { + if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { err = init_egress_root_ns(steering); if (err) goto err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index a06f83c0c2b6..b51ad217da32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -38,9 +38,21 @@ #include <linux/rhashtable.h> #include <linux/llist.h> +/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only, + * and those are in parallel to one another when going over them to connect + * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one + * parallel namespace will not automatically connect to the first flow table + * found in any prio in any next namespace, but skip the entire containing + * TYPE_PRIO_CHAINS prio. + * + * This is used to implement tc chains, each chain of prios is a different + * namespace inside a containing TYPE_PRIO_CHAINS prio. + */ + enum fs_node_type { FS_TYPE_NAMESPACE, FS_TYPE_PRIO, + FS_TYPE_PRIO_CHAINS, FS_TYPE_FLOW_TABLE, FS_TYPE_FLOW_GROUP, FS_TYPE_FLOW_ENTRY, @@ -73,6 +85,7 @@ struct mlx5_flow_steering { struct kmem_cache *ftes_cache; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_namespace **fdb_sub_ns; struct mlx5_flow_root_namespace **esw_egress_root_ns; struct mlx5_flow_root_namespace **esw_ingress_root_ns; struct mlx5_flow_root_namespace *sniffer_tx_root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 09206c4acd9a..1329bc5b7969 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -258,6 +258,12 @@ err_out: } EXPORT_SYMBOL(mlx5_fc_create); +u32 mlx5_fc_id(struct mlx5_fc *counter) +{ + return counter->id; +} +EXPORT_SYMBOL(mlx5_fc_id); + void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index cc298527baf1..0594d0961cb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -39,6 +39,7 @@ #include <linux/if_link.h> #include <linux/firmware.h> #include <linux/mlx5/cq.h> +#include <linux/mlx5/fs.h> #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "5.0-0" @@ -171,17 +172,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); -int mlx5_encap_alloc(struct mlx5_core_dev *dev, - int header_type, - size_t size, - void *encap_header, - u32 *encap_id); -void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); - -int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, - u8 namespace, u8 num_actions, - void *modify_actions, u32 *modify_header_id); -void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id); bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 4ca07bfb6b14..91b8139a388d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -211,6 +211,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev, } qp->qpn = MLX5_GET(create_dct_out, out, dctn); + qp->uid = MLX5_GET(create_dct_in, in, uid); err = create_resource_common(dev, qp, MLX5_RES_DCT); if (err) goto err_cmd; @@ -219,6 +220,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev, err_cmd: MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); MLX5_SET(destroy_dct_in, din, dctn, qp->qpn); + MLX5_SET(destroy_dct_in, din, uid, qp->uid); mlx5_cmd_exec(dev, (void *)&in, sizeof(din), (void *)&out, sizeof(dout)); return err; @@ -240,6 +242,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, if (err) return err; + qp->uid = MLX5_GET(create_qp_in, in, uid); qp->qpn = MLX5_GET(create_qp_out, out, qpn); mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); @@ -261,6 +264,7 @@ err_cmd: memset(dout, 0, sizeof(dout)); MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, din, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, din, uid, qp->uid); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } @@ -275,6 +279,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT); MLX5_SET(drain_dct_in, in, dctn, qp->qpn); + MLX5_SET(drain_dct_in, in, uid, qp->uid); return mlx5_cmd_exec(dev, (void *)&in, sizeof(in), (void *)&out, sizeof(out)); } @@ -301,6 +306,7 @@ destroy: destroy_resource_common(dev, &dct->mqp); MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); + MLX5_SET(destroy_dct_in, in, uid, qp->uid); err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), (void *)&out, sizeof(out)); return err; @@ -320,6 +326,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, in, uid, qp->uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; @@ -373,7 +380,7 @@ static void mbox_free(struct mbox_info *mbox) static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, u32 opt_param_mask, void *qpc, - struct mbox_info *mbox) + struct mbox_info *mbox, u16 uid) { mbox->out = NULL; mbox->in = NULL; @@ -381,26 +388,32 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, #define MBOX_ALLOC(mbox, typ) \ mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out)) -#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \ - MLX5_SET(typ##_in, in, opcode, _opcode); \ - MLX5_SET(typ##_in, in, qpn, _qpn) - -#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \ - MOD_QP_IN_SET(typ, in, _opcode, _qpn); \ - MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \ - memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc)) +#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid) \ + do { \ + MLX5_SET(typ##_in, in, opcode, _opcode); \ + MLX5_SET(typ##_in, in, qpn, _qpn); \ + MLX5_SET(typ##_in, in, uid, _uid); \ + } while (0) + +#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid) \ + do { \ + MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid); \ + MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \ + memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, \ + MLX5_ST_SZ_BYTES(qpc)); \ + } while (0) switch (opcode) { /* 2RST & 2ERR */ case MLX5_CMD_OP_2RST_QP: if (MBOX_ALLOC(mbox, qp_2rst)) return -ENOMEM; - MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn); + MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid); break; case MLX5_CMD_OP_2ERR_QP: if (MBOX_ALLOC(mbox, qp_2err)) return -ENOMEM; - MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn); + MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid); break; /* MODIFY with QPC */ @@ -408,37 +421,37 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, if (MBOX_ALLOC(mbox, rst2init_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_INIT2RTR_QP: if (MBOX_ALLOC(mbox, init2rtr_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_RTR2RTS_QP: if (MBOX_ALLOC(mbox, rtr2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_RTS2RTS_QP: if (MBOX_ALLOC(mbox, rts2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_SQERR2RTS_QP: if (MBOX_ALLOC(mbox, sqerr2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_INIT2INIT_QP: if (MBOX_ALLOC(mbox, init2init_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; default: mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n", @@ -456,7 +469,7 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, int err; err = modify_qp_mbox_alloc(dev, opcode, qp->qpn, - opt_param_mask, qpc, &mbox); + opt_param_mask, qpc, &mbox, qp->uid); if (err) return err; @@ -531,6 +544,17 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) } EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); +static void destroy_rq_tracked(struct mlx5_core_dev *dev, u32 rqn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {}; + + MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, in, rqn, rqn); + MLX5_SET(destroy_rq_in, in, uid, uid); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *rq) { @@ -541,6 +565,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, if (err) return err; + rq->uid = MLX5_GET(create_rq_in, in, uid); rq->qpn = rqn; err = create_resource_common(dev, rq, MLX5_RES_RQ); if (err) @@ -549,7 +574,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, return 0; err_destroy_rq: - mlx5_core_destroy_rq(dev, rq->qpn); + destroy_rq_tracked(dev, rq->qpn, rq->uid); return err; } @@ -559,10 +584,21 @@ void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *rq) { destroy_resource_common(dev, rq); - mlx5_core_destroy_rq(dev, rq->qpn); + destroy_rq_tracked(dev, rq->qpn, rq->uid); } EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked); +static void destroy_sq_tracked(struct mlx5_core_dev *dev, u32 sqn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {}; + + MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, in, sqn, sqn); + MLX5_SET(destroy_sq_in, in, uid, uid); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq) { @@ -573,6 +609,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, if (err) return err; + sq->uid = MLX5_GET(create_sq_in, in, uid); sq->qpn = sqn; err = create_resource_common(dev, sq, MLX5_RES_SQ); if (err) @@ -581,7 +618,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, return 0; err_destroy_sq: - mlx5_core_destroy_sq(dev, sq->qpn); + destroy_sq_tracked(dev, sq->qpn, sq->uid); return err; } @@ -591,7 +628,7 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq) { destroy_resource_common(dev, sq); - mlx5_core_destroy_sq(dev, sq->qpn); + destroy_sq_tracked(dev, sq->qpn, sq->uid); } EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 7e20666ce5ae..6a6fc9be01e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -166,6 +166,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, if (!create_in) return -ENOMEM; + MLX5_SET(create_srq_in, create_in, uid, in->uid); srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); @@ -178,8 +179,10 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, err = mlx5_cmd_exec(dev, create_in, inlen, create_out, sizeof(create_out)); kvfree(create_in); - if (!err) + if (!err) { srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); + srq->uid = in->uid; + } return err; } @@ -193,6 +196,7 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(destroy_srq_in, srq_in, opcode, MLX5_CMD_OP_DESTROY_SRQ); MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), srq_out, sizeof(srq_out)); @@ -208,6 +212,7 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); MLX5_SET(arm_rq_in, srq_in, lwm, lwm); + MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), srq_out, sizeof(srq_out)); @@ -260,6 +265,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, if (!create_in) return -ENOMEM; + MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, xrc_srq_context_entry); pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); @@ -277,6 +283,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, goto out; srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); + srq->uid = in->uid; out: kvfree(create_in); return err; @@ -291,6 +298,7 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, sizeof(xrcsrq_out)); @@ -306,6 +314,7 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, sizeof(xrcsrq_out)); @@ -365,10 +374,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, wq = MLX5_ADDR_OF(rmpc, rmpc, wq); MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(create_rmp_in, create_in, uid, in->uid); set_wq(wq, in); memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); + if (!err) + srq->uid = in->uid; kvfree(create_in); return err; @@ -377,7 +389,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, static int destroy_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - return mlx5_core_destroy_rmp(dev, srq->srqn); + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(destroy_rmp_in, in, uid, srq->uid); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static int arm_rmp_cmd(struct mlx5_core_dev *dev, @@ -400,6 +418,7 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev, MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(modify_rmp_in, in, uid, srq->uid); MLX5_SET(wq, wq, lwm, lwm); MLX5_SET(rmp_bitmask, bitmask, lwm, 1); MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); @@ -469,11 +488,14 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(xrqc, xrqc, user_index, in->user_index); MLX5_SET(xrqc, xrqc, cqn, in->cqn); MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + MLX5_SET(create_xrq_in, create_in, uid, in->uid); err = mlx5_cmd_exec(dev, create_in, inlen, create_out, sizeof(create_out)); kvfree(create_in); - if (!err) + if (!err) { srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + srq->uid = in->uid; + } return err; } @@ -485,6 +507,7 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + MLX5_SET(destroy_xrq_in, in, uid, srq->uid); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -500,6 +523,7 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev, MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, uid, srq->uid); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 68fa44a41485..1f77e97e2d7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -27,7 +27,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_acl_flex_keys.o \ spectrum1_mr_tcam.o spectrum2_mr_tcam.o \ spectrum_mr_tcam.o spectrum_mr.o \ - spectrum_qdisc.o spectrum_span.o + spectrum_qdisc.o spectrum_span.o \ + spectrum_nve.o spectrum_nve_vxlan.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index ed7e4c4e7403..8a4983adae94 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2994,6 +2994,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_qdiscs_init; } + err = mlxsw_sp_port_nve_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n", + mlxsw_sp_port->local_port); + goto err_port_nve_init; + } + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1); if (IS_ERR(mlxsw_sp_port_vlan)) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n", @@ -3022,6 +3029,8 @@ err_register_netdev: mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); err_port_vlan_get: + mlxsw_sp_port_nve_fini(mlxsw_sp_port); +err_port_nve_init: mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); err_port_qdiscs_init: mlxsw_sp_port_fids_fini(mlxsw_sp_port); @@ -3061,6 +3070,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_vlan_flush(mlxsw_sp_port); + mlxsw_sp_port_nve_fini(mlxsw_sp_port); mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); mlxsw_sp_port_fids_fini(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); @@ -3795,6 +3805,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_afa_init; } + err = mlxsw_sp_nve_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize NVE\n"); + goto err_nve_init; + } + err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); @@ -3841,6 +3857,8 @@ err_acl_init: err_netdev_notifier: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_nve_fini(mlxsw_sp); +err_nve_init: mlxsw_sp_afa_fini(mlxsw_sp); err_afa_init: mlxsw_sp_counter_pool_fini(mlxsw_sp); @@ -3873,6 +3891,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops; mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops; mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -3887,6 +3906,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops; mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -3900,6 +3920,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_acl_fini(mlxsw_sp); unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_nve_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); @@ -4566,6 +4587,41 @@ static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); } +static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev) +{ + unsigned int num_vxlans = 0; + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev)) + num_vxlans++; + } + + return num_vxlans > 1; +} + +static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev, + struct netlink_ext_ack *extack) +{ + if (br_multicast_enabled(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multicast can not be enabled on a bridge with a VxLAN device"); + return false; + } + + if (br_vlan_enabled(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "VLAN filtering can not be enabled on a bridge with a VxLAN device"); + return false; + } + + if (mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge"); + return false; + } + + return true; +} + static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, struct net_device *dev, unsigned long event, void *ptr) @@ -4595,6 +4651,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, } if (!info->linking) break; + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) && + mlxsw_sp_bridge_has_vxlan(upper_dev) && + !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; if (netdev_has_any_upper_dev(upper_dev) && (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, @@ -4752,6 +4813,11 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, } if (!info->linking) break; + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) && + mlxsw_sp_bridge_has_vxlan(upper_dev) && + !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; if (netdev_has_any_upper_dev(upper_dev) && (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, @@ -4898,6 +4964,63 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) return netif_is_l3_master(info->upper_dev); } +static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *cu_info; + struct netdev_notifier_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + + extack = netdev_notifier_info_to_extack(info); + + switch (event) { + case NETDEV_CHANGEUPPER: + cu_info = container_of(info, + struct netdev_notifier_changeupper_info, + info); + upper_dev = cu_info->upper_dev; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; + if (cu_info->linking) { + if (!netif_running(dev)) + return 0; + return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, + dev, extack); + } else { + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev); + } + break; + case NETDEV_PRE_UP: + upper_dev = netdev_master_upper_dev_get(dev); + if (!upper_dev) + return 0; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev, + extack); + case NETDEV_DOWN: + upper_dev = netdev_master_upper_dev_get(dev); + if (!upper_dev) + return 0; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev); + break; + } + + return 0; +} + static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -4914,6 +5037,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, } mlxsw_sp_span_respin(mlxsw_sp); + if (netif_is_vxlan(dev)) + err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr); if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, event, ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 1f68ac2a20f4..0875a79cbe7b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -16,6 +16,7 @@ #include <net/psample.h> #include <net/pkt_cls.h> #include <net/red.h> +#include <net/vxlan.h> #include "port.h" #include "core.h" @@ -55,6 +56,8 @@ enum mlxsw_sp_resource_id { struct mlxsw_sp_port; struct mlxsw_sp_rif; struct mlxsw_sp_span_entry; +enum mlxsw_sp_l3proto; +union mlxsw_sp_l3addr; struct mlxsw_sp_upper { struct net_device *dev; @@ -113,9 +116,11 @@ struct mlxsw_sp_acl; struct mlxsw_sp_counter_pool; struct mlxsw_sp_fid_core; struct mlxsw_sp_kvdl; +struct mlxsw_sp_nve; struct mlxsw_sp_kvdl_ops; struct mlxsw_sp_mr_tcam_ops; struct mlxsw_sp_acl_tcam_ops; +struct mlxsw_sp_nve_ops; struct mlxsw_sp { struct mlxsw_sp_port **ports; @@ -132,6 +137,7 @@ struct mlxsw_sp { struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; struct mlxsw_sp_kvdl *kvdl; + struct mlxsw_sp_nve *nve; struct notifier_block netdevice_nb; struct mlxsw_sp_counter_pool *counter_pool; @@ -146,6 +152,7 @@ struct mlxsw_sp { const struct mlxsw_afk_ops *afk_ops; const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops; const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; + const struct mlxsw_sp_nve_ops **nve_ops_arr; }; static inline struct mlxsw_sp_upper * @@ -235,6 +242,25 @@ struct mlxsw_sp_port { struct mlxsw_sp_acl_block *eg_acl_block; }; +static inline struct net_device * +mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev) +{ + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev)) + return dev; + } + + return NULL; +} + +static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev) +{ + return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev); +} + static inline bool mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) { @@ -330,6 +356,13 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *br_dev); bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev); +int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack); +void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev); /* spectrum.c */ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -431,6 +464,15 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif); +int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip, + u32 tunnel_index); +void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip); +int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + u16 *vr_id); /* spectrum_kvdl.c */ enum mlxsw_sp_kvdl_entry_type { @@ -679,6 +721,16 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p); /* spectrum_fid.c */ +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, + __be32 vni); +int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni); +int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index); +void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid); +bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid); +int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni); +void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid); +bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid); int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type, u8 local_port, bool member); @@ -697,6 +749,8 @@ u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid); struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid); struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, int br_ifindex); +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp, + int br_ifindex); struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, u16 rif_index); struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp); @@ -742,4 +796,39 @@ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops; /* spectrum2_mr_tcam.c */ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops; +/* spectrum_nve.c */ +enum mlxsw_sp_nve_type { + MLXSW_SP_NVE_TYPE_VXLAN, +}; + +struct mlxsw_sp_nve_params { + enum mlxsw_sp_nve_type type; + __be32 vni; + const struct net_device *dev; +}; + +extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[]; +extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[]; + +int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr); +void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr); +u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp); +bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp, + u32 tb_id, __be32 addr); +int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack); +void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid); +int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp); + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 715d24ff937e..a3db033d7399 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -6,6 +6,7 @@ #include <linux/if_vlan.h> #include <linux/if_bridge.h> #include <linux/netdevice.h> +#include <linux/rhashtable.h> #include <linux/rtnetlink.h> #include "spectrum.h" @@ -14,6 +15,7 @@ struct mlxsw_sp_fid_family; struct mlxsw_sp_fid_core { + struct rhashtable vni_ht; struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX]; unsigned int *port_fid_mappings; }; @@ -24,6 +26,12 @@ struct mlxsw_sp_fid { unsigned int ref_count; u16 fid_index; struct mlxsw_sp_fid_family *fid_family; + + struct rhash_head vni_ht_node; + __be32 vni; + u32 nve_flood_index; + u8 vni_valid:1, + nve_flood_index_valid:1; }; struct mlxsw_sp_fid_8021q { @@ -36,6 +44,12 @@ struct mlxsw_sp_fid_8021d { int br_ifindex; }; +static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = { + .key_len = sizeof_field(struct mlxsw_sp_fid, vni), + .key_offset = offsetof(struct mlxsw_sp_fid, vni), + .head_offset = offsetof(struct mlxsw_sp_fid, vni_ht_node), +}; + struct mlxsw_sp_flood_table { enum mlxsw_sp_flood_type packet_type; enum mlxsw_reg_sfgc_bridge_type bridge_type; @@ -56,6 +70,11 @@ struct mlxsw_sp_fid_ops { struct mlxsw_sp_port *port, u16 vid); void (*port_vid_unmap)(struct mlxsw_sp_fid *fid, struct mlxsw_sp_port *port, u16 vid); + int (*vni_set)(struct mlxsw_sp_fid *fid, __be32 vni); + void (*vni_clear)(struct mlxsw_sp_fid *fid); + int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid, + u32 nve_flood_index); + void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid); }; struct mlxsw_sp_fid_family { @@ -94,6 +113,117 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = { [MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types, }; +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, + __be32 vni) +{ + struct mlxsw_sp_fid *fid; + + fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni, + mlxsw_sp_fid_vni_ht_params); + if (fid) + fid->ref_count++; + + return fid; +} + +int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni) +{ + if (!fid->vni_valid) + return -EINVAL; + + *vni = fid->vni; + + return 0; +} + +int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + int err; + + if (WARN_ON(!ops->nve_flood_index_set || fid->nve_flood_index_valid)) + return -EINVAL; + + err = ops->nve_flood_index_set(fid, nve_flood_index); + if (err) + return err; + + fid->nve_flood_index = nve_flood_index; + fid->nve_flood_index_valid = true; + + return 0; +} + +void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + + if (WARN_ON(!ops->nve_flood_index_clear || !fid->nve_flood_index_valid)) + return; + + fid->nve_flood_index_valid = false; + ops->nve_flood_index_clear(fid); +} + +bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid) +{ + return fid->nve_flood_index_valid; +} + +int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + int err; + + if (WARN_ON(!ops->vni_set || fid->vni_valid)) + return -EINVAL; + + fid->vni = vni; + err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht, + &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); + if (err) + return err; + + err = ops->vni_set(fid, vni); + if (err) + goto err_vni_set; + + fid->vni_valid = true; + + return 0; + +err_vni_set: + rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); + return err; +} + +void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + + if (WARN_ON(!ops->vni_clear || !fid->vni_valid)) + return; + + fid->vni_valid = false; + ops->vni_clear(fid); + rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); +} + +bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid) +{ + return fid->vni_valid; +} + static const struct mlxsw_sp_flood_table * mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type) @@ -217,6 +347,21 @@ static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); } +static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, + __be32 vni, bool vni_valid, u32 nve_flood_index, + bool nve_flood_index_valid) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid_index, + 0); + mlxsw_reg_sfmr_vv_set(sfmr_pl, vni_valid); + mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(vni)); + mlxsw_reg_sfmr_vtfp_set(sfmr_pl, nve_flood_index_valid); + mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, nve_flood_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, u16 vid, bool valid) { @@ -393,6 +538,8 @@ static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid) static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid) { + if (fid->vni_valid) + mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid); mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false); } @@ -531,6 +678,41 @@ mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid, mlxsw_sp_port->local_port, vid, false); } +static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid, __be32 vni) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, vni, + true, fid->nve_flood_index, + fid->nve_flood_index_valid); +} + +static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, 0, false, + fid->nve_flood_index, fid->nve_flood_index_valid); +} + +static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, + fid->vni, fid->vni_valid, nve_flood_index, + true); +} + +static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, fid->vni, + fid->vni_valid, 0, false); +} + static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = { .setup = mlxsw_sp_fid_8021d_setup, .configure = mlxsw_sp_fid_8021d_configure, @@ -540,6 +722,10 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = { .flood_index = mlxsw_sp_fid_8021d_flood_index, .port_vid_map = mlxsw_sp_fid_8021d_port_vid_map, .port_vid_unmap = mlxsw_sp_fid_8021d_port_vid_unmap, + .vni_set = mlxsw_sp_fid_8021d_vni_set, + .vni_clear = mlxsw_sp_fid_8021d_vni_clear, + .nve_flood_index_set = mlxsw_sp_fid_8021d_nve_flood_index_set, + .nve_flood_index_clear = mlxsw_sp_fid_8021d_nve_flood_index_clear, }; static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = { @@ -708,14 +894,12 @@ static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = { [MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp_fid_dummy_family, }; -static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_fid_type type, - const void *arg) +static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type, + const void *arg) { struct mlxsw_sp_fid_family *fid_family; struct mlxsw_sp_fid *fid; - u16 fid_index; - int err; fid_family = mlxsw_sp->fid_core->fid_family_arr[type]; list_for_each_entry(fid, &fid_family->fids_list, list) { @@ -725,6 +909,23 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, return fid; } + return NULL; +} + +static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type, + const void *arg) +{ + struct mlxsw_sp_fid_family *fid_family; + struct mlxsw_sp_fid *fid; + u16 fid_index; + int err; + + fid = mlxsw_sp_fid_lookup(mlxsw_sp, type, arg); + if (fid) + return fid; + + fid_family = mlxsw_sp->fid_core->fid_family_arr[type]; fid = kzalloc(fid_family->fid_size, GFP_KERNEL); if (!fid) return ERR_PTR(-ENOMEM); @@ -784,6 +985,13 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex); } +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp, + int br_ifindex) +{ + return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, + &br_ifindex); +} + struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, u16 rif_index) { @@ -918,6 +1126,10 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp) return -ENOMEM; mlxsw_sp->fid_core = fid_core; + err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params); + if (err) + goto err_rhashtable_init; + fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int), GFP_KERNEL); if (!fid_core->port_fid_mappings) { @@ -944,6 +1156,8 @@ err_fid_ops_register: } kfree(fid_core->port_fid_mappings); err_alloc_port_fid_mappings: + rhashtable_destroy(&fid_core->vni_ht); +err_rhashtable_init: kfree(fid_core); return err; } @@ -957,5 +1171,6 @@ void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_fid_family_unregister(mlxsw_sp, fid_core->fid_family_arr[i]); kfree(fid_core->port_fid_mappings); + rhashtable_destroy(&fid_core->vni_ht); kfree(fid_core); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c new file mode 100644 index 000000000000..ad06d9969bc1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -0,0 +1,982 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <net/inet_ecn.h> +#include <net/ipv6.h> + +#include "reg.h" +#include "spectrum.h" +#include "spectrum_nve.h" + +const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[] = { + [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp1_nve_vxlan_ops, +}; + +const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[] = { + [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp2_nve_vxlan_ops, +}; + +struct mlxsw_sp_nve_mc_entry; +struct mlxsw_sp_nve_mc_record; +struct mlxsw_sp_nve_mc_list; + +struct mlxsw_sp_nve_mc_record_ops { + enum mlxsw_reg_tnumt_record_type type; + int (*entry_add)(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr); + void (*entry_del)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry); + void (*entry_set)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index); + bool (*entry_compare)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr); +}; + +struct mlxsw_sp_nve_mc_list_key { + u16 fid_index; +}; + +struct mlxsw_sp_nve_mc_ipv6_entry { + struct in6_addr addr6; + u32 addr6_kvdl_index; +}; + +struct mlxsw_sp_nve_mc_entry { + union { + __be32 addr4; + struct mlxsw_sp_nve_mc_ipv6_entry ipv6_entry; + }; + u8 valid:1; +}; + +struct mlxsw_sp_nve_mc_record { + struct list_head list; + enum mlxsw_sp_l3proto proto; + unsigned int num_entries; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_nve_mc_list *mc_list; + const struct mlxsw_sp_nve_mc_record_ops *ops; + u32 kvdl_index; + struct mlxsw_sp_nve_mc_entry entries[0]; +}; + +struct mlxsw_sp_nve_mc_list { + struct list_head records_list; + struct rhash_head ht_node; + struct mlxsw_sp_nve_mc_list_key key; +}; + +static const struct rhashtable_params mlxsw_sp_nve_mc_list_ht_params = { + .key_len = sizeof(struct mlxsw_sp_nve_mc_list_key), + .key_offset = offsetof(struct mlxsw_sp_nve_mc_list, key), + .head_offset = offsetof(struct mlxsw_sp_nve_mc_list, ht_node), +}; + +static int +mlxsw_sp_nve_mc_record_ipv4_entry_add(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + mc_entry->addr4 = addr->addr4; + + return 0; +} + +static void +mlxsw_sp_nve_mc_record_ipv4_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry) +{ +} + +static void +mlxsw_sp_nve_mc_record_ipv4_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index) +{ + u32 udip = be32_to_cpu(mc_entry->addr4); + + mlxsw_reg_tnumt_udip_set(tnumt_pl, entry_index, udip); +} + +static bool +mlxsw_sp_nve_mc_record_ipv4_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + return mc_entry->addr4 == addr->addr4; +} + +static const struct mlxsw_sp_nve_mc_record_ops +mlxsw_sp_nve_mc_record_ipv4_ops = { + .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV4, + .entry_add = &mlxsw_sp_nve_mc_record_ipv4_entry_add, + .entry_del = &mlxsw_sp_nve_mc_record_ipv4_entry_del, + .entry_set = &mlxsw_sp_nve_mc_record_ipv4_entry_set, + .entry_compare = &mlxsw_sp_nve_mc_record_ipv4_entry_compare, +}; + +static int +mlxsw_sp_nve_mc_record_ipv6_entry_add(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + WARN_ON(1); + + return -EINVAL; +} + +static void +mlxsw_sp_nve_mc_record_ipv6_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry) +{ +} + +static void +mlxsw_sp_nve_mc_record_ipv6_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index) +{ + u32 udip_ptr = mc_entry->ipv6_entry.addr6_kvdl_index; + + mlxsw_reg_tnumt_udip_ptr_set(tnumt_pl, entry_index, udip_ptr); +} + +static bool +mlxsw_sp_nve_mc_record_ipv6_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + return ipv6_addr_equal(&mc_entry->ipv6_entry.addr6, &addr->addr6); +} + +static const struct mlxsw_sp_nve_mc_record_ops +mlxsw_sp_nve_mc_record_ipv6_ops = { + .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV6, + .entry_add = &mlxsw_sp_nve_mc_record_ipv6_entry_add, + .entry_del = &mlxsw_sp_nve_mc_record_ipv6_entry_del, + .entry_set = &mlxsw_sp_nve_mc_record_ipv6_entry_set, + .entry_compare = &mlxsw_sp_nve_mc_record_ipv6_entry_compare, +}; + +static const struct mlxsw_sp_nve_mc_record_ops * +mlxsw_sp_nve_mc_record_ops_arr[] = { + [MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_nve_mc_record_ipv4_ops, + [MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops, +}; + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + return rhashtable_lookup_fast(&nve->mc_list_ht, key, + mlxsw_sp_nve_mc_list_ht_params); +} + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + struct mlxsw_sp_nve_mc_list *mc_list; + int err; + + mc_list = kmalloc(sizeof(*mc_list), GFP_KERNEL); + if (!mc_list) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&mc_list->records_list); + mc_list->key = *key; + + err = rhashtable_insert_fast(&nve->mc_list_ht, &mc_list->ht_node, + mlxsw_sp_nve_mc_list_ht_params); + if (err) + goto err_rhashtable_insert; + + return mc_list; + +err_rhashtable_insert: + kfree(mc_list); + return ERR_PTR(err); +} + +static void mlxsw_sp_nve_mc_list_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + rhashtable_remove_fast(&nve->mc_list_ht, &mc_list->ht_node, + mlxsw_sp_nve_mc_list_ht_params); + WARN_ON(!list_empty(&mc_list->records_list)); + kfree(mc_list); +} + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve_mc_list *mc_list; + + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, key); + if (mc_list) + return mc_list; + + return mlxsw_sp_nve_mc_list_create(mlxsw_sp, key); +} + +static void +mlxsw_sp_nve_mc_list_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + if (!list_empty(&mc_list->records_list)) + return; + mlxsw_sp_nve_mc_list_destroy(mlxsw_sp, mc_list); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto) +{ + unsigned int num_max_entries = mlxsw_sp->nve->num_max_mc_entries[proto]; + struct mlxsw_sp_nve_mc_record *mc_record; + int err; + + mc_record = kzalloc(sizeof(*mc_record) + num_max_entries * + sizeof(struct mlxsw_sp_nve_mc_entry), GFP_KERNEL); + if (!mc_record) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1, + &mc_record->kvdl_index); + if (err) + goto err_kvdl_alloc; + + mc_record->ops = mlxsw_sp_nve_mc_record_ops_arr[proto]; + mc_record->mlxsw_sp = mlxsw_sp; + mc_record->mc_list = mc_list; + mc_record->proto = proto; + list_add_tail(&mc_record->list, &mc_list->records_list); + + return mc_record; + +err_kvdl_alloc: + kfree(mc_record); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nve_mc_record_destroy(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp; + + list_del(&mc_record->list); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1, + mc_record->kvdl_index); + WARN_ON(mc_record->num_entries); + kfree(mc_record); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + list_for_each_entry_reverse(mc_record, &mc_list->records_list, list) { + unsigned int num_entries = mc_record->num_entries; + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + if (mc_record->proto == proto && + num_entries < nve->num_max_mc_entries[proto]) + return mc_record; + } + + return mlxsw_sp_nve_mc_record_create(mlxsw_sp, mc_list, proto); +} + +static void +mlxsw_sp_nve_mc_record_put(struct mlxsw_sp_nve_mc_record *mc_record) +{ + if (mc_record->num_entries != 0) + return; + + mlxsw_sp_nve_mc_record_destroy(mc_record); +} + +static struct mlxsw_sp_nve_mc_entry * +mlxsw_sp_nve_mc_free_entry_find(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + if (mc_record->entries[i].valid) + continue; + return &mc_record->entries[i]; + } + + return NULL; +} + +static int +mlxsw_sp_nve_mc_record_refresh(struct mlxsw_sp_nve_mc_record *mc_record) +{ + enum mlxsw_reg_tnumt_record_type type = mc_record->ops->type; + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp; + char tnumt_pl[MLXSW_REG_TNUMT_LEN]; + unsigned int num_max_entries; + unsigned int num_entries = 0; + u32 next_kvdl_index = 0; + bool next_valid = false; + int i; + + if (!list_is_last(&mc_record->list, &mc_list->records_list)) { + struct mlxsw_sp_nve_mc_record *next_record; + + next_record = list_next_entry(mc_record, list); + next_kvdl_index = next_record->kvdl_index; + next_valid = true; + } + + mlxsw_reg_tnumt_pack(tnumt_pl, type, MLXSW_REG_TNUMT_TUNNEL_PORT_NVE, + mc_record->kvdl_index, next_valid, + next_kvdl_index, mc_record->num_entries); + + num_max_entries = mlxsw_sp->nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_entry = &mc_record->entries[i]; + if (!mc_entry->valid) + continue; + mc_record->ops->entry_set(mc_record, mc_entry, tnumt_pl, + num_entries++); + } + + WARN_ON(num_entries != mc_record->num_entries); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnumt), tnumt_pl); +} + +static bool +mlxsw_sp_nve_mc_record_is_first(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + struct mlxsw_sp_nve_mc_record *first_record; + + first_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + + return mc_record == first_record; +} + +static struct mlxsw_sp_nve_mc_entry * +mlxsw_sp_nve_mc_entry_find(struct mlxsw_sp_nve_mc_record *mc_record, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_entry = &mc_record->entries[i]; + if (!mc_entry->valid) + continue; + if (mc_record->ops->entry_compare(mc_record, mc_entry, addr)) + return mc_entry; + } + + return NULL; +} + +static int +mlxsw_sp_nve_mc_record_ip_add(struct mlxsw_sp_nve_mc_record *mc_record, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_entry *mc_entry = NULL; + int err; + + mc_entry = mlxsw_sp_nve_mc_free_entry_find(mc_record); + if (WARN_ON(!mc_entry)) + return -EINVAL; + + err = mc_record->ops->entry_add(mc_record, mc_entry, addr); + if (err) + return err; + mc_record->num_entries++; + mc_entry->valid = true; + + err = mlxsw_sp_nve_mc_record_refresh(mc_record); + if (err) + goto err_record_refresh; + + /* If this is a new record and not the first one, then we need to + * update the next pointer of the previous entry + */ + if (mc_record->num_entries != 1 || + mlxsw_sp_nve_mc_record_is_first(mc_record)) + return 0; + + err = mlxsw_sp_nve_mc_record_refresh(list_prev_entry(mc_record, list)); + if (err) + goto err_prev_record_refresh; + + return 0; + +err_prev_record_refresh: +err_record_refresh: + mc_entry->valid = false; + mc_record->num_entries--; + mc_record->ops->entry_del(mc_record, mc_entry); + return err; +} + +static void +mlxsw_sp_nve_mc_record_entry_del(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry) +{ + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + + mc_entry->valid = false; + mc_record->num_entries--; + + /* When the record continues to exist we only need to invalidate + * the requested entry + */ + if (mc_record->num_entries != 0) { + mlxsw_sp_nve_mc_record_refresh(mc_record); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* If the record needs to be deleted, but it is not the first, + * then we need to make sure that the previous record no longer + * points to it. Remove deleted record from the list to reflect + * that and then re-add it at the end, so that it could be + * properly removed by the record destruction code + */ + if (!mlxsw_sp_nve_mc_record_is_first(mc_record)) { + struct mlxsw_sp_nve_mc_record *prev_record; + + prev_record = list_prev_entry(mc_record, list); + list_del(&mc_record->list); + mlxsw_sp_nve_mc_record_refresh(prev_record); + list_add_tail(&mc_record->list, &mc_list->records_list); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* If the first record needs to be deleted, but the list is not + * singular, then the second record needs to be written in the + * first record's address, as this address is stored as a property + * of the FID + */ + if (mlxsw_sp_nve_mc_record_is_first(mc_record) && + !list_is_singular(&mc_list->records_list)) { + struct mlxsw_sp_nve_mc_record *next_record; + + next_record = list_next_entry(mc_record, list); + swap(mc_record->kvdl_index, next_record->kvdl_index); + mlxsw_sp_nve_mc_record_refresh(next_record); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* This is the last case where the last remaining record needs to + * be deleted. Simply delete the entry + */ + mc_record->ops->entry_del(mc_record, mc_entry); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_find(struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr, + struct mlxsw_sp_nve_mc_entry **mc_entry) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + list_for_each_entry(mc_record, &mc_list->records_list, list) { + if (mc_record->proto != proto) + continue; + + *mc_entry = mlxsw_sp_nve_mc_entry_find(mc_record, addr); + if (*mc_entry) + return mc_record; + } + + return NULL; +} + +static int mlxsw_sp_nve_mc_list_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + int err; + + mc_record = mlxsw_sp_nve_mc_record_get(mlxsw_sp, mc_list, proto); + if (IS_ERR(mc_record)) + return PTR_ERR(mc_record); + + err = mlxsw_sp_nve_mc_record_ip_add(mc_record, addr); + if (err) + goto err_ip_add; + + return 0; + +err_ip_add: + mlxsw_sp_nve_mc_record_put(mc_record); + return err; +} + +static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr, + &mc_entry); + if (WARN_ON(!mc_record)) + return; + + mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry); + mlxsw_sp_nve_mc_record_put(mc_record); +} + +static int +mlxsw_sp_nve_fid_flood_index_set(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + /* The address of the first record in the list is a property of + * the FID and we never change it. It only needs to be set when + * a new list is created + */ + if (mlxsw_sp_fid_nve_flood_index_is_set(fid)) + return 0; + + mc_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + + return mlxsw_sp_fid_nve_flood_index_set(fid, mc_record->kvdl_index); +} + +static void +mlxsw_sp_nve_fid_flood_index_clear(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + /* The address of the first record needs to be invalidated only when + * the last record is about to be removed + */ + if (!list_is_singular(&mc_list->records_list)) + return; + + mc_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + if (mc_record->num_entries != 1) + return; + + return mlxsw_sp_fid_nve_flood_index_clear(fid); +} + +int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + int err; + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_get(mlxsw_sp, &key); + if (IS_ERR(mc_list)) + return PTR_ERR(mc_list); + + err = mlxsw_sp_nve_mc_list_ip_add(mlxsw_sp, mc_list, proto, addr); + if (err) + goto err_add_ip; + + err = mlxsw_sp_nve_fid_flood_index_set(fid, mc_list); + if (err) + goto err_fid_flood_index_set; + + return 0; + +err_fid_flood_index_set: + mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr); +err_add_ip: + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); + return err; +} + +void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key); + if (WARN_ON(!mc_list)) + return; + + mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list); + mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr); + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); +} + +static void +mlxsw_sp_nve_mc_record_delete(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry = &mc_record->entries[i]; + + if (!mc_entry->valid) + continue; + mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry); + } + + WARN_ON(mc_record->num_entries); + mlxsw_sp_nve_mc_record_put(mc_record); +} + +static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_nve_mc_record *mc_record, *tmp; + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + + if (!mlxsw_sp_fid_nve_flood_index_is_set(fid)) + return; + + mlxsw_sp_fid_nve_flood_index_clear(fid); + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key); + if (WARN_ON(!mc_list)) + return; + + list_for_each_entry_safe(mc_record, tmp, &mc_list->records_list, list) + mlxsw_sp_nve_mc_record_delete(mc_record); + + WARN_ON(!list_empty(&mc_list->records_list)); + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); +} + +u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0); + + return mlxsw_sp->nve->tunnel_index; +} + +bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp, + u32 tb_id, __be32 addr) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + struct mlxsw_sp_nve_config *config = &nve->config; + + if (nve->num_nve_tunnels && + config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 && + config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id) + return true; + + return false; +} + +static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_config *config) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + int err; + + if (nve->num_nve_tunnels++ != 0) + return 0; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + &nve->tunnel_index); + if (err) + goto err_kvdl_alloc; + + ops = nve->nve_ops_arr[config->type]; + err = ops->init(nve, config); + if (err) + goto err_ops_init; + + return 0; + +err_ops_init: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + nve->tunnel_index); +err_kvdl_alloc: + nve->num_nve_tunnels--; + return err; +} + +static void mlxsw_sp_nve_tunnel_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + + ops = nve->nve_ops_arr[nve->config.type]; + + if (mlxsw_sp->nve->num_nve_tunnels == 1) { + ops->fini(nve); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + nve->tunnel_index); + } + nve->num_nve_tunnels--; +} + +static void mlxsw_sp_nve_fdb_flush_by_fid(struct mlxsw_sp *mlxsw_sp, + u16 fid_index) +{ + char sfdf_pl[MLXSW_REG_SFDF_LEN]; + + mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID); + mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); +} + +int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + struct mlxsw_sp_nve_config config; + int err; + + ops = nve->nve_ops_arr[params->type]; + + if (!ops->can_offload(nve, params->dev, extack)) + return -EOPNOTSUPP; + + memset(&config, 0, sizeof(config)); + ops->nve_config(nve, params->dev, &config); + if (nve->num_nve_tunnels && + memcmp(&config, &nve->config, sizeof(config))) { + NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration"); + return -EOPNOTSUPP; + } + + err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize NVE tunnel"); + return err; + } + + err = mlxsw_sp_fid_vni_set(fid, params->vni); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID"); + goto err_fid_vni_set; + } + + nve->config = config; + + return 0; + +err_fid_vni_set: + mlxsw_sp_nve_tunnel_fini(mlxsw_sp); + return err; +} + +void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid) +{ + u16 fid_index = mlxsw_sp_fid_index(fid); + + mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid); + mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index); + mlxsw_sp_fid_vni_clear(fid); + mlxsw_sp_nve_tunnel_fini(mlxsw_sp); +} + +int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char tnqdr_pl[MLXSW_REG_TNQDR_LEN]; + + mlxsw_reg_tnqdr_pack(tnqdr_pl, mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqdr), tnqdr_pl); +} + +void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ +} + +static int mlxsw_sp_nve_qos_init(struct mlxsw_sp *mlxsw_sp) +{ + char tnqcr_pl[MLXSW_REG_TNQCR_LEN]; + + mlxsw_reg_tnqcr_pack(tnqcr_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqcr), tnqcr_pl); +} + +static int mlxsw_sp_nve_ecn_encap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + u8 outer_ecn = INET_ECN_encapsulate(0, i); + char tneem_pl[MLXSW_REG_TNEEM_LEN]; + int err; + + mlxsw_reg_tneem_pack(tneem_pl, i, outer_ecn); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tneem), + tneem_pl); + if (err) + return err; + } + + return 0; +} + +static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp, + u8 inner_ecn, u8 outer_ecn) +{ + char tndem_pl[MLXSW_REG_TNDEM_LEN]; + bool trap_en, set_ce = false; + u8 new_inner_ecn; + + trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); + new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; + + mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn, + trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl); +} + +static int mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + int j; + + /* Iterate over outer ECN values */ + for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) { + int err; + + err = __mlxsw_sp_nve_ecn_decap_init(mlxsw_sp, i, j); + if (err) + return err; + } + } + + return 0; +} + +static int mlxsw_sp_nve_ecn_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = mlxsw_sp_nve_ecn_encap_init(mlxsw_sp); + if (err) + return err; + + return mlxsw_sp_nve_ecn_decap_init(mlxsw_sp); +} + +static int mlxsw_sp_nve_resources_query(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6)) + return -EIO; + max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4); + mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV4] = max; + max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6); + mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV6] = max; + + return 0; +} + +int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nve *nve; + int err; + + nve = kzalloc(sizeof(*mlxsw_sp->nve), GFP_KERNEL); + if (!nve) + return -ENOMEM; + mlxsw_sp->nve = nve; + nve->mlxsw_sp = mlxsw_sp; + nve->nve_ops_arr = mlxsw_sp->nve_ops_arr; + + err = rhashtable_init(&nve->mc_list_ht, + &mlxsw_sp_nve_mc_list_ht_params); + if (err) + goto err_rhashtable_init; + + err = mlxsw_sp_nve_qos_init(mlxsw_sp); + if (err) + goto err_nve_qos_init; + + err = mlxsw_sp_nve_ecn_init(mlxsw_sp); + if (err) + goto err_nve_ecn_init; + + err = mlxsw_sp_nve_resources_query(mlxsw_sp); + if (err) + goto err_nve_resources_query; + + return 0; + +err_nve_resources_query: +err_nve_ecn_init: +err_nve_qos_init: + rhashtable_destroy(&nve->mc_list_ht); +err_rhashtable_init: + mlxsw_sp->nve = NULL; + kfree(nve); + return err; +} + +void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(mlxsw_sp->nve->num_nve_tunnels); + rhashtable_destroy(&mlxsw_sp->nve->mc_list_ht); + mlxsw_sp->nve = NULL; + kfree(mlxsw_sp->nve); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h new file mode 100644 index 000000000000..4cc3297e13d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_NVE_H +#define _MLXSW_SPECTRUM_NVE_H + +#include <linux/netlink.h> +#include <linux/rhashtable.h> + +#include "spectrum.h" + +struct mlxsw_sp_nve_config { + enum mlxsw_sp_nve_type type; + u8 ttl; + u8 learning_en:1; + __be16 udp_dport; + __be32 flowlabel; + u32 ul_tb_id; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr ul_sip; +}; + +struct mlxsw_sp_nve { + struct mlxsw_sp_nve_config config; + struct rhashtable mc_list_ht; + struct mlxsw_sp *mlxsw_sp; + const struct mlxsw_sp_nve_ops **nve_ops_arr; + unsigned int num_nve_tunnels; /* Protected by RTNL */ + unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX]; + u32 tunnel_index; +}; + +struct mlxsw_sp_nve_ops { + enum mlxsw_sp_nve_type type; + bool (*can_offload)(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack); + void (*nve_config)(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct mlxsw_sp_nve_config *config); + int (*init)(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config); + void (*fini)(struct mlxsw_sp_nve *nve); +}; + +extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops; +extern const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c new file mode 100644 index 000000000000..d21c7be5b1c9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/random.h> +#include <net/vxlan.h> + +#include "reg.h" +#include "spectrum_nve.h" + +/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B) + * + * In the worst case - where we have a VLAN tag on the outer Ethernet + * header and IPv6 in overlay and underlay - we need to parse 128 bytes + */ +#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128 +#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96 + +#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS VXLAN_F_UDP_ZERO_CSUM_TX + +static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_config *cfg = &vxlan->cfg; + + if (cfg->saddr.sa.sa_family != AF_INET) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported"); + return false; + } + + if (vxlan_addr_multicast(&cfg->remote_ip)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported"); + return false; + } + + if (vxlan_addr_any(&cfg->saddr)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified"); + return false; + } + + if (cfg->remote_ifindex) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported"); + return false; + } + + if (cfg->port_min || cfg->port_max) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported"); + return false; + } + + if (cfg->tos != 1) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit"); + return false; + } + + if (cfg->flags & VXLAN_F_TTL_INHERIT) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit"); + return false; + } + + if (cfg->flags & VXLAN_F_LEARN) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Learning is not supported"); + return false; + } + + if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported"); + return false; + } + + if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag"); + return false; + } + + if (cfg->ttl == 0) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0"); + return false; + } + + if (cfg->label != 0) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0"); + return false; + } + + return true; +} + +static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct mlxsw_sp_nve_config *config) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_config *cfg = &vxlan->cfg; + + config->type = MLXSW_SP_NVE_TYPE_VXLAN; + config->ttl = cfg->ttl; + config->flowlabel = cfg->label; + config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0; + config->ul_tb_id = RT_TABLE_MAIN; + config->ul_proto = MLXSW_SP_L3_PROTO_IPV4; + config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr; + config->udp_dport = cfg->dst_port; +} + +static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp, + unsigned int parsing_depth, + __be16 udp_dport) +{ + char mprs_pl[MLXSW_REG_MPRS_LEN]; + + mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport)); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl); +} + +static int +mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_config *config) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + u16 ul_vr_id; + u8 udp_sport; + int err; + + err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id, + &ul_vr_id); + if (err) + return err; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true, + config->ttl); + /* VxLAN driver's default UDP source port range is 32768 (0x8000) + * to 60999 (0xee47). Set the upper 8 bits of the UDP source port + * to a random number between 0x80 and 0xee + */ + get_random_bytes(&udp_sport, sizeof(udp_sport)); + udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80; + mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport); + mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en); + mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id); + mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); +} + +static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0); + + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); +} + +static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp, + unsigned int tunnel_index) +{ + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); +} + +static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config) +{ + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + int err; + + err = mlxsw_sp_nve_parsing_set(mlxsw_sp, + MLXSW_SP_NVE_VXLAN_PARSING_DEPTH, + config->udp_dport); + if (err) + return err; + + err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config); + if (err) + goto err_config_set; + + err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index); + if (err) + goto err_rtdp_set; + + err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, + &config->ul_sip, + nve->tunnel_index); + if (err) + goto err_promote_decap; + + return 0; + +err_promote_decap: +err_rtdp_set: + mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp); +err_config_set: + mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH, + config->udp_dport); + return err; +} + +static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve) +{ + struct mlxsw_sp_nve_config *config = &nve->config; + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + + mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, &config->ul_sip); + mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp); + mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH, + config->udp_dport); +} + +const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .can_offload = mlxsw_sp1_nve_vxlan_can_offload, + .nve_config = mlxsw_sp_nve_vxlan_config, + .init = mlxsw_sp1_nve_vxlan_init, + .fini = mlxsw_sp1_nve_vxlan_fini, +}; + +static bool mlxsw_sp2_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + return false; +} + +static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve) +{ +} + +const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .can_offload = mlxsw_sp2_nve_vxlan_can_offload, + .nve_config = mlxsw_sp_nve_vxlan_config, + .init = mlxsw_sp2_nve_vxlan_init, + .fini = mlxsw_sp2_nve_vxlan_fini, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2ab9cf25a08a..9e9bb57134f2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -366,6 +366,7 @@ enum mlxsw_sp_fib_entry_type { * encapsulating entries.) */ MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP, + MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP, }; struct mlxsw_sp_nexthop_group; @@ -741,6 +742,19 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, return NULL; } +int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + u16 *vr_id) +{ + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) + return -ESRCH; + *vr_id = vr->id; + + return 0; +} + static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, enum mlxsw_sp_l3proto proto) { @@ -1128,6 +1142,52 @@ mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); } +static struct mlxsw_sp_fib_entry * +mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + enum mlxsw_sp_l3proto proto, + const union mlxsw_sp_l3addr *addr, + enum mlxsw_sp_fib_entry_type type) +{ + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_node *fib_node; + unsigned char addr_prefix_len; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; + const void *addrp; + size_t addr_len; + u32 addr4; + + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) + return NULL; + fib = mlxsw_sp_vr_fib(vr, proto); + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + addr4 = be32_to_cpu(addr->addr4); + addrp = &addr4; + addr_len = 4; + addr_prefix_len = 32; + break; + case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ + default: + WARN_ON(1); + return NULL; + } + + fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len, + addr_prefix_len); + if (!fib_node || list_empty(&fib_node->entry_list)) + return NULL; + + fib_entry = list_first_entry(&fib_node->entry_list, + struct mlxsw_sp_fib_entry, list); + if (fib_entry->type != type) + return NULL; + + return fib_entry; +} + /* Given an IPIP entry, find the corresponding decap route. */ static struct mlxsw_sp_fib_entry * mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, @@ -1765,6 +1825,56 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, return 0; } +int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip, + u32 tunnel_index) +{ + enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + /* It is valid to create a tunnel with a local IP and only later + * assign this IP address to a local interface + */ + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, + ul_proto, ul_sip, + type); + if (!fib_entry) + return 0; + + fib_entry->decap.tunnel_index = tunnel_index; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + goto err_fib_entry_update; + + return 0; + +err_fib_entry_update: + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + return err; +} + +void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip) +{ + enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + struct mlxsw_sp_fib_entry *fib_entry; + + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, + ul_proto, ul_sip, + type); + if (!fib_entry) + return; + + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; @@ -3815,6 +3925,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: return !!nh_group->nh_rif; case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: return true; default: return false; @@ -3848,7 +3959,8 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) int i; if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) { + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP || + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) { nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; return; } @@ -4072,6 +4184,18 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, fib_entry->decap.tunnel_index); } +static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, + fib_entry->decap.tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) @@ -4086,6 +4210,8 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: + return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op); } return -EINVAL; } @@ -4121,6 +4247,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; + u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); struct net_device *dev = fen_info->fi->fib_dev; struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; @@ -4135,6 +4262,15 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, fib_entry, ipip_entry); } + if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id, + dip.addr4)) { + u32 t_index; + + t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp); + fib_entry->decap.tunnel_index = t_index; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + return 0; + } /* fall through */ case RTN_BROADCAST: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index fa16ad2c6a50..bc60d7a8b49d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -15,6 +15,7 @@ #include <linux/rtnetlink.h> #include <linux/netlink.h> #include <net/switchdev.h> +#include <net/vxlan.h> #include "spectrum_span.h" #include "spectrum_switchdev.h" @@ -83,9 +84,19 @@ struct mlxsw_sp_bridge_ops { void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, struct mlxsw_sp_port *mlxsw_sp_port); + int (*vxlan_join)(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack); + void (*vxlan_leave)(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev); struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_bridge_device *bridge_device, u16 vid); + struct mlxsw_sp_fid * + (*fid_lookup)(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid); + u16 (*fid_vid)(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid); }; static int @@ -1236,6 +1247,51 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) MLXSW_REG_SFD_OP_WRITE_REMOVE; } +static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp, + const char *mac, u16 fid, + enum mlxsw_sp_l3proto proto, + const union mlxsw_sp_l3addr *addr, + bool adding, bool dynamic) +{ + enum mlxsw_reg_sfd_uc_tunnel_protocol sfd_proto; + char *sfd_pl; + u8 num_rec; + u32 uip; + int err; + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + uip = be32_to_cpu(addr->addr4); + sfd_proto = MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4; + break; + case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ + default: + WARN_ON(1); + return -EOPNOTSUPP; + } + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_tunnel_pack(sfd_pl, 0, + mlxsw_sp_sfd_rec_policy(dynamic), mac, fid, + MLXSW_REG_SFD_REC_ACTION_NOP, uip, + sfd_proto); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; + +out: + kfree(sfd_pl); + return err; +} + static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, const char *mac, u16 fid, bool adding, enum mlxsw_reg_sfd_rec_action action, @@ -1949,6 +2005,21 @@ mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device, mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); } +static int +mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack) +{ + WARN_ON(1); + return -EINVAL; +} + +static void +mlxsw_sp_bridge_8021q_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev) +{ +} + static struct mlxsw_sp_fid * mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, u16 vid) @@ -1958,10 +2029,29 @@ mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); } +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021q_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid) +{ + WARN_ON(1); + return NULL; +} + +static u16 +mlxsw_sp_bridge_8021q_fid_vid(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_8021q_vid(fid); +} + static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = { .port_join = mlxsw_sp_bridge_8021q_port_join, .port_leave = mlxsw_sp_bridge_8021q_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021q_vxlan_join, + .vxlan_leave = mlxsw_sp_bridge_8021q_vxlan_leave, .fid_get = mlxsw_sp_bridge_8021q_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021q_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021q_fid_vid, }; static bool @@ -2025,19 +2115,126 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); } +static int +mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + struct mlxsw_sp_nve_params params = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .vni = vxlan->cfg.vni, + .dev = vxlan_dev, + }; + struct mlxsw_sp_fid *fid; + int err; + + fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); + if (!fid) + return -EINVAL; + + if (mlxsw_sp_fid_vni_is_set(fid)) + return -EINVAL; + + err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, ¶ms, extack); + if (err) + goto err_nve_fid_enable; + + /* The tunnel port does not hold a reference on the FID. Only + * local ports and the router port + */ + mlxsw_sp_fid_put(fid); + + return 0; + +err_nve_fid_enable: + mlxsw_sp_fid_put(fid); + return err; +} + +static void +mlxsw_sp_bridge_8021d_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct mlxsw_sp_fid *fid; + + fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); + if (WARN_ON(!fid)) + return; + + /* If the VxLAN device is down, then the FID does not have a VNI */ + if (!mlxsw_sp_fid_vni_is_set(fid)) + goto out; + + mlxsw_sp_nve_fid_disable(mlxsw_sp, fid); +out: + mlxsw_sp_fid_put(fid); +} + static struct mlxsw_sp_fid * mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device, u16 vid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct net_device *vxlan_dev; + struct mlxsw_sp_fid *fid; + int err; + + fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); + if (IS_ERR(fid)) + return fid; + + if (mlxsw_sp_fid_vni_is_set(fid)) + return fid; + + vxlan_dev = mlxsw_sp_bridge_vxlan_dev_find(bridge_device->dev); + if (!vxlan_dev) + return fid; + + if (!netif_running(vxlan_dev)) + return fid; + + err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, NULL); + if (err) + goto err_vxlan_join; + + return fid; + +err_vxlan_join: + mlxsw_sp_fid_put(fid); + return ERR_PTR(err); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021d_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); - return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); + /* The only valid VLAN for a VLAN-unaware bridge is 0 */ + if (vid) + return NULL; + + return mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); +} + +static u16 +mlxsw_sp_bridge_8021d_fid_vid(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid) +{ + return 0; } static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = { .port_join = mlxsw_sp_bridge_8021d_port_join, .port_leave = mlxsw_sp_bridge_8021d_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021d_vxlan_join, + .vxlan_leave = mlxsw_sp_bridge_8021d_vxlan_leave, .fid_get = mlxsw_sp_bridge_8021d_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021d_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021d_fid_vid, }; int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, @@ -2087,15 +2284,43 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port); } +int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (WARN_ON(!bridge_device)) + return -EINVAL; + + return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, extack); +} + +void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev) +{ + struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (WARN_ON(!bridge_device)) + return; + + bridge_device->ops->vxlan_leave(bridge_device, vxlan_dev); +} + static void mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type, const char *mac, u16 vid, - struct net_device *dev) + struct net_device *dev, bool offloaded) { struct switchdev_notifier_fdb_info info; info.addr = mac; info.vid = vid; + info.offloaded = offloaded; call_switchdev_notifiers(type, dev, &info.info); } @@ -2147,7 +2372,7 @@ do_fdb_op: if (!do_notification) return; type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE; - mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev); + mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding); return; @@ -2207,7 +2432,7 @@ do_fdb_op: if (!do_notification) return; type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE; - mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev); + mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding); return; @@ -2283,11 +2508,126 @@ out: struct mlxsw_sp_switchdev_event_work { struct work_struct work; - struct switchdev_notifier_fdb_info fdb_info; + union { + struct switchdev_notifier_fdb_info fdb_info; + struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info; + }; struct net_device *dev; unsigned long event; }; +static void +mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr, + enum mlxsw_sp_l3proto *proto, + union mlxsw_sp_l3addr *addr) +{ + if (vxlan_addr->sa.sa_family == AF_INET) { + addr->addr4 = vxlan_addr->sin.sin_addr.s_addr; + *proto = MLXSW_SP_L3_PROTO_IPV4; + } else { + addr->addr6 = vxlan_addr->sin6.sin6_addr; + *proto = MLXSW_SP_L3_PROTO_IPV6; + } +} + +static void +mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work, + struct mlxsw_sp_fid *fid, __be32 vni) +{ + struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info; + struct switchdev_notifier_fdb_info *fdb_info; + struct net_device *dev = switchdev_work->dev; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + int err; + + fdb_info = &switchdev_work->fdb_info; + err = vxlan_fdb_find_uc(dev, fdb_info->addr, vni, &vxlan_fdb_info); + if (err) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info.remote_ip, + &proto, &addr); + + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, + vxlan_fdb_info.eth_addr, + mlxsw_sp_fid_index(fid), + proto, &addr, true, false); + if (err) + return; + vxlan_fdb_info.offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info.info); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info.eth_addr, + fdb_info->vid, dev, true); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, + vxlan_fdb_info.eth_addr, + mlxsw_sp_fid_index(fid), + proto, &addr, false, + false); + vxlan_fdb_info.offloaded = false; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info.info); + break; + } +} + +static void +mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + struct net_device *br_dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_fid *fid; + __be32 vni; + int err; + + if (switchdev_work->event != SWITCHDEV_FDB_ADD_TO_DEVICE && + switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE) + return; + + if (!switchdev_work->fdb_info.added_by_user) + return; + + if (!netif_running(dev)) + return; + br_dev = netdev_master_upper_dev_get(dev); + if (!br_dev) + return; + if (!netif_is_bridge_master(br_dev)) + return; + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return; + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = bridge_device->ops->fid_lookup(bridge_device, + switchdev_work->fdb_info.vid); + if (!fid) + return; + + err = mlxsw_sp_fid_vni(fid, &vni); + if (err) + goto out; + + mlxsw_sp_switchdev_bridge_vxlan_fdb_event(mlxsw_sp, switchdev_work, fid, + vni); + +out: + mlxsw_sp_fid_put(fid); +} + static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) { struct mlxsw_sp_switchdev_event_work *switchdev_work = @@ -2298,6 +2638,11 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) int err; rtnl_lock(); + if (netif_is_vxlan(dev)) { + mlxsw_sp_switchdev_bridge_nve_fdb_event(switchdev_work); + goto out; + } + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev); if (!mlxsw_sp_port) goto out; @@ -2312,7 +2657,7 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) break; mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, fdb_info->addr, - fdb_info->vid, dev); + fdb_info->vid, dev, true); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; @@ -2337,6 +2682,189 @@ out: dev_put(dev); } +static void +mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + u8 all_zeros_mac[ETH_ALEN] = { 0 }; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + struct net_device *br_dev; + struct mlxsw_sp_fid *fid; + u16 vid; + int err; + + vxlan_fdb_info = &switchdev_work->vxlan_fdb_info; + br_dev = netdev_master_upper_dev_get(dev); + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni); + if (!fid) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip, + &proto, &addr); + + if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) { + err = mlxsw_sp_nve_flood_ip_add(mlxsw_sp, fid, proto, &addr); + if (err) { + mlxsw_sp_fid_put(fid); + return; + } + vxlan_fdb_info->offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info->info); + mlxsw_sp_fid_put(fid); + return; + } + + /* The device has a single FDB table, whereas Linux has two - one + * in the bridge driver and another in the VxLAN driver. We only + * program an entry to the device if the MAC points to the VxLAN + * device in the bridge's FDB table + */ + vid = bridge_device->ops->fid_vid(bridge_device, fid); + if (br_fdb_find_port(br_dev, vxlan_fdb_info->eth_addr, vid) != dev) + goto err_br_fdb_find; + + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr, + mlxsw_sp_fid_index(fid), proto, + &addr, true, false); + if (err) + goto err_fdb_tunnel_uc_op; + vxlan_fdb_info->offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info->info); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info->eth_addr, vid, dev, true); + + mlxsw_sp_fid_put(fid); + + return; + +err_fdb_tunnel_uc_op: +err_br_fdb_find: + mlxsw_sp_fid_put(fid); +} + +static void +mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + u8 all_zeros_mac[ETH_ALEN] = { 0 }; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + struct mlxsw_sp_fid *fid; + u16 vid; + + vxlan_fdb_info = &switchdev_work->vxlan_fdb_info; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni); + if (!fid) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip, + &proto, &addr); + + if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) { + mlxsw_sp_nve_flood_ip_del(mlxsw_sp, fid, proto, &addr); + mlxsw_sp_fid_put(fid); + return; + } + + mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr, + mlxsw_sp_fid_index(fid), proto, &addr, + false, false); + vid = bridge_device->ops->fid_vid(bridge_device, fid); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info->eth_addr, vid, dev, false); + + mlxsw_sp_fid_put(fid); +} + +static void mlxsw_sp_switchdev_vxlan_fdb_event_work(struct work_struct *work) +{ + struct mlxsw_sp_switchdev_event_work *switchdev_work = + container_of(work, struct mlxsw_sp_switchdev_event_work, work); + struct net_device *dev = switchdev_work->dev; + struct mlxsw_sp *mlxsw_sp; + struct net_device *br_dev; + + rtnl_lock(); + + if (!netif_running(dev)) + goto out; + br_dev = netdev_master_upper_dev_get(dev); + if (!br_dev) + goto out; + if (!netif_is_bridge_master(br_dev)) + goto out; + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + goto out; + + switch (switchdev_work->event) { + case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: + mlxsw_sp_switchdev_vxlan_fdb_add(mlxsw_sp, switchdev_work); + break; + case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE: + mlxsw_sp_switchdev_vxlan_fdb_del(mlxsw_sp, switchdev_work); + break; + } + +out: + rtnl_unlock(); + kfree(switchdev_work); + dev_put(dev); +} + +static int +mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work * + switchdev_work, + struct switchdev_notifier_info *info) +{ + struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev); + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct vxlan_config *cfg = &vxlan->cfg; + + vxlan_fdb_info = container_of(info, + struct switchdev_notifier_vxlan_fdb_info, + info); + + if (vxlan_fdb_info->remote_port != cfg->dst_port) + return -EOPNOTSUPP; + if (vxlan_fdb_info->remote_vni != cfg->vni) + return -EOPNOTSUPP; + if (vxlan_fdb_info->vni != cfg->vni) + return -EOPNOTSUPP; + if (vxlan_fdb_info->remote_ifindex) + return -EOPNOTSUPP; + if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) + return -EOPNOTSUPP; + if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) + return -EOPNOTSUPP; + + switchdev_work->vxlan_fdb_info = *vxlan_fdb_info; + + return 0; +} + /* Called under rcu_read_lock() */ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) @@ -2346,6 +2874,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, struct switchdev_notifier_fdb_info *fdb_info; struct switchdev_notifier_info *info = ptr; struct net_device *br_dev; + int err; /* Tunnel devices are not our uppers, so check their master instead */ br_dev = netdev_master_upper_dev_get_rcu(dev); @@ -2386,6 +2915,16 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, */ dev_hold(dev); break; + case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: /* fall through */ + case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE: + INIT_WORK(&switchdev_work->work, + mlxsw_sp_switchdev_vxlan_fdb_event_work); + err = mlxsw_sp_switchdev_vxlan_work_prepare(switchdev_work, + info); + if (err) + goto err_vxlan_work_prepare; + dev_hold(dev); + break; default: kfree(switchdev_work); return NOTIFY_DONE; @@ -2395,6 +2934,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, return NOTIFY_DONE; +err_vxlan_work_prepare: err_addr_alloc: kfree(switchdev_work); return NOTIFY_BAD; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 30c926c4bc47..8e5bec04d1f9 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -4,6 +4,7 @@ #include <linux/etherdevice.h> #include <linux/inetdevice.h> #include <net/netevent.h> +#include <net/vxlan.h> #include <linux/idr.h> #include <net/dst_metadata.h> #include <net/arp.h> @@ -187,7 +188,7 @@ static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev) return false; if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch")) return true; - if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan")) + if (netif_is_vxlan(netdev)) return true; return false; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index aeafdb9ac015..8721c0506af3 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2728,6 +2728,7 @@ rocker_fdb_offload_notify(struct rocker_port *rocker_port, info.addr = recv_info->addr; info.vid = recv_info->vid; + info.offloaded = true; call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, rocker_port->dev, &info.info); } |