aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/sfc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/sfc')
-rw-r--r--drivers/net/ethernet/sfc/Kconfig2
-rw-r--r--drivers/net/ethernet/sfc/Makefile9
-rw-r--r--drivers/net/ethernet/sfc/ef10.c2820
-rw-r--r--drivers/net/ethernet/sfc/ef10_sriov.c5
-rw-r--r--drivers/net/ethernet/sfc/efx.c2501
-rw-r--r--drivers/net/ethernet/sfc/efx.h65
-rw-r--r--drivers/net/ethernet/sfc/efx_channels.c1234
-rw-r--r--drivers/net/ethernet/sfc/efx_channels.h55
-rw-r--r--drivers/net/ethernet/sfc/efx_common.c1102
-rw-r--r--drivers/net/ethernet/sfc/efx_common.h73
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c446
-rw-r--r--drivers/net/ethernet/sfc/ethtool_common.c457
-rw-r--r--drivers/net/ethernet/sfc/ethtool_common.h30
-rw-r--r--drivers/net/ethernet/sfc/falcon/efx.c2
-rw-r--r--drivers/net/ethernet/sfc/farch.c1
-rw-r--r--drivers/net/ethernet/sfc/mcdi.h3
-rw-r--r--drivers/net/ethernet/sfc/mcdi_filters.c2270
-rw-r--r--drivers/net/ethernet/sfc/mcdi_filters.h159
-rw-r--r--drivers/net/ethernet/sfc/mcdi_functions.c386
-rw-r--r--drivers/net/ethernet/sfc/mcdi_functions.h32
-rw-r--r--drivers/net/ethernet/sfc/mcdi_port.c558
-rw-r--r--drivers/net/ethernet/sfc/mcdi_port_common.c568
-rw-r--r--drivers/net/ethernet/sfc/mcdi_port_common.h57
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h20
-rw-r--r--drivers/net/ethernet/sfc/nic.h7
-rw-r--r--drivers/net/ethernet/sfc/rx.c592
-rw-r--r--drivers/net/ethernet/sfc/rx_common.c851
-rw-r--r--drivers/net/ethernet/sfc/rx_common.h97
-rw-r--r--drivers/net/ethernet/sfc/selftest.c9
-rw-r--r--drivers/net/ethernet/sfc/selftest.h2
-rw-r--r--drivers/net/ethernet/sfc/siena.c2
-rw-r--r--drivers/net/ethernet/sfc/siena_sriov.c1
-rw-r--r--drivers/net/ethernet/sfc/tx.c398
-rw-r--r--drivers/net/ethernet/sfc/tx_common.c404
-rw-r--r--drivers/net/ethernet/sfc/tx_common.h36
35 files changed, 7972 insertions, 7282 deletions
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 5f36774bf4b8..ea5a9220196c 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -21,8 +21,6 @@ config SFC
depends on PCI
select MDIO
select CRC32
- select I2C
- select I2C_ALGOBIT
imply PTP_1588_CLOCK
---help---
This driver supports 10/40-gigabit Ethernet cards based on
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index c5c297e78d06..87d093da22ca 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -1,7 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-sfc-y += efx.o nic.o farch.o siena.o ef10.o tx.o rx.o \
- selftest.o ethtool.o ptp.o tx_tso.o \
- mcdi.o mcdi_port.o mcdi_mon.o
+sfc-y += efx.o efx_common.o efx_channels.o nic.o \
+ farch.o siena.o ef10.o \
+ tx.o tx_common.o tx_tso.o rx.o rx_common.o \
+ selftest.o ethtool.o ethtool_common.o ptp.o \
+ mcdi.o mcdi_port.o mcdi_port_common.o \
+ mcdi_functions.o mcdi_filters.o mcdi_mon.o
sfc-$(CONFIG_SFC_MTD) += mtd.o
sfc-$(CONFIG_SFC_SRIOV) += sriov.o siena_sriov.o ef10_sriov.o
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index a6ae2cdc1986..52113b7529d6 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -5,11 +5,15 @@
*/
#include "net_driver.h"
+#include "rx_common.h"
#include "ef10_regs.h"
#include "io.h"
#include "mcdi.h"
#include "mcdi_pcol.h"
+#include "mcdi_port_common.h"
+#include "mcdi_functions.h"
#include "nic.h"
+#include "mcdi_filters.h"
#include "workarounds.h"
#include "selftest.h"
#include "ef10_sriov.h"
@@ -25,28 +29,6 @@ enum {
EFX_EF10_TEST = 1,
EFX_EF10_REFILL,
};
-/* The maximum size of a shared RSS context */
-/* TODO: this should really be from the mcdi protocol export */
-#define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL
-
-/* The filter table(s) are managed by firmware and we have write-only
- * access. When removing filters we must identify them to the
- * firmware by a 64-bit handle, but this is too wide for Linux kernel
- * interfaces (32-bit for RX NFC, 16-bit for RFS). Also, we need to
- * be able to tell in advance whether a requested insertion will
- * replace an existing filter. Therefore we maintain a software hash
- * table, which should be at least as large as the hardware hash
- * table.
- *
- * Huntington has a single 8K filter table shared between all filter
- * types and both ports.
- */
-#define HUNT_FILTER_TBL_ROWS 8192
-
-#define EFX_EF10_FILTER_ID_INVALID 0xffff
-
-#define EFX_EF10_FILTER_DEV_UC_MAX 32
-#define EFX_EF10_FILTER_DEV_MC_MAX 256
/* VLAN list entry */
struct efx_ef10_vlan {
@@ -54,95 +36,8 @@ struct efx_ef10_vlan {
u16 vid;
};
-enum efx_ef10_default_filters {
- EFX_EF10_BCAST,
- EFX_EF10_UCDEF,
- EFX_EF10_MCDEF,
- EFX_EF10_VXLAN4_UCDEF,
- EFX_EF10_VXLAN4_MCDEF,
- EFX_EF10_VXLAN6_UCDEF,
- EFX_EF10_VXLAN6_MCDEF,
- EFX_EF10_NVGRE4_UCDEF,
- EFX_EF10_NVGRE4_MCDEF,
- EFX_EF10_NVGRE6_UCDEF,
- EFX_EF10_NVGRE6_MCDEF,
- EFX_EF10_GENEVE4_UCDEF,
- EFX_EF10_GENEVE4_MCDEF,
- EFX_EF10_GENEVE6_UCDEF,
- EFX_EF10_GENEVE6_MCDEF,
-
- EFX_EF10_NUM_DEFAULT_FILTERS
-};
-
-/* Per-VLAN filters information */
-struct efx_ef10_filter_vlan {
- struct list_head list;
- u16 vid;
- u16 uc[EFX_EF10_FILTER_DEV_UC_MAX];
- u16 mc[EFX_EF10_FILTER_DEV_MC_MAX];
- u16 default_filters[EFX_EF10_NUM_DEFAULT_FILTERS];
-};
-
-struct efx_ef10_dev_addr {
- u8 addr[ETH_ALEN];
-};
-
-struct efx_ef10_filter_table {
-/* The MCDI match masks supported by this fw & hw, in order of priority */
- u32 rx_match_mcdi_flags[
- MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM * 2];
- unsigned int rx_match_count;
-
- struct rw_semaphore lock; /* Protects entries */
- struct {
- unsigned long spec; /* pointer to spec plus flag bits */
-/* AUTO_OLD is used to mark and sweep MAC filters for the device address lists. */
-/* unused flag 1UL */
-#define EFX_EF10_FILTER_FLAG_AUTO_OLD 2UL
-#define EFX_EF10_FILTER_FLAGS 3UL
- u64 handle; /* firmware handle */
- } *entry;
-/* Shadow of net_device address lists, guarded by mac_lock */
- struct efx_ef10_dev_addr dev_uc_list[EFX_EF10_FILTER_DEV_UC_MAX];
- struct efx_ef10_dev_addr dev_mc_list[EFX_EF10_FILTER_DEV_MC_MAX];
- int dev_uc_count;
- int dev_mc_count;
- bool uc_promisc;
- bool mc_promisc;
-/* Whether in multicast promiscuous mode when last changed */
- bool mc_promisc_last;
- bool mc_overflow; /* Too many MC addrs; should always imply mc_promisc */
- bool vlan_filter;
- struct list_head vlan_list;
-};
-
-/* An arbitrary search limit for the software hash table */
-#define EFX_EF10_FILTER_SEARCH_LIMIT 200
-
-static void efx_ef10_rx_free_indir_table(struct efx_nic *efx);
-static void efx_ef10_filter_table_remove(struct efx_nic *efx);
-static int efx_ef10_filter_add_vlan(struct efx_nic *efx, u16 vid);
-static void efx_ef10_filter_del_vlan_internal(struct efx_nic *efx,
- struct efx_ef10_filter_vlan *vlan);
-static void efx_ef10_filter_del_vlan(struct efx_nic *efx, u16 vid);
static int efx_ef10_set_udp_tnl_ports(struct efx_nic *efx, bool unloading);
-static u32 efx_ef10_filter_get_unsafe_id(u32 filter_id)
-{
- WARN_ON_ONCE(filter_id == EFX_EF10_FILTER_ID_INVALID);
- return filter_id & (HUNT_FILTER_TBL_ROWS - 1);
-}
-
-static unsigned int efx_ef10_filter_get_unsafe_pri(u32 filter_id)
-{
- return filter_id / (HUNT_FILTER_TBL_ROWS * 2);
-}
-
-static u32 efx_ef10_make_filter_id(unsigned int pri, u16 idx)
-{
- return pri * HUNT_FILTER_TBL_ROWS * 2 + idx;
-}
-
static int efx_ef10_get_warm_boot_count(struct efx_nic *efx)
{
efx_dword_t reg;
@@ -185,24 +80,6 @@ static bool efx_ef10_is_vf(struct efx_nic *efx)
return efx->type->is_vf;
}
-static int efx_ef10_get_pf_index(struct efx_nic *efx)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN);
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- size_t outlen;
- int rc;
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf,
- sizeof(outbuf), &outlen);
- if (rc)
- return rc;
- if (outlen < sizeof(outbuf))
- return -EIO;
-
- nic_data->pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF);
- return 0;
-}
-
#ifdef CONFIG_SFC_SRIOV
static int efx_ef10_get_vf_index(struct efx_nic *efx)
{
@@ -273,24 +150,9 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
u8 vi_window_mode = MCDI_BYTE(outbuf,
GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE);
- switch (vi_window_mode) {
- case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K:
- efx->vi_stride = 8192;
- break;
- case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K:
- efx->vi_stride = 16384;
- break;
- case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K:
- efx->vi_stride = 65536;
- break;
- default:
- netif_err(efx, probe, efx->net_dev,
- "Unrecognised VI window mode %d\n",
- vi_window_mode);
- return -EIO;
- }
- netif_dbg(efx, probe, efx->net_dev, "vi_stride = %u\n",
- efx->vi_stride);
+ rc = efx_mcdi_window_mode_to_stride(efx, vi_window_mode);
+ if (rc)
+ return rc;
} else {
/* keep default VI stride */
netif_dbg(efx, probe, efx->net_dev,
@@ -576,7 +438,7 @@ static int efx_ef10_add_vlan(struct efx_nic *efx, u16 vid)
if (efx->filter_state) {
mutex_lock(&efx->mac_lock);
down_write(&efx->filter_sem);
- rc = efx_ef10_filter_add_vlan(efx, vlan->vid);
+ rc = efx_mcdi_filter_add_vlan(efx, vlan->vid);
up_write(&efx->filter_sem);
mutex_unlock(&efx->mac_lock);
if (rc)
@@ -605,7 +467,7 @@ static void efx_ef10_del_vlan_internal(struct efx_nic *efx,
if (efx->filter_state) {
down_write(&efx->filter_sem);
- efx_ef10_filter_del_vlan(efx, vlan->vid);
+ efx_mcdi_filter_del_vlan(efx, vlan->vid);
up_write(&efx->filter_sem);
}
@@ -689,7 +551,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
}
nic_data->warm_boot_count = rc;
- efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+ efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
@@ -725,7 +587,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
if (rc)
goto fail4;
- rc = efx_ef10_get_pf_index(efx);
+ rc = efx_get_pf_index(efx, &nic_data->pf_index);
if (rc)
goto fail5;
@@ -831,22 +693,6 @@ fail1:
return rc;
}
-static int efx_ef10_free_vis(struct efx_nic *efx)
-{
- MCDI_DECLARE_BUF_ERR(outbuf);
- size_t outlen;
- int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0,
- outbuf, sizeof(outbuf), &outlen);
-
- /* -EALREADY means nothing to free, so ignore */
- if (rc == -EALREADY)
- rc = 0;
- if (rc)
- efx_mcdi_display_error(efx, MC_CMD_FREE_VIS, 0, outbuf, outlen,
- rc);
- return rc;
-}
-
#ifdef EFX_USE_PIO
static void efx_ef10_free_piobufs(struct efx_nic *efx)
@@ -1084,12 +930,12 @@ static void efx_ef10_remove(struct efx_nic *efx)
efx_mcdi_mon_remove(efx);
- efx_ef10_rx_free_indir_table(efx);
+ efx_mcdi_rx_free_indir_table(efx);
if (nic_data->wc_membase)
iounmap(nic_data->wc_membase);
- rc = efx_ef10_free_vis(efx);
+ rc = efx_mcdi_free_vis(efx);
WARN_ON(rc != 0);
if (!nic_data->must_restore_piobufs)
@@ -1260,28 +1106,10 @@ static int efx_ef10_probe_vf(struct efx_nic *efx __attribute__ ((unused)))
static int efx_ef10_alloc_vis(struct efx_nic *efx,
unsigned int min_vis, unsigned int max_vis)
{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN);
- MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN);
struct efx_ef10_nic_data *nic_data = efx->nic_data;
- size_t outlen;
- int rc;
-
- MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis);
- MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis);
- rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), &outlen);
- if (rc != 0)
- return rc;
-
- if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN)
- return -EIO;
- netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n",
- MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE));
-
- nic_data->vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE);
- nic_data->n_allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT);
- return 0;
+ return efx_mcdi_alloc_vis(efx, min_vis, max_vis, &nic_data->vi_base,
+ &nic_data->n_allocated_vis);
}
/* Note that the failure path of this function does not free
@@ -1363,7 +1191,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
}
/* In case the last attached driver failed to free VIs, do it now */
- rc = efx_ef10_free_vis(efx);
+ rc = efx_mcdi_free_vis(efx);
if (rc != 0)
return rc;
@@ -1384,7 +1212,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
efx->max_tx_channels =
nic_data->n_allocated_vis / EFX_TXQ_TYPES;
- efx_ef10_free_vis(efx);
+ efx_mcdi_free_vis(efx);
return -EAGAIN;
}
@@ -1490,7 +1318,7 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
return 0;
}
-static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
+static void efx_ef10_table_reset_mc_allocations(struct efx_nic *efx)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
#ifdef CONFIG_SFC_SRIOV
@@ -1503,7 +1331,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
nic_data->must_restore_filters = true;
nic_data->must_restore_piobufs = true;
efx_ef10_forget_old_piobufs(efx);
- efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+ efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
/* Driver-created vswitches and vports must be re-created */
nic_data->must_probe_vswitching = true;
@@ -1571,7 +1399,7 @@ static int efx_ef10_reset(struct efx_nic *efx, enum reset_type reset_type)
*/
if ((reset_type == RESET_TYPE_ALL ||
reset_type == RESET_TYPE_MCDI_TIMEOUT) && !rc)
- efx_ef10_reset_mc_allocations(efx);
+ efx_ef10_table_reset_mc_allocations(efx);
return rc;
}
@@ -2187,7 +2015,7 @@ static void efx_ef10_mcdi_reboot_detected(struct efx_nic *efx)
struct efx_ef10_nic_data *nic_data = efx->nic_data;
/* All our allocations have been reset */
- efx_ef10_reset_mc_allocations(efx);
+ efx_ef10_table_reset_mc_allocations(efx);
/* The datapath firmware might have been changed */
nic_data->must_check_datapath_caps = true;
@@ -2408,20 +2236,15 @@ static u32 efx_ef10_tso_versions(struct efx_nic *efx)
static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
- EFX_BUF_SIZE));
bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
- size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
struct efx_channel *channel = tx_queue->channel;
struct efx_nic *efx = tx_queue->efx;
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ struct efx_ef10_nic_data *nic_data;
bool tso_v2 = false;
- size_t inlen;
- dma_addr_t dma_addr;
efx_qword_t *txd;
int rc;
- int i;
- BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
+
+ nic_data = efx->nic_data;
/* Only attempt to enable TX timestamping if we have the license for it,
* otherwise TXQ init will fail
@@ -2448,51 +2271,9 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
channel->channel);
}
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);
-
- dma_addr = tx_queue->txd.buf.dma_addr;
-
- netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n",
- tx_queue->queue, entries, (u64)dma_addr);
-
- for (i = 0; i < entries; ++i) {
- MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr);
- dma_addr += EFX_BUF_SIZE;
- }
-
- inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
-
- do {
- MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS,
- /* This flag was removed from mcdi_pcol.h for
- * the non-_EXT version of INIT_TXQ. However,
- * firmware still honours it.
- */
- INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
- INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
- INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload,
- INIT_TXQ_EXT_IN_FLAG_TIMESTAMP,
- tx_queue->timestamping);
-
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
- NULL, 0, NULL);
- if (rc == -ENOSPC && tso_v2) {
- /* Retry without TSOv2 if we're short on contexts. */
- tso_v2 = false;
- netif_warn(efx, probe, efx->net_dev,
- "TSOv2 context not available to segment in hardware. TCP performance may be reduced.\n");
- } else if (rc) {
- efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ,
- MC_CMD_INIT_TXQ_EXT_IN_LEN,
- NULL, 0, rc);
- goto fail;
- }
- } while (rc);
+ rc = efx_mcdi_tx_init(tx_queue, tso_v2);
+ if (rc)
+ goto fail;
/* A previous user of this TX queue might have set us up the
* bomb by writing a descriptor to the TX push collector but
@@ -2530,35 +2311,6 @@ fail:
tx_queue->queue);
}
-static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN);
- MCDI_DECLARE_BUF_ERR(outbuf);
- struct efx_nic *efx = tx_queue->efx;
- size_t outlen;
- int rc;
-
- MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE,
- tx_queue->queue);
-
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), &outlen);
-
- if (rc && rc != -EALREADY)
- goto fail;
-
- return;
-
-fail:
- efx_mcdi_display_error(efx, MC_CMD_FINI_TXQ, MC_CMD_FINI_TXQ_IN_LEN,
- outbuf, outlen, rc);
-}
-
-static void efx_ef10_tx_remove(struct efx_tx_queue *tx_queue)
-{
- efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf);
-}
-
/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */
static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue)
{
@@ -2637,527 +2389,6 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
}
}
-#define RSS_MODE_HASH_ADDRS (1 << RSS_MODE_HASH_SRC_ADDR_LBN |\
- 1 << RSS_MODE_HASH_DST_ADDR_LBN)
-#define RSS_MODE_HASH_PORTS (1 << RSS_MODE_HASH_SRC_PORT_LBN |\
- 1 << RSS_MODE_HASH_DST_PORT_LBN)
-#define RSS_CONTEXT_FLAGS_DEFAULT (1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\
- 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\
- 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\
- 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\
- (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\
- RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\
- RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\
- (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\
- RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\
- RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN)
-
-static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags)
-{
- /* Firmware had a bug (sfc bug 61952) where it would not actually
- * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS.
- * This meant that it would always contain whatever was previously
- * in the MCDI buffer. Fortunately, all firmware versions with
- * this bug have the same default flags value for a newly-allocated
- * RSS context, and the only time we want to get the flags is just
- * after allocating. Moreover, the response has a 32-bit hole
- * where the context ID would be in the request, so we can use an
- * overlength buffer in the request and pre-fill the flags field
- * with what we believe the default to be. Thus if the firmware
- * has the bug, it will leave our pre-filled value in the flags
- * field of the response, and we will get the right answer.
- *
- * However, this does mean that this function should NOT be used if
- * the RSS context flags might not be their defaults - it is ONLY
- * reliably correct for a newly-allocated RSS context.
- */
- MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
- MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
- size_t outlen;
- int rc;
-
- /* Check we have a hole for the context ID */
- BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST);
- MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context);
- MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS,
- RSS_CONTEXT_FLAGS_DEFAULT);
- rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf,
- sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
- if (rc == 0) {
- if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN)
- rc = -EIO;
- else
- *flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS);
- }
- return rc;
-}
-
-/* Attempt to enable 4-tuple UDP hashing on the specified RSS context.
- * If we fail, we just leave the RSS context at its default hash settings,
- * which is safe but may slightly reduce performance.
- * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
- * just need to set the UDP ports flags (for both IP versions).
- */
-static void efx_ef10_set_rss_flags(struct efx_nic *efx,
- struct efx_rss_context *ctx)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
- u32 flags;
-
- BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
-
- if (efx_ef10_get_rss_flags(efx, ctx->context_id, &flags) != 0)
- return;
- MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID,
- ctx->context_id);
- flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
- flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
- MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
- if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
- NULL, 0, NULL))
- /* Succeeded, so UDP 4-tuple is now enabled */
- ctx->rx_hash_udp_4tuple = true;
-}
-
-static int efx_ef10_alloc_rss_context(struct efx_nic *efx, bool exclusive,
- struct efx_rss_context *ctx,
- unsigned *context_size)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
- MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- size_t outlen;
- int rc;
- u32 alloc_type = exclusive ?
- MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE :
- MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED;
- unsigned rss_spread = exclusive ?
- efx->rss_spread :
- min(rounddown_pow_of_two(efx->rss_spread),
- EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE);
-
- if (!exclusive && rss_spread == 1) {
- ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
- if (context_size)
- *context_size = 1;
- return 0;
- }
-
- if (nic_data->datapath_caps &
- 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_RSS_LIMITED_LBN)
- return -EOPNOTSUPP;
-
- MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID,
- nic_data->vport_id);
- MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type);
- MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, rss_spread);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), &outlen);
- if (rc != 0)
- return rc;
-
- if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)
- return -EIO;
-
- ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
-
- if (context_size)
- *context_size = rss_spread;
-
- if (nic_data->datapath_caps &
- 1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
- efx_ef10_set_rss_flags(efx, ctx);
-
- return 0;
-}
-
-static int efx_ef10_free_rss_context(struct efx_nic *efx, u32 context)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN);
-
- MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID,
- context);
- return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
- NULL, 0, NULL);
-}
-
-static int efx_ef10_populate_rss_table(struct efx_nic *efx, u32 context,
- const u32 *rx_indir_table, const u8 *key)
-{
- MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN);
- MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN);
- int i, rc;
-
- MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID,
- context);
- BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
- MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN);
-
- /* This iterates over the length of efx->rss_context.rx_indir_table, but
- * copies bytes from rx_indir_table. That's because the latter is a
- * pointer rather than an array, but should have the same length.
- * The efx->rss_context.rx_hash_key loop below is similar.
- */
- for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i)
- MCDI_PTR(tablebuf,
- RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] =
- (u8) rx_indir_table[i];
-
- rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf,
- sizeof(tablebuf), NULL, 0, NULL);
- if (rc != 0)
- return rc;
-
- MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID,
- context);
- BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) !=
- MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
- for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i)
- MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i];
-
- return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf,
- sizeof(keybuf), NULL, 0, NULL);
-}
-
-static void efx_ef10_rx_free_indir_table(struct efx_nic *efx)
-{
- int rc;
-
- if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) {
- rc = efx_ef10_free_rss_context(efx, efx->rss_context.context_id);
- WARN_ON(rc != 0);
- }
- efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
-}
-
-static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx,
- unsigned *context_size)
-{
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- int rc = efx_ef10_alloc_rss_context(efx, false, &efx->rss_context,
- context_size);
-
- if (rc != 0)
- return rc;
-
- nic_data->rx_rss_context_exclusive = false;
- efx_set_default_rx_indir_table(efx, &efx->rss_context);
- return 0;
-}
-
-static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx,
- const u32 *rx_indir_table,
- const u8 *key)
-{
- u32 old_rx_rss_context = efx->rss_context.context_id;
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- int rc;
-
- if (efx->rss_context.context_id == EFX_EF10_RSS_CONTEXT_INVALID ||
- !nic_data->rx_rss_context_exclusive) {
- rc = efx_ef10_alloc_rss_context(efx, true, &efx->rss_context,
- NULL);
- if (rc == -EOPNOTSUPP)
- return rc;
- else if (rc != 0)
- goto fail1;
- }
-
- rc = efx_ef10_populate_rss_table(efx, efx->rss_context.context_id,
- rx_indir_table, key);
- if (rc != 0)
- goto fail2;
-
- if (efx->rss_context.context_id != old_rx_rss_context &&
- old_rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
- WARN_ON(efx_ef10_free_rss_context(efx, old_rx_rss_context) != 0);
- nic_data->rx_rss_context_exclusive = true;
- if (rx_indir_table != efx->rss_context.rx_indir_table)
- memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
- sizeof(efx->rss_context.rx_indir_table));
- if (key != efx->rss_context.rx_hash_key)
- memcpy(efx->rss_context.rx_hash_key, key,
- efx->type->rx_hash_key_size);
-
- return 0;
-
-fail2:
- if (old_rx_rss_context != efx->rss_context.context_id) {
- WARN_ON(efx_ef10_free_rss_context(efx, efx->rss_context.context_id) != 0);
- efx->rss_context.context_id = old_rx_rss_context;
- }
-fail1:
- netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
- return rc;
-}
-
-static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx,
- struct efx_rss_context *ctx,
- const u32 *rx_indir_table,
- const u8 *key)
-{
- int rc;
-
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
- if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
- rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL);
- if (rc)
- return rc;
- }
-
- if (!rx_indir_table) /* Delete this context */
- return efx_ef10_free_rss_context(efx, ctx->context_id);
-
- rc = efx_ef10_populate_rss_table(efx, ctx->context_id,
- rx_indir_table, key);
- if (rc)
- return rc;
-
- memcpy(ctx->rx_indir_table, rx_indir_table,
- sizeof(efx->rss_context.rx_indir_table));
- memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size);
-
- return 0;
-}
-
-static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx,
- struct efx_rss_context *ctx)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN);
- MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN);
- MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN);
- size_t outlen;
- int rc, i;
-
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
- BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
- MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
-
- if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
- return -ENOENT;
-
- MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
- ctx->context_id);
- BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) !=
- MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN);
- rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf),
- tablebuf, sizeof(tablebuf), &outlen);
- if (rc != 0)
- return rc;
-
- if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN))
- return -EIO;
-
- for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
- ctx->rx_indir_table[i] = MCDI_PTR(tablebuf,
- RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i];
-
- MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID,
- ctx->context_id);
- BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) !=
- MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
- rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf),
- keybuf, sizeof(keybuf), &outlen);
- if (rc != 0)
- return rc;
-
- if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN))
- return -EIO;
-
- for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i)
- ctx->rx_hash_key[i] = MCDI_PTR(
- keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i];
-
- return 0;
-}
-
-static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx)
-{
- int rc;
-
- mutex_lock(&efx->rss_lock);
- rc = efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context);
- mutex_unlock(&efx->rss_lock);
- return rc;
-}
-
-static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx)
-{
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- struct efx_rss_context *ctx;
- int rc;
-
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
- if (!nic_data->must_restore_rss_contexts)
- return;
-
- list_for_each_entry(ctx, &efx->rss_context.list, list) {
- /* previous NIC RSS context is gone */
- ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
- /* so try to allocate a new one */
- rc = efx_ef10_rx_push_rss_context_config(efx, ctx,
- ctx->rx_indir_table,
- ctx->rx_hash_key);
- if (rc)
- netif_warn(efx, probe, efx->net_dev,
- "failed to restore RSS context %u, rc=%d"
- "; RSS filters may fail to be applied\n",
- ctx->user_id, rc);
- }
- nic_data->must_restore_rss_contexts = false;
-}
-
-static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
- const u32 *rx_indir_table,
- const u8 *key)
-{
- int rc;
-
- if (efx->rss_spread == 1)
- return 0;
-
- if (!key)
- key = efx->rss_context.rx_hash_key;
-
- rc = efx_ef10_rx_push_exclusive_rss_config(efx, rx_indir_table, key);
-
- if (rc == -ENOBUFS && !user) {
- unsigned context_size;
- bool mismatch = false;
- size_t i;
-
- for (i = 0;
- i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch;
- i++)
- mismatch = rx_indir_table[i] !=
- ethtool_rxfh_indir_default(i, efx->rss_spread);
-
- rc = efx_ef10_rx_push_shared_rss_config(efx, &context_size);
- if (rc == 0) {
- if (context_size != efx->rss_spread)
- netif_warn(efx, probe, efx->net_dev,
- "Could not allocate an exclusive RSS"
- " context; allocated a shared one of"
- " different size."
- " Wanted %u, got %u.\n",
- efx->rss_spread, context_size);
- else if (mismatch)
- netif_warn(efx, probe, efx->net_dev,
- "Could not allocate an exclusive RSS"
- " context; allocated a shared one but"
- " could not apply custom"
- " indirection.\n");
- else
- netif_info(efx, probe, efx->net_dev,
- "Could not allocate an exclusive RSS"
- " context; allocated a shared one.\n");
- }
- }
- return rc;
-}
-
-static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
- const u32 *rx_indir_table
- __attribute__ ((unused)),
- const u8 *key
- __attribute__ ((unused)))
-{
- if (user)
- return -EOPNOTSUPP;
- if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID)
- return 0;
- return efx_ef10_rx_push_shared_rss_config(efx, NULL);
-}
-
-static int efx_ef10_rx_probe(struct efx_rx_queue *rx_queue)
-{
- return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf,
- (rx_queue->ptr_mask + 1) *
- sizeof(efx_qword_t),
- GFP_KERNEL);
-}
-
-static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue)
-{
- MCDI_DECLARE_BUF(inbuf,
- MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
- EFX_BUF_SIZE));
- struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
- size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE;
- struct efx_nic *efx = rx_queue->efx;
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- size_t inlen;
- dma_addr_t dma_addr;
- int rc;
- int i;
- BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0);
-
- rx_queue->scatter_n = 0;
- rx_queue->scatter_len = 0;
-
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1);
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel);
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue));
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE,
- efx_rx_queue_index(rx_queue));
- MCDI_POPULATE_DWORD_2(inbuf, INIT_RXQ_IN_FLAGS,
- INIT_RXQ_IN_FLAG_PREFIX, 1,
- INIT_RXQ_IN_FLAG_TIMESTAMP, 1);
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0);
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id);
-
- dma_addr = rx_queue->rxd.buf.dma_addr;
-
- netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n",
- efx_rx_queue_index(rx_queue), entries, (u64)dma_addr);
-
- for (i = 0; i < entries; ++i) {
- MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr);
- dma_addr += EFX_BUF_SIZE;
- }
-
- inlen = MC_CMD_INIT_RXQ_IN_LEN(entries);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen,
- NULL, 0, NULL);
- if (rc)
- netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n",
- efx_rx_queue_index(rx_queue));
-}
-
-static void efx_ef10_rx_fini(struct efx_rx_queue *rx_queue)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN);
- MCDI_DECLARE_BUF_ERR(outbuf);
- struct efx_nic *efx = rx_queue->efx;
- size_t outlen;
- int rc;
-
- MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE,
- efx_rx_queue_index(rx_queue));
-
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), &outlen);
-
- if (rc && rc != -EALREADY)
- goto fail;
-
- return;
-
-fail:
- efx_mcdi_display_error(efx, MC_CMD_FINI_RXQ, MC_CMD_FINI_RXQ_IN_LEN,
- outbuf, outlen, rc);
-}
-
-static void efx_ef10_rx_remove(struct efx_rx_queue *rx_queue)
-{
- efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf);
-}
-
/* This creates an entry in the RX descriptor queue */
static inline void
efx_ef10_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index)
@@ -3229,106 +2460,20 @@ efx_ef10_rx_defer_refill_complete(struct efx_nic *efx, unsigned long cookie,
/* nothing to do */
}
-static int efx_ef10_ev_probe(struct efx_channel *channel)
-{
- return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf,
- (channel->eventq_mask + 1) *
- sizeof(efx_qword_t),
- GFP_KERNEL);
-}
-
-static void efx_ef10_ev_fini(struct efx_channel *channel)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN);
- MCDI_DECLARE_BUF_ERR(outbuf);
- struct efx_nic *efx = channel->efx;
- size_t outlen;
- int rc;
-
- MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel);
-
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), &outlen);
-
- if (rc && rc != -EALREADY)
- goto fail;
-
- return;
-
-fail:
- efx_mcdi_display_error(efx, MC_CMD_FINI_EVQ, MC_CMD_FINI_EVQ_IN_LEN,
- outbuf, outlen, rc);
-}
-
static int efx_ef10_ev_init(struct efx_channel *channel)
{
- MCDI_DECLARE_BUF(inbuf,
- MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
- EFX_BUF_SIZE));
- MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN);
- size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE;
struct efx_nic *efx = channel->efx;
struct efx_ef10_nic_data *nic_data;
- size_t inlen, outlen;
unsigned int enabled, implemented;
- dma_addr_t dma_addr;
+ bool use_v2, cut_thru;
int rc;
- int i;
nic_data = efx->nic_data;
-
- /* Fill event queue with all ones (i.e. empty events) */
- memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
-
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel);
- /* INIT_EVQ expects index in vector table, not absolute */
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE,
- MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE,
- MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0);
-
- if (nic_data->datapath_caps2 &
- 1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN) {
- /* Use the new generic approach to specifying event queue
- * configuration, requesting lower latency or higher throughput.
- * The options that actually get used appear in the output.
- */
- MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS,
- INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1,
- INIT_EVQ_V2_IN_FLAG_TYPE,
- MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO);
- } else {
- bool cut_thru = !(nic_data->datapath_caps &
- 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
-
- MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
- INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
- INIT_EVQ_IN_FLAG_RX_MERGE, 1,
- INIT_EVQ_IN_FLAG_TX_MERGE, 1,
- INIT_EVQ_IN_FLAG_CUT_THRU, cut_thru);
- }
-
- dma_addr = channel->eventq.buf.dma_addr;
- for (i = 0; i < entries; ++i) {
- MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr);
- dma_addr += EFX_BUF_SIZE;
- }
-
- inlen = MC_CMD_INIT_EVQ_IN_LEN(entries);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen,
- outbuf, sizeof(outbuf), &outlen);
-
- if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN)
- netif_dbg(efx, drv, efx->net_dev,
- "Channel %d using event queue flags %08x\n",
- channel->channel,
- MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS));
+ use_v2 = nic_data->datapath_caps2 &
+ 1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN;
+ cut_thru = !(nic_data->datapath_caps &
+ 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
+ rc = efx_mcdi_ev_init(channel, cut_thru, use_v2);
/* IRQ return is ignored */
if (channel->channel || rc)
@@ -3386,15 +2531,10 @@ static int efx_ef10_ev_init(struct efx_channel *channel)
return 0;
fail:
- efx_ef10_ev_fini(channel);
+ efx_mcdi_ev_fini(channel);
return rc;
}
-static void efx_ef10_ev_remove(struct efx_channel *channel)
-{
- efx_nic_free_buffer(channel->efx, &channel->eventq.buf);
-}
-
static void efx_ef10_handle_rx_wrong_queue(struct efx_rx_queue *rx_queue,
unsigned int rx_queue_label)
{
@@ -3976,9 +3116,9 @@ static int efx_ef10_fini_dmaq(struct efx_nic *efx)
if (efx->state != STATE_RECOVERY) {
efx_for_each_channel(channel, efx) {
efx_for_each_channel_rx_queue(rx_queue, channel)
- efx_ef10_rx_fini(rx_queue);
+ efx_mcdi_rx_fini(rx_queue);
efx_for_each_channel_tx_queue(tx_queue, channel)
- efx_ef10_tx_fini(tx_queue);
+ efx_mcdi_tx_fini(tx_queue);
}
wait_event_timeout(efx->flush_wq,
@@ -4000,1538 +3140,6 @@ static void efx_ef10_prepare_flr(struct efx_nic *efx)
atomic_set(&efx->active_queues, 0);
}
-/* Decide whether a filter should be exclusive or else should allow
- * delivery to additional recipients. Currently we decide that
- * filters for specific local unicast MAC and IP addresses are
- * exclusive.
- */
-static bool efx_ef10_filter_is_exclusive(const struct efx_filter_spec *spec)
-{
- if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC &&
- !is_multicast_ether_addr(spec->loc_mac))
- return true;
-
- if ((spec->match_flags &
- (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
- (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
- if (spec->ether_type == htons(ETH_P_IP) &&
- !ipv4_is_multicast(spec->loc_host[0]))
- return true;
- if (spec->ether_type == htons(ETH_P_IPV6) &&
- ((const u8 *)spec->loc_host)[0] != 0xff)
- return true;
- }
-
- return false;
-}
-
-static struct efx_filter_spec *
-efx_ef10_filter_entry_spec(const struct efx_ef10_filter_table *table,
- unsigned int filter_idx)
-{
- return (struct efx_filter_spec *)(table->entry[filter_idx].spec &
- ~EFX_EF10_FILTER_FLAGS);
-}
-
-static unsigned int
-efx_ef10_filter_entry_flags(const struct efx_ef10_filter_table *table,
- unsigned int filter_idx)
-{
- return table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAGS;
-}
-
-static void
-efx_ef10_filter_set_entry(struct efx_ef10_filter_table *table,
- unsigned int filter_idx,
- const struct efx_filter_spec *spec,
- unsigned int flags)
-{
- table->entry[filter_idx].spec = (unsigned long)spec | flags;
-}
-
-static void
-efx_ef10_filter_push_prep_set_match_fields(struct efx_nic *efx,
- const struct efx_filter_spec *spec,
- efx_dword_t *inbuf)
-{
- enum efx_encap_type encap_type = efx_filter_get_encap_type(spec);
- u32 match_fields = 0, uc_match, mc_match;
-
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
- efx_ef10_filter_is_exclusive(spec) ?
- MC_CMD_FILTER_OP_IN_OP_INSERT :
- MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE);
-
- /* Convert match flags and values. Unlike almost
- * everything else in MCDI, these fields are in
- * network byte order.
- */
-#define COPY_VALUE(value, mcdi_field) \
- do { \
- match_fields |= \
- 1 << MC_CMD_FILTER_OP_IN_MATCH_ ## \
- mcdi_field ## _LBN; \
- BUILD_BUG_ON( \
- MC_CMD_FILTER_OP_IN_ ## mcdi_field ## _LEN < \
- sizeof(value)); \
- memcpy(MCDI_PTR(inbuf, FILTER_OP_IN_ ## mcdi_field), \
- &value, sizeof(value)); \
- } while (0)
-#define COPY_FIELD(gen_flag, gen_field, mcdi_field) \
- if (spec->match_flags & EFX_FILTER_MATCH_ ## gen_flag) { \
- COPY_VALUE(spec->gen_field, mcdi_field); \
- }
- /* Handle encap filters first. They will always be mismatch
- * (unknown UC or MC) filters
- */
- if (encap_type) {
- /* ether_type and outer_ip_proto need to be variables
- * because COPY_VALUE wants to memcpy them
- */
- __be16 ether_type =
- htons(encap_type & EFX_ENCAP_FLAG_IPV6 ?
- ETH_P_IPV6 : ETH_P_IP);
- u8 vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE;
- u8 outer_ip_proto;
-
- switch (encap_type & EFX_ENCAP_TYPES_MASK) {
- case EFX_ENCAP_TYPE_VXLAN:
- vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN;
- /* fallthrough */
- case EFX_ENCAP_TYPE_GENEVE:
- COPY_VALUE(ether_type, ETHER_TYPE);
- outer_ip_proto = IPPROTO_UDP;
- COPY_VALUE(outer_ip_proto, IP_PROTO);
- /* We always need to set the type field, even
- * though we're not matching on the TNI.
- */
- MCDI_POPULATE_DWORD_1(inbuf,
- FILTER_OP_EXT_IN_VNI_OR_VSID,
- FILTER_OP_EXT_IN_VNI_TYPE,
- vni_type);
- break;
- case EFX_ENCAP_TYPE_NVGRE:
- COPY_VALUE(ether_type, ETHER_TYPE);
- outer_ip_proto = IPPROTO_GRE;
- COPY_VALUE(outer_ip_proto, IP_PROTO);
- break;
- default:
- WARN_ON(1);
- }
-
- uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN;
- mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN;
- } else {
- uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
- mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN;
- }
-
- if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC_IG)
- match_fields |=
- is_multicast_ether_addr(spec->loc_mac) ?
- 1 << mc_match :
- 1 << uc_match;
- COPY_FIELD(REM_HOST, rem_host, SRC_IP);
- COPY_FIELD(LOC_HOST, loc_host, DST_IP);
- COPY_FIELD(REM_MAC, rem_mac, SRC_MAC);
- COPY_FIELD(REM_PORT, rem_port, SRC_PORT);
- COPY_FIELD(LOC_MAC, loc_mac, DST_MAC);
- COPY_FIELD(LOC_PORT, loc_port, DST_PORT);
- COPY_FIELD(ETHER_TYPE, ether_type, ETHER_TYPE);
- COPY_FIELD(INNER_VID, inner_vid, INNER_VLAN);
- COPY_FIELD(OUTER_VID, outer_vid, OUTER_VLAN);
- COPY_FIELD(IP_PROTO, ip_proto, IP_PROTO);
-#undef COPY_FIELD
-#undef COPY_VALUE
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_MATCH_FIELDS,
- match_fields);
-}
-
-static void efx_ef10_filter_push_prep(struct efx_nic *efx,
- const struct efx_filter_spec *spec,
- efx_dword_t *inbuf, u64 handle,
- struct efx_rss_context *ctx,
- bool replacing)
-{
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- u32 flags = spec->flags;
-
- memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN);
-
- /* If RSS filter, caller better have given us an RSS context */
- if (flags & EFX_FILTER_FLAG_RX_RSS) {
- /* We don't have the ability to return an error, so we'll just
- * log a warning and disable RSS for the filter.
- */
- if (WARN_ON_ONCE(!ctx))
- flags &= ~EFX_FILTER_FLAG_RX_RSS;
- else if (WARN_ON_ONCE(ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID))
- flags &= ~EFX_FILTER_FLAG_RX_RSS;
- }
-
- if (replacing) {
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
- MC_CMD_FILTER_OP_IN_OP_REPLACE);
- MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, handle);
- } else {
- efx_ef10_filter_push_prep_set_match_fields(efx, spec, inbuf);
- }
-
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, nic_data->vport_id);
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST,
- spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
- MC_CMD_FILTER_OP_IN_RX_DEST_DROP :
- MC_CMD_FILTER_OP_IN_RX_DEST_HOST);
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DOMAIN, 0);
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST,
- MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT);
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE,
- spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
- 0 : spec->dmaq_id);
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_MODE,
- (flags & EFX_FILTER_FLAG_RX_RSS) ?
- MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
- MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
- if (flags & EFX_FILTER_FLAG_RX_RSS)
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id);
-}
-
-static int efx_ef10_filter_push(struct efx_nic *efx,
- const struct efx_filter_spec *spec, u64 *handle,
- struct efx_rss_context *ctx, bool replacing)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
- MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
- size_t outlen;
- int rc;
-
- efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing);
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), &outlen);
- if (rc && spec->priority != EFX_FILTER_PRI_HINT)
- efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, sizeof(inbuf),
- outbuf, outlen, rc);
- if (rc == 0)
- *handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
- if (rc == -ENOSPC)
- rc = -EBUSY; /* to match efx_farch_filter_insert() */
- return rc;
-}
-
-static u32 efx_ef10_filter_mcdi_flags_from_spec(const struct efx_filter_spec *spec)
-{
- enum efx_encap_type encap_type = efx_filter_get_encap_type(spec);
- unsigned int match_flags = spec->match_flags;
- unsigned int uc_match, mc_match;
- u32 mcdi_flags = 0;
-
-#define MAP_FILTER_TO_MCDI_FLAG(gen_flag, mcdi_field, encap) { \
- unsigned int old_match_flags = match_flags; \
- match_flags &= ~EFX_FILTER_MATCH_ ## gen_flag; \
- if (match_flags != old_match_flags) \
- mcdi_flags |= \
- (1 << ((encap) ? \
- MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_ ## \
- mcdi_field ## _LBN : \
- MC_CMD_FILTER_OP_EXT_IN_MATCH_ ##\
- mcdi_field ## _LBN)); \
- }
- /* inner or outer based on encap type */
- MAP_FILTER_TO_MCDI_FLAG(REM_HOST, SRC_IP, encap_type);
- MAP_FILTER_TO_MCDI_FLAG(LOC_HOST, DST_IP, encap_type);
- MAP_FILTER_TO_MCDI_FLAG(REM_MAC, SRC_MAC, encap_type);
- MAP_FILTER_TO_MCDI_FLAG(REM_PORT, SRC_PORT, encap_type);
- MAP_FILTER_TO_MCDI_FLAG(LOC_MAC, DST_MAC, encap_type);
- MAP_FILTER_TO_MCDI_FLAG(LOC_PORT, DST_PORT, encap_type);
- MAP_FILTER_TO_MCDI_FLAG(ETHER_TYPE, ETHER_TYPE, encap_type);
- MAP_FILTER_TO_MCDI_FLAG(IP_PROTO, IP_PROTO, encap_type);
- /* always outer */
- MAP_FILTER_TO_MCDI_FLAG(INNER_VID, INNER_VLAN, false);
- MAP_FILTER_TO_MCDI_FLAG(OUTER_VID, OUTER_VLAN, false);
-#undef MAP_FILTER_TO_MCDI_FLAG
-
- /* special handling for encap type, and mismatch */
- if (encap_type) {
- match_flags &= ~EFX_FILTER_MATCH_ENCAP_TYPE;
- mcdi_flags |=
- (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN);
- mcdi_flags |= (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN);
-
- uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN;
- mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN;
- } else {
- uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
- mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN;
- }
-
- if (match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) {
- match_flags &= ~EFX_FILTER_MATCH_LOC_MAC_IG;
- mcdi_flags |=
- is_multicast_ether_addr(spec->loc_mac) ?
- 1 << mc_match :
- 1 << uc_match;
- }
-
- /* Did we map them all? */
- WARN_ON_ONCE(match_flags);
-
- return mcdi_flags;
-}
-
-static int efx_ef10_filter_pri(struct efx_ef10_filter_table *table,
- const struct efx_filter_spec *spec)
-{
- u32 mcdi_flags = efx_ef10_filter_mcdi_flags_from_spec(spec);
- unsigned int match_pri;
-
- for (match_pri = 0;
- match_pri < table->rx_match_count;
- match_pri++)
- if (table->rx_match_mcdi_flags[match_pri] == mcdi_flags)
- return match_pri;
-
- return -EPROTONOSUPPORT;
-}
-
-static s32 efx_ef10_filter_insert_locked(struct efx_nic *efx,
- struct efx_filter_spec *spec,
- bool replace_equal)
-{
- DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- struct efx_ef10_filter_table *table;
- struct efx_filter_spec *saved_spec;
- struct efx_rss_context *ctx = NULL;
- unsigned int match_pri, hash;
- unsigned int priv_flags;
- bool rss_locked = false;
- bool replacing = false;
- unsigned int depth, i;
- int ins_index = -1;
- DEFINE_WAIT(wait);
- bool is_mc_recip;
- s32 rc;
-
- WARN_ON(!rwsem_is_locked(&efx->filter_sem));
- table = efx->filter_state;
- down_write(&table->lock);
-
- /* For now, only support RX filters */
- if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) !=
- EFX_FILTER_FLAG_RX) {
- rc = -EINVAL;
- goto out_unlock;
- }
-
- rc = efx_ef10_filter_pri(table, spec);
- if (rc < 0)
- goto out_unlock;
- match_pri = rc;
-
- hash = efx_filter_spec_hash(spec);
- is_mc_recip = efx_filter_is_mc_recipient(spec);
- if (is_mc_recip)
- bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
-
- if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
- mutex_lock(&efx->rss_lock);
- rss_locked = true;
- if (spec->rss_context)
- ctx = efx_find_rss_context_entry(efx, spec->rss_context);
- else
- ctx = &efx->rss_context;
- if (!ctx) {
- rc = -ENOENT;
- goto out_unlock;
- }
- if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
- rc = -EOPNOTSUPP;
- goto out_unlock;
- }
- }
-
- /* Find any existing filters with the same match tuple or
- * else a free slot to insert at.
- */
- for (depth = 1; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
- i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
- saved_spec = efx_ef10_filter_entry_spec(table, i);
-
- if (!saved_spec) {
- if (ins_index < 0)
- ins_index = i;
- } else if (efx_filter_spec_equal(spec, saved_spec)) {
- if (spec->priority < saved_spec->priority &&
- spec->priority != EFX_FILTER_PRI_AUTO) {
- rc = -EPERM;
- goto out_unlock;
- }
- if (!is_mc_recip) {
- /* This is the only one */
- if (spec->priority ==
- saved_spec->priority &&
- !replace_equal) {
- rc = -EEXIST;
- goto out_unlock;
- }
- ins_index = i;
- break;
- } else if (spec->priority >
- saved_spec->priority ||
- (spec->priority ==
- saved_spec->priority &&
- replace_equal)) {
- if (ins_index < 0)
- ins_index = i;
- else
- __set_bit(depth, mc_rem_map);
- }
- }
- }
-
- /* Once we reach the maximum search depth, use the first suitable
- * slot, or return -EBUSY if there was none
- */
- if (ins_index < 0) {
- rc = -EBUSY;
- goto out_unlock;
- }
-
- /* Create a software table entry if necessary. */
- saved_spec = efx_ef10_filter_entry_spec(table, ins_index);
- if (saved_spec) {
- if (spec->priority == EFX_FILTER_PRI_AUTO &&
- saved_spec->priority >= EFX_FILTER_PRI_AUTO) {
- /* Just make sure it won't be removed */
- if (saved_spec->priority > EFX_FILTER_PRI_AUTO)
- saved_spec->flags |= EFX_FILTER_FLAG_RX_OVER_AUTO;
- table->entry[ins_index].spec &=
- ~EFX_EF10_FILTER_FLAG_AUTO_OLD;
- rc = ins_index;
- goto out_unlock;
- }
- replacing = true;
- priv_flags = efx_ef10_filter_entry_flags(table, ins_index);
- } else {
- saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC);
- if (!saved_spec) {
- rc = -ENOMEM;
- goto out_unlock;
- }
- *saved_spec = *spec;
- priv_flags = 0;
- }
- efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags);
-
- /* Actually insert the filter on the HW */
- rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle,
- ctx, replacing);
-
- if (rc == -EINVAL && nic_data->must_realloc_vis)
- /* The MC rebooted under us, causing it to reject our filter
- * insertion as pointing to an invalid VI (spec->dmaq_id).
- */
- rc = -EAGAIN;
-
- /* Finalise the software table entry */
- if (rc == 0) {
- if (replacing) {
- /* Update the fields that may differ */
- if (saved_spec->priority == EFX_FILTER_PRI_AUTO)
- saved_spec->flags |=
- EFX_FILTER_FLAG_RX_OVER_AUTO;
- saved_spec->priority = spec->priority;
- saved_spec->flags &= EFX_FILTER_FLAG_RX_OVER_AUTO;
- saved_spec->flags |= spec->flags;
- saved_spec->rss_context = spec->rss_context;
- saved_spec->dmaq_id = spec->dmaq_id;
- }
- } else if (!replacing) {
- kfree(saved_spec);
- saved_spec = NULL;
- } else {
- /* We failed to replace, so the old filter is still present.
- * Roll back the software table to reflect this. In fact the
- * efx_ef10_filter_set_entry() call below will do the right
- * thing, so nothing extra is needed here.
- */
- }
- efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags);
-
- /* Remove and finalise entries for lower-priority multicast
- * recipients
- */
- if (is_mc_recip) {
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
- unsigned int depth, i;
-
- memset(inbuf, 0, sizeof(inbuf));
-
- for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
- if (!test_bit(depth, mc_rem_map))
- continue;
-
- i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1);
- saved_spec = efx_ef10_filter_entry_spec(table, i);
- priv_flags = efx_ef10_filter_entry_flags(table, i);
-
- if (rc == 0) {
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
- MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
- MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
- table->entry[i].handle);
- rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP,
- inbuf, sizeof(inbuf),
- NULL, 0, NULL);
- }
-
- if (rc == 0) {
- kfree(saved_spec);
- saved_spec = NULL;
- priv_flags = 0;
- }
- efx_ef10_filter_set_entry(table, i, saved_spec,
- priv_flags);
- }
- }
-
- /* If successful, return the inserted filter ID */
- if (rc == 0)
- rc = efx_ef10_make_filter_id(match_pri, ins_index);
-
-out_unlock:
- if (rss_locked)
- mutex_unlock(&efx->rss_lock);
- up_write(&table->lock);
- return rc;
-}
-
-static s32 efx_ef10_filter_insert(struct efx_nic *efx,
- struct efx_filter_spec *spec,
- bool replace_equal)
-{
- s32 ret;
-
- down_read(&efx->filter_sem);
- ret = efx_ef10_filter_insert_locked(efx, spec, replace_equal);
- up_read(&efx->filter_sem);
-
- return ret;
-}
-
-static void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx)
-{
- /* no need to do anything here on EF10 */
-}
-
-/* Remove a filter.
- * If !by_index, remove by ID
- * If by_index, remove by index
- * Filter ID may come from userland and must be range-checked.
- * Caller must hold efx->filter_sem for read, and efx->filter_state->lock
- * for write.
- */
-static int efx_ef10_filter_remove_internal(struct efx_nic *efx,
- unsigned int priority_mask,
- u32 filter_id, bool by_index)
-{
- unsigned int filter_idx = efx_ef10_filter_get_unsafe_id(filter_id);
- struct efx_ef10_filter_table *table = efx->filter_state;
- MCDI_DECLARE_BUF(inbuf,
- MC_CMD_FILTER_OP_IN_HANDLE_OFST +
- MC_CMD_FILTER_OP_IN_HANDLE_LEN);
- struct efx_filter_spec *spec;
- DEFINE_WAIT(wait);
- int rc;
-
- spec = efx_ef10_filter_entry_spec(table, filter_idx);
- if (!spec ||
- (!by_index &&
- efx_ef10_filter_pri(table, spec) !=
- efx_ef10_filter_get_unsafe_pri(filter_id)))
- return -ENOENT;
-
- if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO &&
- priority_mask == (1U << EFX_FILTER_PRI_AUTO)) {
- /* Just remove flags */
- spec->flags &= ~EFX_FILTER_FLAG_RX_OVER_AUTO;
- table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_AUTO_OLD;
- return 0;
- }
-
- if (!(priority_mask & (1U << spec->priority)))
- return -ENOENT;
-
- if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) {
- /* Reset to an automatic filter */
-
- struct efx_filter_spec new_spec = *spec;
-
- new_spec.priority = EFX_FILTER_PRI_AUTO;
- new_spec.flags = (EFX_FILTER_FLAG_RX |
- (efx_rss_active(&efx->rss_context) ?
- EFX_FILTER_FLAG_RX_RSS : 0));
- new_spec.dmaq_id = 0;
- new_spec.rss_context = 0;
- rc = efx_ef10_filter_push(efx, &new_spec,
- &table->entry[filter_idx].handle,
- &efx->rss_context,
- true);
-
- if (rc == 0)
- *spec = new_spec;
- } else {
- /* Really remove the filter */
-
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
- efx_ef10_filter_is_exclusive(spec) ?
- MC_CMD_FILTER_OP_IN_OP_REMOVE :
- MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
- MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
- table->entry[filter_idx].handle);
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP,
- inbuf, sizeof(inbuf), NULL, 0, NULL);
-
- if ((rc == 0) || (rc == -ENOENT)) {
- /* Filter removed OK or didn't actually exist */
- kfree(spec);
- efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
- } else {
- efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
- MC_CMD_FILTER_OP_EXT_IN_LEN,
- NULL, 0, rc);
- }
- }
-
- return rc;
-}
-
-static int efx_ef10_filter_remove_safe(struct efx_nic *efx,
- enum efx_filter_priority priority,
- u32 filter_id)
-{
- struct efx_ef10_filter_table *table;
- int rc;
-
- down_read(&efx->filter_sem);
- table = efx->filter_state;
- down_write(&table->lock);
- rc = efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id,
- false);
- up_write(&table->lock);
- up_read(&efx->filter_sem);
- return rc;
-}
-
-/* Caller must hold efx->filter_sem for read */
-static void efx_ef10_filter_remove_unsafe(struct efx_nic *efx,
- enum efx_filter_priority priority,
- u32 filter_id)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
-
- if (filter_id == EFX_EF10_FILTER_ID_INVALID)
- return;
-
- down_write(&table->lock);
- efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id,
- true);
- up_write(&table->lock);
-}
-
-static int efx_ef10_filter_get_safe(struct efx_nic *efx,
- enum efx_filter_priority priority,
- u32 filter_id, struct efx_filter_spec *spec)
-{
- unsigned int filter_idx = efx_ef10_filter_get_unsafe_id(filter_id);
- const struct efx_filter_spec *saved_spec;
- struct efx_ef10_filter_table *table;
- int rc;
-
- down_read(&efx->filter_sem);
- table = efx->filter_state;
- down_read(&table->lock);
- saved_spec = efx_ef10_filter_entry_spec(table, filter_idx);
- if (saved_spec && saved_spec->priority == priority &&
- efx_ef10_filter_pri(table, saved_spec) ==
- efx_ef10_filter_get_unsafe_pri(filter_id)) {
- *spec = *saved_spec;
- rc = 0;
- } else {
- rc = -ENOENT;
- }
- up_read(&table->lock);
- up_read(&efx->filter_sem);
- return rc;
-}
-
-static int efx_ef10_filter_clear_rx(struct efx_nic *efx,
- enum efx_filter_priority priority)
-{
- struct efx_ef10_filter_table *table;
- unsigned int priority_mask;
- unsigned int i;
- int rc;
-
- priority_mask = (((1U << (priority + 1)) - 1) &
- ~(1U << EFX_FILTER_PRI_AUTO));
-
- down_read(&efx->filter_sem);
- table = efx->filter_state;
- down_write(&table->lock);
- for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
- rc = efx_ef10_filter_remove_internal(efx, priority_mask,
- i, true);
- if (rc && rc != -ENOENT)
- break;
- rc = 0;
- }
-
- up_write(&table->lock);
- up_read(&efx->filter_sem);
- return rc;
-}
-
-static u32 efx_ef10_filter_count_rx_used(struct efx_nic *efx,
- enum efx_filter_priority priority)
-{
- struct efx_ef10_filter_table *table;
- unsigned int filter_idx;
- s32 count = 0;
-
- down_read(&efx->filter_sem);
- table = efx->filter_state;
- down_read(&table->lock);
- for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
- if (table->entry[filter_idx].spec &&
- efx_ef10_filter_entry_spec(table, filter_idx)->priority ==
- priority)
- ++count;
- }
- up_read(&table->lock);
- up_read(&efx->filter_sem);
- return count;
-}
-
-static u32 efx_ef10_filter_get_rx_id_limit(struct efx_nic *efx)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
-
- return table->rx_match_count * HUNT_FILTER_TBL_ROWS * 2;
-}
-
-static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx,
- enum efx_filter_priority priority,
- u32 *buf, u32 size)
-{
- struct efx_ef10_filter_table *table;
- struct efx_filter_spec *spec;
- unsigned int filter_idx;
- s32 count = 0;
-
- down_read(&efx->filter_sem);
- table = efx->filter_state;
- down_read(&table->lock);
-
- for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
- spec = efx_ef10_filter_entry_spec(table, filter_idx);
- if (spec && spec->priority == priority) {
- if (count == size) {
- count = -EMSGSIZE;
- break;
- }
- buf[count++] =
- efx_ef10_make_filter_id(
- efx_ef10_filter_pri(table, spec),
- filter_idx);
- }
- }
- up_read(&table->lock);
- up_read(&efx->filter_sem);
- return count;
-}
-
-#ifdef CONFIG_RFS_ACCEL
-
-static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
- unsigned int filter_idx)
-{
- struct efx_filter_spec *spec, saved_spec;
- struct efx_ef10_filter_table *table;
- struct efx_arfs_rule *rule = NULL;
- bool ret = true, force = false;
- u16 arfs_id;
-
- down_read(&efx->filter_sem);
- table = efx->filter_state;
- down_write(&table->lock);
- spec = efx_ef10_filter_entry_spec(table, filter_idx);
-
- if (!spec || spec->priority != EFX_FILTER_PRI_HINT)
- goto out_unlock;
-
- spin_lock_bh(&efx->rps_hash_lock);
- if (!efx->rps_hash_table) {
- /* In the absence of the table, we always return 0 to ARFS. */
- arfs_id = 0;
- } else {
- rule = efx_rps_hash_find(efx, spec);
- if (!rule)
- /* ARFS table doesn't know of this filter, so remove it */
- goto expire;
- arfs_id = rule->arfs_id;
- ret = efx_rps_check_rule(rule, filter_idx, &force);
- if (force)
- goto expire;
- if (!ret) {
- spin_unlock_bh(&efx->rps_hash_lock);
- goto out_unlock;
- }
- }
- if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, arfs_id))
- ret = false;
- else if (rule)
- rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
-expire:
- saved_spec = *spec; /* remove operation will kfree spec */
- spin_unlock_bh(&efx->rps_hash_lock);
- /* At this point (since we dropped the lock), another thread might queue
- * up a fresh insertion request (but the actual insertion will be held
- * up by our possession of the filter table lock). In that case, it
- * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that
- * the rule is not removed by efx_rps_hash_del() below.
- */
- if (ret)
- ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority,
- filter_idx, true) == 0;
- /* While we can't safely dereference rule (we dropped the lock), we can
- * still test it for NULL.
- */
- if (ret && rule) {
- /* Expiring, so remove entry from ARFS table */
- spin_lock_bh(&efx->rps_hash_lock);
- efx_rps_hash_del(efx, &saved_spec);
- spin_unlock_bh(&efx->rps_hash_lock);
- }
-out_unlock:
- up_write(&table->lock);
- up_read(&efx->filter_sem);
- return ret;
-}
-
-#endif /* CONFIG_RFS_ACCEL */
-
-static int efx_ef10_filter_match_flags_from_mcdi(bool encap, u32 mcdi_flags)
-{
- int match_flags = 0;
-
-#define MAP_FLAG(gen_flag, mcdi_field) do { \
- u32 old_mcdi_flags = mcdi_flags; \
- mcdi_flags &= ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ ## \
- mcdi_field ## _LBN); \
- if (mcdi_flags != old_mcdi_flags) \
- match_flags |= EFX_FILTER_MATCH_ ## gen_flag; \
- } while (0)
-
- if (encap) {
- /* encap filters must specify encap type */
- match_flags |= EFX_FILTER_MATCH_ENCAP_TYPE;
- /* and imply ethertype and ip proto */
- mcdi_flags &=
- ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN);
- mcdi_flags &=
- ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN);
- /* VLAN tags refer to the outer packet */
- MAP_FLAG(INNER_VID, INNER_VLAN);
- MAP_FLAG(OUTER_VID, OUTER_VLAN);
- /* everything else refers to the inner packet */
- MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_UCAST_DST);
- MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_MCAST_DST);
- MAP_FLAG(REM_HOST, IFRM_SRC_IP);
- MAP_FLAG(LOC_HOST, IFRM_DST_IP);
- MAP_FLAG(REM_MAC, IFRM_SRC_MAC);
- MAP_FLAG(REM_PORT, IFRM_SRC_PORT);
- MAP_FLAG(LOC_MAC, IFRM_DST_MAC);
- MAP_FLAG(LOC_PORT, IFRM_DST_PORT);
- MAP_FLAG(ETHER_TYPE, IFRM_ETHER_TYPE);
- MAP_FLAG(IP_PROTO, IFRM_IP_PROTO);
- } else {
- MAP_FLAG(LOC_MAC_IG, UNKNOWN_UCAST_DST);
- MAP_FLAG(LOC_MAC_IG, UNKNOWN_MCAST_DST);
- MAP_FLAG(REM_HOST, SRC_IP);
- MAP_FLAG(LOC_HOST, DST_IP);
- MAP_FLAG(REM_MAC, SRC_MAC);
- MAP_FLAG(REM_PORT, SRC_PORT);
- MAP_FLAG(LOC_MAC, DST_MAC);
- MAP_FLAG(LOC_PORT, DST_PORT);
- MAP_FLAG(ETHER_TYPE, ETHER_TYPE);
- MAP_FLAG(INNER_VID, INNER_VLAN);
- MAP_FLAG(OUTER_VID, OUTER_VLAN);
- MAP_FLAG(IP_PROTO, IP_PROTO);
- }
-#undef MAP_FLAG
-
- /* Did we map them all? */
- if (mcdi_flags)
- return -EINVAL;
-
- return match_flags;
-}
-
-static void efx_ef10_filter_cleanup_vlans(struct efx_nic *efx)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- struct efx_ef10_filter_vlan *vlan, *next_vlan;
-
- /* See comment in efx_ef10_filter_table_remove() */
- if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
- return;
-
- if (!table)
- return;
-
- list_for_each_entry_safe(vlan, next_vlan, &table->vlan_list, list)
- efx_ef10_filter_del_vlan_internal(efx, vlan);
-}
-
-static bool efx_ef10_filter_match_supported(struct efx_ef10_filter_table *table,
- bool encap,
- enum efx_filter_match_flags match_flags)
-{
- unsigned int match_pri;
- int mf;
-
- for (match_pri = 0;
- match_pri < table->rx_match_count;
- match_pri++) {
- mf = efx_ef10_filter_match_flags_from_mcdi(encap,
- table->rx_match_mcdi_flags[match_pri]);
- if (mf == match_flags)
- return true;
- }
-
- return false;
-}
-
-static int
-efx_ef10_filter_table_probe_matches(struct efx_nic *efx,
- struct efx_ef10_filter_table *table,
- bool encap)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN);
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX);
- unsigned int pd_match_pri, pd_match_count;
- size_t outlen;
- int rc;
-
- /* Find out which RX filter types are supported, and their priorities */
- MCDI_SET_DWORD(inbuf, GET_PARSER_DISP_INFO_IN_OP,
- encap ?
- MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES :
- MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES);
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_PARSER_DISP_INFO,
- inbuf, sizeof(inbuf), outbuf, sizeof(outbuf),
- &outlen);
- if (rc)
- return rc;
-
- pd_match_count = MCDI_VAR_ARRAY_LEN(
- outlen, GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES);
-
- for (pd_match_pri = 0; pd_match_pri < pd_match_count; pd_match_pri++) {
- u32 mcdi_flags =
- MCDI_ARRAY_DWORD(
- outbuf,
- GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES,
- pd_match_pri);
- rc = efx_ef10_filter_match_flags_from_mcdi(encap, mcdi_flags);
- if (rc < 0) {
- netif_dbg(efx, probe, efx->net_dev,
- "%s: fw flags %#x pri %u not supported in driver\n",
- __func__, mcdi_flags, pd_match_pri);
- } else {
- netif_dbg(efx, probe, efx->net_dev,
- "%s: fw flags %#x pri %u supported as driver flags %#x pri %u\n",
- __func__, mcdi_flags, pd_match_pri,
- rc, table->rx_match_count);
- table->rx_match_mcdi_flags[table->rx_match_count] = mcdi_flags;
- table->rx_match_count++;
- }
- }
-
- return 0;
-}
-
-static int efx_ef10_filter_table_probe(struct efx_nic *efx)
-{
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- struct net_device *net_dev = efx->net_dev;
- struct efx_ef10_filter_table *table;
- struct efx_ef10_vlan *vlan;
- int rc;
-
- if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
- return -EINVAL;
-
- if (efx->filter_state) /* already probed */
- return 0;
-
- table = kzalloc(sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
-
- table->rx_match_count = 0;
- rc = efx_ef10_filter_table_probe_matches(efx, table, false);
- if (rc)
- goto fail;
- if (nic_data->datapath_caps &
- (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
- rc = efx_ef10_filter_table_probe_matches(efx, table, true);
- if (rc)
- goto fail;
- if ((efx_supported_features(efx) & NETIF_F_HW_VLAN_CTAG_FILTER) &&
- !(efx_ef10_filter_match_supported(table, false,
- (EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC)) &&
- efx_ef10_filter_match_supported(table, false,
- (EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC_IG)))) {
- netif_info(efx, probe, net_dev,
- "VLAN filters are not supported in this firmware variant\n");
- net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
- efx->fixed_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
- net_dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
- }
-
- table->entry = vzalloc(array_size(HUNT_FILTER_TBL_ROWS,
- sizeof(*table->entry)));
- if (!table->entry) {
- rc = -ENOMEM;
- goto fail;
- }
-
- table->mc_promisc_last = false;
- table->vlan_filter =
- !!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
- INIT_LIST_HEAD(&table->vlan_list);
- init_rwsem(&table->lock);
-
- efx->filter_state = table;
-
- list_for_each_entry(vlan, &nic_data->vlan_list, list) {
- rc = efx_ef10_filter_add_vlan(efx, vlan->vid);
- if (rc)
- goto fail_add_vlan;
- }
-
- return 0;
-
-fail_add_vlan:
- efx_ef10_filter_cleanup_vlans(efx);
- efx->filter_state = NULL;
-fail:
- kfree(table);
- return rc;
-}
-
-/* Caller must hold efx->filter_sem for read if race against
- * efx_ef10_filter_table_remove() is possible
- */
-static void efx_ef10_filter_table_restore(struct efx_nic *efx)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- unsigned int invalid_filters = 0, failed = 0;
- struct efx_ef10_filter_vlan *vlan;
- struct efx_filter_spec *spec;
- struct efx_rss_context *ctx;
- unsigned int filter_idx;
- u32 mcdi_flags;
- int match_pri;
- int rc, i;
-
- WARN_ON(!rwsem_is_locked(&efx->filter_sem));
-
- if (!nic_data->must_restore_filters)
- return;
-
- if (!table)
- return;
-
- down_write(&table->lock);
- mutex_lock(&efx->rss_lock);
-
- for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
- spec = efx_ef10_filter_entry_spec(table, filter_idx);
- if (!spec)
- continue;
-
- mcdi_flags = efx_ef10_filter_mcdi_flags_from_spec(spec);
- match_pri = 0;
- while (match_pri < table->rx_match_count &&
- table->rx_match_mcdi_flags[match_pri] != mcdi_flags)
- ++match_pri;
- if (match_pri >= table->rx_match_count) {
- invalid_filters++;
- goto not_restored;
- }
- if (spec->rss_context)
- ctx = efx_find_rss_context_entry(efx, spec->rss_context);
- else
- ctx = &efx->rss_context;
- if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
- if (!ctx) {
- netif_warn(efx, drv, efx->net_dev,
- "Warning: unable to restore a filter with nonexistent RSS context %u.\n",
- spec->rss_context);
- invalid_filters++;
- goto not_restored;
- }
- if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
- netif_warn(efx, drv, efx->net_dev,
- "Warning: unable to restore a filter with RSS context %u as it was not created.\n",
- spec->rss_context);
- invalid_filters++;
- goto not_restored;
- }
- }
-
- rc = efx_ef10_filter_push(efx, spec,
- &table->entry[filter_idx].handle,
- ctx, false);
- if (rc)
- failed++;
-
- if (rc) {
-not_restored:
- list_for_each_entry(vlan, &table->vlan_list, list)
- for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; ++i)
- if (vlan->default_filters[i] == filter_idx)
- vlan->default_filters[i] =
- EFX_EF10_FILTER_ID_INVALID;
-
- kfree(spec);
- efx_ef10_filter_set_entry(table, filter_idx, NULL, 0);
- }
- }
-
- mutex_unlock(&efx->rss_lock);
- up_write(&table->lock);
-
- /* This can happen validly if the MC's capabilities have changed, so
- * is not an error.
- */
- if (invalid_filters)
- netif_dbg(efx, drv, efx->net_dev,
- "Did not restore %u filters that are now unsupported.\n",
- invalid_filters);
-
- if (failed)
- netif_err(efx, hw, efx->net_dev,
- "unable to restore %u filters\n", failed);
- else
- nic_data->must_restore_filters = false;
-}
-
-static void efx_ef10_filter_table_remove(struct efx_nic *efx)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
- struct efx_filter_spec *spec;
- unsigned int filter_idx;
- int rc;
-
- efx_ef10_filter_cleanup_vlans(efx);
- efx->filter_state = NULL;
- /* If we were called without locking, then it's not safe to free
- * the table as others might be using it. So we just WARN, leak
- * the memory, and potentially get an inconsistent filter table
- * state.
- * This should never actually happen.
- */
- if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
- return;
-
- if (!table)
- return;
-
- for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) {
- spec = efx_ef10_filter_entry_spec(table, filter_idx);
- if (!spec)
- continue;
-
- MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
- efx_ef10_filter_is_exclusive(spec) ?
- MC_CMD_FILTER_OP_IN_OP_REMOVE :
- MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
- MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
- table->entry[filter_idx].handle);
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf,
- sizeof(inbuf), NULL, 0, NULL);
- if (rc)
- netif_info(efx, drv, efx->net_dev,
- "%s: filter %04x remove failed\n",
- __func__, filter_idx);
- kfree(spec);
- }
-
- vfree(table->entry);
- kfree(table);
-}
-
-static void efx_ef10_filter_mark_one_old(struct efx_nic *efx, uint16_t *id)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- unsigned int filter_idx;
-
- efx_rwsem_assert_write_locked(&table->lock);
-
- if (*id != EFX_EF10_FILTER_ID_INVALID) {
- filter_idx = efx_ef10_filter_get_unsafe_id(*id);
- if (!table->entry[filter_idx].spec)
- netif_dbg(efx, drv, efx->net_dev,
- "marked null spec old %04x:%04x\n", *id,
- filter_idx);
- table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_AUTO_OLD;
- *id = EFX_EF10_FILTER_ID_INVALID;
- }
-}
-
-/* Mark old per-VLAN filters that may need to be removed */
-static void _efx_ef10_filter_vlan_mark_old(struct efx_nic *efx,
- struct efx_ef10_filter_vlan *vlan)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- unsigned int i;
-
- for (i = 0; i < table->dev_uc_count; i++)
- efx_ef10_filter_mark_one_old(efx, &vlan->uc[i]);
- for (i = 0; i < table->dev_mc_count; i++)
- efx_ef10_filter_mark_one_old(efx, &vlan->mc[i]);
- for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
- efx_ef10_filter_mark_one_old(efx, &vlan->default_filters[i]);
-}
-
-/* Mark old filters that may need to be removed.
- * Caller must hold efx->filter_sem for read if race against
- * efx_ef10_filter_table_remove() is possible
- */
-static void efx_ef10_filter_mark_old(struct efx_nic *efx)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- struct efx_ef10_filter_vlan *vlan;
-
- down_write(&table->lock);
- list_for_each_entry(vlan, &table->vlan_list, list)
- _efx_ef10_filter_vlan_mark_old(efx, vlan);
- up_write(&table->lock);
-}
-
-static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- struct net_device *net_dev = efx->net_dev;
- struct netdev_hw_addr *uc;
- unsigned int i;
-
- table->uc_promisc = !!(net_dev->flags & IFF_PROMISC);
- ether_addr_copy(table->dev_uc_list[0].addr, net_dev->dev_addr);
- i = 1;
- netdev_for_each_uc_addr(uc, net_dev) {
- if (i >= EFX_EF10_FILTER_DEV_UC_MAX) {
- table->uc_promisc = true;
- break;
- }
- ether_addr_copy(table->dev_uc_list[i].addr, uc->addr);
- i++;
- }
-
- table->dev_uc_count = i;
-}
-
-static void efx_ef10_filter_mc_addr_list(struct efx_nic *efx)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- struct net_device *net_dev = efx->net_dev;
- struct netdev_hw_addr *mc;
- unsigned int i;
-
- table->mc_overflow = false;
- table->mc_promisc = !!(net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI));
-
- i = 0;
- netdev_for_each_mc_addr(mc, net_dev) {
- if (i >= EFX_EF10_FILTER_DEV_MC_MAX) {
- table->mc_promisc = true;
- table->mc_overflow = true;
- break;
- }
- ether_addr_copy(table->dev_mc_list[i].addr, mc->addr);
- i++;
- }
-
- table->dev_mc_count = i;
-}
-
-static int efx_ef10_filter_insert_addr_list(struct efx_nic *efx,
- struct efx_ef10_filter_vlan *vlan,
- bool multicast, bool rollback)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- struct efx_ef10_dev_addr *addr_list;
- enum efx_filter_flags filter_flags;
- struct efx_filter_spec spec;
- u8 baddr[ETH_ALEN];
- unsigned int i, j;
- int addr_count;
- u16 *ids;
- int rc;
-
- if (multicast) {
- addr_list = table->dev_mc_list;
- addr_count = table->dev_mc_count;
- ids = vlan->mc;
- } else {
- addr_list = table->dev_uc_list;
- addr_count = table->dev_uc_count;
- ids = vlan->uc;
- }
-
- filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0;
-
- /* Insert/renew filters */
- for (i = 0; i < addr_count; i++) {
- EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
- efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
- efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
- rc = efx_ef10_filter_insert_locked(efx, &spec, true);
- if (rc < 0) {
- if (rollback) {
- netif_info(efx, drv, efx->net_dev,
- "efx_ef10_filter_insert failed rc=%d\n",
- rc);
- /* Fall back to promiscuous */
- for (j = 0; j < i; j++) {
- efx_ef10_filter_remove_unsafe(
- efx, EFX_FILTER_PRI_AUTO,
- ids[j]);
- ids[j] = EFX_EF10_FILTER_ID_INVALID;
- }
- return rc;
- } else {
- /* keep invalid ID, and carry on */
- }
- } else {
- ids[i] = efx_ef10_filter_get_unsafe_id(rc);
- }
- }
-
- if (multicast && rollback) {
- /* Also need an Ethernet broadcast filter */
- EFX_WARN_ON_PARANOID(vlan->default_filters[EFX_EF10_BCAST] !=
- EFX_EF10_FILTER_ID_INVALID);
- efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
- eth_broadcast_addr(baddr);
- efx_filter_set_eth_local(&spec, vlan->vid, baddr);
- rc = efx_ef10_filter_insert_locked(efx, &spec, true);
- if (rc < 0) {
- netif_warn(efx, drv, efx->net_dev,
- "Broadcast filter insert failed rc=%d\n", rc);
- /* Fall back to promiscuous */
- for (j = 0; j < i; j++) {
- efx_ef10_filter_remove_unsafe(
- efx, EFX_FILTER_PRI_AUTO,
- ids[j]);
- ids[j] = EFX_EF10_FILTER_ID_INVALID;
- }
- return rc;
- } else {
- vlan->default_filters[EFX_EF10_BCAST] =
- efx_ef10_filter_get_unsafe_id(rc);
- }
- }
-
- return 0;
-}
-
-static int efx_ef10_filter_insert_def(struct efx_nic *efx,
- struct efx_ef10_filter_vlan *vlan,
- enum efx_encap_type encap_type,
- bool multicast, bool rollback)
-{
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- enum efx_filter_flags filter_flags;
- struct efx_filter_spec spec;
- u8 baddr[ETH_ALEN];
- int rc;
- u16 *id;
-
- filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0;
-
- efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
-
- if (multicast)
- efx_filter_set_mc_def(&spec);
- else
- efx_filter_set_uc_def(&spec);
-
- if (encap_type) {
- if (nic_data->datapath_caps &
- (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
- efx_filter_set_encap_type(&spec, encap_type);
- else
- /* don't insert encap filters on non-supporting
- * platforms. ID will be left as INVALID.
- */
- return 0;
- }
-
- if (vlan->vid != EFX_FILTER_VID_UNSPEC)
- efx_filter_set_eth_local(&spec, vlan->vid, NULL);
-
- rc = efx_ef10_filter_insert_locked(efx, &spec, true);
- if (rc < 0) {
- const char *um = multicast ? "Multicast" : "Unicast";
- const char *encap_name = "";
- const char *encap_ipv = "";
-
- if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
- EFX_ENCAP_TYPE_VXLAN)
- encap_name = "VXLAN ";
- else if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
- EFX_ENCAP_TYPE_NVGRE)
- encap_name = "NVGRE ";
- else if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
- EFX_ENCAP_TYPE_GENEVE)
- encap_name = "GENEVE ";
- if (encap_type & EFX_ENCAP_FLAG_IPV6)
- encap_ipv = "IPv6 ";
- else if (encap_type)
- encap_ipv = "IPv4 ";
-
- /* unprivileged functions can't insert mismatch filters
- * for encapsulated or unicast traffic, so downgrade
- * those warnings to debug.
- */
- netif_cond_dbg(efx, drv, efx->net_dev,
- rc == -EPERM && (encap_type || !multicast), warn,
- "%s%s%s mismatch filter insert failed rc=%d\n",
- encap_name, encap_ipv, um, rc);
- } else if (multicast) {
- /* mapping from encap types to default filter IDs (multicast) */
- static enum efx_ef10_default_filters map[] = {
- [EFX_ENCAP_TYPE_NONE] = EFX_EF10_MCDEF,
- [EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_MCDEF,
- [EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_MCDEF,
- [EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_MCDEF,
- [EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] =
- EFX_EF10_VXLAN6_MCDEF,
- [EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] =
- EFX_EF10_NVGRE6_MCDEF,
- [EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] =
- EFX_EF10_GENEVE6_MCDEF,
- };
-
- /* quick bounds check (BCAST result impossible) */
- BUILD_BUG_ON(EFX_EF10_BCAST != 0);
- if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) {
- WARN_ON(1);
- return -EINVAL;
- }
- /* then follow map */
- id = &vlan->default_filters[map[encap_type]];
-
- EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
- *id = efx_ef10_filter_get_unsafe_id(rc);
- if (!nic_data->workaround_26807 && !encap_type) {
- /* Also need an Ethernet broadcast filter */
- efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
- filter_flags, 0);
- eth_broadcast_addr(baddr);
- efx_filter_set_eth_local(&spec, vlan->vid, baddr);
- rc = efx_ef10_filter_insert_locked(efx, &spec, true);
- if (rc < 0) {
- netif_warn(efx, drv, efx->net_dev,
- "Broadcast filter insert failed rc=%d\n",
- rc);
- if (rollback) {
- /* Roll back the mc_def filter */
- efx_ef10_filter_remove_unsafe(
- efx, EFX_FILTER_PRI_AUTO,
- *id);
- *id = EFX_EF10_FILTER_ID_INVALID;
- return rc;
- }
- } else {
- EFX_WARN_ON_PARANOID(
- vlan->default_filters[EFX_EF10_BCAST] !=
- EFX_EF10_FILTER_ID_INVALID);
- vlan->default_filters[EFX_EF10_BCAST] =
- efx_ef10_filter_get_unsafe_id(rc);
- }
- }
- rc = 0;
- } else {
- /* mapping from encap types to default filter IDs (unicast) */
- static enum efx_ef10_default_filters map[] = {
- [EFX_ENCAP_TYPE_NONE] = EFX_EF10_UCDEF,
- [EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_UCDEF,
- [EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_UCDEF,
- [EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_UCDEF,
- [EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] =
- EFX_EF10_VXLAN6_UCDEF,
- [EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] =
- EFX_EF10_NVGRE6_UCDEF,
- [EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] =
- EFX_EF10_GENEVE6_UCDEF,
- };
-
- /* quick bounds check (BCAST result impossible) */
- BUILD_BUG_ON(EFX_EF10_BCAST != 0);
- if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) {
- WARN_ON(1);
- return -EINVAL;
- }
- /* then follow map */
- id = &vlan->default_filters[map[encap_type]];
- EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
- *id = rc;
- rc = 0;
- }
- return rc;
-}
-
-/* Remove filters that weren't renewed. */
-static void efx_ef10_filter_remove_old(struct efx_nic *efx)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- int remove_failed = 0;
- int remove_noent = 0;
- int rc;
- int i;
-
- down_write(&table->lock);
- for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) {
- if (READ_ONCE(table->entry[i].spec) &
- EFX_EF10_FILTER_FLAG_AUTO_OLD) {
- rc = efx_ef10_filter_remove_internal(efx,
- 1U << EFX_FILTER_PRI_AUTO, i, true);
- if (rc == -ENOENT)
- remove_noent++;
- else if (rc)
- remove_failed++;
- }
- }
- up_write(&table->lock);
-
- if (remove_failed)
- netif_info(efx, drv, efx->net_dev,
- "%s: failed to remove %d filters\n",
- __func__, remove_failed);
- if (remove_noent)
- netif_info(efx, drv, efx->net_dev,
- "%s: failed to remove %d non-existent filters\n",
- __func__, remove_noent);
-}
-
static int efx_ef10_vport_set_mac_address(struct efx_nic *efx)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
@@ -5545,7 +3153,7 @@ static int efx_ef10_vport_set_mac_address(struct efx_nic *efx)
efx_device_detach_sync(efx);
efx_net_stop(efx->net_dev);
down_write(&efx->filter_sem);
- efx_ef10_filter_table_remove(efx);
+ efx_mcdi_filter_table_remove(efx);
up_write(&efx->filter_sem);
rc = efx_ef10_vadaptor_free(efx, nic_data->vport_id);
@@ -5577,7 +3185,7 @@ restore_vadaptor:
goto reset_nic;
restore_filters:
down_write(&efx->filter_sem);
- rc2 = efx_ef10_filter_table_probe(efx);
+ rc2 = efx_mcdi_filter_table_probe(efx);
up_write(&efx->filter_sem);
if (rc2)
goto reset_nic;
@@ -5598,256 +3206,6 @@ reset_nic:
return rc ? rc : rc2;
}
-/* Caller must hold efx->filter_sem for read if race against
- * efx_ef10_filter_table_remove() is possible
- */
-static void efx_ef10_filter_vlan_sync_rx_mode(struct efx_nic *efx,
- struct efx_ef10_filter_vlan *vlan)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
-
- /* Do not install unspecified VID if VLAN filtering is enabled.
- * Do not install all specified VIDs if VLAN filtering is disabled.
- */
- if ((vlan->vid == EFX_FILTER_VID_UNSPEC) == table->vlan_filter)
- return;
-
- /* Insert/renew unicast filters */
- if (table->uc_promisc) {
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NONE,
- false, false);
- efx_ef10_filter_insert_addr_list(efx, vlan, false, false);
- } else {
- /* If any of the filters failed to insert, fall back to
- * promiscuous mode - add in the uc_def filter. But keep
- * our individual unicast filters.
- */
- if (efx_ef10_filter_insert_addr_list(efx, vlan, false, false))
- efx_ef10_filter_insert_def(efx, vlan,
- EFX_ENCAP_TYPE_NONE,
- false, false);
- }
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN,
- false, false);
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN |
- EFX_ENCAP_FLAG_IPV6,
- false, false);
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE,
- false, false);
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE |
- EFX_ENCAP_FLAG_IPV6,
- false, false);
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE,
- false, false);
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE |
- EFX_ENCAP_FLAG_IPV6,
- false, false);
-
- /* Insert/renew multicast filters */
- /* If changing promiscuous state with cascaded multicast filters, remove
- * old filters first, so that packets are dropped rather than duplicated
- */
- if (nic_data->workaround_26807 &&
- table->mc_promisc_last != table->mc_promisc)
- efx_ef10_filter_remove_old(efx);
- if (table->mc_promisc) {
- if (nic_data->workaround_26807) {
- /* If we failed to insert promiscuous filters, rollback
- * and fall back to individual multicast filters
- */
- if (efx_ef10_filter_insert_def(efx, vlan,
- EFX_ENCAP_TYPE_NONE,
- true, true)) {
- /* Changing promisc state, so remove old filters */
- efx_ef10_filter_remove_old(efx);
- efx_ef10_filter_insert_addr_list(efx, vlan,
- true, false);
- }
- } else {
- /* If we failed to insert promiscuous filters, don't
- * rollback. Regardless, also insert the mc_list,
- * unless it's incomplete due to overflow
- */
- efx_ef10_filter_insert_def(efx, vlan,
- EFX_ENCAP_TYPE_NONE,
- true, false);
- if (!table->mc_overflow)
- efx_ef10_filter_insert_addr_list(efx, vlan,
- true, false);
- }
- } else {
- /* If any filters failed to insert, rollback and fall back to
- * promiscuous mode - mc_def filter and maybe broadcast. If
- * that fails, roll back again and insert as many of our
- * individual multicast filters as we can.
- */
- if (efx_ef10_filter_insert_addr_list(efx, vlan, true, true)) {
- /* Changing promisc state, so remove old filters */
- if (nic_data->workaround_26807)
- efx_ef10_filter_remove_old(efx);
- if (efx_ef10_filter_insert_def(efx, vlan,
- EFX_ENCAP_TYPE_NONE,
- true, true))
- efx_ef10_filter_insert_addr_list(efx, vlan,
- true, false);
- }
- }
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN,
- true, false);
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN |
- EFX_ENCAP_FLAG_IPV6,
- true, false);
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE,
- true, false);
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE |
- EFX_ENCAP_FLAG_IPV6,
- true, false);
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE,
- true, false);
- efx_ef10_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE |
- EFX_ENCAP_FLAG_IPV6,
- true, false);
-}
-
-/* Caller must hold efx->filter_sem for read if race against
- * efx_ef10_filter_table_remove() is possible
- */
-static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- struct net_device *net_dev = efx->net_dev;
- struct efx_ef10_filter_vlan *vlan;
- bool vlan_filter;
-
- if (!efx_dev_registered(efx))
- return;
-
- if (!table)
- return;
-
- efx_ef10_filter_mark_old(efx);
-
- /* Copy/convert the address lists; add the primary station
- * address and broadcast address
- */
- netif_addr_lock_bh(net_dev);
- efx_ef10_filter_uc_addr_list(efx);
- efx_ef10_filter_mc_addr_list(efx);
- netif_addr_unlock_bh(net_dev);
-
- /* If VLAN filtering changes, all old filters are finally removed.
- * Do it in advance to avoid conflicts for unicast untagged and
- * VLAN 0 tagged filters.
- */
- vlan_filter = !!(net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
- if (table->vlan_filter != vlan_filter) {
- table->vlan_filter = vlan_filter;
- efx_ef10_filter_remove_old(efx);
- }
-
- list_for_each_entry(vlan, &table->vlan_list, list)
- efx_ef10_filter_vlan_sync_rx_mode(efx, vlan);
-
- efx_ef10_filter_remove_old(efx);
- table->mc_promisc_last = table->mc_promisc;
-}
-
-static struct efx_ef10_filter_vlan *efx_ef10_filter_find_vlan(struct efx_nic *efx, u16 vid)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- struct efx_ef10_filter_vlan *vlan;
-
- WARN_ON(!rwsem_is_locked(&efx->filter_sem));
-
- list_for_each_entry(vlan, &table->vlan_list, list) {
- if (vlan->vid == vid)
- return vlan;
- }
-
- return NULL;
-}
-
-static int efx_ef10_filter_add_vlan(struct efx_nic *efx, u16 vid)
-{
- struct efx_ef10_filter_table *table = efx->filter_state;
- struct efx_ef10_filter_vlan *vlan;
- unsigned int i;
-
- if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
- return -EINVAL;
-
- vlan = efx_ef10_filter_find_vlan(efx, vid);
- if (WARN_ON(vlan)) {
- netif_err(efx, drv, efx->net_dev,
- "VLAN %u already added\n", vid);
- return -EALREADY;
- }
-
- vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
- if (!vlan)
- return -ENOMEM;
-
- vlan->vid = vid;
-
- for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
- vlan->uc[i] = EFX_EF10_FILTER_ID_INVALID;
- for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
- vlan->mc[i] = EFX_EF10_FILTER_ID_INVALID;
- for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
- vlan->default_filters[i] = EFX_EF10_FILTER_ID_INVALID;
-
- list_add_tail(&vlan->list, &table->vlan_list);
-
- if (efx_dev_registered(efx))
- efx_ef10_filter_vlan_sync_rx_mode(efx, vlan);
-
- return 0;
-}
-
-static void efx_ef10_filter_del_vlan_internal(struct efx_nic *efx,
- struct efx_ef10_filter_vlan *vlan)
-{
- unsigned int i;
-
- /* See comment in efx_ef10_filter_table_remove() */
- if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
- return;
-
- list_del(&vlan->list);
-
- for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
- efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
- vlan->uc[i]);
- for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
- efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
- vlan->mc[i]);
- for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
- if (vlan->default_filters[i] != EFX_EF10_FILTER_ID_INVALID)
- efx_ef10_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
- vlan->default_filters[i]);
-
- kfree(vlan);
-}
-
-static void efx_ef10_filter_del_vlan(struct efx_nic *efx, u16 vid)
-{
- struct efx_ef10_filter_vlan *vlan;
-
- /* See comment in efx_ef10_filter_table_remove() */
- if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
- return;
-
- vlan = efx_ef10_filter_find_vlan(efx, vid);
- if (!vlan) {
- netif_err(efx, drv, efx->net_dev,
- "VLAN %u not found in filter state\n", vid);
- return;
- }
-
- efx_ef10_filter_del_vlan_internal(efx, vlan);
-}
-
static int efx_ef10_set_mac_address(struct efx_nic *efx)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_SET_MAC_IN_LEN);
@@ -5860,7 +3218,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
mutex_lock(&efx->mac_lock);
down_write(&efx->filter_sem);
- efx_ef10_filter_table_remove(efx);
+ efx_mcdi_filter_table_remove(efx);
ether_addr_copy(MCDI_PTR(inbuf, VADAPTOR_SET_MAC_IN_MACADDR),
efx->net_dev->dev_addr);
@@ -5869,7 +3227,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
rc = efx_mcdi_rpc_quiet(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf,
sizeof(inbuf), NULL, 0, NULL);
- efx_ef10_filter_table_probe(efx);
+ efx_mcdi_filter_table_probe(efx);
up_write(&efx->filter_sem);
mutex_unlock(&efx->mac_lock);
@@ -5931,14 +3289,14 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
static int efx_ef10_mac_reconfigure(struct efx_nic *efx)
{
- efx_ef10_filter_sync_rx_mode(efx);
+ efx_mcdi_filter_sync_rx_mode(efx);
return efx_mcdi_set_mac(efx);
}
static int efx_ef10_mac_reconfigure_vf(struct efx_nic *efx)
{
- efx_ef10_filter_sync_rx_mode(efx);
+ efx_mcdi_filter_sync_rx_mode(efx);
return 0;
}
@@ -6650,36 +4008,36 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
.irq_handle_legacy = efx_ef10_legacy_interrupt,
.tx_probe = efx_ef10_tx_probe,
.tx_init = efx_ef10_tx_init,
- .tx_remove = efx_ef10_tx_remove,
+ .tx_remove = efx_mcdi_tx_remove,
.tx_write = efx_ef10_tx_write,
.tx_limit_len = efx_ef10_tx_limit_len,
- .rx_push_rss_config = efx_ef10_vf_rx_push_rss_config,
- .rx_pull_rss_config = efx_ef10_rx_pull_rss_config,
- .rx_probe = efx_ef10_rx_probe,
- .rx_init = efx_ef10_rx_init,
- .rx_remove = efx_ef10_rx_remove,
+ .rx_push_rss_config = efx_mcdi_vf_rx_push_rss_config,
+ .rx_pull_rss_config = efx_mcdi_rx_pull_rss_config,
+ .rx_probe = efx_mcdi_rx_probe,
+ .rx_init = efx_mcdi_rx_init,
+ .rx_remove = efx_mcdi_rx_remove,
.rx_write = efx_ef10_rx_write,
.rx_defer_refill = efx_ef10_rx_defer_refill,
- .ev_probe = efx_ef10_ev_probe,
+ .ev_probe = efx_mcdi_ev_probe,
.ev_init = efx_ef10_ev_init,
- .ev_fini = efx_ef10_ev_fini,
- .ev_remove = efx_ef10_ev_remove,
+ .ev_fini = efx_mcdi_ev_fini,
+ .ev_remove = efx_mcdi_ev_remove,
.ev_process = efx_ef10_ev_process,
.ev_read_ack = efx_ef10_ev_read_ack,
.ev_test_generate = efx_ef10_ev_test_generate,
- .filter_table_probe = efx_ef10_filter_table_probe,
- .filter_table_restore = efx_ef10_filter_table_restore,
- .filter_table_remove = efx_ef10_filter_table_remove,
- .filter_update_rx_scatter = efx_ef10_filter_update_rx_scatter,
- .filter_insert = efx_ef10_filter_insert,
- .filter_remove_safe = efx_ef10_filter_remove_safe,
- .filter_get_safe = efx_ef10_filter_get_safe,
- .filter_clear_rx = efx_ef10_filter_clear_rx,
- .filter_count_rx_used = efx_ef10_filter_count_rx_used,
- .filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
- .filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
+ .filter_table_probe = efx_mcdi_filter_table_probe,
+ .filter_table_restore = efx_mcdi_filter_table_restore,
+ .filter_table_remove = efx_mcdi_filter_table_remove,
+ .filter_update_rx_scatter = efx_mcdi_update_rx_scatter,
+ .filter_insert = efx_mcdi_filter_insert,
+ .filter_remove_safe = efx_mcdi_filter_remove_safe,
+ .filter_get_safe = efx_mcdi_filter_get_safe,
+ .filter_clear_rx = efx_mcdi_filter_clear_rx,
+ .filter_count_rx_used = efx_mcdi_filter_count_rx_used,
+ .filter_get_rx_id_limit = efx_mcdi_filter_get_rx_id_limit,
+ .filter_get_rx_ids = efx_mcdi_filter_get_rx_ids,
#ifdef CONFIG_RFS_ACCEL
- .filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
+ .filter_rfs_expire_one = efx_mcdi_filter_rfs_expire_one,
#endif
#ifdef CONFIG_SFC_MTD
.mtd_probe = efx_port_dummy_op_int,
@@ -6709,7 +4067,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
.timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH,
.offload_features = EF10_OFFLOAD_FEATURES,
.mcdi_max_ver = 2,
- .max_rx_ip_filters = HUNT_FILTER_TBL_ROWS,
+ .max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS,
.hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE |
1 << HWTSTAMP_FILTER_ALL,
.rx_hash_key_size = 40,
@@ -6759,39 +4117,39 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.irq_handle_legacy = efx_ef10_legacy_interrupt,
.tx_probe = efx_ef10_tx_probe,
.tx_init = efx_ef10_tx_init,
- .tx_remove = efx_ef10_tx_remove,
+ .tx_remove = efx_mcdi_tx_remove,
.tx_write = efx_ef10_tx_write,
.tx_limit_len = efx_ef10_tx_limit_len,
- .rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
- .rx_pull_rss_config = efx_ef10_rx_pull_rss_config,
- .rx_push_rss_context_config = efx_ef10_rx_push_rss_context_config,
- .rx_pull_rss_context_config = efx_ef10_rx_pull_rss_context_config,
- .rx_restore_rss_contexts = efx_ef10_rx_restore_rss_contexts,
- .rx_probe = efx_ef10_rx_probe,
- .rx_init = efx_ef10_rx_init,
- .rx_remove = efx_ef10_rx_remove,
+ .rx_push_rss_config = efx_mcdi_pf_rx_push_rss_config,
+ .rx_pull_rss_config = efx_mcdi_rx_pull_rss_config,
+ .rx_push_rss_context_config = efx_mcdi_rx_push_rss_context_config,
+ .rx_pull_rss_context_config = efx_mcdi_rx_pull_rss_context_config,
+ .rx_restore_rss_contexts = efx_mcdi_rx_restore_rss_contexts,
+ .rx_probe = efx_mcdi_rx_probe,
+ .rx_init = efx_mcdi_rx_init,
+ .rx_remove = efx_mcdi_rx_remove,
.rx_write = efx_ef10_rx_write,
.rx_defer_refill = efx_ef10_rx_defer_refill,
- .ev_probe = efx_ef10_ev_probe,
+ .ev_probe = efx_mcdi_ev_probe,
.ev_init = efx_ef10_ev_init,
- .ev_fini = efx_ef10_ev_fini,
- .ev_remove = efx_ef10_ev_remove,
+ .ev_fini = efx_mcdi_ev_fini,
+ .ev_remove = efx_mcdi_ev_remove,
.ev_process = efx_ef10_ev_process,
.ev_read_ack = efx_ef10_ev_read_ack,
.ev_test_generate = efx_ef10_ev_test_generate,
- .filter_table_probe = efx_ef10_filter_table_probe,
- .filter_table_restore = efx_ef10_filter_table_restore,
- .filter_table_remove = efx_ef10_filter_table_remove,
- .filter_update_rx_scatter = efx_ef10_filter_update_rx_scatter,
- .filter_insert = efx_ef10_filter_insert,
- .filter_remove_safe = efx_ef10_filter_remove_safe,
- .filter_get_safe = efx_ef10_filter_get_safe,
- .filter_clear_rx = efx_ef10_filter_clear_rx,
- .filter_count_rx_used = efx_ef10_filter_count_rx_used,
- .filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit,
- .filter_get_rx_ids = efx_ef10_filter_get_rx_ids,
+ .filter_table_probe = efx_mcdi_filter_table_probe,
+ .filter_table_restore = efx_mcdi_filter_table_restore,
+ .filter_table_remove = efx_mcdi_filter_table_remove,
+ .filter_update_rx_scatter = efx_mcdi_update_rx_scatter,
+ .filter_insert = efx_mcdi_filter_insert,
+ .filter_remove_safe = efx_mcdi_filter_remove_safe,
+ .filter_get_safe = efx_mcdi_filter_get_safe,
+ .filter_clear_rx = efx_mcdi_filter_clear_rx,
+ .filter_count_rx_used = efx_mcdi_filter_count_rx_used,
+ .filter_get_rx_id_limit = efx_mcdi_filter_get_rx_id_limit,
+ .filter_get_rx_ids = efx_mcdi_filter_get_rx_ids,
#ifdef CONFIG_RFS_ACCEL
- .filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one,
+ .filter_rfs_expire_one = efx_mcdi_filter_rfs_expire_one,
#endif
#ifdef CONFIG_SFC_MTD
.mtd_probe = efx_ef10_mtd_probe,
@@ -6844,7 +4202,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH,
.offload_features = EF10_OFFLOAD_FEATURES,
.mcdi_max_ver = 2,
- .max_rx_ip_filters = HUNT_FILTER_TBL_ROWS,
+ .max_rx_ip_filters = EFX_MCDI_FILTER_TBL_ROWS,
.hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE |
1 << HWTSTAMP_FILTER_ALL,
.rx_hash_key_size = 40,
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 52bd43f45761..14393767ef9f 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -522,10 +522,9 @@ int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac)
if (!is_zero_ether_addr(mac)) {
rc = efx_ef10_vport_add_mac(efx, vf->vport_id, mac);
- if (rc) {
- eth_zero_addr(vf->mac);
+ if (rc)
goto fail;
- }
+
if (vf->efx)
ether_addr_copy(vf->efx->net_dev->dev_addr, mac);
}
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 6891df471538..4481f21a1f43 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -23,6 +23,10 @@
#include <net/gre.h>
#include <net/udp_tunnel.h>
#include "efx.h"
+#include "efx_common.h"
+#include "efx_channels.h"
+#include "rx_common.h"
+#include "tx_common.h"
#include "nic.h"
#include "io.h"
#include "selftest.h"
@@ -39,56 +43,6 @@
**************************************************************************
*/
-/* Loopback mode names (see LOOPBACK_MODE()) */
-const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
-const char *const efx_loopback_mode_names[] = {
- [LOOPBACK_NONE] = "NONE",
- [LOOPBACK_DATA] = "DATAPATH",
- [LOOPBACK_GMAC] = "GMAC",
- [LOOPBACK_XGMII] = "XGMII",
- [LOOPBACK_XGXS] = "XGXS",
- [LOOPBACK_XAUI] = "XAUI",
- [LOOPBACK_GMII] = "GMII",
- [LOOPBACK_SGMII] = "SGMII",
- [LOOPBACK_XGBR] = "XGBR",
- [LOOPBACK_XFI] = "XFI",
- [LOOPBACK_XAUI_FAR] = "XAUI_FAR",
- [LOOPBACK_GMII_FAR] = "GMII_FAR",
- [LOOPBACK_SGMII_FAR] = "SGMII_FAR",
- [LOOPBACK_XFI_FAR] = "XFI_FAR",
- [LOOPBACK_GPHY] = "GPHY",
- [LOOPBACK_PHYXS] = "PHYXS",
- [LOOPBACK_PCS] = "PCS",
- [LOOPBACK_PMAPMD] = "PMA/PMD",
- [LOOPBACK_XPORT] = "XPORT",
- [LOOPBACK_XGMII_WS] = "XGMII_WS",
- [LOOPBACK_XAUI_WS] = "XAUI_WS",
- [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR",
- [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
- [LOOPBACK_GMII_WS] = "GMII_WS",
- [LOOPBACK_XFI_WS] = "XFI_WS",
- [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR",
- [LOOPBACK_PHYXS_WS] = "PHYXS_WS",
-};
-
-const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
-const char *const efx_reset_type_names[] = {
- [RESET_TYPE_INVISIBLE] = "INVISIBLE",
- [RESET_TYPE_ALL] = "ALL",
- [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL",
- [RESET_TYPE_WORLD] = "WORLD",
- [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
- [RESET_TYPE_DATAPATH] = "DATAPATH",
- [RESET_TYPE_MC_BIST] = "MC_BIST",
- [RESET_TYPE_DISABLE] = "DISABLE",
- [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
- [RESET_TYPE_INT_ERROR] = "INT_ERROR",
- [RESET_TYPE_DMA_ERROR] = "DMA_ERROR",
- [RESET_TYPE_TX_SKIP] = "TX_SKIP",
- [RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
- [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)",
-};
-
/* UDP tunnel type names */
static const char *const efx_udp_tunnel_type_names[] = {
[TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan",
@@ -104,18 +58,6 @@ void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen)
snprintf(buf, buflen, "type %d", type);
}
-/* Reset workqueue. If any NIC has a hardware failure then a reset will be
- * queued onto this work queue. This is not a per-nic work queue, because
- * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
- */
-static struct workqueue_struct *reset_workqueue;
-
-/* How often and how many times to poll for a reset while waiting for a
- * BIST that another function started to complete.
- */
-#define BIST_WAIT_DELAY_MS 100
-#define BIST_WAIT_DELAY_COUNT 100
-
/**************************************************************************
*
* Configurable values
@@ -135,21 +77,6 @@ module_param(efx_separate_tx_channels, bool, 0444);
MODULE_PARM_DESC(efx_separate_tx_channels,
"Use separate channels for TX and RX");
-/* This is the weight assigned to each of the (per-channel) virtual
- * NAPI devices.
- */
-static int napi_weight = 64;
-
-/* This is the time (in jiffies) between invocations of the hardware
- * monitor.
- * On Falcon-based NICs, this will:
- * - Check the on-board hardware monitor;
- * - Poll the link state and reconfigure the hardware as necessary.
- * On Siena-based NICs for power systems with EEH support, this will give EEH a
- * chance to start.
- */
-static unsigned int efx_monitor_interval = 1 * HZ;
-
/* Initial interrupt moderation settings. They can be modified after
* module load with ethtool.
*
@@ -169,38 +96,10 @@ static unsigned int rx_irq_mod_usec = 60;
*/
static unsigned int tx_irq_mod_usec = 150;
-/* This is the first interrupt mode to try out of:
- * 0 => MSI-X
- * 1 => MSI
- * 2 => legacy
- */
-static unsigned int interrupt_mode;
-
-/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
- * i.e. the number of CPUs among which we may distribute simultaneous
- * interrupt handling.
- *
- * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
- * The default (0) means to assign an interrupt to each core.
- */
-static unsigned int rss_cpus;
-module_param(rss_cpus, uint, 0444);
-MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
-
static bool phy_flash_cfg;
module_param(phy_flash_cfg, bool, 0644);
MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
-static unsigned irq_adapt_low_thresh = 8000;
-module_param(irq_adapt_low_thresh, uint, 0644);
-MODULE_PARM_DESC(irq_adapt_low_thresh,
- "Threshold score for reducing IRQ moderation");
-
-static unsigned irq_adapt_high_thresh = 16000;
-module_param(irq_adapt_high_thresh, uint, 0644);
-MODULE_PARM_DESC(irq_adapt_high_thresh,
- "Threshold score for increasing IRQ moderation");
-
static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
@@ -214,18 +113,8 @@ MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
*
*************************************************************************/
-static int efx_soft_enable_interrupts(struct efx_nic *efx);
-static void efx_soft_disable_interrupts(struct efx_nic *efx);
-static void efx_remove_channel(struct efx_channel *channel);
-static void efx_remove_channels(struct efx_nic *efx);
static const struct efx_channel_type efx_default_channel_type;
static void efx_remove_port(struct efx_nic *efx);
-static void efx_init_napi_channel(struct efx_channel *channel);
-static void efx_fini_napi(struct efx_nic *efx);
-static void efx_fini_napi_channel(struct efx_channel *channel);
-static void efx_fini_struct(struct efx_nic *efx);
-static void efx_start_all(struct efx_nic *efx);
-static void efx_stop_all(struct efx_nic *efx);
static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog);
static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp);
static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
@@ -239,776 +128,12 @@ static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
ASSERT_RTNL(); \
} while (0)
-static int efx_check_disabled(struct efx_nic *efx)
-{
- if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
- netif_err(efx, drv, efx->net_dev,
- "device is disabled due to earlier errors\n");
- return -EIO;
- }
- return 0;
-}
-
-/**************************************************************************
- *
- * Event queue processing
- *
- *************************************************************************/
-
-/* Process channel's event queue
- *
- * This function is responsible for processing the event queue of a
- * single channel. The caller must guarantee that this function will
- * never be concurrently called more than once on the same channel,
- * though different channels may be being processed concurrently.
- */
-static int efx_process_channel(struct efx_channel *channel, int budget)
-{
- struct efx_tx_queue *tx_queue;
- struct list_head rx_list;
- int spent;
-
- if (unlikely(!channel->enabled))
- return 0;
-
- /* Prepare the batch receive list */
- EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
- INIT_LIST_HEAD(&rx_list);
- channel->rx_list = &rx_list;
-
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- tx_queue->pkts_compl = 0;
- tx_queue->bytes_compl = 0;
- }
-
- spent = efx_nic_process_eventq(channel, budget);
- if (spent && efx_channel_has_rx_queue(channel)) {
- struct efx_rx_queue *rx_queue =
- efx_channel_get_rx_queue(channel);
-
- efx_rx_flush_packet(channel);
- efx_fast_push_rx_descriptors(rx_queue, true);
- }
-
- /* Update BQL */
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- if (tx_queue->bytes_compl) {
- netdev_tx_completed_queue(tx_queue->core_txq,
- tx_queue->pkts_compl, tx_queue->bytes_compl);
- }
- }
-
- /* Receive any packets we queued up */
- netif_receive_skb_list(channel->rx_list);
- channel->rx_list = NULL;
-
- return spent;
-}
-
-/* NAPI poll handler
- *
- * NAPI guarantees serialisation of polls of the same device, which
- * provides the guarantee required by efx_process_channel().
- */
-static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
-{
- int step = efx->irq_mod_step_us;
-
- if (channel->irq_mod_score < irq_adapt_low_thresh) {
- if (channel->irq_moderation_us > step) {
- channel->irq_moderation_us -= step;
- efx->type->push_irq_moderation(channel);
- }
- } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
- if (channel->irq_moderation_us <
- efx->irq_rx_moderation_us) {
- channel->irq_moderation_us += step;
- efx->type->push_irq_moderation(channel);
- }
- }
-
- channel->irq_count = 0;
- channel->irq_mod_score = 0;
-}
-
-static int efx_poll(struct napi_struct *napi, int budget)
-{
- struct efx_channel *channel =
- container_of(napi, struct efx_channel, napi_str);
- struct efx_nic *efx = channel->efx;
- int spent;
-
- netif_vdbg(efx, intr, efx->net_dev,
- "channel %d NAPI poll executing on CPU %d\n",
- channel->channel, raw_smp_processor_id());
-
- spent = efx_process_channel(channel, budget);
-
- xdp_do_flush_map();
-
- if (spent < budget) {
- if (efx_channel_has_rx_queue(channel) &&
- efx->irq_rx_adaptive &&
- unlikely(++channel->irq_count == 1000)) {
- efx_update_irq_mod(efx, channel);
- }
-
-#ifdef CONFIG_RFS_ACCEL
- /* Perhaps expire some ARFS filters */
- mod_delayed_work(system_wq, &channel->filter_work, 0);
-#endif
-
- /* There is no race here; although napi_disable() will
- * only wait for napi_complete(), this isn't a problem
- * since efx_nic_eventq_read_ack() will have no effect if
- * interrupts have already been disabled.
- */
- if (napi_complete_done(napi, spent))
- efx_nic_eventq_read_ack(channel);
- }
-
- return spent;
-}
-
-/* Create event queue
- * Event queue memory allocations are done only once. If the channel
- * is reset, the memory buffer will be reused; this guards against
- * errors during channel reset and also simplifies interrupt handling.
- */
-static int efx_probe_eventq(struct efx_channel *channel)
-{
- struct efx_nic *efx = channel->efx;
- unsigned long entries;
-
- netif_dbg(efx, probe, efx->net_dev,
- "chan %d create event queue\n", channel->channel);
-
- /* Build an event queue with room for one event per tx and rx buffer,
- * plus some extra for link state events and MCDI completions. */
- entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
- EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
- channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
-
- return efx_nic_probe_eventq(channel);
-}
-
-/* Prepare channel's event queue */
-static int efx_init_eventq(struct efx_channel *channel)
-{
- struct efx_nic *efx = channel->efx;
- int rc;
-
- EFX_WARN_ON_PARANOID(channel->eventq_init);
-
- netif_dbg(efx, drv, efx->net_dev,
- "chan %d init event queue\n", channel->channel);
-
- rc = efx_nic_init_eventq(channel);
- if (rc == 0) {
- efx->type->push_irq_moderation(channel);
- channel->eventq_read_ptr = 0;
- channel->eventq_init = true;
- }
- return rc;
-}
-
-/* Enable event queue processing and NAPI */
-void efx_start_eventq(struct efx_channel *channel)
-{
- netif_dbg(channel->efx, ifup, channel->efx->net_dev,
- "chan %d start event queue\n", channel->channel);
-
- /* Make sure the NAPI handler sees the enabled flag set */
- channel->enabled = true;
- smp_wmb();
-
- napi_enable(&channel->napi_str);
- efx_nic_eventq_read_ack(channel);
-}
-
-/* Disable event queue processing and NAPI */
-void efx_stop_eventq(struct efx_channel *channel)
-{
- if (!channel->enabled)
- return;
-
- napi_disable(&channel->napi_str);
- channel->enabled = false;
-}
-
-static void efx_fini_eventq(struct efx_channel *channel)
-{
- if (!channel->eventq_init)
- return;
-
- netif_dbg(channel->efx, drv, channel->efx->net_dev,
- "chan %d fini event queue\n", channel->channel);
-
- efx_nic_fini_eventq(channel);
- channel->eventq_init = false;
-}
-
-static void efx_remove_eventq(struct efx_channel *channel)
-{
- netif_dbg(channel->efx, drv, channel->efx->net_dev,
- "chan %d remove event queue\n", channel->channel);
-
- efx_nic_remove_eventq(channel);
-}
-
-/**************************************************************************
- *
- * Channel handling
- *
- *************************************************************************/
-
-/* Allocate and initialise a channel structure. */
-static struct efx_channel *
-efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
-{
- struct efx_channel *channel;
- struct efx_rx_queue *rx_queue;
- struct efx_tx_queue *tx_queue;
- int j;
-
- channel = kzalloc(sizeof(*channel), GFP_KERNEL);
- if (!channel)
- return NULL;
-
- channel->efx = efx;
- channel->channel = i;
- channel->type = &efx_default_channel_type;
-
- for (j = 0; j < EFX_TXQ_TYPES; j++) {
- tx_queue = &channel->tx_queue[j];
- tx_queue->efx = efx;
- tx_queue->queue = i * EFX_TXQ_TYPES + j;
- tx_queue->channel = channel;
- }
-
-#ifdef CONFIG_RFS_ACCEL
- INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
-#endif
-
- rx_queue = &channel->rx_queue;
- rx_queue->efx = efx;
- timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
-
- return channel;
-}
-
-/* Allocate and initialise a channel structure, copying parameters
- * (but not resources) from an old channel structure.
- */
-static struct efx_channel *
-efx_copy_channel(const struct efx_channel *old_channel)
-{
- struct efx_channel *channel;
- struct efx_rx_queue *rx_queue;
- struct efx_tx_queue *tx_queue;
- int j;
-
- channel = kmalloc(sizeof(*channel), GFP_KERNEL);
- if (!channel)
- return NULL;
-
- *channel = *old_channel;
-
- channel->napi_dev = NULL;
- INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
- channel->napi_str.napi_id = 0;
- channel->napi_str.state = 0;
- memset(&channel->eventq, 0, sizeof(channel->eventq));
-
- for (j = 0; j < EFX_TXQ_TYPES; j++) {
- tx_queue = &channel->tx_queue[j];
- if (tx_queue->channel)
- tx_queue->channel = channel;
- tx_queue->buffer = NULL;
- memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
- }
-
- rx_queue = &channel->rx_queue;
- rx_queue->buffer = NULL;
- memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
- timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
-#ifdef CONFIG_RFS_ACCEL
- INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
-#endif
-
- return channel;
-}
-
-static int efx_probe_channel(struct efx_channel *channel)
-{
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
- int rc;
-
- netif_dbg(channel->efx, probe, channel->efx->net_dev,
- "creating channel %d\n", channel->channel);
-
- rc = channel->type->pre_probe(channel);
- if (rc)
- goto fail;
-
- rc = efx_probe_eventq(channel);
- if (rc)
- goto fail;
-
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- rc = efx_probe_tx_queue(tx_queue);
- if (rc)
- goto fail;
- }
-
- efx_for_each_channel_rx_queue(rx_queue, channel) {
- rc = efx_probe_rx_queue(rx_queue);
- if (rc)
- goto fail;
- }
-
- channel->rx_list = NULL;
-
- return 0;
-
-fail:
- efx_remove_channel(channel);
- return rc;
-}
-
-static void
-efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
-{
- struct efx_nic *efx = channel->efx;
- const char *type;
- int number;
-
- number = channel->channel;
-
- if (number >= efx->xdp_channel_offset &&
- !WARN_ON_ONCE(!efx->n_xdp_channels)) {
- type = "-xdp";
- number -= efx->xdp_channel_offset;
- } else if (efx->tx_channel_offset == 0) {
- type = "";
- } else if (number < efx->tx_channel_offset) {
- type = "-rx";
- } else {
- type = "-tx";
- number -= efx->tx_channel_offset;
- }
- snprintf(buf, len, "%s%s-%d", efx->name, type, number);
-}
-
-static void efx_set_channel_names(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx)
- channel->type->get_name(channel,
- efx->msi_context[channel->channel].name,
- sizeof(efx->msi_context[0].name));
-}
-
-static int efx_probe_channels(struct efx_nic *efx)
-{
- struct efx_channel *channel;
- int rc;
-
- /* Restart special buffer allocation */
- efx->next_buffer_table = 0;
-
- /* Probe channels in reverse, so that any 'extra' channels
- * use the start of the buffer table. This allows the traffic
- * channels to be resized without moving them or wasting the
- * entries before them.
- */
- efx_for_each_channel_rev(channel, efx) {
- rc = efx_probe_channel(channel);
- if (rc) {
- netif_err(efx, probe, efx->net_dev,
- "failed to create channel %d\n",
- channel->channel);
- goto fail;
- }
- }
- efx_set_channel_names(efx);
-
- return 0;
-
-fail:
- efx_remove_channels(efx);
- return rc;
-}
-
-/* Channels are shutdown and reinitialised whilst the NIC is running
- * to propagate configuration changes (mtu, checksum offload), or
- * to clear hardware error conditions
- */
-static void efx_start_datapath(struct efx_nic *efx)
-{
- netdev_features_t old_features = efx->net_dev->features;
- bool old_rx_scatter = efx->rx_scatter;
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
- struct efx_channel *channel;
- size_t rx_buf_len;
-
- /* Calculate the rx buffer allocation parameters required to
- * support the current MTU, including padding for header
- * alignment and overruns.
- */
- efx->rx_dma_len = (efx->rx_prefix_size +
- EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
- efx->type->rx_buffer_padding);
- rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM +
- efx->rx_ip_align + efx->rx_dma_len);
- if (rx_buf_len <= PAGE_SIZE) {
- efx->rx_scatter = efx->type->always_rx_scatter;
- efx->rx_buffer_order = 0;
- } else if (efx->type->can_rx_scatter) {
- BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
- BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
- 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
- EFX_RX_BUF_ALIGNMENT) >
- PAGE_SIZE);
- efx->rx_scatter = true;
- efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
- efx->rx_buffer_order = 0;
- } else {
- efx->rx_scatter = false;
- efx->rx_buffer_order = get_order(rx_buf_len);
- }
-
- efx_rx_config_page_split(efx);
- if (efx->rx_buffer_order)
- netif_dbg(efx, drv, efx->net_dev,
- "RX buf len=%u; page order=%u batch=%u\n",
- efx->rx_dma_len, efx->rx_buffer_order,
- efx->rx_pages_per_batch);
- else
- netif_dbg(efx, drv, efx->net_dev,
- "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
- efx->rx_dma_len, efx->rx_page_buf_step,
- efx->rx_bufs_per_page, efx->rx_pages_per_batch);
-
- /* Restore previously fixed features in hw_features and remove
- * features which are fixed now
- */
- efx->net_dev->hw_features |= efx->net_dev->features;
- efx->net_dev->hw_features &= ~efx->fixed_features;
- efx->net_dev->features |= efx->fixed_features;
- if (efx->net_dev->features != old_features)
- netdev_features_change(efx->net_dev);
-
- /* RX filters may also have scatter-enabled flags */
- if (efx->rx_scatter != old_rx_scatter)
- efx->type->filter_update_rx_scatter(efx);
-
- /* We must keep at least one descriptor in a TX ring empty.
- * We could avoid this when the queue size does not exactly
- * match the hardware ring size, but it's not that important.
- * Therefore we stop the queue when one more skb might fill
- * the ring completely. We wake it when half way back to
- * empty.
- */
- efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
- efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
-
- /* Initialise the channels */
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- efx_init_tx_queue(tx_queue);
- atomic_inc(&efx->active_queues);
- }
-
- efx_for_each_channel_rx_queue(rx_queue, channel) {
- efx_init_rx_queue(rx_queue);
- atomic_inc(&efx->active_queues);
- efx_stop_eventq(channel);
- efx_fast_push_rx_descriptors(rx_queue, false);
- efx_start_eventq(channel);
- }
-
- WARN_ON(channel->rx_pkt_n_frags);
- }
-
- efx_ptp_start_datapath(efx);
-
- if (netif_device_present(efx->net_dev))
- netif_tx_wake_all_queues(efx->net_dev);
-}
-
-static void efx_stop_datapath(struct efx_nic *efx)
-{
- struct efx_channel *channel;
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
- int rc;
-
- EFX_ASSERT_RESET_SERIALISED(efx);
- BUG_ON(efx->port_enabled);
-
- efx_ptp_stop_datapath(efx);
-
- /* Stop RX refill */
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_rx_queue(rx_queue, channel)
- rx_queue->refill_enabled = false;
- }
-
- efx_for_each_channel(channel, efx) {
- /* RX packet processing is pipelined, so wait for the
- * NAPI handler to complete. At least event queue 0
- * might be kept active by non-data events, so don't
- * use napi_synchronize() but actually disable NAPI
- * temporarily.
- */
- if (efx_channel_has_rx_queue(channel)) {
- efx_stop_eventq(channel);
- efx_start_eventq(channel);
- }
- }
-
- rc = efx->type->fini_dmaq(efx);
- if (rc) {
- netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
- } else {
- netif_dbg(efx, drv, efx->net_dev,
- "successfully flushed all queues\n");
- }
-
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_rx_queue(rx_queue, channel)
- efx_fini_rx_queue(rx_queue);
- efx_for_each_possible_channel_tx_queue(tx_queue, channel)
- efx_fini_tx_queue(tx_queue);
- }
- efx->xdp_rxq_info_failed = false;
-}
-
-static void efx_remove_channel(struct efx_channel *channel)
-{
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
-
- netif_dbg(channel->efx, drv, channel->efx->net_dev,
- "destroy chan %d\n", channel->channel);
-
- efx_for_each_channel_rx_queue(rx_queue, channel)
- efx_remove_rx_queue(rx_queue);
- efx_for_each_possible_channel_tx_queue(tx_queue, channel)
- efx_remove_tx_queue(tx_queue);
- efx_remove_eventq(channel);
- channel->type->post_remove(channel);
-}
-
-static void efx_remove_channels(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx)
- efx_remove_channel(channel);
-
- kfree(efx->xdp_tx_queues);
-}
-
-int
-efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
-{
- struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
- u32 old_rxq_entries, old_txq_entries;
- unsigned i, next_buffer_table = 0;
- int rc, rc2;
-
- rc = efx_check_disabled(efx);
- if (rc)
- return rc;
-
- /* Not all channels should be reallocated. We must avoid
- * reallocating their buffer table entries.
- */
- efx_for_each_channel(channel, efx) {
- struct efx_rx_queue *rx_queue;
- struct efx_tx_queue *tx_queue;
-
- if (channel->type->copy)
- continue;
- next_buffer_table = max(next_buffer_table,
- channel->eventq.index +
- channel->eventq.entries);
- efx_for_each_channel_rx_queue(rx_queue, channel)
- next_buffer_table = max(next_buffer_table,
- rx_queue->rxd.index +
- rx_queue->rxd.entries);
- efx_for_each_channel_tx_queue(tx_queue, channel)
- next_buffer_table = max(next_buffer_table,
- tx_queue->txd.index +
- tx_queue->txd.entries);
- }
-
- efx_device_detach_sync(efx);
- efx_stop_all(efx);
- efx_soft_disable_interrupts(efx);
-
- /* Clone channels (where possible) */
- memset(other_channel, 0, sizeof(other_channel));
- for (i = 0; i < efx->n_channels; i++) {
- channel = efx->channel[i];
- if (channel->type->copy)
- channel = channel->type->copy(channel);
- if (!channel) {
- rc = -ENOMEM;
- goto out;
- }
- other_channel[i] = channel;
- }
-
- /* Swap entry counts and channel pointers */
- old_rxq_entries = efx->rxq_entries;
- old_txq_entries = efx->txq_entries;
- efx->rxq_entries = rxq_entries;
- efx->txq_entries = txq_entries;
- for (i = 0; i < efx->n_channels; i++) {
- channel = efx->channel[i];
- efx->channel[i] = other_channel[i];
- other_channel[i] = channel;
- }
-
- /* Restart buffer table allocation */
- efx->next_buffer_table = next_buffer_table;
-
- for (i = 0; i < efx->n_channels; i++) {
- channel = efx->channel[i];
- if (!channel->type->copy)
- continue;
- rc = efx_probe_channel(channel);
- if (rc)
- goto rollback;
- efx_init_napi_channel(efx->channel[i]);
- }
-
-out:
- /* Destroy unused channel structures */
- for (i = 0; i < efx->n_channels; i++) {
- channel = other_channel[i];
- if (channel && channel->type->copy) {
- efx_fini_napi_channel(channel);
- efx_remove_channel(channel);
- kfree(channel);
- }
- }
-
- rc2 = efx_soft_enable_interrupts(efx);
- if (rc2) {
- rc = rc ? rc : rc2;
- netif_err(efx, drv, efx->net_dev,
- "unable to restart interrupts on channel reallocation\n");
- efx_schedule_reset(efx, RESET_TYPE_DISABLE);
- } else {
- efx_start_all(efx);
- efx_device_attach_if_not_resetting(efx);
- }
- return rc;
-
-rollback:
- /* Swap back */
- efx->rxq_entries = old_rxq_entries;
- efx->txq_entries = old_txq_entries;
- for (i = 0; i < efx->n_channels; i++) {
- channel = efx->channel[i];
- efx->channel[i] = other_channel[i];
- other_channel[i] = channel;
- }
- goto out;
-}
-
-void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
-{
- mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
-}
-
-static bool efx_default_channel_want_txqs(struct efx_channel *channel)
-{
- return channel->channel - channel->efx->tx_channel_offset <
- channel->efx->n_tx_channels;
-}
-
-static const struct efx_channel_type efx_default_channel_type = {
- .pre_probe = efx_channel_dummy_op_int,
- .post_remove = efx_channel_dummy_op_void,
- .get_name = efx_get_channel_name,
- .copy = efx_copy_channel,
- .want_txqs = efx_default_channel_want_txqs,
- .keep_eventq = false,
- .want_pio = true,
-};
-
-int efx_channel_dummy_op_int(struct efx_channel *channel)
-{
- return 0;
-}
-
-void efx_channel_dummy_op_void(struct efx_channel *channel)
-{
-}
-
/**************************************************************************
*
* Port handling
*
**************************************************************************/
-/* This ensures that the kernel is kept informed (via
- * netif_carrier_on/off) of the link status, and also maintains the
- * link status's stop on the port's TX queue.
- */
-void efx_link_status_changed(struct efx_nic *efx)
-{
- struct efx_link_state *link_state = &efx->link_state;
-
- /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
- * that no events are triggered between unregister_netdev() and the
- * driver unloading. A more general condition is that NETDEV_CHANGE
- * can only be generated between NETDEV_UP and NETDEV_DOWN */
- if (!netif_running(efx->net_dev))
- return;
-
- if (link_state->up != netif_carrier_ok(efx->net_dev)) {
- efx->n_link_state_changes++;
-
- if (link_state->up)
- netif_carrier_on(efx->net_dev);
- else
- netif_carrier_off(efx->net_dev);
- }
-
- /* Status message for kernel log */
- if (link_state->up)
- netif_info(efx, link, efx->net_dev,
- "link up at %uMbps %s-duplex (MTU %d)\n",
- link_state->speed, link_state->fd ? "full" : "half",
- efx->net_dev->mtu);
- else
- netif_info(efx, link, efx->net_dev, "link down\n");
-}
-
-void efx_link_set_advertising(struct efx_nic *efx,
- const unsigned long *advertising)
-{
- memcpy(efx->link_advertising, advertising,
- sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
-
- efx->link_advertising[0] |= ADVERTISED_Autoneg;
- if (advertising[0] & ADVERTISED_Pause)
- efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
- else
- efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
- if (advertising[0] & ADVERTISED_Asym_Pause)
- efx->wanted_fc ^= EFX_FC_TX;
-}
-
/* Equivalent to efx_link_set_advertising with all-zeroes, except does not
* force the Autoneg bit on.
*/
@@ -1035,73 +160,6 @@ void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
static void efx_fini_port(struct efx_nic *efx);
-/* We assume that efx->type->reconfigure_mac will always try to sync RX
- * filters and therefore needs to read-lock the filter table against freeing
- */
-void efx_mac_reconfigure(struct efx_nic *efx)
-{
- down_read(&efx->filter_sem);
- efx->type->reconfigure_mac(efx);
- up_read(&efx->filter_sem);
-}
-
-/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
- * the MAC appropriately. All other PHY configuration changes are pushed
- * through phy_op->set_settings(), and pushed asynchronously to the MAC
- * through efx_monitor().
- *
- * Callers must hold the mac_lock
- */
-int __efx_reconfigure_port(struct efx_nic *efx)
-{
- enum efx_phy_mode phy_mode;
- int rc;
-
- WARN_ON(!mutex_is_locked(&efx->mac_lock));
-
- /* Disable PHY transmit in mac level loopbacks */
- phy_mode = efx->phy_mode;
- if (LOOPBACK_INTERNAL(efx))
- efx->phy_mode |= PHY_MODE_TX_DISABLED;
- else
- efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
-
- rc = efx->type->reconfigure_port(efx);
-
- if (rc)
- efx->phy_mode = phy_mode;
-
- return rc;
-}
-
-/* Reinitialise the MAC to pick up new PHY settings, even if the port is
- * disabled. */
-int efx_reconfigure_port(struct efx_nic *efx)
-{
- int rc;
-
- EFX_ASSERT_RESET_SERIALISED(efx);
-
- mutex_lock(&efx->mac_lock);
- rc = __efx_reconfigure_port(efx);
- mutex_unlock(&efx->mac_lock);
-
- return rc;
-}
-
-/* Asynchronous work item for changing MAC promiscuity and multicast
- * hash. Avoid a drain/rx_ingress enable by reconfiguring the current
- * MAC directly. */
-static void efx_mac_work(struct work_struct *data)
-{
- struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
-
- mutex_lock(&efx->mac_lock);
- if (efx->port_enabled)
- efx_mac_reconfigure(efx);
- mutex_unlock(&efx->mac_lock);
-}
-
static int efx_probe_port(struct efx_nic *efx)
{
int rc;
@@ -1155,44 +213,6 @@ fail1:
return rc;
}
-static void efx_start_port(struct efx_nic *efx)
-{
- netif_dbg(efx, ifup, efx->net_dev, "start port\n");
- BUG_ON(efx->port_enabled);
-
- mutex_lock(&efx->mac_lock);
- efx->port_enabled = true;
-
- /* Ensure MAC ingress/egress is enabled */
- efx_mac_reconfigure(efx);
-
- mutex_unlock(&efx->mac_lock);
-}
-
-/* Cancel work for MAC reconfiguration, periodic hardware monitoring
- * and the async self-test, wait for them to finish and prevent them
- * being scheduled again. This doesn't cover online resets, which
- * should only be cancelled when removing the device.
- */
-static void efx_stop_port(struct efx_nic *efx)
-{
- netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
-
- EFX_ASSERT_RESET_SERIALISED(efx);
-
- mutex_lock(&efx->mac_lock);
- efx->port_enabled = false;
- mutex_unlock(&efx->mac_lock);
-
- /* Serialise against efx_set_multicast_list() */
- netif_addr_lock_bh(efx->net_dev);
- netif_addr_unlock_bh(efx->net_dev);
-
- cancel_delayed_work_sync(&efx->monitor_work);
- efx_selftest_async_cancel(efx);
- cancel_work_sync(&efx->mac_work);
-}
-
static void efx_fini_port(struct efx_nic *efx)
{
netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
@@ -1291,582 +311,6 @@ static void efx_dissociate(struct efx_nic *efx)
}
}
-/* This configures the PCI device to enable I/O and DMA. */
-static int efx_init_io(struct efx_nic *efx)
-{
- struct pci_dev *pci_dev = efx->pci_dev;
- dma_addr_t dma_mask = efx->type->max_dma_mask;
- unsigned int mem_map_size = efx->type->mem_map_size(efx);
- int rc, bar;
-
- netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
-
- bar = efx->type->mem_bar(efx);
-
- rc = pci_enable_device(pci_dev);
- if (rc) {
- netif_err(efx, probe, efx->net_dev,
- "failed to enable PCI device\n");
- goto fail1;
- }
-
- pci_set_master(pci_dev);
-
- /* Set the PCI DMA mask. Try all possibilities from our genuine mask
- * down to 32 bits, because some architectures will allow 40 bit
- * masks event though they reject 46 bit masks.
- */
- while (dma_mask > 0x7fffffffUL) {
- rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
- if (rc == 0)
- break;
- dma_mask >>= 1;
- }
- if (rc) {
- netif_err(efx, probe, efx->net_dev,
- "could not find a suitable DMA mask\n");
- goto fail2;
- }
- netif_dbg(efx, probe, efx->net_dev,
- "using DMA mask %llx\n", (unsigned long long) dma_mask);
-
- efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
- rc = pci_request_region(pci_dev, bar, "sfc");
- if (rc) {
- netif_err(efx, probe, efx->net_dev,
- "request for memory BAR failed\n");
- rc = -EIO;
- goto fail3;
- }
- efx->membase = ioremap(efx->membase_phys, mem_map_size);
- if (!efx->membase) {
- netif_err(efx, probe, efx->net_dev,
- "could not map memory BAR at %llx+%x\n",
- (unsigned long long)efx->membase_phys, mem_map_size);
- rc = -ENOMEM;
- goto fail4;
- }
- netif_dbg(efx, probe, efx->net_dev,
- "memory BAR at %llx+%x (virtual %p)\n",
- (unsigned long long)efx->membase_phys, mem_map_size,
- efx->membase);
-
- return 0;
-
- fail4:
- pci_release_region(efx->pci_dev, bar);
- fail3:
- efx->membase_phys = 0;
- fail2:
- pci_disable_device(efx->pci_dev);
- fail1:
- return rc;
-}
-
-static void efx_fini_io(struct efx_nic *efx)
-{
- int bar;
-
- netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
-
- if (efx->membase) {
- iounmap(efx->membase);
- efx->membase = NULL;
- }
-
- if (efx->membase_phys) {
- bar = efx->type->mem_bar(efx);
- pci_release_region(efx->pci_dev, bar);
- efx->membase_phys = 0;
- }
-
- /* Don't disable bus-mastering if VFs are assigned */
- if (!pci_vfs_assigned(efx->pci_dev))
- pci_disable_device(efx->pci_dev);
-}
-
-void efx_set_default_rx_indir_table(struct efx_nic *efx,
- struct efx_rss_context *ctx)
-{
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
- ctx->rx_indir_table[i] =
- ethtool_rxfh_indir_default(i, efx->rss_spread);
-}
-
-static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
-{
- cpumask_var_t thread_mask;
- unsigned int count;
- int cpu;
-
- if (rss_cpus) {
- count = rss_cpus;
- } else {
- if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
- netif_warn(efx, probe, efx->net_dev,
- "RSS disabled due to allocation failure\n");
- return 1;
- }
-
- count = 0;
- for_each_online_cpu(cpu) {
- if (!cpumask_test_cpu(cpu, thread_mask)) {
- ++count;
- cpumask_or(thread_mask, thread_mask,
- topology_sibling_cpumask(cpu));
- }
- }
-
- free_cpumask_var(thread_mask);
- }
-
- if (count > EFX_MAX_RX_QUEUES) {
- netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
- "Reducing number of rx queues from %u to %u.\n",
- count, EFX_MAX_RX_QUEUES);
- count = EFX_MAX_RX_QUEUES;
- }
-
- /* If RSS is requested for the PF *and* VFs then we can't write RSS
- * table entries that are inaccessible to VFs
- */
-#ifdef CONFIG_SFC_SRIOV
- if (efx->type->sriov_wanted) {
- if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
- count > efx_vf_size(efx)) {
- netif_warn(efx, probe, efx->net_dev,
- "Reducing number of RSS channels from %u to %u for "
- "VF support. Increase vf-msix-limit to use more "
- "channels on the PF.\n",
- count, efx_vf_size(efx));
- count = efx_vf_size(efx);
- }
- }
-#endif
-
- return count;
-}
-
-static int efx_allocate_msix_channels(struct efx_nic *efx,
- unsigned int max_channels,
- unsigned int extra_channels,
- unsigned int parallelism)
-{
- unsigned int n_channels = parallelism;
- int vec_count;
- int n_xdp_tx;
- int n_xdp_ev;
-
- if (efx_separate_tx_channels)
- n_channels *= 2;
- n_channels += extra_channels;
-
- /* To allow XDP transmit to happen from arbitrary NAPI contexts
- * we allocate a TX queue per CPU. We share event queues across
- * multiple tx queues, assuming tx and ev queues are both
- * maximum size.
- */
-
- n_xdp_tx = num_possible_cpus();
- n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
-
- vec_count = pci_msix_vec_count(efx->pci_dev);
- if (vec_count < 0)
- return vec_count;
-
- max_channels = min_t(unsigned int, vec_count, max_channels);
-
- /* Check resources.
- * We need a channel per event queue, plus a VI per tx queue.
- * This may be more pessimistic than it needs to be.
- */
- if (n_channels + n_xdp_ev > max_channels) {
- netif_err(efx, drv, efx->net_dev,
- "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
- n_xdp_ev, n_channels, max_channels);
- efx->n_xdp_channels = 0;
- efx->xdp_tx_per_channel = 0;
- efx->xdp_tx_queue_count = 0;
- } else {
- efx->n_xdp_channels = n_xdp_ev;
- efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
- efx->xdp_tx_queue_count = n_xdp_tx;
- n_channels += n_xdp_ev;
- netif_dbg(efx, drv, efx->net_dev,
- "Allocating %d TX and %d event queues for XDP\n",
- n_xdp_tx, n_xdp_ev);
- }
-
- if (vec_count < n_channels) {
- netif_err(efx, drv, efx->net_dev,
- "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
- vec_count, n_channels);
- netif_err(efx, drv, efx->net_dev,
- "WARNING: Performance may be reduced.\n");
- n_channels = vec_count;
- }
-
- n_channels = min(n_channels, max_channels);
-
- efx->n_channels = n_channels;
-
- /* Ignore XDP tx channels when creating rx channels. */
- n_channels -= efx->n_xdp_channels;
-
- if (efx_separate_tx_channels) {
- efx->n_tx_channels =
- min(max(n_channels / 2, 1U),
- efx->max_tx_channels);
- efx->tx_channel_offset =
- n_channels - efx->n_tx_channels;
- efx->n_rx_channels =
- max(n_channels -
- efx->n_tx_channels, 1U);
- } else {
- efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
- efx->tx_channel_offset = 0;
- efx->n_rx_channels = n_channels;
- }
-
- efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
- efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
-
- efx->xdp_channel_offset = n_channels;
-
- netif_dbg(efx, drv, efx->net_dev,
- "Allocating %u RX channels\n",
- efx->n_rx_channels);
-
- return efx->n_channels;
-}
-
-/* Probe the number and type of interrupts we are able to obtain, and
- * the resulting numbers of channels and RX queues.
- */
-static int efx_probe_interrupts(struct efx_nic *efx)
-{
- unsigned int extra_channels = 0;
- unsigned int rss_spread;
- unsigned int i, j;
- int rc;
-
- for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
- if (efx->extra_channel_type[i])
- ++extra_channels;
-
- if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
- unsigned int parallelism = efx_wanted_parallelism(efx);
- struct msix_entry xentries[EFX_MAX_CHANNELS];
- unsigned int n_channels;
-
- rc = efx_allocate_msix_channels(efx, efx->max_channels,
- extra_channels, parallelism);
- if (rc >= 0) {
- n_channels = rc;
- for (i = 0; i < n_channels; i++)
- xentries[i].entry = i;
- rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
- n_channels);
- }
- if (rc < 0) {
- /* Fall back to single channel MSI */
- netif_err(efx, drv, efx->net_dev,
- "could not enable MSI-X\n");
- if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
- efx->interrupt_mode = EFX_INT_MODE_MSI;
- else
- return rc;
- } else if (rc < n_channels) {
- netif_err(efx, drv, efx->net_dev,
- "WARNING: Insufficient MSI-X vectors"
- " available (%d < %u).\n", rc, n_channels);
- netif_err(efx, drv, efx->net_dev,
- "WARNING: Performance may be reduced.\n");
- n_channels = rc;
- }
-
- if (rc > 0) {
- for (i = 0; i < efx->n_channels; i++)
- efx_get_channel(efx, i)->irq =
- xentries[i].vector;
- }
- }
-
- /* Try single interrupt MSI */
- if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
- efx->n_channels = 1;
- efx->n_rx_channels = 1;
- efx->n_tx_channels = 1;
- efx->n_xdp_channels = 0;
- efx->xdp_channel_offset = efx->n_channels;
- rc = pci_enable_msi(efx->pci_dev);
- if (rc == 0) {
- efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
- } else {
- netif_err(efx, drv, efx->net_dev,
- "could not enable MSI\n");
- if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
- efx->interrupt_mode = EFX_INT_MODE_LEGACY;
- else
- return rc;
- }
- }
-
- /* Assume legacy interrupts */
- if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
- efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
- efx->n_rx_channels = 1;
- efx->n_tx_channels = 1;
- efx->n_xdp_channels = 0;
- efx->xdp_channel_offset = efx->n_channels;
- efx->legacy_irq = efx->pci_dev->irq;
- }
-
- /* Assign extra channels if possible, before XDP channels */
- efx->n_extra_tx_channels = 0;
- j = efx->xdp_channel_offset;
- for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
- if (!efx->extra_channel_type[i])
- continue;
- if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
- efx->extra_channel_type[i]->handle_no_channel(efx);
- } else {
- --j;
- efx_get_channel(efx, j)->type =
- efx->extra_channel_type[i];
- if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
- efx->n_extra_tx_channels++;
- }
- }
-
- rss_spread = efx->n_rx_channels;
- /* RSS might be usable on VFs even if it is disabled on the PF */
-#ifdef CONFIG_SFC_SRIOV
- if (efx->type->sriov_wanted) {
- efx->rss_spread = ((rss_spread > 1 ||
- !efx->type->sriov_wanted(efx)) ?
- rss_spread : efx_vf_size(efx));
- return 0;
- }
-#endif
- efx->rss_spread = rss_spread;
-
- return 0;
-}
-
-#if defined(CONFIG_SMP)
-static void efx_set_interrupt_affinity(struct efx_nic *efx)
-{
- struct efx_channel *channel;
- unsigned int cpu;
-
- efx_for_each_channel(channel, efx) {
- cpu = cpumask_local_spread(channel->channel,
- pcibus_to_node(efx->pci_dev->bus));
- irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
- }
-}
-
-static void efx_clear_interrupt_affinity(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx)
- irq_set_affinity_hint(channel->irq, NULL);
-}
-#else
-static void
-efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
-{
-}
-
-static void
-efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
-{
-}
-#endif /* CONFIG_SMP */
-
-static int efx_soft_enable_interrupts(struct efx_nic *efx)
-{
- struct efx_channel *channel, *end_channel;
- int rc;
-
- BUG_ON(efx->state == STATE_DISABLED);
-
- efx->irq_soft_enabled = true;
- smp_wmb();
-
- efx_for_each_channel(channel, efx) {
- if (!channel->type->keep_eventq) {
- rc = efx_init_eventq(channel);
- if (rc)
- goto fail;
- }
- efx_start_eventq(channel);
- }
-
- efx_mcdi_mode_event(efx);
-
- return 0;
-fail:
- end_channel = channel;
- efx_for_each_channel(channel, efx) {
- if (channel == end_channel)
- break;
- efx_stop_eventq(channel);
- if (!channel->type->keep_eventq)
- efx_fini_eventq(channel);
- }
-
- return rc;
-}
-
-static void efx_soft_disable_interrupts(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- if (efx->state == STATE_DISABLED)
- return;
-
- efx_mcdi_mode_poll(efx);
-
- efx->irq_soft_enabled = false;
- smp_wmb();
-
- if (efx->legacy_irq)
- synchronize_irq(efx->legacy_irq);
-
- efx_for_each_channel(channel, efx) {
- if (channel->irq)
- synchronize_irq(channel->irq);
-
- efx_stop_eventq(channel);
- if (!channel->type->keep_eventq)
- efx_fini_eventq(channel);
- }
-
- /* Flush the asynchronous MCDI request queue */
- efx_mcdi_flush_async(efx);
-}
-
-static int efx_enable_interrupts(struct efx_nic *efx)
-{
- struct efx_channel *channel, *end_channel;
- int rc;
-
- BUG_ON(efx->state == STATE_DISABLED);
-
- if (efx->eeh_disabled_legacy_irq) {
- enable_irq(efx->legacy_irq);
- efx->eeh_disabled_legacy_irq = false;
- }
-
- efx->type->irq_enable_master(efx);
-
- efx_for_each_channel(channel, efx) {
- if (channel->type->keep_eventq) {
- rc = efx_init_eventq(channel);
- if (rc)
- goto fail;
- }
- }
-
- rc = efx_soft_enable_interrupts(efx);
- if (rc)
- goto fail;
-
- return 0;
-
-fail:
- end_channel = channel;
- efx_for_each_channel(channel, efx) {
- if (channel == end_channel)
- break;
- if (channel->type->keep_eventq)
- efx_fini_eventq(channel);
- }
-
- efx->type->irq_disable_non_ev(efx);
-
- return rc;
-}
-
-static void efx_disable_interrupts(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_soft_disable_interrupts(efx);
-
- efx_for_each_channel(channel, efx) {
- if (channel->type->keep_eventq)
- efx_fini_eventq(channel);
- }
-
- efx->type->irq_disable_non_ev(efx);
-}
-
-static void efx_remove_interrupts(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- /* Remove MSI/MSI-X interrupts */
- efx_for_each_channel(channel, efx)
- channel->irq = 0;
- pci_disable_msi(efx->pci_dev);
- pci_disable_msix(efx->pci_dev);
-
- /* Remove legacy interrupt */
- efx->legacy_irq = 0;
-}
-
-static int efx_set_channels(struct efx_nic *efx)
-{
- struct efx_channel *channel;
- struct efx_tx_queue *tx_queue;
- int xdp_queue_number;
-
- efx->tx_channel_offset =
- efx_separate_tx_channels ?
- efx->n_channels - efx->n_tx_channels : 0;
-
- if (efx->xdp_tx_queue_count) {
- EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
-
- /* Allocate array for XDP TX queue lookup. */
- efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
- sizeof(*efx->xdp_tx_queues),
- GFP_KERNEL);
- if (!efx->xdp_tx_queues)
- return -ENOMEM;
- }
-
- /* We need to mark which channels really have RX and TX
- * queues, and adjust the TX queue numbers if we have separate
- * RX-only and TX-only channels.
- */
- xdp_queue_number = 0;
- efx_for_each_channel(channel, efx) {
- if (channel->channel < efx->n_rx_channels)
- channel->rx_queue.core_index = channel->channel;
- else
- channel->rx_queue.core_index = -1;
-
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- tx_queue->queue -= (efx->tx_channel_offset *
- EFX_TXQ_TYPES);
-
- if (efx_channel_is_xdp_tx(channel) &&
- xdp_queue_number < efx->xdp_tx_queue_count) {
- efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
- xdp_queue_number++;
- }
- }
- }
- return 0;
-}
-
static int efx_probe_nic(struct efx_nic *efx)
{
int rc;
@@ -1939,70 +383,6 @@ static void efx_remove_nic(struct efx_nic *efx)
efx->type->remove(efx);
}
-static int efx_probe_filters(struct efx_nic *efx)
-{
- int rc;
-
- init_rwsem(&efx->filter_sem);
- mutex_lock(&efx->mac_lock);
- down_write(&efx->filter_sem);
- rc = efx->type->filter_table_probe(efx);
- if (rc)
- goto out_unlock;
-
-#ifdef CONFIG_RFS_ACCEL
- if (efx->type->offload_features & NETIF_F_NTUPLE) {
- struct efx_channel *channel;
- int i, success = 1;
-
- efx_for_each_channel(channel, efx) {
- channel->rps_flow_id =
- kcalloc(efx->type->max_rx_ip_filters,
- sizeof(*channel->rps_flow_id),
- GFP_KERNEL);
- if (!channel->rps_flow_id)
- success = 0;
- else
- for (i = 0;
- i < efx->type->max_rx_ip_filters;
- ++i)
- channel->rps_flow_id[i] =
- RPS_FLOW_ID_INVALID;
- channel->rfs_expire_index = 0;
- channel->rfs_filter_count = 0;
- }
-
- if (!success) {
- efx_for_each_channel(channel, efx)
- kfree(channel->rps_flow_id);
- efx->type->filter_table_remove(efx);
- rc = -ENOMEM;
- goto out_unlock;
- }
- }
-#endif
-out_unlock:
- up_write(&efx->filter_sem);
- mutex_unlock(&efx->mac_lock);
- return rc;
-}
-
-static void efx_remove_filters(struct efx_nic *efx)
-{
-#ifdef CONFIG_RFS_ACCEL
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx) {
- cancel_delayed_work_sync(&channel->filter_work);
- kfree(channel->rps_flow_id);
- }
-#endif
- down_write(&efx->filter_sem);
- efx->type->filter_table_remove(efx);
- up_write(&efx->filter_sem);
-}
-
-
/**************************************************************************
*
* NIC startup/shutdown
@@ -2067,81 +447,6 @@ static int efx_probe_all(struct efx_nic *efx)
return rc;
}
-/* If the interface is supposed to be running but is not, start
- * the hardware and software data path, regular activity for the port
- * (MAC statistics, link polling, etc.) and schedule the port to be
- * reconfigured. Interrupts must already be enabled. This function
- * is safe to call multiple times, so long as the NIC is not disabled.
- * Requires the RTNL lock.
- */
-static void efx_start_all(struct efx_nic *efx)
-{
- EFX_ASSERT_RESET_SERIALISED(efx);
- BUG_ON(efx->state == STATE_DISABLED);
-
- /* Check that it is appropriate to restart the interface. All
- * of these flags are safe to read under just the rtnl lock */
- if (efx->port_enabled || !netif_running(efx->net_dev) ||
- efx->reset_pending)
- return;
-
- efx_start_port(efx);
- efx_start_datapath(efx);
-
- /* Start the hardware monitor if there is one */
- if (efx->type->monitor != NULL)
- queue_delayed_work(efx->workqueue, &efx->monitor_work,
- efx_monitor_interval);
-
- /* Link state detection is normally event-driven; we have
- * to poll now because we could have missed a change
- */
- mutex_lock(&efx->mac_lock);
- if (efx->phy_op->poll(efx))
- efx_link_status_changed(efx);
- mutex_unlock(&efx->mac_lock);
-
- efx->type->start_stats(efx);
- efx->type->pull_stats(efx);
- spin_lock_bh(&efx->stats_lock);
- efx->type->update_stats(efx, NULL, NULL);
- spin_unlock_bh(&efx->stats_lock);
-}
-
-/* Quiesce the hardware and software data path, and regular activity
- * for the port without bringing the link down. Safe to call multiple
- * times with the NIC in almost any state, but interrupts should be
- * enabled. Requires the RTNL lock.
- */
-static void efx_stop_all(struct efx_nic *efx)
-{
- EFX_ASSERT_RESET_SERIALISED(efx);
-
- /* port_enabled can be read safely under the rtnl lock */
- if (!efx->port_enabled)
- return;
-
- /* update stats before we go down so we can accurately count
- * rx_nodesc_drops
- */
- efx->type->pull_stats(efx);
- spin_lock_bh(&efx->stats_lock);
- efx->type->update_stats(efx, NULL, NULL);
- spin_unlock_bh(&efx->stats_lock);
- efx->type->stop_stats(efx);
- efx_stop_port(efx);
-
- /* Stop the kernel transmit interface. This is only valid if
- * the device is stopped or detached; otherwise the watchdog
- * may fire immediately.
- */
- WARN_ON(netif_running(efx->net_dev) &&
- netif_device_present(efx->net_dev));
- netif_tx_disable(efx->net_dev);
-
- efx_stop_datapath(efx);
-}
-
static void efx_remove_all(struct efx_nic *efx)
{
rtnl_lock();
@@ -2237,36 +542,6 @@ void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
/**************************************************************************
*
- * Hardware monitor
- *
- **************************************************************************/
-
-/* Run periodically off the general workqueue */
-static void efx_monitor(struct work_struct *data)
-{
- struct efx_nic *efx = container_of(data, struct efx_nic,
- monitor_work.work);
-
- netif_vdbg(efx, timer, efx->net_dev,
- "hardware monitor executing on CPU %d\n",
- raw_smp_processor_id());
- BUG_ON(efx->type->monitor == NULL);
-
- /* If the mac_lock is already held then it is likely a port
- * reconfiguration is already in place, which will likely do
- * most of the work of monitor() anyway. */
- if (mutex_trylock(&efx->mac_lock)) {
- if (efx->port_enabled)
- efx->type->monitor(efx);
- mutex_unlock(&efx->mac_lock);
- }
-
- queue_delayed_work(efx->workqueue, &efx->monitor_work,
- efx_monitor_interval);
-}
-
-/**************************************************************************
- *
* ioctls
*
*************************************************************************/
@@ -2294,45 +569,6 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
/**************************************************************************
*
- * NAPI interface
- *
- **************************************************************************/
-
-static void efx_init_napi_channel(struct efx_channel *channel)
-{
- struct efx_nic *efx = channel->efx;
-
- channel->napi_dev = efx->net_dev;
- netif_napi_add(channel->napi_dev, &channel->napi_str,
- efx_poll, napi_weight);
-}
-
-static void efx_init_napi(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx)
- efx_init_napi_channel(channel);
-}
-
-static void efx_fini_napi_channel(struct efx_channel *channel)
-{
- if (channel->napi_dev)
- netif_napi_del(&channel->napi_str);
-
- channel->napi_dev = NULL;
-}
-
-static void efx_fini_napi(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx)
- efx_fini_napi_channel(channel);
-}
-
-/**************************************************************************
- *
* Kernel net device interface
*
*************************************************************************/
@@ -2382,19 +618,8 @@ int efx_net_stop(struct net_device *net_dev)
return 0;
}
-/* Context: process, dev_base_lock or RTNL held, non-blocking. */
-static void efx_net_stats(struct net_device *net_dev,
- struct rtnl_link_stats64 *stats)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
-
- spin_lock_bh(&efx->stats_lock);
- efx->type->update_stats(efx, NULL, stats);
- spin_unlock_bh(&efx->stats_lock);
-}
-
/* Context: netif_tx_lock held, BHs disabled. */
-static void efx_watchdog(struct net_device *net_dev)
+static void efx_watchdog(struct net_device *net_dev, unsigned int txqueue)
{
struct efx_nic *efx = netdev_priv(net_dev);
@@ -2405,51 +630,6 @@ static void efx_watchdog(struct net_device *net_dev)
efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
}
-static unsigned int efx_xdp_max_mtu(struct efx_nic *efx)
-{
- /* The maximum MTU that we can fit in a single page, allowing for
- * framing, overhead and XDP headroom.
- */
- int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) +
- efx->rx_prefix_size + efx->type->rx_buffer_padding +
- efx->rx_ip_align + XDP_PACKET_HEADROOM;
-
- return PAGE_SIZE - overhead;
-}
-
-/* Context: process, rtnl_lock() held. */
-static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
- int rc;
-
- rc = efx_check_disabled(efx);
- if (rc)
- return rc;
-
- if (rtnl_dereference(efx->xdp_prog) &&
- new_mtu > efx_xdp_max_mtu(efx)) {
- netif_err(efx, drv, efx->net_dev,
- "Requested MTU of %d too big for XDP (max: %d)\n",
- new_mtu, efx_xdp_max_mtu(efx));
- return -EINVAL;
- }
-
- netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
-
- efx_device_detach_sync(efx);
- efx_stop_all(efx);
-
- mutex_lock(&efx->mac_lock);
- net_dev->mtu = new_mtu;
- efx_mac_reconfigure(efx);
- mutex_unlock(&efx->mac_lock);
-
- efx_start_all(efx);
- efx_device_attach_if_not_resetting(efx);
- return 0;
-}
-
static int efx_set_mac_address(struct net_device *net_dev, void *data)
{
struct efx_nic *efx = netdev_priv(net_dev);
@@ -2726,28 +906,6 @@ show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
}
static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
-#ifdef CONFIG_SFC_MCDI_LOGGING
-static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct efx_nic *efx = dev_get_drvdata(dev);
- struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
-
- return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
-}
-static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct efx_nic *efx = dev_get_drvdata(dev);
- struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
- bool enable = count > 0 && *buf != '0';
-
- mcdi->logging_enabled = enable;
- return count;
-}
-static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
-#endif
-
static int efx_register_netdev(struct efx_nic *efx)
{
struct net_device *net_dev = efx->net_dev;
@@ -2807,21 +965,11 @@ static int efx_register_netdev(struct efx_nic *efx)
"failed to init net dev attributes\n");
goto fail_registered;
}
-#ifdef CONFIG_SFC_MCDI_LOGGING
- rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
- if (rc) {
- netif_err(efx, drv, efx->net_dev,
- "failed to init net dev attributes\n");
- goto fail_attr_mcdi_logging;
- }
-#endif
+
+ efx_init_mcdi_logging(efx);
return 0;
-#ifdef CONFIG_SFC_MCDI_LOGGING
-fail_attr_mcdi_logging:
- device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
-#endif
fail_registered:
rtnl_lock();
efx_dissociate(efx);
@@ -2842,9 +990,7 @@ static void efx_unregister_netdev(struct efx_nic *efx)
if (efx_dev_registered(efx)) {
strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
-#ifdef CONFIG_SFC_MCDI_LOGGING
- device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
-#endif
+ efx_fini_mcdi_logging(efx);
device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
unregister_netdev(efx->net_dev);
}
@@ -2852,292 +998,6 @@ static void efx_unregister_netdev(struct efx_nic *efx)
/**************************************************************************
*
- * Device reset and suspend
- *
- **************************************************************************/
-
-/* Tears down the entire software state and most of the hardware state
- * before reset. */
-void efx_reset_down(struct efx_nic *efx, enum reset_type method)
-{
- EFX_ASSERT_RESET_SERIALISED(efx);
-
- if (method == RESET_TYPE_MCDI_TIMEOUT)
- efx->type->prepare_flr(efx);
-
- efx_stop_all(efx);
- efx_disable_interrupts(efx);
-
- mutex_lock(&efx->mac_lock);
- down_write(&efx->filter_sem);
- mutex_lock(&efx->rss_lock);
- if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
- method != RESET_TYPE_DATAPATH)
- efx->phy_op->fini(efx);
- efx->type->fini(efx);
-}
-
-/* This function will always ensure that the locks acquired in
- * efx_reset_down() are released. A failure return code indicates
- * that we were unable to reinitialise the hardware, and the
- * driver should be disabled. If ok is false, then the rx and tx
- * engines are not restarted, pending a RESET_DISABLE. */
-int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
-{
- int rc;
-
- EFX_ASSERT_RESET_SERIALISED(efx);
-
- if (method == RESET_TYPE_MCDI_TIMEOUT)
- efx->type->finish_flr(efx);
-
- /* Ensure that SRAM is initialised even if we're disabling the device */
- rc = efx->type->init(efx);
- if (rc) {
- netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
- goto fail;
- }
-
- if (!ok)
- goto fail;
-
- if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
- method != RESET_TYPE_DATAPATH) {
- rc = efx->phy_op->init(efx);
- if (rc)
- goto fail;
- rc = efx->phy_op->reconfigure(efx);
- if (rc && rc != -EPERM)
- netif_err(efx, drv, efx->net_dev,
- "could not restore PHY settings\n");
- }
-
- rc = efx_enable_interrupts(efx);
- if (rc)
- goto fail;
-
-#ifdef CONFIG_SFC_SRIOV
- rc = efx->type->vswitching_restore(efx);
- if (rc) /* not fatal; the PF will still work fine */
- netif_warn(efx, probe, efx->net_dev,
- "failed to restore vswitching rc=%d;"
- " VFs may not function\n", rc);
-#endif
-
- if (efx->type->rx_restore_rss_contexts)
- efx->type->rx_restore_rss_contexts(efx);
- mutex_unlock(&efx->rss_lock);
- efx->type->filter_table_restore(efx);
- up_write(&efx->filter_sem);
- if (efx->type->sriov_reset)
- efx->type->sriov_reset(efx);
-
- mutex_unlock(&efx->mac_lock);
-
- efx_start_all(efx);
-
- if (efx->type->udp_tnl_push_ports)
- efx->type->udp_tnl_push_ports(efx);
-
- return 0;
-
-fail:
- efx->port_initialized = false;
-
- mutex_unlock(&efx->rss_lock);
- up_write(&efx->filter_sem);
- mutex_unlock(&efx->mac_lock);
-
- return rc;
-}
-
-/* Reset the NIC using the specified method. Note that the reset may
- * fail, in which case the card will be left in an unusable state.
- *
- * Caller must hold the rtnl_lock.
- */
-int efx_reset(struct efx_nic *efx, enum reset_type method)
-{
- int rc, rc2;
- bool disabled;
-
- netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
- RESET_TYPE(method));
-
- efx_device_detach_sync(efx);
- efx_reset_down(efx, method);
-
- rc = efx->type->reset(efx, method);
- if (rc) {
- netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
- goto out;
- }
-
- /* Clear flags for the scopes we covered. We assume the NIC and
- * driver are now quiescent so that there is no race here.
- */
- if (method < RESET_TYPE_MAX_METHOD)
- efx->reset_pending &= -(1 << (method + 1));
- else /* it doesn't fit into the well-ordered scope hierarchy */
- __clear_bit(method, &efx->reset_pending);
-
- /* Reinitialise bus-mastering, which may have been turned off before
- * the reset was scheduled. This is still appropriate, even in the
- * RESET_TYPE_DISABLE since this driver generally assumes the hardware
- * can respond to requests. */
- pci_set_master(efx->pci_dev);
-
-out:
- /* Leave device stopped if necessary */
- disabled = rc ||
- method == RESET_TYPE_DISABLE ||
- method == RESET_TYPE_RECOVER_OR_DISABLE;
- rc2 = efx_reset_up(efx, method, !disabled);
- if (rc2) {
- disabled = true;
- if (!rc)
- rc = rc2;
- }
-
- if (disabled) {
- dev_close(efx->net_dev);
- netif_err(efx, drv, efx->net_dev, "has been disabled\n");
- efx->state = STATE_DISABLED;
- } else {
- netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
- efx_device_attach_if_not_resetting(efx);
- }
- return rc;
-}
-
-/* Try recovery mechanisms.
- * For now only EEH is supported.
- * Returns 0 if the recovery mechanisms are unsuccessful.
- * Returns a non-zero value otherwise.
- */
-int efx_try_recovery(struct efx_nic *efx)
-{
-#ifdef CONFIG_EEH
- /* A PCI error can occur and not be seen by EEH because nothing
- * happens on the PCI bus. In this case the driver may fail and
- * schedule a 'recover or reset', leading to this recovery handler.
- * Manually call the eeh failure check function.
- */
- struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
- if (eeh_dev_check_failure(eehdev)) {
- /* The EEH mechanisms will handle the error and reset the
- * device if necessary.
- */
- return 1;
- }
-#endif
- return 0;
-}
-
-static void efx_wait_for_bist_end(struct efx_nic *efx)
-{
- int i;
-
- for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
- if (efx_mcdi_poll_reboot(efx))
- goto out;
- msleep(BIST_WAIT_DELAY_MS);
- }
-
- netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
-out:
- /* Either way unset the BIST flag. If we found no reboot we probably
- * won't recover, but we should try.
- */
- efx->mc_bist_for_other_fn = false;
-}
-
-/* The worker thread exists so that code that cannot sleep can
- * schedule a reset for later.
- */
-static void efx_reset_work(struct work_struct *data)
-{
- struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
- unsigned long pending;
- enum reset_type method;
-
- pending = READ_ONCE(efx->reset_pending);
- method = fls(pending) - 1;
-
- if (method == RESET_TYPE_MC_BIST)
- efx_wait_for_bist_end(efx);
-
- if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
- method == RESET_TYPE_RECOVER_OR_ALL) &&
- efx_try_recovery(efx))
- return;
-
- if (!pending)
- return;
-
- rtnl_lock();
-
- /* We checked the state in efx_schedule_reset() but it may
- * have changed by now. Now that we have the RTNL lock,
- * it cannot change again.
- */
- if (efx->state == STATE_READY)
- (void)efx_reset(efx, method);
-
- rtnl_unlock();
-}
-
-void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
-{
- enum reset_type method;
-
- if (efx->state == STATE_RECOVERY) {
- netif_dbg(efx, drv, efx->net_dev,
- "recovering: skip scheduling %s reset\n",
- RESET_TYPE(type));
- return;
- }
-
- switch (type) {
- case RESET_TYPE_INVISIBLE:
- case RESET_TYPE_ALL:
- case RESET_TYPE_RECOVER_OR_ALL:
- case RESET_TYPE_WORLD:
- case RESET_TYPE_DISABLE:
- case RESET_TYPE_RECOVER_OR_DISABLE:
- case RESET_TYPE_DATAPATH:
- case RESET_TYPE_MC_BIST:
- case RESET_TYPE_MCDI_TIMEOUT:
- method = type;
- netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
- RESET_TYPE(method));
- break;
- default:
- method = efx->type->map_reset_reason(type);
- netif_dbg(efx, drv, efx->net_dev,
- "scheduling %s reset for %s\n",
- RESET_TYPE(method), RESET_TYPE(type));
- break;
- }
-
- set_bit(method, &efx->reset_pending);
- smp_mb(); /* ensure we change reset_pending before checking state */
-
- /* If we're not READY then just leave the flags set as the cue
- * to abort probing or reschedule the reset later.
- */
- if (READ_ONCE(efx->state) != STATE_READY)
- return;
-
- /* efx_process_channel() will no longer read events once a
- * reset is scheduled. So switch back to poll'd MCDI completions. */
- efx_mcdi_mode_poll(efx);
-
- queue_work(reset_workqueue, &efx->reset_work);
-}
-
-/**************************************************************************
- *
* List of NICs we support
*
**************************************************************************/
@@ -3169,139 +1029,10 @@ static const struct pci_device_id efx_pci_table[] = {
/**************************************************************************
*
- * Dummy PHY/MAC operations
- *
- * Can be used for some unimplemented operations
- * Needed so all function pointers are valid and do not have to be tested
- * before use
- *
- **************************************************************************/
-int efx_port_dummy_op_int(struct efx_nic *efx)
-{
- return 0;
-}
-void efx_port_dummy_op_void(struct efx_nic *efx) {}
-
-static bool efx_port_dummy_op_poll(struct efx_nic *efx)
-{
- return false;
-}
-
-static const struct efx_phy_operations efx_dummy_phy_operations = {
- .init = efx_port_dummy_op_int,
- .reconfigure = efx_port_dummy_op_int,
- .poll = efx_port_dummy_op_poll,
- .fini = efx_port_dummy_op_void,
-};
-
-/**************************************************************************
- *
* Data housekeeping
*
**************************************************************************/
-/* This zeroes out and then fills in the invariants in a struct
- * efx_nic (including all sub-structures).
- */
-static int efx_init_struct(struct efx_nic *efx,
- struct pci_dev *pci_dev, struct net_device *net_dev)
-{
- int rc = -ENOMEM, i;
-
- /* Initialise common structures */
- INIT_LIST_HEAD(&efx->node);
- INIT_LIST_HEAD(&efx->secondary_list);
- spin_lock_init(&efx->biu_lock);
-#ifdef CONFIG_SFC_MTD
- INIT_LIST_HEAD(&efx->mtd_list);
-#endif
- INIT_WORK(&efx->reset_work, efx_reset_work);
- INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
- INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
- efx->pci_dev = pci_dev;
- efx->msg_enable = debug;
- efx->state = STATE_UNINIT;
- strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
-
- efx->net_dev = net_dev;
- efx->rx_prefix_size = efx->type->rx_prefix_size;
- efx->rx_ip_align =
- NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
- efx->rx_packet_hash_offset =
- efx->type->rx_hash_offset - efx->type->rx_prefix_size;
- efx->rx_packet_ts_offset =
- efx->type->rx_ts_offset - efx->type->rx_prefix_size;
- INIT_LIST_HEAD(&efx->rss_context.list);
- mutex_init(&efx->rss_lock);
- spin_lock_init(&efx->stats_lock);
- efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
- efx->num_mac_stats = MC_CMD_MAC_NSTATS;
- BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
- mutex_init(&efx->mac_lock);
-#ifdef CONFIG_RFS_ACCEL
- mutex_init(&efx->rps_mutex);
- spin_lock_init(&efx->rps_hash_lock);
- /* Failure to allocate is not fatal, but may degrade ARFS performance */
- efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
- sizeof(*efx->rps_hash_table), GFP_KERNEL);
-#endif
- efx->phy_op = &efx_dummy_phy_operations;
- efx->mdio.dev = net_dev;
- INIT_WORK(&efx->mac_work, efx_mac_work);
- init_waitqueue_head(&efx->flush_wq);
-
- for (i = 0; i < EFX_MAX_CHANNELS; i++) {
- efx->channel[i] = efx_alloc_channel(efx, i, NULL);
- if (!efx->channel[i])
- goto fail;
- efx->msi_context[i].efx = efx;
- efx->msi_context[i].index = i;
- }
-
- /* Higher numbered interrupt modes are less capable! */
- if (WARN_ON_ONCE(efx->type->max_interrupt_mode >
- efx->type->min_interrupt_mode)) {
- rc = -EIO;
- goto fail;
- }
- efx->interrupt_mode = max(efx->type->max_interrupt_mode,
- interrupt_mode);
- efx->interrupt_mode = min(efx->type->min_interrupt_mode,
- interrupt_mode);
-
- /* Would be good to use the net_dev name, but we're too early */
- snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
- pci_name(pci_dev));
- efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
- if (!efx->workqueue)
- goto fail;
-
- return 0;
-
-fail:
- efx_fini_struct(efx);
- return rc;
-}
-
-static void efx_fini_struct(struct efx_nic *efx)
-{
- int i;
-
-#ifdef CONFIG_RFS_ACCEL
- kfree(efx->rps_hash_table);
-#endif
-
- for (i = 0; i < EFX_MAX_CHANNELS; i++)
- kfree(efx->channel[i]);
-
- kfree(efx->vpd_sn);
-
- if (efx->workqueue) {
- destroy_workqueue(efx->workqueue);
- efx->workqueue = NULL;
- }
-}
-
void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
{
u64 n_rx_nodesc_trunc = 0;
@@ -3313,197 +1044,6 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
}
-bool efx_filter_spec_equal(const struct efx_filter_spec *left,
- const struct efx_filter_spec *right)
-{
- if ((left->match_flags ^ right->match_flags) |
- ((left->flags ^ right->flags) &
- (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
- return false;
-
- return memcmp(&left->outer_vid, &right->outer_vid,
- sizeof(struct efx_filter_spec) -
- offsetof(struct efx_filter_spec, outer_vid)) == 0;
-}
-
-u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
-{
- BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
- return jhash2((const u32 *)&spec->outer_vid,
- (sizeof(struct efx_filter_spec) -
- offsetof(struct efx_filter_spec, outer_vid)) / 4,
- 0);
-}
-
-#ifdef CONFIG_RFS_ACCEL
-bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
- bool *force)
-{
- if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
- /* ARFS is currently updating this entry, leave it */
- return false;
- }
- if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
- /* ARFS tried and failed to update this, so it's probably out
- * of date. Remove the filter and the ARFS rule entry.
- */
- rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
- *force = true;
- return true;
- } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
- /* ARFS has moved on, so old filter is not needed. Since we did
- * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
- * not be removed by efx_rps_hash_del() subsequently.
- */
- *force = true;
- return true;
- }
- /* Remove it iff ARFS wants to. */
- return true;
-}
-
-static
-struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
- const struct efx_filter_spec *spec)
-{
- u32 hash = efx_filter_spec_hash(spec);
-
- lockdep_assert_held(&efx->rps_hash_lock);
- if (!efx->rps_hash_table)
- return NULL;
- return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
-}
-
-struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
- const struct efx_filter_spec *spec)
-{
- struct efx_arfs_rule *rule;
- struct hlist_head *head;
- struct hlist_node *node;
-
- head = efx_rps_hash_bucket(efx, spec);
- if (!head)
- return NULL;
- hlist_for_each(node, head) {
- rule = container_of(node, struct efx_arfs_rule, node);
- if (efx_filter_spec_equal(spec, &rule->spec))
- return rule;
- }
- return NULL;
-}
-
-struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
- const struct efx_filter_spec *spec,
- bool *new)
-{
- struct efx_arfs_rule *rule;
- struct hlist_head *head;
- struct hlist_node *node;
-
- head = efx_rps_hash_bucket(efx, spec);
- if (!head)
- return NULL;
- hlist_for_each(node, head) {
- rule = container_of(node, struct efx_arfs_rule, node);
- if (efx_filter_spec_equal(spec, &rule->spec)) {
- *new = false;
- return rule;
- }
- }
- rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
- *new = true;
- if (rule) {
- memcpy(&rule->spec, spec, sizeof(rule->spec));
- hlist_add_head(&rule->node, head);
- }
- return rule;
-}
-
-void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec)
-{
- struct efx_arfs_rule *rule;
- struct hlist_head *head;
- struct hlist_node *node;
-
- head = efx_rps_hash_bucket(efx, spec);
- if (WARN_ON(!head))
- return;
- hlist_for_each(node, head) {
- rule = container_of(node, struct efx_arfs_rule, node);
- if (efx_filter_spec_equal(spec, &rule->spec)) {
- /* Someone already reused the entry. We know that if
- * this check doesn't fire (i.e. filter_id == REMOVING)
- * then the REMOVING mark was put there by our caller,
- * because caller is holding a lock on filter table and
- * only holders of that lock set REMOVING.
- */
- if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
- return;
- hlist_del(node);
- kfree(rule);
- return;
- }
- }
- /* We didn't find it. */
- WARN_ON(1);
-}
-#endif
-
-/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because
- * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
- */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
-{
- struct list_head *head = &efx->rss_context.list;
- struct efx_rss_context *ctx, *new;
- u32 id = 1; /* Don't use zero, that refers to the master RSS context */
-
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
- /* Search for first gap in the numbering */
- list_for_each_entry(ctx, head, list) {
- if (ctx->user_id != id)
- break;
- id++;
- /* Check for wrap. If this happens, we have nearly 2^32
- * allocated RSS contexts, which seems unlikely.
- */
- if (WARN_ON_ONCE(!id))
- return NULL;
- }
-
- /* Create the new entry */
- new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL);
- if (!new)
- return NULL;
- new->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
- new->rx_hash_udp_4tuple = false;
-
- /* Insert the new entry into the gap */
- new->user_id = id;
- list_add_tail(&new->list, &ctx->list);
- return new;
-}
-
-struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
-{
- struct list_head *head = &efx->rss_context.list;
- struct efx_rss_context *ctx;
-
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
- list_for_each_entry(ctx, head, list)
- if (ctx->user_id == id)
- return ctx;
- return NULL;
-}
-
-void efx_free_rss_context_entry(struct efx_rss_context *ctx)
-{
- list_del(&ctx->list);
- kfree(ctx);
-}
-
/**************************************************************************
*
* PCI interface
@@ -3519,7 +1059,7 @@ static void efx_pci_remove_main(struct efx_nic *efx)
* are not READY.
*/
BUG_ON(efx->state == STATE_READY);
- cancel_work_sync(&efx->reset_work);
+ efx_flush_reset_workqueue(efx);
efx_disable_interrupts(efx);
efx_clear_interrupt_affinity(efx);
@@ -3559,7 +1099,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
efx_pci_remove_main(efx);
- efx_fini_io(efx);
+ efx_fini_io(efx, efx->type->mem_bar(efx));
netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
efx_fini_struct(efx);
@@ -3782,7 +1322,8 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
efx_probe_vpd_strings(efx);
/* Set up basic I/O (BAR mappings etc) */
- rc = efx_init_io(efx);
+ rc = efx_init_io(efx, efx->type->mem_bar(efx), efx->type->max_dma_mask,
+ efx->type->mem_map_size(efx));
if (rc)
goto fail2;
@@ -3826,7 +1367,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
return 0;
fail3:
- efx_fini_io(efx);
+ efx_fini_io(efx, efx->type->mem_bar(efx));
fail2:
efx_fini_struct(efx);
fail1:
@@ -3904,7 +1445,7 @@ static int efx_pm_thaw(struct device *dev)
rtnl_unlock();
/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
- queue_work(reset_workqueue, &efx->reset_work);
+ efx_queue_reset_work(efx);
return 0;
@@ -4083,10 +1624,6 @@ static struct pci_driver efx_pci_driver = {
*
*************************************************************************/
-module_param(interrupt_mode, uint, 0444);
-MODULE_PARM_DESC(interrupt_mode,
- "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
-
static int __init efx_init_module(void)
{
int rc;
@@ -4103,11 +1640,9 @@ static int __init efx_init_module(void)
goto err_sriov;
#endif
- reset_workqueue = create_singlethread_workqueue("sfc_reset");
- if (!reset_workqueue) {
- rc = -ENOMEM;
+ rc = efx_create_reset_workqueue();
+ if (rc)
goto err_reset;
- }
rc = pci_register_driver(&efx_pci_driver);
if (rc < 0)
@@ -4116,7 +1651,7 @@ static int __init efx_init_module(void)
return 0;
err_pci:
- destroy_workqueue(reset_workqueue);
+ efx_destroy_reset_workqueue();
err_reset:
#ifdef CONFIG_SFC_SRIOV
efx_fini_sriov();
@@ -4132,7 +1667,7 @@ static void __exit efx_exit_module(void)
printk(KERN_INFO "Solarflare NET driver unloading\n");
pci_unregister_driver(&efx_pci_driver);
- destroy_workqueue(reset_workqueue);
+ efx_destroy_reset_workqueue();
#ifdef CONFIG_SFC_SRIOV
efx_fini_sriov();
#endif
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 2dd8d5002315..f1bdb04efbe4 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -15,31 +15,17 @@ int efx_net_open(struct net_device *net_dev);
int efx_net_stop(struct net_device *net_dev);
/* TX */
-int efx_probe_tx_queue(struct efx_tx_queue *tx_queue);
-void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
-void efx_init_tx_queue(struct efx_tx_queue *tx_queue);
void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue);
-void efx_fini_tx_queue(struct efx_tx_queue *tx_queue);
netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
struct net_device *net_dev);
netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
void *type_data);
-unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
extern unsigned int efx_piobuf_size;
extern bool efx_separate_tx_channels;
/* RX */
-void efx_set_default_rx_indir_table(struct efx_nic *efx,
- struct efx_rss_context *ctx);
-void efx_rx_config_page_split(struct efx_nic *efx);
-int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic);
-void efx_rx_slow_fill(struct timer_list *t);
void __efx_rx_packet(struct efx_channel *channel);
void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
unsigned int n_frags, unsigned int len, u16 flags);
@@ -48,7 +34,6 @@ static inline void efx_rx_flush_packet(struct efx_channel *channel)
if (channel->rx_pkt_n_frags)
__efx_rx_packet(channel);
}
-void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
#define EFX_MAX_DMAQ_SIZE 4096UL
#define EFX_DEFAULT_DMAQ_SIZE 1024UL
@@ -80,8 +65,6 @@ static inline bool efx_rss_enabled(struct efx_nic *efx)
/* Filters */
-void efx_mac_reconfigure(struct efx_nic *efx);
-
/**
* efx_filter_insert_filter - add or replace a filter
* @efx: NIC in which to insert the filter
@@ -186,58 +169,17 @@ static inline void efx_filter_rfs_expire(struct work_struct *data)
static inline void efx_filter_rfs_expire(struct work_struct *data) {}
#define efx_filter_rfs_enabled() 0
#endif
-bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
-
-bool efx_filter_spec_equal(const struct efx_filter_spec *left,
- const struct efx_filter_spec *right);
-u32 efx_filter_spec_hash(const struct efx_filter_spec *spec);
-
-#ifdef CONFIG_RFS_ACCEL
-bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
- bool *force);
-
-struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
- const struct efx_filter_spec *spec);
-
-/* @new is written to indicate if entry was newly added (true) or if an old
- * entry was found and returned (false).
- */
-struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
- const struct efx_filter_spec *spec,
- bool *new);
-
-void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec);
-#endif
/* RSS contexts */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
-struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
-void efx_free_rss_context_entry(struct efx_rss_context *ctx);
static inline bool efx_rss_active(struct efx_rss_context *ctx)
{
- return ctx->context_id != EFX_EF10_RSS_CONTEXT_INVALID;
+ return ctx->context_id != EFX_MCDI_RSS_CONTEXT_INVALID;
}
-/* Channels */
-int efx_channel_dummy_op_int(struct efx_channel *channel);
-void efx_channel_dummy_op_void(struct efx_channel *channel);
-int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries);
-
-/* Ports */
-int efx_reconfigure_port(struct efx_nic *efx);
-int __efx_reconfigure_port(struct efx_nic *efx);
-
/* Ethtool support */
extern const struct ethtool_ops efx_ethtool_ops;
-/* Reset handling */
-int efx_reset(struct efx_nic *efx, enum reset_type method);
-void efx_reset_down(struct efx_nic *efx, enum reset_type method);
-int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok);
-int efx_try_recovery(struct efx_nic *efx);
-
/* Global */
-void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs);
unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks);
int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
@@ -245,8 +187,6 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
bool rx_may_override_tx);
void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
unsigned int *rx_usecs, bool *rx_adaptive);
-void efx_stop_eventq(struct efx_channel *channel);
-void efx_start_eventq(struct efx_channel *channel);
/* Dummy PHY ops for PHY drivers */
int efx_port_dummy_op_int(struct efx_nic *efx);
@@ -293,9 +233,6 @@ static inline void efx_schedule_channel_irq(struct efx_channel *channel)
efx_schedule_channel(channel);
}
-void efx_link_status_changed(struct efx_nic *efx);
-void efx_link_set_advertising(struct efx_nic *efx,
- const unsigned long *advertising);
void efx_link_clear_advertising(struct efx_nic *efx);
void efx_link_set_wanted_fc(struct efx_nic *efx, u8);
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
new file mode 100644
index 000000000000..aeb5e8aa2f2a
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -0,0 +1,1234 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include <linux/module.h>
+#include "efx_channels.h"
+#include "efx.h"
+#include "efx_common.h"
+#include "tx_common.h"
+#include "rx_common.h"
+#include "nic.h"
+#include "sriov.h"
+
+/* This is the first interrupt mode to try out of:
+ * 0 => MSI-X
+ * 1 => MSI
+ * 2 => legacy
+ */
+static unsigned int interrupt_mode;
+module_param(interrupt_mode, uint, 0444);
+MODULE_PARM_DESC(interrupt_mode,
+ "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
+
+/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
+ * i.e. the number of CPUs among which we may distribute simultaneous
+ * interrupt handling.
+ *
+ * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
+ * The default (0) means to assign an interrupt to each core.
+ */
+static unsigned int rss_cpus;
+module_param(rss_cpus, uint, 0444);
+MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
+
+static unsigned int irq_adapt_low_thresh = 8000;
+module_param(irq_adapt_low_thresh, uint, 0644);
+MODULE_PARM_DESC(irq_adapt_low_thresh,
+ "Threshold score for reducing IRQ moderation");
+
+static unsigned int irq_adapt_high_thresh = 16000;
+module_param(irq_adapt_high_thresh, uint, 0644);
+MODULE_PARM_DESC(irq_adapt_high_thresh,
+ "Threshold score for increasing IRQ moderation");
+
+/* This is the weight assigned to each of the (per-channel) virtual
+ * NAPI devices.
+ */
+static int napi_weight = 64;
+
+/***************
+ * Housekeeping
+ ***************/
+
+int efx_channel_dummy_op_int(struct efx_channel *channel)
+{
+ return 0;
+}
+
+void efx_channel_dummy_op_void(struct efx_channel *channel)
+{
+}
+
+static const struct efx_channel_type efx_default_channel_type = {
+ .pre_probe = efx_channel_dummy_op_int,
+ .post_remove = efx_channel_dummy_op_void,
+ .get_name = efx_get_channel_name,
+ .copy = efx_copy_channel,
+ .want_txqs = efx_default_channel_want_txqs,
+ .keep_eventq = false,
+ .want_pio = true,
+};
+
+/*************
+ * INTERRUPTS
+ *************/
+
+static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
+{
+ cpumask_var_t thread_mask;
+ unsigned int count;
+ int cpu;
+
+ if (rss_cpus) {
+ count = rss_cpus;
+ } else {
+ if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
+ netif_warn(efx, probe, efx->net_dev,
+ "RSS disabled due to allocation failure\n");
+ return 1;
+ }
+
+ count = 0;
+ for_each_online_cpu(cpu) {
+ if (!cpumask_test_cpu(cpu, thread_mask)) {
+ ++count;
+ cpumask_or(thread_mask, thread_mask,
+ topology_sibling_cpumask(cpu));
+ }
+ }
+
+ free_cpumask_var(thread_mask);
+ }
+
+ if (count > EFX_MAX_RX_QUEUES) {
+ netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+ "Reducing number of rx queues from %u to %u.\n",
+ count, EFX_MAX_RX_QUEUES);
+ count = EFX_MAX_RX_QUEUES;
+ }
+
+ /* If RSS is requested for the PF *and* VFs then we can't write RSS
+ * table entries that are inaccessible to VFs
+ */
+#ifdef CONFIG_SFC_SRIOV
+ if (efx->type->sriov_wanted) {
+ if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
+ count > efx_vf_size(efx)) {
+ netif_warn(efx, probe, efx->net_dev,
+ "Reducing number of RSS channels from %u to %u for "
+ "VF support. Increase vf-msix-limit to use more "
+ "channels on the PF.\n",
+ count, efx_vf_size(efx));
+ count = efx_vf_size(efx);
+ }
+ }
+#endif
+
+ return count;
+}
+
+static int efx_allocate_msix_channels(struct efx_nic *efx,
+ unsigned int max_channels,
+ unsigned int extra_channels,
+ unsigned int parallelism)
+{
+ unsigned int n_channels = parallelism;
+ int vec_count;
+ int n_xdp_tx;
+ int n_xdp_ev;
+
+ if (efx_separate_tx_channels)
+ n_channels *= 2;
+ n_channels += extra_channels;
+
+ /* To allow XDP transmit to happen from arbitrary NAPI contexts
+ * we allocate a TX queue per CPU. We share event queues across
+ * multiple tx queues, assuming tx and ev queues are both
+ * maximum size.
+ */
+
+ n_xdp_tx = num_possible_cpus();
+ n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
+
+ vec_count = pci_msix_vec_count(efx->pci_dev);
+ if (vec_count < 0)
+ return vec_count;
+
+ max_channels = min_t(unsigned int, vec_count, max_channels);
+
+ /* Check resources.
+ * We need a channel per event queue, plus a VI per tx queue.
+ * This may be more pessimistic than it needs to be.
+ */
+ if (n_channels + n_xdp_ev > max_channels) {
+ netif_err(efx, drv, efx->net_dev,
+ "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
+ n_xdp_ev, n_channels, max_channels);
+ efx->n_xdp_channels = 0;
+ efx->xdp_tx_per_channel = 0;
+ efx->xdp_tx_queue_count = 0;
+ } else {
+ efx->n_xdp_channels = n_xdp_ev;
+ efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
+ efx->xdp_tx_queue_count = n_xdp_tx;
+ n_channels += n_xdp_ev;
+ netif_dbg(efx, drv, efx->net_dev,
+ "Allocating %d TX and %d event queues for XDP\n",
+ n_xdp_tx, n_xdp_ev);
+ }
+
+ if (vec_count < n_channels) {
+ netif_err(efx, drv, efx->net_dev,
+ "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
+ vec_count, n_channels);
+ netif_err(efx, drv, efx->net_dev,
+ "WARNING: Performance may be reduced.\n");
+ n_channels = vec_count;
+ }
+
+ n_channels = min(n_channels, max_channels);
+
+ efx->n_channels = n_channels;
+
+ /* Ignore XDP tx channels when creating rx channels. */
+ n_channels -= efx->n_xdp_channels;
+
+ if (efx_separate_tx_channels) {
+ efx->n_tx_channels =
+ min(max(n_channels / 2, 1U),
+ efx->max_tx_channels);
+ efx->tx_channel_offset =
+ n_channels - efx->n_tx_channels;
+ efx->n_rx_channels =
+ max(n_channels -
+ efx->n_tx_channels, 1U);
+ } else {
+ efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
+ efx->tx_channel_offset = 0;
+ efx->n_rx_channels = n_channels;
+ }
+
+ efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
+ efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
+
+ efx->xdp_channel_offset = n_channels;
+
+ netif_dbg(efx, drv, efx->net_dev,
+ "Allocating %u RX channels\n",
+ efx->n_rx_channels);
+
+ return efx->n_channels;
+}
+
+/* Probe the number and type of interrupts we are able to obtain, and
+ * the resulting numbers of channels and RX queues.
+ */
+int efx_probe_interrupts(struct efx_nic *efx)
+{
+ unsigned int extra_channels = 0;
+ unsigned int rss_spread;
+ unsigned int i, j;
+ int rc;
+
+ for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
+ if (efx->extra_channel_type[i])
+ ++extra_channels;
+
+ if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
+ unsigned int parallelism = efx_wanted_parallelism(efx);
+ struct msix_entry xentries[EFX_MAX_CHANNELS];
+ unsigned int n_channels;
+
+ rc = efx_allocate_msix_channels(efx, efx->max_channels,
+ extra_channels, parallelism);
+ if (rc >= 0) {
+ n_channels = rc;
+ for (i = 0; i < n_channels; i++)
+ xentries[i].entry = i;
+ rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
+ n_channels);
+ }
+ if (rc < 0) {
+ /* Fall back to single channel MSI */
+ netif_err(efx, drv, efx->net_dev,
+ "could not enable MSI-X\n");
+ if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
+ efx->interrupt_mode = EFX_INT_MODE_MSI;
+ else
+ return rc;
+ } else if (rc < n_channels) {
+ netif_err(efx, drv, efx->net_dev,
+ "WARNING: Insufficient MSI-X vectors"
+ " available (%d < %u).\n", rc, n_channels);
+ netif_err(efx, drv, efx->net_dev,
+ "WARNING: Performance may be reduced.\n");
+ n_channels = rc;
+ }
+
+ if (rc > 0) {
+ for (i = 0; i < efx->n_channels; i++)
+ efx_get_channel(efx, i)->irq =
+ xentries[i].vector;
+ }
+ }
+
+ /* Try single interrupt MSI */
+ if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
+ efx->n_channels = 1;
+ efx->n_rx_channels = 1;
+ efx->n_tx_channels = 1;
+ efx->n_xdp_channels = 0;
+ efx->xdp_channel_offset = efx->n_channels;
+ rc = pci_enable_msi(efx->pci_dev);
+ if (rc == 0) {
+ efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
+ } else {
+ netif_err(efx, drv, efx->net_dev,
+ "could not enable MSI\n");
+ if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
+ efx->interrupt_mode = EFX_INT_MODE_LEGACY;
+ else
+ return rc;
+ }
+ }
+
+ /* Assume legacy interrupts */
+ if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
+ efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
+ efx->n_rx_channels = 1;
+ efx->n_tx_channels = 1;
+ efx->n_xdp_channels = 0;
+ efx->xdp_channel_offset = efx->n_channels;
+ efx->legacy_irq = efx->pci_dev->irq;
+ }
+
+ /* Assign extra channels if possible, before XDP channels */
+ efx->n_extra_tx_channels = 0;
+ j = efx->xdp_channel_offset;
+ for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
+ if (!efx->extra_channel_type[i])
+ continue;
+ if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
+ efx->extra_channel_type[i]->handle_no_channel(efx);
+ } else {
+ --j;
+ efx_get_channel(efx, j)->type =
+ efx->extra_channel_type[i];
+ if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
+ efx->n_extra_tx_channels++;
+ }
+ }
+
+ rss_spread = efx->n_rx_channels;
+ /* RSS might be usable on VFs even if it is disabled on the PF */
+#ifdef CONFIG_SFC_SRIOV
+ if (efx->type->sriov_wanted) {
+ efx->rss_spread = ((rss_spread > 1 ||
+ !efx->type->sriov_wanted(efx)) ?
+ rss_spread : efx_vf_size(efx));
+ return 0;
+ }
+#endif
+ efx->rss_spread = rss_spread;
+
+ return 0;
+}
+
+#if defined(CONFIG_SMP)
+void efx_set_interrupt_affinity(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ unsigned int cpu;
+
+ efx_for_each_channel(channel, efx) {
+ cpu = cpumask_local_spread(channel->channel,
+ pcibus_to_node(efx->pci_dev->bus));
+ irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
+ }
+}
+
+void efx_clear_interrupt_affinity(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ irq_set_affinity_hint(channel->irq, NULL);
+}
+#else
+void
+efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
+{
+}
+
+void
+efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
+{
+}
+#endif /* CONFIG_SMP */
+
+void efx_remove_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ /* Remove MSI/MSI-X interrupts */
+ efx_for_each_channel(channel, efx)
+ channel->irq = 0;
+ pci_disable_msi(efx->pci_dev);
+ pci_disable_msix(efx->pci_dev);
+
+ /* Remove legacy interrupt */
+ efx->legacy_irq = 0;
+}
+
+/***************
+ * EVENT QUEUES
+ ***************/
+
+/* Create event queue
+ * Event queue memory allocations are done only once. If the channel
+ * is reset, the memory buffer will be reused; this guards against
+ * errors during channel reset and also simplifies interrupt handling.
+ */
+int efx_probe_eventq(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+ unsigned long entries;
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "chan %d create event queue\n", channel->channel);
+
+ /* Build an event queue with room for one event per tx and rx buffer,
+ * plus some extra for link state events and MCDI completions.
+ */
+ entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
+ EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
+ channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
+
+ return efx_nic_probe_eventq(channel);
+}
+
+/* Prepare channel's event queue */
+int efx_init_eventq(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+ int rc;
+
+ EFX_WARN_ON_PARANOID(channel->eventq_init);
+
+ netif_dbg(efx, drv, efx->net_dev,
+ "chan %d init event queue\n", channel->channel);
+
+ rc = efx_nic_init_eventq(channel);
+ if (rc == 0) {
+ efx->type->push_irq_moderation(channel);
+ channel->eventq_read_ptr = 0;
+ channel->eventq_init = true;
+ }
+ return rc;
+}
+
+/* Enable event queue processing and NAPI */
+void efx_start_eventq(struct efx_channel *channel)
+{
+ netif_dbg(channel->efx, ifup, channel->efx->net_dev,
+ "chan %d start event queue\n", channel->channel);
+
+ /* Make sure the NAPI handler sees the enabled flag set */
+ channel->enabled = true;
+ smp_wmb();
+
+ napi_enable(&channel->napi_str);
+ efx_nic_eventq_read_ack(channel);
+}
+
+/* Disable event queue processing and NAPI */
+void efx_stop_eventq(struct efx_channel *channel)
+{
+ if (!channel->enabled)
+ return;
+
+ napi_disable(&channel->napi_str);
+ channel->enabled = false;
+}
+
+void efx_fini_eventq(struct efx_channel *channel)
+{
+ if (!channel->eventq_init)
+ return;
+
+ netif_dbg(channel->efx, drv, channel->efx->net_dev,
+ "chan %d fini event queue\n", channel->channel);
+
+ efx_nic_fini_eventq(channel);
+ channel->eventq_init = false;
+}
+
+void efx_remove_eventq(struct efx_channel *channel)
+{
+ netif_dbg(channel->efx, drv, channel->efx->net_dev,
+ "chan %d remove event queue\n", channel->channel);
+
+ efx_nic_remove_eventq(channel);
+}
+
+/**************************************************************************
+ *
+ * Channel handling
+ *
+ *************************************************************************/
+
+/* Allocate and initialise a channel structure. */
+struct efx_channel *
+efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
+{
+ struct efx_rx_queue *rx_queue;
+ struct efx_tx_queue *tx_queue;
+ struct efx_channel *channel;
+ int j;
+
+ channel = kzalloc(sizeof(*channel), GFP_KERNEL);
+ if (!channel)
+ return NULL;
+
+ channel->efx = efx;
+ channel->channel = i;
+ channel->type = &efx_default_channel_type;
+
+ for (j = 0; j < EFX_TXQ_TYPES; j++) {
+ tx_queue = &channel->tx_queue[j];
+ tx_queue->efx = efx;
+ tx_queue->queue = i * EFX_TXQ_TYPES + j;
+ tx_queue->channel = channel;
+ }
+
+#ifdef CONFIG_RFS_ACCEL
+ INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
+
+ rx_queue = &channel->rx_queue;
+ rx_queue->efx = efx;
+ timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
+
+ return channel;
+}
+
+int efx_init_channels(struct efx_nic *efx)
+{
+ unsigned int i;
+
+ for (i = 0; i < EFX_MAX_CHANNELS; i++) {
+ efx->channel[i] = efx_alloc_channel(efx, i, NULL);
+ if (!efx->channel[i])
+ return -ENOMEM;
+ efx->msi_context[i].efx = efx;
+ efx->msi_context[i].index = i;
+ }
+
+ /* Higher numbered interrupt modes are less capable! */
+ if (WARN_ON_ONCE(efx->type->max_interrupt_mode >
+ efx->type->min_interrupt_mode)) {
+ return -EIO;
+ }
+ efx->interrupt_mode = max(efx->type->max_interrupt_mode,
+ interrupt_mode);
+ efx->interrupt_mode = min(efx->type->min_interrupt_mode,
+ interrupt_mode);
+
+ return 0;
+}
+
+void efx_fini_channels(struct efx_nic *efx)
+{
+ unsigned int i;
+
+ for (i = 0; i < EFX_MAX_CHANNELS; i++)
+ if (efx->channel[i]) {
+ kfree(efx->channel[i]);
+ efx->channel[i] = NULL;
+ }
+}
+
+/* Allocate and initialise a channel structure, copying parameters
+ * (but not resources) from an old channel structure.
+ */
+struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
+{
+ struct efx_rx_queue *rx_queue;
+ struct efx_tx_queue *tx_queue;
+ struct efx_channel *channel;
+ int j;
+
+ channel = kmalloc(sizeof(*channel), GFP_KERNEL);
+ if (!channel)
+ return NULL;
+
+ *channel = *old_channel;
+
+ channel->napi_dev = NULL;
+ INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
+ channel->napi_str.napi_id = 0;
+ channel->napi_str.state = 0;
+ memset(&channel->eventq, 0, sizeof(channel->eventq));
+
+ for (j = 0; j < EFX_TXQ_TYPES; j++) {
+ tx_queue = &channel->tx_queue[j];
+ if (tx_queue->channel)
+ tx_queue->channel = channel;
+ tx_queue->buffer = NULL;
+ memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
+ }
+
+ rx_queue = &channel->rx_queue;
+ rx_queue->buffer = NULL;
+ memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
+ timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
+#ifdef CONFIG_RFS_ACCEL
+ INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
+
+ return channel;
+}
+
+static int efx_probe_channel(struct efx_channel *channel)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ int rc;
+
+ netif_dbg(channel->efx, probe, channel->efx->net_dev,
+ "creating channel %d\n", channel->channel);
+
+ rc = channel->type->pre_probe(channel);
+ if (rc)
+ goto fail;
+
+ rc = efx_probe_eventq(channel);
+ if (rc)
+ goto fail;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ rc = efx_probe_tx_queue(tx_queue);
+ if (rc)
+ goto fail;
+ }
+
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ rc = efx_probe_rx_queue(rx_queue);
+ if (rc)
+ goto fail;
+ }
+
+ channel->rx_list = NULL;
+
+ return 0;
+
+fail:
+ efx_remove_channel(channel);
+ return rc;
+}
+
+void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
+{
+ struct efx_nic *efx = channel->efx;
+ const char *type;
+ int number;
+
+ number = channel->channel;
+
+ if (number >= efx->xdp_channel_offset &&
+ !WARN_ON_ONCE(!efx->n_xdp_channels)) {
+ type = "-xdp";
+ number -= efx->xdp_channel_offset;
+ } else if (efx->tx_channel_offset == 0) {
+ type = "";
+ } else if (number < efx->tx_channel_offset) {
+ type = "-rx";
+ } else {
+ type = "-tx";
+ number -= efx->tx_channel_offset;
+ }
+ snprintf(buf, len, "%s%s-%d", efx->name, type, number);
+}
+
+void efx_set_channel_names(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ channel->type->get_name(channel,
+ efx->msi_context[channel->channel].name,
+ sizeof(efx->msi_context[0].name));
+}
+
+int efx_probe_channels(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ int rc;
+
+ /* Restart special buffer allocation */
+ efx->next_buffer_table = 0;
+
+ /* Probe channels in reverse, so that any 'extra' channels
+ * use the start of the buffer table. This allows the traffic
+ * channels to be resized without moving them or wasting the
+ * entries before them.
+ */
+ efx_for_each_channel_rev(channel, efx) {
+ rc = efx_probe_channel(channel);
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev,
+ "failed to create channel %d\n",
+ channel->channel);
+ goto fail;
+ }
+ }
+ efx_set_channel_names(efx);
+
+ return 0;
+
+fail:
+ efx_remove_channels(efx);
+ return rc;
+}
+
+void efx_remove_channel(struct efx_channel *channel)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+
+ netif_dbg(channel->efx, drv, channel->efx->net_dev,
+ "destroy chan %d\n", channel->channel);
+
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ efx_remove_rx_queue(rx_queue);
+ efx_for_each_possible_channel_tx_queue(tx_queue, channel)
+ efx_remove_tx_queue(tx_queue);
+ efx_remove_eventq(channel);
+ channel->type->post_remove(channel);
+}
+
+void efx_remove_channels(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ efx_remove_channel(channel);
+
+ kfree(efx->xdp_tx_queues);
+}
+
+int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
+{
+ struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
+ unsigned int i, next_buffer_table = 0;
+ u32 old_rxq_entries, old_txq_entries;
+ int rc, rc2;
+
+ rc = efx_check_disabled(efx);
+ if (rc)
+ return rc;
+
+ /* Not all channels should be reallocated. We must avoid
+ * reallocating their buffer table entries.
+ */
+ efx_for_each_channel(channel, efx) {
+ struct efx_rx_queue *rx_queue;
+ struct efx_tx_queue *tx_queue;
+
+ if (channel->type->copy)
+ continue;
+ next_buffer_table = max(next_buffer_table,
+ channel->eventq.index +
+ channel->eventq.entries);
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ next_buffer_table = max(next_buffer_table,
+ rx_queue->rxd.index +
+ rx_queue->rxd.entries);
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ next_buffer_table = max(next_buffer_table,
+ tx_queue->txd.index +
+ tx_queue->txd.entries);
+ }
+
+ efx_device_detach_sync(efx);
+ efx_stop_all(efx);
+ efx_soft_disable_interrupts(efx);
+
+ /* Clone channels (where possible) */
+ memset(other_channel, 0, sizeof(other_channel));
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = efx->channel[i];
+ if (channel->type->copy)
+ channel = channel->type->copy(channel);
+ if (!channel) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ other_channel[i] = channel;
+ }
+
+ /* Swap entry counts and channel pointers */
+ old_rxq_entries = efx->rxq_entries;
+ old_txq_entries = efx->txq_entries;
+ efx->rxq_entries = rxq_entries;
+ efx->txq_entries = txq_entries;
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = efx->channel[i];
+ efx->channel[i] = other_channel[i];
+ other_channel[i] = channel;
+ }
+
+ /* Restart buffer table allocation */
+ efx->next_buffer_table = next_buffer_table;
+
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = efx->channel[i];
+ if (!channel->type->copy)
+ continue;
+ rc = efx_probe_channel(channel);
+ if (rc)
+ goto rollback;
+ efx_init_napi_channel(efx->channel[i]);
+ }
+
+out:
+ /* Destroy unused channel structures */
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = other_channel[i];
+ if (channel && channel->type->copy) {
+ efx_fini_napi_channel(channel);
+ efx_remove_channel(channel);
+ kfree(channel);
+ }
+ }
+
+ rc2 = efx_soft_enable_interrupts(efx);
+ if (rc2) {
+ rc = rc ? rc : rc2;
+ netif_err(efx, drv, efx->net_dev,
+ "unable to restart interrupts on channel reallocation\n");
+ efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+ } else {
+ efx_start_all(efx);
+ efx_device_attach_if_not_resetting(efx);
+ }
+ return rc;
+
+rollback:
+ /* Swap back */
+ efx->rxq_entries = old_rxq_entries;
+ efx->txq_entries = old_txq_entries;
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = efx->channel[i];
+ efx->channel[i] = other_channel[i];
+ other_channel[i] = channel;
+ }
+ goto out;
+}
+
+int efx_set_channels(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ struct efx_tx_queue *tx_queue;
+ int xdp_queue_number;
+
+ efx->tx_channel_offset =
+ efx_separate_tx_channels ?
+ efx->n_channels - efx->n_tx_channels : 0;
+
+ if (efx->xdp_tx_queue_count) {
+ EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
+
+ /* Allocate array for XDP TX queue lookup. */
+ efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
+ sizeof(*efx->xdp_tx_queues),
+ GFP_KERNEL);
+ if (!efx->xdp_tx_queues)
+ return -ENOMEM;
+ }
+
+ /* We need to mark which channels really have RX and TX
+ * queues, and adjust the TX queue numbers if we have separate
+ * RX-only and TX-only channels.
+ */
+ xdp_queue_number = 0;
+ efx_for_each_channel(channel, efx) {
+ if (channel->channel < efx->n_rx_channels)
+ channel->rx_queue.core_index = channel->channel;
+ else
+ channel->rx_queue.core_index = -1;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ tx_queue->queue -= (efx->tx_channel_offset *
+ EFX_TXQ_TYPES);
+
+ if (efx_channel_is_xdp_tx(channel) &&
+ xdp_queue_number < efx->xdp_tx_queue_count) {
+ efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
+ xdp_queue_number++;
+ }
+ }
+ }
+ return 0;
+}
+
+bool efx_default_channel_want_txqs(struct efx_channel *channel)
+{
+ return channel->channel - channel->efx->tx_channel_offset <
+ channel->efx->n_tx_channels;
+}
+
+/*************
+ * START/STOP
+ *************/
+
+int efx_soft_enable_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel, *end_channel;
+ int rc;
+
+ BUG_ON(efx->state == STATE_DISABLED);
+
+ efx->irq_soft_enabled = true;
+ smp_wmb();
+
+ efx_for_each_channel(channel, efx) {
+ if (!channel->type->keep_eventq) {
+ rc = efx_init_eventq(channel);
+ if (rc)
+ goto fail;
+ }
+ efx_start_eventq(channel);
+ }
+
+ efx_mcdi_mode_event(efx);
+
+ return 0;
+fail:
+ end_channel = channel;
+ efx_for_each_channel(channel, efx) {
+ if (channel == end_channel)
+ break;
+ efx_stop_eventq(channel);
+ if (!channel->type->keep_eventq)
+ efx_fini_eventq(channel);
+ }
+
+ return rc;
+}
+
+void efx_soft_disable_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ if (efx->state == STATE_DISABLED)
+ return;
+
+ efx_mcdi_mode_poll(efx);
+
+ efx->irq_soft_enabled = false;
+ smp_wmb();
+
+ if (efx->legacy_irq)
+ synchronize_irq(efx->legacy_irq);
+
+ efx_for_each_channel(channel, efx) {
+ if (channel->irq)
+ synchronize_irq(channel->irq);
+
+ efx_stop_eventq(channel);
+ if (!channel->type->keep_eventq)
+ efx_fini_eventq(channel);
+ }
+
+ /* Flush the asynchronous MCDI request queue */
+ efx_mcdi_flush_async(efx);
+}
+
+int efx_enable_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel, *end_channel;
+ int rc;
+
+ /* TODO: Is this really a bug? */
+ BUG_ON(efx->state == STATE_DISABLED);
+
+ if (efx->eeh_disabled_legacy_irq) {
+ enable_irq(efx->legacy_irq);
+ efx->eeh_disabled_legacy_irq = false;
+ }
+
+ efx->type->irq_enable_master(efx);
+
+ efx_for_each_channel(channel, efx) {
+ if (channel->type->keep_eventq) {
+ rc = efx_init_eventq(channel);
+ if (rc)
+ goto fail;
+ }
+ }
+
+ rc = efx_soft_enable_interrupts(efx);
+ if (rc)
+ goto fail;
+
+ return 0;
+
+fail:
+ end_channel = channel;
+ efx_for_each_channel(channel, efx) {
+ if (channel == end_channel)
+ break;
+ if (channel->type->keep_eventq)
+ efx_fini_eventq(channel);
+ }
+
+ efx->type->irq_disable_non_ev(efx);
+
+ return rc;
+}
+
+void efx_disable_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_soft_disable_interrupts(efx);
+
+ efx_for_each_channel(channel, efx) {
+ if (channel->type->keep_eventq)
+ efx_fini_eventq(channel);
+ }
+
+ efx->type->irq_disable_non_ev(efx);
+}
+
+void efx_start_channels(struct efx_nic *efx)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ efx_init_tx_queue(tx_queue);
+ atomic_inc(&efx->active_queues);
+ }
+
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ efx_init_rx_queue(rx_queue);
+ atomic_inc(&efx->active_queues);
+ efx_stop_eventq(channel);
+ efx_fast_push_rx_descriptors(rx_queue, false);
+ efx_start_eventq(channel);
+ }
+
+ WARN_ON(channel->rx_pkt_n_frags);
+ }
+}
+
+void efx_stop_channels(struct efx_nic *efx)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ struct efx_channel *channel;
+ int rc = 0;
+
+ /* Stop RX refill */
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ rx_queue->refill_enabled = false;
+ }
+
+ efx_for_each_channel(channel, efx) {
+ /* RX packet processing is pipelined, so wait for the
+ * NAPI handler to complete. At least event queue 0
+ * might be kept active by non-data events, so don't
+ * use napi_synchronize() but actually disable NAPI
+ * temporarily.
+ */
+ if (efx_channel_has_rx_queue(channel)) {
+ efx_stop_eventq(channel);
+ efx_start_eventq(channel);
+ }
+ }
+
+ if (efx->type->fini_dmaq)
+ rc = efx->type->fini_dmaq(efx);
+
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
+ } else {
+ netif_dbg(efx, drv, efx->net_dev,
+ "successfully flushed all queues\n");
+ }
+
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ efx_fini_rx_queue(rx_queue);
+ efx_for_each_possible_channel_tx_queue(tx_queue, channel)
+ efx_fini_tx_queue(tx_queue);
+ }
+}
+
+/**************************************************************************
+ *
+ * NAPI interface
+ *
+ *************************************************************************/
+
+/* Process channel's event queue
+ *
+ * This function is responsible for processing the event queue of a
+ * single channel. The caller must guarantee that this function will
+ * never be concurrently called more than once on the same channel,
+ * though different channels may be being processed concurrently.
+ */
+static int efx_process_channel(struct efx_channel *channel, int budget)
+{
+ struct efx_tx_queue *tx_queue;
+ struct list_head rx_list;
+ int spent;
+
+ if (unlikely(!channel->enabled))
+ return 0;
+
+ /* Prepare the batch receive list */
+ EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
+ INIT_LIST_HEAD(&rx_list);
+ channel->rx_list = &rx_list;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ tx_queue->pkts_compl = 0;
+ tx_queue->bytes_compl = 0;
+ }
+
+ spent = efx_nic_process_eventq(channel, budget);
+ if (spent && efx_channel_has_rx_queue(channel)) {
+ struct efx_rx_queue *rx_queue =
+ efx_channel_get_rx_queue(channel);
+
+ efx_rx_flush_packet(channel);
+ efx_fast_push_rx_descriptors(rx_queue, true);
+ }
+
+ /* Update BQL */
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ if (tx_queue->bytes_compl) {
+ netdev_tx_completed_queue(tx_queue->core_txq,
+ tx_queue->pkts_compl,
+ tx_queue->bytes_compl);
+ }
+ }
+
+ /* Receive any packets we queued up */
+ netif_receive_skb_list(channel->rx_list);
+ channel->rx_list = NULL;
+
+ return spent;
+}
+
+static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
+{
+ int step = efx->irq_mod_step_us;
+
+ if (channel->irq_mod_score < irq_adapt_low_thresh) {
+ if (channel->irq_moderation_us > step) {
+ channel->irq_moderation_us -= step;
+ efx->type->push_irq_moderation(channel);
+ }
+ } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
+ if (channel->irq_moderation_us <
+ efx->irq_rx_moderation_us) {
+ channel->irq_moderation_us += step;
+ efx->type->push_irq_moderation(channel);
+ }
+ }
+
+ channel->irq_count = 0;
+ channel->irq_mod_score = 0;
+}
+
+/* NAPI poll handler
+ *
+ * NAPI guarantees serialisation of polls of the same device, which
+ * provides the guarantee required by efx_process_channel().
+ */
+static int efx_poll(struct napi_struct *napi, int budget)
+{
+ struct efx_channel *channel =
+ container_of(napi, struct efx_channel, napi_str);
+ struct efx_nic *efx = channel->efx;
+ int spent;
+
+ netif_vdbg(efx, intr, efx->net_dev,
+ "channel %d NAPI poll executing on CPU %d\n",
+ channel->channel, raw_smp_processor_id());
+
+ spent = efx_process_channel(channel, budget);
+
+ xdp_do_flush_map();
+
+ if (spent < budget) {
+ if (efx_channel_has_rx_queue(channel) &&
+ efx->irq_rx_adaptive &&
+ unlikely(++channel->irq_count == 1000)) {
+ efx_update_irq_mod(efx, channel);
+ }
+
+#ifdef CONFIG_RFS_ACCEL
+ /* Perhaps expire some ARFS filters */
+ mod_delayed_work(system_wq, &channel->filter_work, 0);
+#endif
+
+ /* There is no race here; although napi_disable() will
+ * only wait for napi_complete(), this isn't a problem
+ * since efx_nic_eventq_read_ack() will have no effect if
+ * interrupts have already been disabled.
+ */
+ if (napi_complete_done(napi, spent))
+ efx_nic_eventq_read_ack(channel);
+ }
+
+ return spent;
+}
+
+void efx_init_napi_channel(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+
+ channel->napi_dev = efx->net_dev;
+ netif_napi_add(channel->napi_dev, &channel->napi_str,
+ efx_poll, napi_weight);
+}
+
+void efx_init_napi(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ efx_init_napi_channel(channel);
+}
+
+void efx_fini_napi_channel(struct efx_channel *channel)
+{
+ if (channel->napi_dev)
+ netif_napi_del(&channel->napi_str);
+
+ channel->napi_dev = NULL;
+}
+
+void efx_fini_napi(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ efx_fini_napi_channel(channel);
+}
diff --git a/drivers/net/ethernet/sfc/efx_channels.h b/drivers/net/ethernet/sfc/efx_channels.h
new file mode 100644
index 000000000000..8d7b8c4142d7
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_channels.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_CHANNELS_H
+#define EFX_CHANNELS_H
+
+int efx_probe_interrupts(struct efx_nic *efx);
+void efx_remove_interrupts(struct efx_nic *efx);
+int efx_soft_enable_interrupts(struct efx_nic *efx);
+void efx_soft_disable_interrupts(struct efx_nic *efx);
+int efx_enable_interrupts(struct efx_nic *efx);
+void efx_disable_interrupts(struct efx_nic *efx);
+
+void efx_set_interrupt_affinity(struct efx_nic *efx);
+void efx_clear_interrupt_affinity(struct efx_nic *efx);
+
+int efx_probe_eventq(struct efx_channel *channel);
+int efx_init_eventq(struct efx_channel *channel);
+void efx_start_eventq(struct efx_channel *channel);
+void efx_stop_eventq(struct efx_channel *channel);
+void efx_fini_eventq(struct efx_channel *channel);
+void efx_remove_eventq(struct efx_channel *channel);
+
+struct efx_channel *
+efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel);
+int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries);
+void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len);
+void efx_set_channel_names(struct efx_nic *efx);
+int efx_init_channels(struct efx_nic *efx);
+int efx_probe_channels(struct efx_nic *efx);
+int efx_set_channels(struct efx_nic *efx);
+bool efx_default_channel_want_txqs(struct efx_channel *channel);
+void efx_remove_channel(struct efx_channel *channel);
+void efx_remove_channels(struct efx_nic *efx);
+void efx_fini_channels(struct efx_nic *efx);
+struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel);
+void efx_start_channels(struct efx_nic *efx);
+void efx_stop_channels(struct efx_nic *efx);
+
+void efx_init_napi_channel(struct efx_channel *channel);
+void efx_init_napi(struct efx_nic *efx);
+void efx_fini_napi_channel(struct efx_channel *channel);
+void efx_fini_napi(struct efx_nic *efx);
+
+int efx_channel_dummy_op_int(struct efx_channel *channel);
+void efx_channel_dummy_op_void(struct efx_channel *channel);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
new file mode 100644
index 000000000000..b0d76bc19673
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -0,0 +1,1102 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include "efx_common.h"
+#include "efx_channels.h"
+#include "efx.h"
+#include "mcdi.h"
+#include "selftest.h"
+#include "rx_common.h"
+#include "tx_common.h"
+#include "nic.h"
+#include "io.h"
+#include "mcdi_pcol.h"
+
+static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
+ NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
+ NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
+ NETIF_MSG_TX_ERR | NETIF_MSG_HW);
+module_param(debug, uint, 0);
+MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
+
+/* This is the time (in jiffies) between invocations of the hardware
+ * monitor.
+ * On Falcon-based NICs, this will:
+ * - Check the on-board hardware monitor;
+ * - Poll the link state and reconfigure the hardware as necessary.
+ * On Siena-based NICs for power systems with EEH support, this will give EEH a
+ * chance to start.
+ */
+static unsigned int efx_monitor_interval = 1 * HZ;
+
+/* How often and how many times to poll for a reset while waiting for a
+ * BIST that another function started to complete.
+ */
+#define BIST_WAIT_DELAY_MS 100
+#define BIST_WAIT_DELAY_COUNT 100
+
+/* Default stats update time */
+#define STATS_PERIOD_MS_DEFAULT 1000
+
+const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
+const char *const efx_reset_type_names[] = {
+ [RESET_TYPE_INVISIBLE] = "INVISIBLE",
+ [RESET_TYPE_ALL] = "ALL",
+ [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL",
+ [RESET_TYPE_WORLD] = "WORLD",
+ [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
+ [RESET_TYPE_DATAPATH] = "DATAPATH",
+ [RESET_TYPE_MC_BIST] = "MC_BIST",
+ [RESET_TYPE_DISABLE] = "DISABLE",
+ [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
+ [RESET_TYPE_INT_ERROR] = "INT_ERROR",
+ [RESET_TYPE_DMA_ERROR] = "DMA_ERROR",
+ [RESET_TYPE_TX_SKIP] = "TX_SKIP",
+ [RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
+ [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)",
+};
+
+#define RESET_TYPE(type) \
+ STRING_TABLE_LOOKUP(type, efx_reset_type)
+
+/* Loopback mode names (see LOOPBACK_MODE()) */
+const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
+const char *const efx_loopback_mode_names[] = {
+ [LOOPBACK_NONE] = "NONE",
+ [LOOPBACK_DATA] = "DATAPATH",
+ [LOOPBACK_GMAC] = "GMAC",
+ [LOOPBACK_XGMII] = "XGMII",
+ [LOOPBACK_XGXS] = "XGXS",
+ [LOOPBACK_XAUI] = "XAUI",
+ [LOOPBACK_GMII] = "GMII",
+ [LOOPBACK_SGMII] = "SGMII",
+ [LOOPBACK_XGBR] = "XGBR",
+ [LOOPBACK_XFI] = "XFI",
+ [LOOPBACK_XAUI_FAR] = "XAUI_FAR",
+ [LOOPBACK_GMII_FAR] = "GMII_FAR",
+ [LOOPBACK_SGMII_FAR] = "SGMII_FAR",
+ [LOOPBACK_XFI_FAR] = "XFI_FAR",
+ [LOOPBACK_GPHY] = "GPHY",
+ [LOOPBACK_PHYXS] = "PHYXS",
+ [LOOPBACK_PCS] = "PCS",
+ [LOOPBACK_PMAPMD] = "PMA/PMD",
+ [LOOPBACK_XPORT] = "XPORT",
+ [LOOPBACK_XGMII_WS] = "XGMII_WS",
+ [LOOPBACK_XAUI_WS] = "XAUI_WS",
+ [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR",
+ [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
+ [LOOPBACK_GMII_WS] = "GMII_WS",
+ [LOOPBACK_XFI_WS] = "XFI_WS",
+ [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR",
+ [LOOPBACK_PHYXS_WS] = "PHYXS_WS",
+};
+
+/* Reset workqueue. If any NIC has a hardware failure then a reset will be
+ * queued onto this work queue. This is not a per-nic work queue, because
+ * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
+ */
+static struct workqueue_struct *reset_workqueue;
+
+int efx_create_reset_workqueue(void)
+{
+ reset_workqueue = create_singlethread_workqueue("sfc_reset");
+ if (!reset_workqueue) {
+ printk(KERN_ERR "Failed to create reset workqueue\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void efx_queue_reset_work(struct efx_nic *efx)
+{
+ queue_work(reset_workqueue, &efx->reset_work);
+}
+
+void efx_flush_reset_workqueue(struct efx_nic *efx)
+{
+ cancel_work_sync(&efx->reset_work);
+}
+
+void efx_destroy_reset_workqueue(void)
+{
+ if (reset_workqueue) {
+ destroy_workqueue(reset_workqueue);
+ reset_workqueue = NULL;
+ }
+}
+
+/* We assume that efx->type->reconfigure_mac will always try to sync RX
+ * filters and therefore needs to read-lock the filter table against freeing
+ */
+void efx_mac_reconfigure(struct efx_nic *efx)
+{
+ if (efx->type->reconfigure_mac) {
+ down_read(&efx->filter_sem);
+ efx->type->reconfigure_mac(efx);
+ up_read(&efx->filter_sem);
+ }
+}
+
+/* Asynchronous work item for changing MAC promiscuity and multicast
+ * hash. Avoid a drain/rx_ingress enable by reconfiguring the current
+ * MAC directly.
+ */
+static void efx_mac_work(struct work_struct *data)
+{
+ struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
+
+ mutex_lock(&efx->mac_lock);
+ if (efx->port_enabled)
+ efx_mac_reconfigure(efx);
+ mutex_unlock(&efx->mac_lock);
+}
+
+/* This ensures that the kernel is kept informed (via
+ * netif_carrier_on/off) of the link status, and also maintains the
+ * link status's stop on the port's TX queue.
+ */
+void efx_link_status_changed(struct efx_nic *efx)
+{
+ struct efx_link_state *link_state = &efx->link_state;
+
+ /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
+ * that no events are triggered between unregister_netdev() and the
+ * driver unloading. A more general condition is that NETDEV_CHANGE
+ * can only be generated between NETDEV_UP and NETDEV_DOWN
+ */
+ if (!netif_running(efx->net_dev))
+ return;
+
+ if (link_state->up != netif_carrier_ok(efx->net_dev)) {
+ efx->n_link_state_changes++;
+
+ if (link_state->up)
+ netif_carrier_on(efx->net_dev);
+ else
+ netif_carrier_off(efx->net_dev);
+ }
+
+ /* Status message for kernel log */
+ if (link_state->up)
+ netif_info(efx, link, efx->net_dev,
+ "link up at %uMbps %s-duplex (MTU %d)\n",
+ link_state->speed, link_state->fd ? "full" : "half",
+ efx->net_dev->mtu);
+ else
+ netif_info(efx, link, efx->net_dev, "link down\n");
+}
+
+unsigned int efx_xdp_max_mtu(struct efx_nic *efx)
+{
+ /* The maximum MTU that we can fit in a single page, allowing for
+ * framing, overhead and XDP headroom.
+ */
+ int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) +
+ efx->rx_prefix_size + efx->type->rx_buffer_padding +
+ efx->rx_ip_align + XDP_PACKET_HEADROOM;
+
+ return PAGE_SIZE - overhead;
+}
+
+/* Context: process, rtnl_lock() held. */
+int efx_change_mtu(struct net_device *net_dev, int new_mtu)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ int rc;
+
+ rc = efx_check_disabled(efx);
+ if (rc)
+ return rc;
+
+ if (rtnl_dereference(efx->xdp_prog) &&
+ new_mtu > efx_xdp_max_mtu(efx)) {
+ netif_err(efx, drv, efx->net_dev,
+ "Requested MTU of %d too big for XDP (max: %d)\n",
+ new_mtu, efx_xdp_max_mtu(efx));
+ return -EINVAL;
+ }
+
+ netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
+
+ efx_device_detach_sync(efx);
+ efx_stop_all(efx);
+
+ mutex_lock(&efx->mac_lock);
+ net_dev->mtu = new_mtu;
+ efx_mac_reconfigure(efx);
+ mutex_unlock(&efx->mac_lock);
+
+ efx_start_all(efx);
+ efx_device_attach_if_not_resetting(efx);
+ return 0;
+}
+
+/**************************************************************************
+ *
+ * Hardware monitor
+ *
+ **************************************************************************/
+
+/* Run periodically off the general workqueue */
+static void efx_monitor(struct work_struct *data)
+{
+ struct efx_nic *efx = container_of(data, struct efx_nic,
+ monitor_work.work);
+
+ netif_vdbg(efx, timer, efx->net_dev,
+ "hardware monitor executing on CPU %d\n",
+ raw_smp_processor_id());
+ BUG_ON(efx->type->monitor == NULL);
+
+ /* If the mac_lock is already held then it is likely a port
+ * reconfiguration is already in place, which will likely do
+ * most of the work of monitor() anyway.
+ */
+ if (mutex_trylock(&efx->mac_lock)) {
+ if (efx->port_enabled && efx->type->monitor)
+ efx->type->monitor(efx);
+ mutex_unlock(&efx->mac_lock);
+ }
+
+ efx_start_monitor(efx);
+}
+
+void efx_start_monitor(struct efx_nic *efx)
+{
+ if (efx->type->monitor)
+ queue_delayed_work(efx->workqueue, &efx->monitor_work,
+ efx_monitor_interval);
+}
+
+/**************************************************************************
+ *
+ * Event queue processing
+ *
+ *************************************************************************/
+
+/* Channels are shutdown and reinitialised whilst the NIC is running
+ * to propagate configuration changes (mtu, checksum offload), or
+ * to clear hardware error conditions
+ */
+static void efx_start_datapath(struct efx_nic *efx)
+{
+ netdev_features_t old_features = efx->net_dev->features;
+ bool old_rx_scatter = efx->rx_scatter;
+ size_t rx_buf_len;
+
+ /* Calculate the rx buffer allocation parameters required to
+ * support the current MTU, including padding for header
+ * alignment and overruns.
+ */
+ efx->rx_dma_len = (efx->rx_prefix_size +
+ EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
+ efx->type->rx_buffer_padding);
+ rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM +
+ efx->rx_ip_align + efx->rx_dma_len);
+ if (rx_buf_len <= PAGE_SIZE) {
+ efx->rx_scatter = efx->type->always_rx_scatter;
+ efx->rx_buffer_order = 0;
+ } else if (efx->type->can_rx_scatter) {
+ BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
+ BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
+ 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
+ EFX_RX_BUF_ALIGNMENT) >
+ PAGE_SIZE);
+ efx->rx_scatter = true;
+ efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
+ efx->rx_buffer_order = 0;
+ } else {
+ efx->rx_scatter = false;
+ efx->rx_buffer_order = get_order(rx_buf_len);
+ }
+
+ efx_rx_config_page_split(efx);
+ if (efx->rx_buffer_order)
+ netif_dbg(efx, drv, efx->net_dev,
+ "RX buf len=%u; page order=%u batch=%u\n",
+ efx->rx_dma_len, efx->rx_buffer_order,
+ efx->rx_pages_per_batch);
+ else
+ netif_dbg(efx, drv, efx->net_dev,
+ "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
+ efx->rx_dma_len, efx->rx_page_buf_step,
+ efx->rx_bufs_per_page, efx->rx_pages_per_batch);
+
+ /* Restore previously fixed features in hw_features and remove
+ * features which are fixed now
+ */
+ efx->net_dev->hw_features |= efx->net_dev->features;
+ efx->net_dev->hw_features &= ~efx->fixed_features;
+ efx->net_dev->features |= efx->fixed_features;
+ if (efx->net_dev->features != old_features)
+ netdev_features_change(efx->net_dev);
+
+ /* RX filters may also have scatter-enabled flags */
+ if ((efx->rx_scatter != old_rx_scatter) &&
+ efx->type->filter_update_rx_scatter)
+ efx->type->filter_update_rx_scatter(efx);
+
+ /* We must keep at least one descriptor in a TX ring empty.
+ * We could avoid this when the queue size does not exactly
+ * match the hardware ring size, but it's not that important.
+ * Therefore we stop the queue when one more skb might fill
+ * the ring completely. We wake it when half way back to
+ * empty.
+ */
+ efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
+ efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
+
+ /* Initialise the channels */
+ efx_start_channels(efx);
+
+ efx_ptp_start_datapath(efx);
+
+ if (netif_device_present(efx->net_dev))
+ netif_tx_wake_all_queues(efx->net_dev);
+}
+
+static void efx_stop_datapath(struct efx_nic *efx)
+{
+ EFX_ASSERT_RESET_SERIALISED(efx);
+ BUG_ON(efx->port_enabled);
+
+ efx_ptp_stop_datapath(efx);
+
+ efx_stop_channels(efx);
+}
+
+/**************************************************************************
+ *
+ * Port handling
+ *
+ **************************************************************************/
+
+static void efx_start_port(struct efx_nic *efx)
+{
+ netif_dbg(efx, ifup, efx->net_dev, "start port\n");
+ BUG_ON(efx->port_enabled);
+
+ mutex_lock(&efx->mac_lock);
+ efx->port_enabled = true;
+
+ /* Ensure MAC ingress/egress is enabled */
+ efx_mac_reconfigure(efx);
+
+ mutex_unlock(&efx->mac_lock);
+}
+
+/* Cancel work for MAC reconfiguration, periodic hardware monitoring
+ * and the async self-test, wait for them to finish and prevent them
+ * being scheduled again. This doesn't cover online resets, which
+ * should only be cancelled when removing the device.
+ */
+static void efx_stop_port(struct efx_nic *efx)
+{
+ netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ mutex_lock(&efx->mac_lock);
+ efx->port_enabled = false;
+ mutex_unlock(&efx->mac_lock);
+
+ /* Serialise against efx_set_multicast_list() */
+ netif_addr_lock_bh(efx->net_dev);
+ netif_addr_unlock_bh(efx->net_dev);
+
+ cancel_delayed_work_sync(&efx->monitor_work);
+ efx_selftest_async_cancel(efx);
+ cancel_work_sync(&efx->mac_work);
+}
+
+/* If the interface is supposed to be running but is not, start
+ * the hardware and software data path, regular activity for the port
+ * (MAC statistics, link polling, etc.) and schedule the port to be
+ * reconfigured. Interrupts must already be enabled. This function
+ * is safe to call multiple times, so long as the NIC is not disabled.
+ * Requires the RTNL lock.
+ */
+void efx_start_all(struct efx_nic *efx)
+{
+ EFX_ASSERT_RESET_SERIALISED(efx);
+ BUG_ON(efx->state == STATE_DISABLED);
+
+ /* Check that it is appropriate to restart the interface. All
+ * of these flags are safe to read under just the rtnl lock
+ */
+ if (efx->port_enabled || !netif_running(efx->net_dev) ||
+ efx->reset_pending)
+ return;
+
+ efx_start_port(efx);
+ efx_start_datapath(efx);
+
+ /* Start the hardware monitor if there is one */
+ efx_start_monitor(efx);
+
+ /* Link state detection is normally event-driven; we have
+ * to poll now because we could have missed a change
+ */
+ mutex_lock(&efx->mac_lock);
+ if (efx->phy_op->poll(efx))
+ efx_link_status_changed(efx);
+ mutex_unlock(&efx->mac_lock);
+
+ if (efx->type->start_stats) {
+ efx->type->start_stats(efx);
+ efx->type->pull_stats(efx);
+ spin_lock_bh(&efx->stats_lock);
+ efx->type->update_stats(efx, NULL, NULL);
+ spin_unlock_bh(&efx->stats_lock);
+ }
+}
+
+/* Quiesce the hardware and software data path, and regular activity
+ * for the port without bringing the link down. Safe to call multiple
+ * times with the NIC in almost any state, but interrupts should be
+ * enabled. Requires the RTNL lock.
+ */
+void efx_stop_all(struct efx_nic *efx)
+{
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ /* port_enabled can be read safely under the rtnl lock */
+ if (!efx->port_enabled)
+ return;
+
+ if (efx->type->update_stats) {
+ /* update stats before we go down so we can accurately count
+ * rx_nodesc_drops
+ */
+ efx->type->pull_stats(efx);
+ spin_lock_bh(&efx->stats_lock);
+ efx->type->update_stats(efx, NULL, NULL);
+ spin_unlock_bh(&efx->stats_lock);
+ efx->type->stop_stats(efx);
+ }
+
+ efx_stop_port(efx);
+
+ /* Stop the kernel transmit interface. This is only valid if
+ * the device is stopped or detached; otherwise the watchdog
+ * may fire immediately.
+ */
+ WARN_ON(netif_running(efx->net_dev) &&
+ netif_device_present(efx->net_dev));
+ netif_tx_disable(efx->net_dev);
+
+ efx_stop_datapath(efx);
+}
+
+/* Context: process, dev_base_lock or RTNL held, non-blocking. */
+void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ spin_lock_bh(&efx->stats_lock);
+ efx->type->update_stats(efx, NULL, stats);
+ spin_unlock_bh(&efx->stats_lock);
+}
+
+/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
+ * the MAC appropriately. All other PHY configuration changes are pushed
+ * through phy_op->set_settings(), and pushed asynchronously to the MAC
+ * through efx_monitor().
+ *
+ * Callers must hold the mac_lock
+ */
+int __efx_reconfigure_port(struct efx_nic *efx)
+{
+ enum efx_phy_mode phy_mode;
+ int rc = 0;
+
+ WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+ /* Disable PHY transmit in mac level loopbacks */
+ phy_mode = efx->phy_mode;
+ if (LOOPBACK_INTERNAL(efx))
+ efx->phy_mode |= PHY_MODE_TX_DISABLED;
+ else
+ efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
+
+ if (efx->type->reconfigure_port)
+ rc = efx->type->reconfigure_port(efx);
+
+ if (rc)
+ efx->phy_mode = phy_mode;
+
+ return rc;
+}
+
+/* Reinitialise the MAC to pick up new PHY settings, even if the port is
+ * disabled.
+ */
+int efx_reconfigure_port(struct efx_nic *efx)
+{
+ int rc;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ mutex_lock(&efx->mac_lock);
+ rc = __efx_reconfigure_port(efx);
+ mutex_unlock(&efx->mac_lock);
+
+ return rc;
+}
+
+/**************************************************************************
+ *
+ * Device reset and suspend
+ *
+ **************************************************************************/
+
+static void efx_wait_for_bist_end(struct efx_nic *efx)
+{
+ int i;
+
+ for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
+ if (efx_mcdi_poll_reboot(efx))
+ goto out;
+ msleep(BIST_WAIT_DELAY_MS);
+ }
+
+ netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
+out:
+ /* Either way unset the BIST flag. If we found no reboot we probably
+ * won't recover, but we should try.
+ */
+ efx->mc_bist_for_other_fn = false;
+}
+
+/* Try recovery mechanisms.
+ * For now only EEH is supported.
+ * Returns 0 if the recovery mechanisms are unsuccessful.
+ * Returns a non-zero value otherwise.
+ */
+int efx_try_recovery(struct efx_nic *efx)
+{
+#ifdef CONFIG_EEH
+ /* A PCI error can occur and not be seen by EEH because nothing
+ * happens on the PCI bus. In this case the driver may fail and
+ * schedule a 'recover or reset', leading to this recovery handler.
+ * Manually call the eeh failure check function.
+ */
+ struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
+ if (eeh_dev_check_failure(eehdev)) {
+ /* The EEH mechanisms will handle the error and reset the
+ * device if necessary.
+ */
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+/* Tears down the entire software state and most of the hardware state
+ * before reset.
+ */
+void efx_reset_down(struct efx_nic *efx, enum reset_type method)
+{
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ if (method == RESET_TYPE_MCDI_TIMEOUT)
+ efx->type->prepare_flr(efx);
+
+ efx_stop_all(efx);
+ efx_disable_interrupts(efx);
+
+ mutex_lock(&efx->mac_lock);
+ down_write(&efx->filter_sem);
+ mutex_lock(&efx->rss_lock);
+ if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
+ method != RESET_TYPE_DATAPATH)
+ efx->phy_op->fini(efx);
+ efx->type->fini(efx);
+}
+
+/* This function will always ensure that the locks acquired in
+ * efx_reset_down() are released. A failure return code indicates
+ * that we were unable to reinitialise the hardware, and the
+ * driver should be disabled. If ok is false, then the rx and tx
+ * engines are not restarted, pending a RESET_DISABLE.
+ */
+int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
+{
+ int rc;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ if (method == RESET_TYPE_MCDI_TIMEOUT)
+ efx->type->finish_flr(efx);
+
+ /* Ensure that SRAM is initialised even if we're disabling the device */
+ rc = efx->type->init(efx);
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
+ goto fail;
+ }
+
+ if (!ok)
+ goto fail;
+
+ if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
+ method != RESET_TYPE_DATAPATH) {
+ rc = efx->phy_op->init(efx);
+ if (rc)
+ goto fail;
+ rc = efx->phy_op->reconfigure(efx);
+ if (rc && rc != -EPERM)
+ netif_err(efx, drv, efx->net_dev,
+ "could not restore PHY settings\n");
+ }
+
+ rc = efx_enable_interrupts(efx);
+ if (rc)
+ goto fail;
+
+#ifdef CONFIG_SFC_SRIOV
+ rc = efx->type->vswitching_restore(efx);
+ if (rc) /* not fatal; the PF will still work fine */
+ netif_warn(efx, probe, efx->net_dev,
+ "failed to restore vswitching rc=%d;"
+ " VFs may not function\n", rc);
+#endif
+
+ if (efx->type->rx_restore_rss_contexts)
+ efx->type->rx_restore_rss_contexts(efx);
+ mutex_unlock(&efx->rss_lock);
+ efx->type->filter_table_restore(efx);
+ up_write(&efx->filter_sem);
+ if (efx->type->sriov_reset)
+ efx->type->sriov_reset(efx);
+
+ mutex_unlock(&efx->mac_lock);
+
+ efx_start_all(efx);
+
+ if (efx->type->udp_tnl_push_ports)
+ efx->type->udp_tnl_push_ports(efx);
+
+ return 0;
+
+fail:
+ efx->port_initialized = false;
+
+ mutex_unlock(&efx->rss_lock);
+ up_write(&efx->filter_sem);
+ mutex_unlock(&efx->mac_lock);
+
+ return rc;
+}
+
+/* Reset the NIC using the specified method. Note that the reset may
+ * fail, in which case the card will be left in an unusable state.
+ *
+ * Caller must hold the rtnl_lock.
+ */
+int efx_reset(struct efx_nic *efx, enum reset_type method)
+{
+ bool disabled;
+ int rc, rc2;
+
+ netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
+ RESET_TYPE(method));
+
+ efx_device_detach_sync(efx);
+ efx_reset_down(efx, method);
+
+ rc = efx->type->reset(efx, method);
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
+ goto out;
+ }
+
+ /* Clear flags for the scopes we covered. We assume the NIC and
+ * driver are now quiescent so that there is no race here.
+ */
+ if (method < RESET_TYPE_MAX_METHOD)
+ efx->reset_pending &= -(1 << (method + 1));
+ else /* it doesn't fit into the well-ordered scope hierarchy */
+ __clear_bit(method, &efx->reset_pending);
+
+ /* Reinitialise bus-mastering, which may have been turned off before
+ * the reset was scheduled. This is still appropriate, even in the
+ * RESET_TYPE_DISABLE since this driver generally assumes the hardware
+ * can respond to requests.
+ */
+ pci_set_master(efx->pci_dev);
+
+out:
+ /* Leave device stopped if necessary */
+ disabled = rc ||
+ method == RESET_TYPE_DISABLE ||
+ method == RESET_TYPE_RECOVER_OR_DISABLE;
+ rc2 = efx_reset_up(efx, method, !disabled);
+ if (rc2) {
+ disabled = true;
+ if (!rc)
+ rc = rc2;
+ }
+
+ if (disabled) {
+ dev_close(efx->net_dev);
+ netif_err(efx, drv, efx->net_dev, "has been disabled\n");
+ efx->state = STATE_DISABLED;
+ } else {
+ netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
+ efx_device_attach_if_not_resetting(efx);
+ }
+ return rc;
+}
+
+/* The worker thread exists so that code that cannot sleep can
+ * schedule a reset for later.
+ */
+static void efx_reset_work(struct work_struct *data)
+{
+ struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
+ unsigned long pending;
+ enum reset_type method;
+
+ pending = READ_ONCE(efx->reset_pending);
+ method = fls(pending) - 1;
+
+ if (method == RESET_TYPE_MC_BIST)
+ efx_wait_for_bist_end(efx);
+
+ if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
+ method == RESET_TYPE_RECOVER_OR_ALL) &&
+ efx_try_recovery(efx))
+ return;
+
+ if (!pending)
+ return;
+
+ rtnl_lock();
+
+ /* We checked the state in efx_schedule_reset() but it may
+ * have changed by now. Now that we have the RTNL lock,
+ * it cannot change again.
+ */
+ if (efx->state == STATE_READY)
+ (void)efx_reset(efx, method);
+
+ rtnl_unlock();
+}
+
+void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
+{
+ enum reset_type method;
+
+ if (efx->state == STATE_RECOVERY) {
+ netif_dbg(efx, drv, efx->net_dev,
+ "recovering: skip scheduling %s reset\n",
+ RESET_TYPE(type));
+ return;
+ }
+
+ switch (type) {
+ case RESET_TYPE_INVISIBLE:
+ case RESET_TYPE_ALL:
+ case RESET_TYPE_RECOVER_OR_ALL:
+ case RESET_TYPE_WORLD:
+ case RESET_TYPE_DISABLE:
+ case RESET_TYPE_RECOVER_OR_DISABLE:
+ case RESET_TYPE_DATAPATH:
+ case RESET_TYPE_MC_BIST:
+ case RESET_TYPE_MCDI_TIMEOUT:
+ method = type;
+ netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
+ RESET_TYPE(method));
+ break;
+ default:
+ method = efx->type->map_reset_reason(type);
+ netif_dbg(efx, drv, efx->net_dev,
+ "scheduling %s reset for %s\n",
+ RESET_TYPE(method), RESET_TYPE(type));
+ break;
+ }
+
+ set_bit(method, &efx->reset_pending);
+ smp_mb(); /* ensure we change reset_pending before checking state */
+
+ /* If we're not READY then just leave the flags set as the cue
+ * to abort probing or reschedule the reset later.
+ */
+ if (READ_ONCE(efx->state) != STATE_READY)
+ return;
+
+ /* efx_process_channel() will no longer read events once a
+ * reset is scheduled. So switch back to poll'd MCDI completions.
+ */
+ efx_mcdi_mode_poll(efx);
+
+ efx_queue_reset_work(efx);
+}
+
+/**************************************************************************
+ *
+ * Dummy PHY/MAC operations
+ *
+ * Can be used for some unimplemented operations
+ * Needed so all function pointers are valid and do not have to be tested
+ * before use
+ *
+ **************************************************************************/
+int efx_port_dummy_op_int(struct efx_nic *efx)
+{
+ return 0;
+}
+void efx_port_dummy_op_void(struct efx_nic *efx) {}
+
+static bool efx_port_dummy_op_poll(struct efx_nic *efx)
+{
+ return false;
+}
+
+static const struct efx_phy_operations efx_dummy_phy_operations = {
+ .init = efx_port_dummy_op_int,
+ .reconfigure = efx_port_dummy_op_int,
+ .poll = efx_port_dummy_op_poll,
+ .fini = efx_port_dummy_op_void,
+};
+
+/**************************************************************************
+ *
+ * Data housekeeping
+ *
+ **************************************************************************/
+
+/* This zeroes out and then fills in the invariants in a struct
+ * efx_nic (including all sub-structures).
+ */
+int efx_init_struct(struct efx_nic *efx,
+ struct pci_dev *pci_dev, struct net_device *net_dev)
+{
+ int rc = -ENOMEM;
+
+ /* Initialise common structures */
+ INIT_LIST_HEAD(&efx->node);
+ INIT_LIST_HEAD(&efx->secondary_list);
+ spin_lock_init(&efx->biu_lock);
+#ifdef CONFIG_SFC_MTD
+ INIT_LIST_HEAD(&efx->mtd_list);
+#endif
+ INIT_WORK(&efx->reset_work, efx_reset_work);
+ INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
+ efx_selftest_async_init(efx);
+ efx->pci_dev = pci_dev;
+ efx->msg_enable = debug;
+ efx->state = STATE_UNINIT;
+ strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
+
+ efx->net_dev = net_dev;
+ efx->rx_prefix_size = efx->type->rx_prefix_size;
+ efx->rx_ip_align =
+ NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
+ efx->rx_packet_hash_offset =
+ efx->type->rx_hash_offset - efx->type->rx_prefix_size;
+ efx->rx_packet_ts_offset =
+ efx->type->rx_ts_offset - efx->type->rx_prefix_size;
+ INIT_LIST_HEAD(&efx->rss_context.list);
+ mutex_init(&efx->rss_lock);
+ spin_lock_init(&efx->stats_lock);
+ efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
+ efx->num_mac_stats = MC_CMD_MAC_NSTATS;
+ BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
+ mutex_init(&efx->mac_lock);
+#ifdef CONFIG_RFS_ACCEL
+ mutex_init(&efx->rps_mutex);
+ spin_lock_init(&efx->rps_hash_lock);
+ /* Failure to allocate is not fatal, but may degrade ARFS performance */
+ efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
+ sizeof(*efx->rps_hash_table), GFP_KERNEL);
+#endif
+ efx->phy_op = &efx_dummy_phy_operations;
+ efx->mdio.dev = net_dev;
+ INIT_WORK(&efx->mac_work, efx_mac_work);
+ init_waitqueue_head(&efx->flush_wq);
+
+ rc = efx_init_channels(efx);
+ if (rc)
+ goto fail;
+
+ /* Would be good to use the net_dev name, but we're too early */
+ snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
+ pci_name(pci_dev));
+ efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
+ if (!efx->workqueue) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ efx_fini_struct(efx);
+ return rc;
+}
+
+void efx_fini_struct(struct efx_nic *efx)
+{
+#ifdef CONFIG_RFS_ACCEL
+ kfree(efx->rps_hash_table);
+#endif
+
+ efx_fini_channels(efx);
+
+ kfree(efx->vpd_sn);
+
+ if (efx->workqueue) {
+ destroy_workqueue(efx->workqueue);
+ efx->workqueue = NULL;
+ }
+}
+
+/* This configures the PCI device to enable I/O and DMA. */
+int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
+ unsigned int mem_map_size)
+{
+ struct pci_dev *pci_dev = efx->pci_dev;
+ int rc;
+
+ netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
+
+ rc = pci_enable_device(pci_dev);
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev,
+ "failed to enable PCI device\n");
+ goto fail1;
+ }
+
+ pci_set_master(pci_dev);
+
+ /* Set the PCI DMA mask. Try all possibilities from our
+ * genuine mask down to 32 bits, because some architectures
+ * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
+ * masks event though they reject 46 bit masks.
+ */
+ while (dma_mask > 0x7fffffffUL) {
+ rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
+ if (rc == 0)
+ break;
+ dma_mask >>= 1;
+ }
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev,
+ "could not find a suitable DMA mask\n");
+ goto fail2;
+ }
+ netif_dbg(efx, probe, efx->net_dev,
+ "using DMA mask %llx\n", (unsigned long long)dma_mask);
+
+ efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
+ if (!efx->membase_phys) {
+ netif_err(efx, probe, efx->net_dev,
+ "ERROR: No BAR%d mapping from the BIOS. "
+ "Try pci=realloc on the kernel command line\n", bar);
+ rc = -ENODEV;
+ goto fail3;
+ }
+
+ rc = pci_request_region(pci_dev, bar, "sfc");
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev,
+ "request for memory BAR failed\n");
+ rc = -EIO;
+ goto fail3;
+ }
+
+ efx->membase = ioremap(efx->membase_phys, mem_map_size);
+ if (!efx->membase) {
+ netif_err(efx, probe, efx->net_dev,
+ "could not map memory BAR at %llx+%x\n",
+ (unsigned long long)efx->membase_phys, mem_map_size);
+ rc = -ENOMEM;
+ goto fail4;
+ }
+ netif_dbg(efx, probe, efx->net_dev,
+ "memory BAR at %llx+%x (virtual %p)\n",
+ (unsigned long long)efx->membase_phys, mem_map_size,
+ efx->membase);
+
+ return 0;
+
+fail4:
+ pci_release_region(efx->pci_dev, bar);
+fail3:
+ efx->membase_phys = 0;
+fail2:
+ pci_disable_device(efx->pci_dev);
+fail1:
+ return rc;
+}
+
+void efx_fini_io(struct efx_nic *efx, int bar)
+{
+ netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
+
+ if (efx->membase) {
+ iounmap(efx->membase);
+ efx->membase = NULL;
+ }
+
+ if (efx->membase_phys) {
+ pci_release_region(efx->pci_dev, bar);
+ efx->membase_phys = 0;
+ }
+
+ /* Don't disable bus-mastering if VFs are assigned */
+ if (!pci_vfs_assigned(efx->pci_dev))
+ pci_disable_device(efx->pci_dev);
+}
+
+#ifdef CONFIG_SFC_MCDI_LOGGING
+static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct efx_nic *efx = dev_get_drvdata(dev);
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
+}
+
+static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct efx_nic *efx = dev_get_drvdata(dev);
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ bool enable = count > 0 && *buf != '0';
+
+ mcdi->logging_enabled = enable;
+ return count;
+}
+
+static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
+
+void efx_init_mcdi_logging(struct efx_nic *efx)
+{
+ int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
+
+ if (rc) {
+ netif_warn(efx, drv, efx->net_dev,
+ "failed to init net dev attributes\n");
+ }
+}
+
+void efx_fini_mcdi_logging(struct efx_nic *efx)
+{
+ device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
+}
+#endif
diff --git a/drivers/net/ethernet/sfc/efx_common.h b/drivers/net/ethernet/sfc/efx_common.h
new file mode 100644
index 000000000000..fa2fc681e7f9
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_common.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_COMMON_H
+#define EFX_COMMON_H
+
+int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
+ unsigned int mem_map_size);
+void efx_fini_io(struct efx_nic *efx, int bar);
+int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev,
+ struct net_device *net_dev);
+void efx_fini_struct(struct efx_nic *efx);
+
+void efx_start_all(struct efx_nic *efx);
+void efx_stop_all(struct efx_nic *efx);
+
+void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats);
+
+int efx_create_reset_workqueue(void);
+void efx_queue_reset_work(struct efx_nic *efx);
+void efx_flush_reset_workqueue(struct efx_nic *efx);
+void efx_destroy_reset_workqueue(void);
+
+void efx_start_monitor(struct efx_nic *efx);
+
+int __efx_reconfigure_port(struct efx_nic *efx);
+int efx_reconfigure_port(struct efx_nic *efx);
+
+#define EFX_ASSERT_RESET_SERIALISED(efx) \
+ do { \
+ if ((efx->state == STATE_READY) || \
+ (efx->state == STATE_RECOVERY) || \
+ (efx->state == STATE_DISABLED)) \
+ ASSERT_RTNL(); \
+ } while (0)
+
+int efx_try_recovery(struct efx_nic *efx);
+void efx_reset_down(struct efx_nic *efx, enum reset_type method);
+int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok);
+int efx_reset(struct efx_nic *efx, enum reset_type method);
+void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
+
+static inline int efx_check_disabled(struct efx_nic *efx)
+{
+ if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
+ netif_err(efx, drv, efx->net_dev,
+ "device is disabled due to earlier errors\n");
+ return -EIO;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_SFC_MCDI_LOGGING
+void efx_init_mcdi_logging(struct efx_nic *efx);
+void efx_fini_mcdi_logging(struct efx_nic *efx);
+#else
+static inline void efx_init_mcdi_logging(struct efx_nic *efx) {}
+static inline void efx_fini_mcdi_logging(struct efx_nic *efx) {}
+#endif
+
+void efx_mac_reconfigure(struct efx_nic *efx);
+void efx_link_status_changed(struct efx_nic *efx);
+unsigned int efx_xdp_max_mtu(struct efx_nic *efx);
+int efx_change_mtu(struct net_device *net_dev, int new_mtu);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index b31032da4bcb..993b5769525b 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -13,92 +13,13 @@
#include "workarounds.h"
#include "selftest.h"
#include "efx.h"
+#include "efx_channels.h"
+#include "rx_common.h"
+#include "tx_common.h"
+#include "ethtool_common.h"
#include "filter.h"
#include "nic.h"
-struct efx_sw_stat_desc {
- const char *name;
- enum {
- EFX_ETHTOOL_STAT_SOURCE_nic,
- EFX_ETHTOOL_STAT_SOURCE_channel,
- EFX_ETHTOOL_STAT_SOURCE_tx_queue
- } source;
- unsigned offset;
- u64(*get_stat) (void *field); /* Reader function */
-};
-
-/* Initialiser for a struct efx_sw_stat_desc with type-checking */
-#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \
- get_stat_function) { \
- .name = #stat_name, \
- .source = EFX_ETHTOOL_STAT_SOURCE_##source_name, \
- .offset = ((((field_type *) 0) == \
- &((struct efx_##source_name *)0)->field) ? \
- offsetof(struct efx_##source_name, field) : \
- offsetof(struct efx_##source_name, field)), \
- .get_stat = get_stat_function, \
-}
-
-static u64 efx_get_uint_stat(void *field)
-{
- return *(unsigned int *)field;
-}
-
-static u64 efx_get_atomic_stat(void *field)
-{
- return atomic_read((atomic_t *) field);
-}
-
-#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field) \
- EFX_ETHTOOL_STAT(field, nic, field, \
- atomic_t, efx_get_atomic_stat)
-
-#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \
- EFX_ETHTOOL_STAT(field, channel, n_##field, \
- unsigned int, efx_get_uint_stat)
-#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \
- EFX_ETHTOOL_STAT(field, channel, field, \
- unsigned int, efx_get_uint_stat)
-
-#define EFX_ETHTOOL_UINT_TXQ_STAT(field) \
- EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \
- unsigned int, efx_get_uint_stat)
-
-static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
- EFX_ETHTOOL_UINT_TXQ_STAT(merge_events),
- EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts),
- EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers),
- EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
- EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks),
- EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
- EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
- EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets),
- EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect),
-#ifdef CONFIG_RFS_ACCEL
- EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed),
-#endif
-};
-
-#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc)
-
#define EFX_ETHTOOL_EEPROM_MAGIC 0xEFAB
/**************************************************************************
@@ -185,18 +106,6 @@ efx_ethtool_set_link_ksettings(struct net_device *net_dev,
return rc;
}
-static void efx_ethtool_get_drvinfo(struct net_device *net_dev,
- struct ethtool_drvinfo *info)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
-
- strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
- strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version));
- efx_mcdi_print_fwver(efx, info->fw_version,
- sizeof(info->fw_version));
- strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
-}
-
static int efx_ethtool_get_regs_len(struct net_device *net_dev)
{
return efx_nic_get_regs_len(netdev_priv(net_dev));
@@ -211,341 +120,6 @@ static void efx_ethtool_get_regs(struct net_device *net_dev,
efx_nic_get_regs(efx, buf);
}
-static u32 efx_ethtool_get_msglevel(struct net_device *net_dev)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
- return efx->msg_enable;
-}
-
-static void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
- efx->msg_enable = msg_enable;
-}
-
-/**
- * efx_fill_test - fill in an individual self-test entry
- * @test_index: Index of the test
- * @strings: Ethtool strings, or %NULL
- * @data: Ethtool test results, or %NULL
- * @test: Pointer to test result (used only if data != %NULL)
- * @unit_format: Unit name format (e.g. "chan\%d")
- * @unit_id: Unit id (e.g. 0 for "chan0")
- * @test_format: Test name format (e.g. "loopback.\%s.tx.sent")
- * @test_id: Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent")
- *
- * Fill in an individual self-test entry.
- */
-static void efx_fill_test(unsigned int test_index, u8 *strings, u64 *data,
- int *test, const char *unit_format, int unit_id,
- const char *test_format, const char *test_id)
-{
- char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN];
-
- /* Fill data value, if applicable */
- if (data)
- data[test_index] = *test;
-
- /* Fill string, if applicable */
- if (strings) {
- if (strchr(unit_format, '%'))
- snprintf(unit_str, sizeof(unit_str),
- unit_format, unit_id);
- else
- strcpy(unit_str, unit_format);
- snprintf(test_str, sizeof(test_str), test_format, test_id);
- snprintf(strings + test_index * ETH_GSTRING_LEN,
- ETH_GSTRING_LEN,
- "%-6s %-24s", unit_str, test_str);
- }
-}
-
-#define EFX_CHANNEL_NAME(_channel) "chan%d", _channel->channel
-#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue
-#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue
-#define EFX_LOOPBACK_NAME(_mode, _counter) \
- "loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, efx_loopback_mode)
-
-/**
- * efx_fill_loopback_test - fill in a block of loopback self-test entries
- * @efx: Efx NIC
- * @lb_tests: Efx loopback self-test results structure
- * @mode: Loopback test mode
- * @test_index: Starting index of the test
- * @strings: Ethtool strings, or %NULL
- * @data: Ethtool test results, or %NULL
- *
- * Fill in a block of loopback self-test entries. Return new test
- * index.
- */
-static int efx_fill_loopback_test(struct efx_nic *efx,
- struct efx_loopback_self_tests *lb_tests,
- enum efx_loopback_mode mode,
- unsigned int test_index,
- u8 *strings, u64 *data)
-{
- struct efx_channel *channel =
- efx_get_channel(efx, efx->tx_channel_offset);
- struct efx_tx_queue *tx_queue;
-
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- efx_fill_test(test_index++, strings, data,
- &lb_tests->tx_sent[tx_queue->queue],
- EFX_TX_QUEUE_NAME(tx_queue),
- EFX_LOOPBACK_NAME(mode, "tx_sent"));
- efx_fill_test(test_index++, strings, data,
- &lb_tests->tx_done[tx_queue->queue],
- EFX_TX_QUEUE_NAME(tx_queue),
- EFX_LOOPBACK_NAME(mode, "tx_done"));
- }
- efx_fill_test(test_index++, strings, data,
- &lb_tests->rx_good,
- "rx", 0,
- EFX_LOOPBACK_NAME(mode, "rx_good"));
- efx_fill_test(test_index++, strings, data,
- &lb_tests->rx_bad,
- "rx", 0,
- EFX_LOOPBACK_NAME(mode, "rx_bad"));
-
- return test_index;
-}
-
-/**
- * efx_ethtool_fill_self_tests - get self-test details
- * @efx: Efx NIC
- * @tests: Efx self-test results structure, or %NULL
- * @strings: Ethtool strings, or %NULL
- * @data: Ethtool test results, or %NULL
- *
- * Get self-test number of strings, strings, and/or test results.
- * Return number of strings (== number of test results).
- *
- * The reason for merging these three functions is to make sure that
- * they can never be inconsistent.
- */
-static int efx_ethtool_fill_self_tests(struct efx_nic *efx,
- struct efx_self_tests *tests,
- u8 *strings, u64 *data)
-{
- struct efx_channel *channel;
- unsigned int n = 0, i;
- enum efx_loopback_mode mode;
-
- efx_fill_test(n++, strings, data, &tests->phy_alive,
- "phy", 0, "alive", NULL);
- efx_fill_test(n++, strings, data, &tests->nvram,
- "core", 0, "nvram", NULL);
- efx_fill_test(n++, strings, data, &tests->interrupt,
- "core", 0, "interrupt", NULL);
-
- /* Event queues */
- efx_for_each_channel(channel, efx) {
- efx_fill_test(n++, strings, data,
- &tests->eventq_dma[channel->channel],
- EFX_CHANNEL_NAME(channel),
- "eventq.dma", NULL);
- efx_fill_test(n++, strings, data,
- &tests->eventq_int[channel->channel],
- EFX_CHANNEL_NAME(channel),
- "eventq.int", NULL);
- }
-
- efx_fill_test(n++, strings, data, &tests->memory,
- "core", 0, "memory", NULL);
- efx_fill_test(n++, strings, data, &tests->registers,
- "core", 0, "registers", NULL);
-
- if (efx->phy_op->run_tests != NULL) {
- EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL);
-
- for (i = 0; true; ++i) {
- const char *name;
-
- EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS);
- name = efx->phy_op->test_name(efx, i);
- if (name == NULL)
- break;
-
- efx_fill_test(n++, strings, data, &tests->phy_ext[i],
- "phy", 0, name, NULL);
- }
- }
-
- /* Loopback tests */
- for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
- if (!(efx->loopback_modes & (1 << mode)))
- continue;
- n = efx_fill_loopback_test(efx,
- &tests->loopback[mode], mode, n,
- strings, data);
- }
-
- return n;
-}
-
-static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
-{
- size_t n_stats = 0;
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx) {
- if (efx_channel_has_tx_queues(channel)) {
- n_stats++;
- if (strings != NULL) {
- snprintf(strings, ETH_GSTRING_LEN,
- "tx-%u.tx_packets",
- channel->tx_queue[0].queue /
- EFX_TXQ_TYPES);
-
- strings += ETH_GSTRING_LEN;
- }
- }
- }
- efx_for_each_channel(channel, efx) {
- if (efx_channel_has_rx_queue(channel)) {
- n_stats++;
- if (strings != NULL) {
- snprintf(strings, ETH_GSTRING_LEN,
- "rx-%d.rx_packets", channel->channel);
- strings += ETH_GSTRING_LEN;
- }
- }
- }
- if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
- unsigned short xdp;
-
- for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
- n_stats++;
- if (strings) {
- snprintf(strings, ETH_GSTRING_LEN,
- "tx-xdp-cpu-%hu.tx_packets", xdp);
- strings += ETH_GSTRING_LEN;
- }
- }
- }
-
- return n_stats;
-}
-
-static int efx_ethtool_get_sset_count(struct net_device *net_dev,
- int string_set)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
-
- switch (string_set) {
- case ETH_SS_STATS:
- return efx->type->describe_stats(efx, NULL) +
- EFX_ETHTOOL_SW_STAT_COUNT +
- efx_describe_per_queue_stats(efx, NULL) +
- efx_ptp_describe_stats(efx, NULL);
- case ETH_SS_TEST:
- return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL);
- default:
- return -EINVAL;
- }
-}
-
-static void efx_ethtool_get_strings(struct net_device *net_dev,
- u32 string_set, u8 *strings)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
- int i;
-
- switch (string_set) {
- case ETH_SS_STATS:
- strings += (efx->type->describe_stats(efx, strings) *
- ETH_GSTRING_LEN);
- for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++)
- strlcpy(strings + i * ETH_GSTRING_LEN,
- efx_sw_stat_desc[i].name, ETH_GSTRING_LEN);
- strings += EFX_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN;
- strings += (efx_describe_per_queue_stats(efx, strings) *
- ETH_GSTRING_LEN);
- efx_ptp_describe_stats(efx, strings);
- break;
- case ETH_SS_TEST:
- efx_ethtool_fill_self_tests(efx, NULL, strings, NULL);
- break;
- default:
- /* No other string sets */
- break;
- }
-}
-
-static void efx_ethtool_get_stats(struct net_device *net_dev,
- struct ethtool_stats *stats,
- u64 *data)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
- const struct efx_sw_stat_desc *stat;
- struct efx_channel *channel;
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
- int i;
-
- spin_lock_bh(&efx->stats_lock);
-
- /* Get NIC statistics */
- data += efx->type->update_stats(efx, data, NULL);
-
- /* Get software statistics */
- for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) {
- stat = &efx_sw_stat_desc[i];
- switch (stat->source) {
- case EFX_ETHTOOL_STAT_SOURCE_nic:
- data[i] = stat->get_stat((void *)efx + stat->offset);
- break;
- case EFX_ETHTOOL_STAT_SOURCE_channel:
- data[i] = 0;
- efx_for_each_channel(channel, efx)
- data[i] += stat->get_stat((void *)channel +
- stat->offset);
- break;
- case EFX_ETHTOOL_STAT_SOURCE_tx_queue:
- data[i] = 0;
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_tx_queue(tx_queue, channel)
- data[i] +=
- stat->get_stat((void *)tx_queue
- + stat->offset);
- }
- break;
- }
- }
- data += EFX_ETHTOOL_SW_STAT_COUNT;
-
- spin_unlock_bh(&efx->stats_lock);
-
- efx_for_each_channel(channel, efx) {
- if (efx_channel_has_tx_queues(channel)) {
- *data = 0;
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- *data += tx_queue->tx_packets;
- }
- data++;
- }
- }
- efx_for_each_channel(channel, efx) {
- if (efx_channel_has_rx_queue(channel)) {
- *data = 0;
- efx_for_each_channel_rx_queue(rx_queue, channel) {
- *data += rx_queue->rx_packets;
- }
- data++;
- }
- }
- if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
- int xdp;
-
- for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
- data[0] = efx->xdp_tx_queues[xdp]->tx_packets;
- data++;
- }
- }
-
- efx_ptp_update_stats(efx, data);
-}
-
static void efx_ethtool_self_test(struct net_device *net_dev,
struct ethtool_test *test, u64 *data)
{
@@ -787,16 +361,6 @@ out:
return rc;
}
-static void efx_ethtool_get_pauseparam(struct net_device *net_dev,
- struct ethtool_pauseparam *pause)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
-
- pause->rx_pause = !!(efx->wanted_fc & EFX_FC_RX);
- pause->tx_pause = !!(efx->wanted_fc & EFX_FC_TX);
- pause->autoneg = !!(efx->wanted_fc & EFX_FC_AUTO);
-}
-
static void efx_ethtool_get_wol(struct net_device *net_dev,
struct ethtool_wolinfo *wol)
{
@@ -1456,7 +1020,7 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
rc = -ENOMEM;
goto out_unlock;
}
- ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+ ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
/* Initialise indir table and key to defaults */
efx_set_default_rx_indir_table(efx, ctx);
netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c
new file mode 100644
index 000000000000..b8d281ab6c7a
--- /dev/null
+++ b/drivers/net/ethernet/sfc/ethtool_common.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include "net_driver.h"
+#include "mcdi.h"
+#include "nic.h"
+#include "selftest.h"
+#include "ethtool_common.h"
+
+struct efx_sw_stat_desc {
+ const char *name;
+ enum {
+ EFX_ETHTOOL_STAT_SOURCE_nic,
+ EFX_ETHTOOL_STAT_SOURCE_channel,
+ EFX_ETHTOOL_STAT_SOURCE_tx_queue
+ } source;
+ unsigned int offset;
+ u64 (*get_stat)(void *field); /* Reader function */
+};
+
+/* Initialiser for a struct efx_sw_stat_desc with type-checking */
+#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \
+ get_stat_function) { \
+ .name = #stat_name, \
+ .source = EFX_ETHTOOL_STAT_SOURCE_##source_name, \
+ .offset = ((((field_type *) 0) == \
+ &((struct efx_##source_name *)0)->field) ? \
+ offsetof(struct efx_##source_name, field) : \
+ offsetof(struct efx_##source_name, field)), \
+ .get_stat = get_stat_function, \
+}
+
+static u64 efx_get_uint_stat(void *field)
+{
+ return *(unsigned int *)field;
+}
+
+static u64 efx_get_atomic_stat(void *field)
+{
+ return atomic_read((atomic_t *) field);
+}
+
+#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field) \
+ EFX_ETHTOOL_STAT(field, nic, field, \
+ atomic_t, efx_get_atomic_stat)
+
+#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \
+ EFX_ETHTOOL_STAT(field, channel, n_##field, \
+ unsigned int, efx_get_uint_stat)
+#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \
+ EFX_ETHTOOL_STAT(field, channel, field, \
+ unsigned int, efx_get_uint_stat)
+
+#define EFX_ETHTOOL_UINT_TXQ_STAT(field) \
+ EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \
+ unsigned int, efx_get_uint_stat)
+
+static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
+ EFX_ETHTOOL_UINT_TXQ_STAT(merge_events),
+ EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts),
+ EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers),
+ EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
+ EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks),
+ EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
+ EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
+ EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets),
+ EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect),
+#ifdef CONFIG_RFS_ACCEL
+ EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed),
+#endif
+};
+
+#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc)
+
+void efx_ethtool_get_drvinfo(struct net_device *net_dev,
+ struct ethtool_drvinfo *info)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+ strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version));
+ efx_mcdi_print_fwver(efx, info->fw_version,
+ sizeof(info->fw_version));
+ strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
+}
+
+u32 efx_ethtool_get_msglevel(struct net_device *net_dev)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ return efx->msg_enable;
+}
+
+void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ efx->msg_enable = msg_enable;
+}
+
+void efx_ethtool_get_pauseparam(struct net_device *net_dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ pause->rx_pause = !!(efx->wanted_fc & EFX_FC_RX);
+ pause->tx_pause = !!(efx->wanted_fc & EFX_FC_TX);
+ pause->autoneg = !!(efx->wanted_fc & EFX_FC_AUTO);
+}
+
+/**
+ * efx_fill_test - fill in an individual self-test entry
+ * @test_index: Index of the test
+ * @strings: Ethtool strings, or %NULL
+ * @data: Ethtool test results, or %NULL
+ * @test: Pointer to test result (used only if data != %NULL)
+ * @unit_format: Unit name format (e.g. "chan\%d")
+ * @unit_id: Unit id (e.g. 0 for "chan0")
+ * @test_format: Test name format (e.g. "loopback.\%s.tx.sent")
+ * @test_id: Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent")
+ *
+ * Fill in an individual self-test entry.
+ */
+static void efx_fill_test(unsigned int test_index, u8 *strings, u64 *data,
+ int *test, const char *unit_format, int unit_id,
+ const char *test_format, const char *test_id)
+{
+ char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN];
+
+ /* Fill data value, if applicable */
+ if (data)
+ data[test_index] = *test;
+
+ /* Fill string, if applicable */
+ if (strings) {
+ if (strchr(unit_format, '%'))
+ snprintf(unit_str, sizeof(unit_str),
+ unit_format, unit_id);
+ else
+ strcpy(unit_str, unit_format);
+ snprintf(test_str, sizeof(test_str), test_format, test_id);
+ snprintf(strings + test_index * ETH_GSTRING_LEN,
+ ETH_GSTRING_LEN,
+ "%-6s %-24s", unit_str, test_str);
+ }
+}
+
+#define EFX_CHANNEL_NAME(_channel) "chan%d", _channel->channel
+#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue
+#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue
+#define EFX_LOOPBACK_NAME(_mode, _counter) \
+ "loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, efx_loopback_mode)
+
+/**
+ * efx_fill_loopback_test - fill in a block of loopback self-test entries
+ * @efx: Efx NIC
+ * @lb_tests: Efx loopback self-test results structure
+ * @mode: Loopback test mode
+ * @test_index: Starting index of the test
+ * @strings: Ethtool strings, or %NULL
+ * @data: Ethtool test results, or %NULL
+ *
+ * Fill in a block of loopback self-test entries. Return new test
+ * index.
+ */
+static int efx_fill_loopback_test(struct efx_nic *efx,
+ struct efx_loopback_self_tests *lb_tests,
+ enum efx_loopback_mode mode,
+ unsigned int test_index,
+ u8 *strings, u64 *data)
+{
+ struct efx_channel *channel =
+ efx_get_channel(efx, efx->tx_channel_offset);
+ struct efx_tx_queue *tx_queue;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ efx_fill_test(test_index++, strings, data,
+ &lb_tests->tx_sent[tx_queue->queue],
+ EFX_TX_QUEUE_NAME(tx_queue),
+ EFX_LOOPBACK_NAME(mode, "tx_sent"));
+ efx_fill_test(test_index++, strings, data,
+ &lb_tests->tx_done[tx_queue->queue],
+ EFX_TX_QUEUE_NAME(tx_queue),
+ EFX_LOOPBACK_NAME(mode, "tx_done"));
+ }
+ efx_fill_test(test_index++, strings, data,
+ &lb_tests->rx_good,
+ "rx", 0,
+ EFX_LOOPBACK_NAME(mode, "rx_good"));
+ efx_fill_test(test_index++, strings, data,
+ &lb_tests->rx_bad,
+ "rx", 0,
+ EFX_LOOPBACK_NAME(mode, "rx_bad"));
+
+ return test_index;
+}
+
+/**
+ * efx_ethtool_fill_self_tests - get self-test details
+ * @efx: Efx NIC
+ * @tests: Efx self-test results structure, or %NULL
+ * @strings: Ethtool strings, or %NULL
+ * @data: Ethtool test results, or %NULL
+ *
+ * Get self-test number of strings, strings, and/or test results.
+ * Return number of strings (== number of test results).
+ *
+ * The reason for merging these three functions is to make sure that
+ * they can never be inconsistent.
+ */
+int efx_ethtool_fill_self_tests(struct efx_nic *efx,
+ struct efx_self_tests *tests,
+ u8 *strings, u64 *data)
+{
+ struct efx_channel *channel;
+ unsigned int n = 0, i;
+ enum efx_loopback_mode mode;
+
+ efx_fill_test(n++, strings, data, &tests->phy_alive,
+ "phy", 0, "alive", NULL);
+ efx_fill_test(n++, strings, data, &tests->nvram,
+ "core", 0, "nvram", NULL);
+ efx_fill_test(n++, strings, data, &tests->interrupt,
+ "core", 0, "interrupt", NULL);
+
+ /* Event queues */
+ efx_for_each_channel(channel, efx) {
+ efx_fill_test(n++, strings, data,
+ &tests->eventq_dma[channel->channel],
+ EFX_CHANNEL_NAME(channel),
+ "eventq.dma", NULL);
+ efx_fill_test(n++, strings, data,
+ &tests->eventq_int[channel->channel],
+ EFX_CHANNEL_NAME(channel),
+ "eventq.int", NULL);
+ }
+
+ efx_fill_test(n++, strings, data, &tests->memory,
+ "core", 0, "memory", NULL);
+ efx_fill_test(n++, strings, data, &tests->registers,
+ "core", 0, "registers", NULL);
+
+ if (efx->phy_op->run_tests != NULL) {
+ EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL);
+
+ for (i = 0; true; ++i) {
+ const char *name;
+
+ EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS);
+ name = efx->phy_op->test_name(efx, i);
+ if (name == NULL)
+ break;
+
+ efx_fill_test(n++, strings, data, &tests->phy_ext[i],
+ "phy", 0, name, NULL);
+ }
+ }
+
+ /* Loopback tests */
+ for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
+ if (!(efx->loopback_modes & (1 << mode)))
+ continue;
+ n = efx_fill_loopback_test(efx,
+ &tests->loopback[mode], mode, n,
+ strings, data);
+ }
+
+ return n;
+}
+
+static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
+{
+ size_t n_stats = 0;
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx) {
+ if (efx_channel_has_tx_queues(channel)) {
+ n_stats++;
+ if (strings != NULL) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ "tx-%u.tx_packets",
+ channel->tx_queue[0].queue /
+ EFX_TXQ_TYPES);
+
+ strings += ETH_GSTRING_LEN;
+ }
+ }
+ }
+ efx_for_each_channel(channel, efx) {
+ if (efx_channel_has_rx_queue(channel)) {
+ n_stats++;
+ if (strings != NULL) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ "rx-%d.rx_packets", channel->channel);
+ strings += ETH_GSTRING_LEN;
+ }
+ }
+ }
+ if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
+ unsigned short xdp;
+
+ for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
+ n_stats++;
+ if (strings) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ "tx-xdp-cpu-%hu.tx_packets", xdp);
+ strings += ETH_GSTRING_LEN;
+ }
+ }
+ }
+
+ return n_stats;
+}
+
+int efx_ethtool_get_sset_count(struct net_device *net_dev, int string_set)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ switch (string_set) {
+ case ETH_SS_STATS:
+ return efx->type->describe_stats(efx, NULL) +
+ EFX_ETHTOOL_SW_STAT_COUNT +
+ efx_describe_per_queue_stats(efx, NULL) +
+ efx_ptp_describe_stats(efx, NULL);
+ case ETH_SS_TEST:
+ return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL);
+ default:
+ return -EINVAL;
+ }
+}
+
+void efx_ethtool_get_strings(struct net_device *net_dev,
+ u32 string_set, u8 *strings)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ int i;
+
+ switch (string_set) {
+ case ETH_SS_STATS:
+ strings += (efx->type->describe_stats(efx, strings) *
+ ETH_GSTRING_LEN);
+ for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++)
+ strlcpy(strings + i * ETH_GSTRING_LEN,
+ efx_sw_stat_desc[i].name, ETH_GSTRING_LEN);
+ strings += EFX_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN;
+ strings += (efx_describe_per_queue_stats(efx, strings) *
+ ETH_GSTRING_LEN);
+ efx_ptp_describe_stats(efx, strings);
+ break;
+ case ETH_SS_TEST:
+ efx_ethtool_fill_self_tests(efx, NULL, strings, NULL);
+ break;
+ default:
+ /* No other string sets */
+ break;
+ }
+}
+
+void efx_ethtool_get_stats(struct net_device *net_dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ const struct efx_sw_stat_desc *stat;
+ struct efx_channel *channel;
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ int i;
+
+ spin_lock_bh(&efx->stats_lock);
+
+ /* Get NIC statistics */
+ data += efx->type->update_stats(efx, data, NULL);
+
+ /* Get software statistics */
+ for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) {
+ stat = &efx_sw_stat_desc[i];
+ switch (stat->source) {
+ case EFX_ETHTOOL_STAT_SOURCE_nic:
+ data[i] = stat->get_stat((void *)efx + stat->offset);
+ break;
+ case EFX_ETHTOOL_STAT_SOURCE_channel:
+ data[i] = 0;
+ efx_for_each_channel(channel, efx)
+ data[i] += stat->get_stat((void *)channel +
+ stat->offset);
+ break;
+ case EFX_ETHTOOL_STAT_SOURCE_tx_queue:
+ data[i] = 0;
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ data[i] +=
+ stat->get_stat((void *)tx_queue
+ + stat->offset);
+ }
+ break;
+ }
+ }
+ data += EFX_ETHTOOL_SW_STAT_COUNT;
+
+ spin_unlock_bh(&efx->stats_lock);
+
+ efx_for_each_channel(channel, efx) {
+ if (efx_channel_has_tx_queues(channel)) {
+ *data = 0;
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ *data += tx_queue->tx_packets;
+ }
+ data++;
+ }
+ }
+ efx_for_each_channel(channel, efx) {
+ if (efx_channel_has_rx_queue(channel)) {
+ *data = 0;
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ *data += rx_queue->rx_packets;
+ }
+ data++;
+ }
+ }
+ if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
+ int xdp;
+
+ for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
+ data[0] = efx->xdp_tx_queues[xdp]->tx_packets;
+ data++;
+ }
+ }
+
+ efx_ptp_update_stats(efx, data);
+}
diff --git a/drivers/net/ethernet/sfc/ethtool_common.h b/drivers/net/ethernet/sfc/ethtool_common.h
new file mode 100644
index 000000000000..fa624313f330
--- /dev/null
+++ b/drivers/net/ethernet/sfc/ethtool_common.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_ETHTOOL_COMMON_H
+#define EFX_ETHTOOL_COMMON_H
+
+void efx_ethtool_get_drvinfo(struct net_device *net_dev,
+ struct ethtool_drvinfo *info);
+u32 efx_ethtool_get_msglevel(struct net_device *net_dev);
+void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable);
+void efx_ethtool_get_pauseparam(struct net_device *net_dev,
+ struct ethtool_pauseparam *pause);
+int efx_ethtool_fill_self_tests(struct efx_nic *efx,
+ struct efx_self_tests *tests,
+ u8 *strings, u64 *data);
+int efx_ethtool_get_sset_count(struct net_device *net_dev, int string_set);
+void efx_ethtool_get_strings(struct net_device *net_dev, u32 string_set,
+ u8 *strings);
+void efx_ethtool_get_stats(struct net_device *net_dev,
+ struct ethtool_stats *stats __attribute__ ((unused)),
+ u64 *data);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index 53ae9faeb4c3..42bcd34fc508 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -2108,7 +2108,7 @@ static void ef4_net_stats(struct net_device *net_dev,
}
/* Context: netif_tx_lock held, BHs disabled. */
-static void ef4_watchdog(struct net_device *net_dev)
+static void ef4_watchdog(struct net_device *net_dev, unsigned int txqueue)
{
struct ef4_nic *efx = netdev_priv(net_dev);
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index eedd32e2bfcb..dbbb898adddb 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -15,6 +15,7 @@
#include "net_driver.h"
#include "bitfield.h"
#include "efx.h"
+#include "rx_common.h"
#include "nic.h"
#include "farch_regs.h"
#include "sriov.h"
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index 9081f84a2604..54a45010b576 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -346,11 +346,8 @@ int efx_mcdi_flush_rxqs(struct efx_nic *efx);
int efx_mcdi_port_probe(struct efx_nic *efx);
void efx_mcdi_port_remove(struct efx_nic *efx);
int efx_mcdi_port_reconfigure(struct efx_nic *efx);
-int efx_mcdi_port_get_number(struct efx_nic *efx);
u32 efx_mcdi_phy_get_caps(struct efx_nic *efx);
void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev);
-int efx_mcdi_set_mac(struct efx_nic *efx);
-#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1))
void efx_mcdi_mac_start_stats(struct efx_nic *efx);
void efx_mcdi_mac_stop_stats(struct efx_nic *efx);
void efx_mcdi_mac_pull_stats(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c
new file mode 100644
index 000000000000..4310ae5bd898
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -0,0 +1,2270 @@
+#include "mcdi_filters.h"
+#include "mcdi.h"
+#include "nic.h"
+#include "rx_common.h"
+
+/* The maximum size of a shared RSS context */
+/* TODO: this should really be from the mcdi protocol export */
+#define EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE 64UL
+
+#define EFX_EF10_FILTER_ID_INVALID 0xffff
+
+/* An arbitrary search limit for the software hash table */
+#define EFX_EF10_FILTER_SEARCH_LIMIT 200
+
+static struct efx_filter_spec *
+efx_mcdi_filter_entry_spec(const struct efx_mcdi_filter_table *table,
+ unsigned int filter_idx)
+{
+ return (struct efx_filter_spec *)(table->entry[filter_idx].spec &
+ ~EFX_EF10_FILTER_FLAGS);
+}
+
+static unsigned int
+efx_mcdi_filter_entry_flags(const struct efx_mcdi_filter_table *table,
+ unsigned int filter_idx)
+{
+ return table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAGS;
+}
+
+static u32 efx_mcdi_filter_get_unsafe_id(u32 filter_id)
+{
+ WARN_ON_ONCE(filter_id == EFX_EF10_FILTER_ID_INVALID);
+ return filter_id & (EFX_MCDI_FILTER_TBL_ROWS - 1);
+}
+
+static unsigned int efx_mcdi_filter_get_unsafe_pri(u32 filter_id)
+{
+ return filter_id / (EFX_MCDI_FILTER_TBL_ROWS * 2);
+}
+
+static u32 efx_mcdi_filter_make_filter_id(unsigned int pri, u16 idx)
+{
+ return pri * EFX_MCDI_FILTER_TBL_ROWS * 2 + idx;
+}
+
+/*
+ * Decide whether a filter should be exclusive or else should allow
+ * delivery to additional recipients. Currently we decide that
+ * filters for specific local unicast MAC and IP addresses are
+ * exclusive.
+ */
+static bool efx_mcdi_filter_is_exclusive(const struct efx_filter_spec *spec)
+{
+ if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC &&
+ !is_multicast_ether_addr(spec->loc_mac))
+ return true;
+
+ if ((spec->match_flags &
+ (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
+ (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
+ if (spec->ether_type == htons(ETH_P_IP) &&
+ !ipv4_is_multicast(spec->loc_host[0]))
+ return true;
+ if (spec->ether_type == htons(ETH_P_IPV6) &&
+ ((const u8 *)spec->loc_host)[0] != 0xff)
+ return true;
+ }
+
+ return false;
+}
+
+static void
+efx_mcdi_filter_set_entry(struct efx_mcdi_filter_table *table,
+ unsigned int filter_idx,
+ const struct efx_filter_spec *spec,
+ unsigned int flags)
+{
+ table->entry[filter_idx].spec = (unsigned long)spec | flags;
+}
+
+static void
+efx_mcdi_filter_push_prep_set_match_fields(struct efx_nic *efx,
+ const struct efx_filter_spec *spec,
+ efx_dword_t *inbuf)
+{
+ enum efx_encap_type encap_type = efx_filter_get_encap_type(spec);
+ u32 match_fields = 0, uc_match, mc_match;
+
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
+ efx_mcdi_filter_is_exclusive(spec) ?
+ MC_CMD_FILTER_OP_IN_OP_INSERT :
+ MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE);
+
+ /*
+ * Convert match flags and values. Unlike almost
+ * everything else in MCDI, these fields are in
+ * network byte order.
+ */
+#define COPY_VALUE(value, mcdi_field) \
+ do { \
+ match_fields |= \
+ 1 << MC_CMD_FILTER_OP_IN_MATCH_ ## \
+ mcdi_field ## _LBN; \
+ BUILD_BUG_ON( \
+ MC_CMD_FILTER_OP_IN_ ## mcdi_field ## _LEN < \
+ sizeof(value)); \
+ memcpy(MCDI_PTR(inbuf, FILTER_OP_IN_ ## mcdi_field), \
+ &value, sizeof(value)); \
+ } while (0)
+#define COPY_FIELD(gen_flag, gen_field, mcdi_field) \
+ if (spec->match_flags & EFX_FILTER_MATCH_ ## gen_flag) { \
+ COPY_VALUE(spec->gen_field, mcdi_field); \
+ }
+ /*
+ * Handle encap filters first. They will always be mismatch
+ * (unknown UC or MC) filters
+ */
+ if (encap_type) {
+ /*
+ * ether_type and outer_ip_proto need to be variables
+ * because COPY_VALUE wants to memcpy them
+ */
+ __be16 ether_type =
+ htons(encap_type & EFX_ENCAP_FLAG_IPV6 ?
+ ETH_P_IPV6 : ETH_P_IP);
+ u8 vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE;
+ u8 outer_ip_proto;
+
+ switch (encap_type & EFX_ENCAP_TYPES_MASK) {
+ case EFX_ENCAP_TYPE_VXLAN:
+ vni_type = MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN;
+ /* fallthrough */
+ case EFX_ENCAP_TYPE_GENEVE:
+ COPY_VALUE(ether_type, ETHER_TYPE);
+ outer_ip_proto = IPPROTO_UDP;
+ COPY_VALUE(outer_ip_proto, IP_PROTO);
+ /*
+ * We always need to set the type field, even
+ * though we're not matching on the TNI.
+ */
+ MCDI_POPULATE_DWORD_1(inbuf,
+ FILTER_OP_EXT_IN_VNI_OR_VSID,
+ FILTER_OP_EXT_IN_VNI_TYPE,
+ vni_type);
+ break;
+ case EFX_ENCAP_TYPE_NVGRE:
+ COPY_VALUE(ether_type, ETHER_TYPE);
+ outer_ip_proto = IPPROTO_GRE;
+ COPY_VALUE(outer_ip_proto, IP_PROTO);
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN;
+ mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN;
+ } else {
+ uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
+ mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN;
+ }
+
+ if (spec->match_flags & EFX_FILTER_MATCH_LOC_MAC_IG)
+ match_fields |=
+ is_multicast_ether_addr(spec->loc_mac) ?
+ 1 << mc_match :
+ 1 << uc_match;
+ COPY_FIELD(REM_HOST, rem_host, SRC_IP);
+ COPY_FIELD(LOC_HOST, loc_host, DST_IP);
+ COPY_FIELD(REM_MAC, rem_mac, SRC_MAC);
+ COPY_FIELD(REM_PORT, rem_port, SRC_PORT);
+ COPY_FIELD(LOC_MAC, loc_mac, DST_MAC);
+ COPY_FIELD(LOC_PORT, loc_port, DST_PORT);
+ COPY_FIELD(ETHER_TYPE, ether_type, ETHER_TYPE);
+ COPY_FIELD(INNER_VID, inner_vid, INNER_VLAN);
+ COPY_FIELD(OUTER_VID, outer_vid, OUTER_VLAN);
+ COPY_FIELD(IP_PROTO, ip_proto, IP_PROTO);
+#undef COPY_FIELD
+#undef COPY_VALUE
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_MATCH_FIELDS,
+ match_fields);
+}
+
+static void efx_mcdi_filter_push_prep(struct efx_nic *efx,
+ const struct efx_filter_spec *spec,
+ efx_dword_t *inbuf, u64 handle,
+ struct efx_rss_context *ctx,
+ bool replacing)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ u32 flags = spec->flags;
+
+ memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN);
+
+ /* If RSS filter, caller better have given us an RSS context */
+ if (flags & EFX_FILTER_FLAG_RX_RSS) {
+ /*
+ * We don't have the ability to return an error, so we'll just
+ * log a warning and disable RSS for the filter.
+ */
+ if (WARN_ON_ONCE(!ctx))
+ flags &= ~EFX_FILTER_FLAG_RX_RSS;
+ else if (WARN_ON_ONCE(ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID))
+ flags &= ~EFX_FILTER_FLAG_RX_RSS;
+ }
+
+ if (replacing) {
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
+ MC_CMD_FILTER_OP_IN_OP_REPLACE);
+ MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, handle);
+ } else {
+ efx_mcdi_filter_push_prep_set_match_fields(efx, spec, inbuf);
+ }
+
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, nic_data->vport_id);
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST,
+ spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
+ MC_CMD_FILTER_OP_IN_RX_DEST_DROP :
+ MC_CMD_FILTER_OP_IN_RX_DEST_HOST);
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DOMAIN, 0);
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_TX_DEST,
+ MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT);
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_QUEUE,
+ spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
+ 0 : spec->dmaq_id);
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_MODE,
+ (flags & EFX_FILTER_FLAG_RX_RSS) ?
+ MC_CMD_FILTER_OP_IN_RX_MODE_RSS :
+ MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE);
+ if (flags & EFX_FILTER_FLAG_RX_RSS)
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, ctx->context_id);
+}
+
+static int efx_mcdi_filter_push(struct efx_nic *efx,
+ const struct efx_filter_spec *spec, u64 *handle,
+ struct efx_rss_context *ctx, bool replacing)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ efx_mcdi_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc && spec->priority != EFX_FILTER_PRI_HINT)
+ efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, sizeof(inbuf),
+ outbuf, outlen, rc);
+ if (rc == 0)
+ *handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE);
+ if (rc == -ENOSPC)
+ rc = -EBUSY; /* to match efx_farch_filter_insert() */
+ return rc;
+}
+
+static u32 efx_mcdi_filter_mcdi_flags_from_spec(const struct efx_filter_spec *spec)
+{
+ enum efx_encap_type encap_type = efx_filter_get_encap_type(spec);
+ unsigned int match_flags = spec->match_flags;
+ unsigned int uc_match, mc_match;
+ u32 mcdi_flags = 0;
+
+#define MAP_FILTER_TO_MCDI_FLAG(gen_flag, mcdi_field, encap) { \
+ unsigned int old_match_flags = match_flags; \
+ match_flags &= ~EFX_FILTER_MATCH_ ## gen_flag; \
+ if (match_flags != old_match_flags) \
+ mcdi_flags |= \
+ (1 << ((encap) ? \
+ MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_ ## \
+ mcdi_field ## _LBN : \
+ MC_CMD_FILTER_OP_EXT_IN_MATCH_ ##\
+ mcdi_field ## _LBN)); \
+ }
+ /* inner or outer based on encap type */
+ MAP_FILTER_TO_MCDI_FLAG(REM_HOST, SRC_IP, encap_type);
+ MAP_FILTER_TO_MCDI_FLAG(LOC_HOST, DST_IP, encap_type);
+ MAP_FILTER_TO_MCDI_FLAG(REM_MAC, SRC_MAC, encap_type);
+ MAP_FILTER_TO_MCDI_FLAG(REM_PORT, SRC_PORT, encap_type);
+ MAP_FILTER_TO_MCDI_FLAG(LOC_MAC, DST_MAC, encap_type);
+ MAP_FILTER_TO_MCDI_FLAG(LOC_PORT, DST_PORT, encap_type);
+ MAP_FILTER_TO_MCDI_FLAG(ETHER_TYPE, ETHER_TYPE, encap_type);
+ MAP_FILTER_TO_MCDI_FLAG(IP_PROTO, IP_PROTO, encap_type);
+ /* always outer */
+ MAP_FILTER_TO_MCDI_FLAG(INNER_VID, INNER_VLAN, false);
+ MAP_FILTER_TO_MCDI_FLAG(OUTER_VID, OUTER_VLAN, false);
+#undef MAP_FILTER_TO_MCDI_FLAG
+
+ /* special handling for encap type, and mismatch */
+ if (encap_type) {
+ match_flags &= ~EFX_FILTER_MATCH_ENCAP_TYPE;
+ mcdi_flags |=
+ (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN);
+ mcdi_flags |= (1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN);
+
+ uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST_LBN;
+ mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST_LBN;
+ } else {
+ uc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST_LBN;
+ mc_match = MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST_LBN;
+ }
+
+ if (match_flags & EFX_FILTER_MATCH_LOC_MAC_IG) {
+ match_flags &= ~EFX_FILTER_MATCH_LOC_MAC_IG;
+ mcdi_flags |=
+ is_multicast_ether_addr(spec->loc_mac) ?
+ 1 << mc_match :
+ 1 << uc_match;
+ }
+
+ /* Did we map them all? */
+ WARN_ON_ONCE(match_flags);
+
+ return mcdi_flags;
+}
+
+static int efx_mcdi_filter_pri(struct efx_mcdi_filter_table *table,
+ const struct efx_filter_spec *spec)
+{
+ u32 mcdi_flags = efx_mcdi_filter_mcdi_flags_from_spec(spec);
+ unsigned int match_pri;
+
+ for (match_pri = 0;
+ match_pri < table->rx_match_count;
+ match_pri++)
+ if (table->rx_match_mcdi_flags[match_pri] == mcdi_flags)
+ return match_pri;
+
+ return -EPROTONOSUPPORT;
+}
+
+static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx,
+ struct efx_filter_spec *spec,
+ bool replace_equal)
+{
+ DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ struct efx_mcdi_filter_table *table;
+ struct efx_filter_spec *saved_spec;
+ struct efx_rss_context *ctx = NULL;
+ unsigned int match_pri, hash;
+ unsigned int priv_flags;
+ bool rss_locked = false;
+ bool replacing = false;
+ unsigned int depth, i;
+ int ins_index = -1;
+ DEFINE_WAIT(wait);
+ bool is_mc_recip;
+ s32 rc;
+
+ WARN_ON(!rwsem_is_locked(&efx->filter_sem));
+ table = efx->filter_state;
+ down_write(&table->lock);
+
+ /* For now, only support RX filters */
+ if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) !=
+ EFX_FILTER_FLAG_RX) {
+ rc = -EINVAL;
+ goto out_unlock;
+ }
+
+ rc = efx_mcdi_filter_pri(table, spec);
+ if (rc < 0)
+ goto out_unlock;
+ match_pri = rc;
+
+ hash = efx_filter_spec_hash(spec);
+ is_mc_recip = efx_filter_is_mc_recipient(spec);
+ if (is_mc_recip)
+ bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
+
+ if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+ mutex_lock(&efx->rss_lock);
+ rss_locked = true;
+ if (spec->rss_context)
+ ctx = efx_find_rss_context_entry(efx, spec->rss_context);
+ else
+ ctx = &efx->rss_context;
+ if (!ctx) {
+ rc = -ENOENT;
+ goto out_unlock;
+ }
+ if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
+ rc = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+ }
+
+ /* Find any existing filters with the same match tuple or
+ * else a free slot to insert at.
+ */
+ for (depth = 1; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
+ i = (hash + depth) & (EFX_MCDI_FILTER_TBL_ROWS - 1);
+ saved_spec = efx_mcdi_filter_entry_spec(table, i);
+
+ if (!saved_spec) {
+ if (ins_index < 0)
+ ins_index = i;
+ } else if (efx_filter_spec_equal(spec, saved_spec)) {
+ if (spec->priority < saved_spec->priority &&
+ spec->priority != EFX_FILTER_PRI_AUTO) {
+ rc = -EPERM;
+ goto out_unlock;
+ }
+ if (!is_mc_recip) {
+ /* This is the only one */
+ if (spec->priority ==
+ saved_spec->priority &&
+ !replace_equal) {
+ rc = -EEXIST;
+ goto out_unlock;
+ }
+ ins_index = i;
+ break;
+ } else if (spec->priority >
+ saved_spec->priority ||
+ (spec->priority ==
+ saved_spec->priority &&
+ replace_equal)) {
+ if (ins_index < 0)
+ ins_index = i;
+ else
+ __set_bit(depth, mc_rem_map);
+ }
+ }
+ }
+
+ /* Once we reach the maximum search depth, use the first suitable
+ * slot, or return -EBUSY if there was none
+ */
+ if (ins_index < 0) {
+ rc = -EBUSY;
+ goto out_unlock;
+ }
+
+ /* Create a software table entry if necessary. */
+ saved_spec = efx_mcdi_filter_entry_spec(table, ins_index);
+ if (saved_spec) {
+ if (spec->priority == EFX_FILTER_PRI_AUTO &&
+ saved_spec->priority >= EFX_FILTER_PRI_AUTO) {
+ /* Just make sure it won't be removed */
+ if (saved_spec->priority > EFX_FILTER_PRI_AUTO)
+ saved_spec->flags |= EFX_FILTER_FLAG_RX_OVER_AUTO;
+ table->entry[ins_index].spec &=
+ ~EFX_EF10_FILTER_FLAG_AUTO_OLD;
+ rc = ins_index;
+ goto out_unlock;
+ }
+ replacing = true;
+ priv_flags = efx_mcdi_filter_entry_flags(table, ins_index);
+ } else {
+ saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC);
+ if (!saved_spec) {
+ rc = -ENOMEM;
+ goto out_unlock;
+ }
+ *saved_spec = *spec;
+ priv_flags = 0;
+ }
+ efx_mcdi_filter_set_entry(table, ins_index, saved_spec, priv_flags);
+
+ /* Actually insert the filter on the HW */
+ rc = efx_mcdi_filter_push(efx, spec, &table->entry[ins_index].handle,
+ ctx, replacing);
+
+ if (rc == -EINVAL && nic_data->must_realloc_vis)
+ /* The MC rebooted under us, causing it to reject our filter
+ * insertion as pointing to an invalid VI (spec->dmaq_id).
+ */
+ rc = -EAGAIN;
+
+ /* Finalise the software table entry */
+ if (rc == 0) {
+ if (replacing) {
+ /* Update the fields that may differ */
+ if (saved_spec->priority == EFX_FILTER_PRI_AUTO)
+ saved_spec->flags |=
+ EFX_FILTER_FLAG_RX_OVER_AUTO;
+ saved_spec->priority = spec->priority;
+ saved_spec->flags &= EFX_FILTER_FLAG_RX_OVER_AUTO;
+ saved_spec->flags |= spec->flags;
+ saved_spec->rss_context = spec->rss_context;
+ saved_spec->dmaq_id = spec->dmaq_id;
+ }
+ } else if (!replacing) {
+ kfree(saved_spec);
+ saved_spec = NULL;
+ } else {
+ /* We failed to replace, so the old filter is still present.
+ * Roll back the software table to reflect this. In fact the
+ * efx_mcdi_filter_set_entry() call below will do the right
+ * thing, so nothing extra is needed here.
+ */
+ }
+ efx_mcdi_filter_set_entry(table, ins_index, saved_spec, priv_flags);
+
+ /* Remove and finalise entries for lower-priority multicast
+ * recipients
+ */
+ if (is_mc_recip) {
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
+ unsigned int depth, i;
+
+ memset(inbuf, 0, sizeof(inbuf));
+
+ for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) {
+ if (!test_bit(depth, mc_rem_map))
+ continue;
+
+ i = (hash + depth) & (EFX_MCDI_FILTER_TBL_ROWS - 1);
+ saved_spec = efx_mcdi_filter_entry_spec(table, i);
+ priv_flags = efx_mcdi_filter_entry_flags(table, i);
+
+ if (rc == 0) {
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
+ MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
+ MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
+ table->entry[i].handle);
+ rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP,
+ inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+ }
+
+ if (rc == 0) {
+ kfree(saved_spec);
+ saved_spec = NULL;
+ priv_flags = 0;
+ }
+ efx_mcdi_filter_set_entry(table, i, saved_spec,
+ priv_flags);
+ }
+ }
+
+ /* If successful, return the inserted filter ID */
+ if (rc == 0)
+ rc = efx_mcdi_filter_make_filter_id(match_pri, ins_index);
+
+out_unlock:
+ if (rss_locked)
+ mutex_unlock(&efx->rss_lock);
+ up_write(&table->lock);
+ return rc;
+}
+
+s32 efx_mcdi_filter_insert(struct efx_nic *efx, struct efx_filter_spec *spec,
+ bool replace_equal)
+{
+ s32 ret;
+
+ down_read(&efx->filter_sem);
+ ret = efx_mcdi_filter_insert_locked(efx, spec, replace_equal);
+ up_read(&efx->filter_sem);
+
+ return ret;
+}
+
+/*
+ * Remove a filter.
+ * If !by_index, remove by ID
+ * If by_index, remove by index
+ * Filter ID may come from userland and must be range-checked.
+ * Caller must hold efx->filter_sem for read, and efx->filter_state->lock
+ * for write.
+ */
+static int efx_mcdi_filter_remove_internal(struct efx_nic *efx,
+ unsigned int priority_mask,
+ u32 filter_id, bool by_index)
+{
+ unsigned int filter_idx = efx_mcdi_filter_get_unsafe_id(filter_id);
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ MCDI_DECLARE_BUF(inbuf,
+ MC_CMD_FILTER_OP_IN_HANDLE_OFST +
+ MC_CMD_FILTER_OP_IN_HANDLE_LEN);
+ struct efx_filter_spec *spec;
+ DEFINE_WAIT(wait);
+ int rc;
+
+ spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+ if (!spec ||
+ (!by_index &&
+ efx_mcdi_filter_pri(table, spec) !=
+ efx_mcdi_filter_get_unsafe_pri(filter_id)))
+ return -ENOENT;
+
+ if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO &&
+ priority_mask == (1U << EFX_FILTER_PRI_AUTO)) {
+ /* Just remove flags */
+ spec->flags &= ~EFX_FILTER_FLAG_RX_OVER_AUTO;
+ table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_AUTO_OLD;
+ return 0;
+ }
+
+ if (!(priority_mask & (1U << spec->priority)))
+ return -ENOENT;
+
+ if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) {
+ /* Reset to an automatic filter */
+
+ struct efx_filter_spec new_spec = *spec;
+
+ new_spec.priority = EFX_FILTER_PRI_AUTO;
+ new_spec.flags = (EFX_FILTER_FLAG_RX |
+ (efx_rss_active(&efx->rss_context) ?
+ EFX_FILTER_FLAG_RX_RSS : 0));
+ new_spec.dmaq_id = 0;
+ new_spec.rss_context = 0;
+ rc = efx_mcdi_filter_push(efx, &new_spec,
+ &table->entry[filter_idx].handle,
+ &efx->rss_context,
+ true);
+
+ if (rc == 0)
+ *spec = new_spec;
+ } else {
+ /* Really remove the filter */
+
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
+ efx_mcdi_filter_is_exclusive(spec) ?
+ MC_CMD_FILTER_OP_IN_OP_REMOVE :
+ MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
+ MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
+ table->entry[filter_idx].handle);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP,
+ inbuf, sizeof(inbuf), NULL, 0, NULL);
+
+ if ((rc == 0) || (rc == -ENOENT)) {
+ /* Filter removed OK or didn't actually exist */
+ kfree(spec);
+ efx_mcdi_filter_set_entry(table, filter_idx, NULL, 0);
+ } else {
+ efx_mcdi_display_error(efx, MC_CMD_FILTER_OP,
+ MC_CMD_FILTER_OP_EXT_IN_LEN,
+ NULL, 0, rc);
+ }
+ }
+
+ return rc;
+}
+
+/* Remove filters that weren't renewed. */
+static void efx_mcdi_filter_remove_old(struct efx_nic *efx)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ int remove_failed = 0;
+ int remove_noent = 0;
+ int rc;
+ int i;
+
+ down_write(&table->lock);
+ for (i = 0; i < EFX_MCDI_FILTER_TBL_ROWS; i++) {
+ if (READ_ONCE(table->entry[i].spec) &
+ EFX_EF10_FILTER_FLAG_AUTO_OLD) {
+ rc = efx_mcdi_filter_remove_internal(efx,
+ 1U << EFX_FILTER_PRI_AUTO, i, true);
+ if (rc == -ENOENT)
+ remove_noent++;
+ else if (rc)
+ remove_failed++;
+ }
+ }
+ up_write(&table->lock);
+
+ if (remove_failed)
+ netif_info(efx, drv, efx->net_dev,
+ "%s: failed to remove %d filters\n",
+ __func__, remove_failed);
+ if (remove_noent)
+ netif_info(efx, drv, efx->net_dev,
+ "%s: failed to remove %d non-existent filters\n",
+ __func__, remove_noent);
+}
+
+int efx_mcdi_filter_remove_safe(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 filter_id)
+{
+ struct efx_mcdi_filter_table *table;
+ int rc;
+
+ down_read(&efx->filter_sem);
+ table = efx->filter_state;
+ down_write(&table->lock);
+ rc = efx_mcdi_filter_remove_internal(efx, 1U << priority, filter_id,
+ false);
+ up_write(&table->lock);
+ up_read(&efx->filter_sem);
+ return rc;
+}
+
+/* Caller must hold efx->filter_sem for read */
+static void efx_mcdi_filter_remove_unsafe(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 filter_id)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+
+ if (filter_id == EFX_EF10_FILTER_ID_INVALID)
+ return;
+
+ down_write(&table->lock);
+ efx_mcdi_filter_remove_internal(efx, 1U << priority, filter_id,
+ true);
+ up_write(&table->lock);
+}
+
+int efx_mcdi_filter_get_safe(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 filter_id, struct efx_filter_spec *spec)
+{
+ unsigned int filter_idx = efx_mcdi_filter_get_unsafe_id(filter_id);
+ const struct efx_filter_spec *saved_spec;
+ struct efx_mcdi_filter_table *table;
+ int rc;
+
+ down_read(&efx->filter_sem);
+ table = efx->filter_state;
+ down_read(&table->lock);
+ saved_spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+ if (saved_spec && saved_spec->priority == priority &&
+ efx_mcdi_filter_pri(table, saved_spec) ==
+ efx_mcdi_filter_get_unsafe_pri(filter_id)) {
+ *spec = *saved_spec;
+ rc = 0;
+ } else {
+ rc = -ENOENT;
+ }
+ up_read(&table->lock);
+ up_read(&efx->filter_sem);
+ return rc;
+}
+
+static int efx_mcdi_filter_insert_addr_list(struct efx_nic *efx,
+ struct efx_mcdi_filter_vlan *vlan,
+ bool multicast, bool rollback)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ struct efx_mcdi_dev_addr *addr_list;
+ enum efx_filter_flags filter_flags;
+ struct efx_filter_spec spec;
+ u8 baddr[ETH_ALEN];
+ unsigned int i, j;
+ int addr_count;
+ u16 *ids;
+ int rc;
+
+ if (multicast) {
+ addr_list = table->dev_mc_list;
+ addr_count = table->dev_mc_count;
+ ids = vlan->mc;
+ } else {
+ addr_list = table->dev_uc_list;
+ addr_count = table->dev_uc_count;
+ ids = vlan->uc;
+ }
+
+ filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0;
+
+ /* Insert/renew filters */
+ for (i = 0; i < addr_count; i++) {
+ EFX_WARN_ON_PARANOID(ids[i] != EFX_EF10_FILTER_ID_INVALID);
+ efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
+ efx_filter_set_eth_local(&spec, vlan->vid, addr_list[i].addr);
+ rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
+ if (rc < 0) {
+ if (rollback) {
+ netif_info(efx, drv, efx->net_dev,
+ "efx_mcdi_filter_insert failed rc=%d\n",
+ rc);
+ /* Fall back to promiscuous */
+ for (j = 0; j < i; j++) {
+ efx_mcdi_filter_remove_unsafe(
+ efx, EFX_FILTER_PRI_AUTO,
+ ids[j]);
+ ids[j] = EFX_EF10_FILTER_ID_INVALID;
+ }
+ return rc;
+ } else {
+ /* keep invalid ID, and carry on */
+ }
+ } else {
+ ids[i] = efx_mcdi_filter_get_unsafe_id(rc);
+ }
+ }
+
+ if (multicast && rollback) {
+ /* Also need an Ethernet broadcast filter */
+ EFX_WARN_ON_PARANOID(vlan->default_filters[EFX_EF10_BCAST] !=
+ EFX_EF10_FILTER_ID_INVALID);
+ efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
+ eth_broadcast_addr(baddr);
+ efx_filter_set_eth_local(&spec, vlan->vid, baddr);
+ rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
+ if (rc < 0) {
+ netif_warn(efx, drv, efx->net_dev,
+ "Broadcast filter insert failed rc=%d\n", rc);
+ /* Fall back to promiscuous */
+ for (j = 0; j < i; j++) {
+ efx_mcdi_filter_remove_unsafe(
+ efx, EFX_FILTER_PRI_AUTO,
+ ids[j]);
+ ids[j] = EFX_EF10_FILTER_ID_INVALID;
+ }
+ return rc;
+ } else {
+ vlan->default_filters[EFX_EF10_BCAST] =
+ efx_mcdi_filter_get_unsafe_id(rc);
+ }
+ }
+
+ return 0;
+}
+
+static int efx_mcdi_filter_insert_def(struct efx_nic *efx,
+ struct efx_mcdi_filter_vlan *vlan,
+ enum efx_encap_type encap_type,
+ bool multicast, bool rollback)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ enum efx_filter_flags filter_flags;
+ struct efx_filter_spec spec;
+ u8 baddr[ETH_ALEN];
+ int rc;
+ u16 *id;
+
+ filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0;
+
+ efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0);
+
+ if (multicast)
+ efx_filter_set_mc_def(&spec);
+ else
+ efx_filter_set_uc_def(&spec);
+
+ if (encap_type) {
+ if (nic_data->datapath_caps &
+ (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
+ efx_filter_set_encap_type(&spec, encap_type);
+ else
+ /*
+ * don't insert encap filters on non-supporting
+ * platforms. ID will be left as INVALID.
+ */
+ return 0;
+ }
+
+ if (vlan->vid != EFX_FILTER_VID_UNSPEC)
+ efx_filter_set_eth_local(&spec, vlan->vid, NULL);
+
+ rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
+ if (rc < 0) {
+ const char *um = multicast ? "Multicast" : "Unicast";
+ const char *encap_name = "";
+ const char *encap_ipv = "";
+
+ if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
+ EFX_ENCAP_TYPE_VXLAN)
+ encap_name = "VXLAN ";
+ else if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
+ EFX_ENCAP_TYPE_NVGRE)
+ encap_name = "NVGRE ";
+ else if ((encap_type & EFX_ENCAP_TYPES_MASK) ==
+ EFX_ENCAP_TYPE_GENEVE)
+ encap_name = "GENEVE ";
+ if (encap_type & EFX_ENCAP_FLAG_IPV6)
+ encap_ipv = "IPv6 ";
+ else if (encap_type)
+ encap_ipv = "IPv4 ";
+
+ /*
+ * unprivileged functions can't insert mismatch filters
+ * for encapsulated or unicast traffic, so downgrade
+ * those warnings to debug.
+ */
+ netif_cond_dbg(efx, drv, efx->net_dev,
+ rc == -EPERM && (encap_type || !multicast), warn,
+ "%s%s%s mismatch filter insert failed rc=%d\n",
+ encap_name, encap_ipv, um, rc);
+ } else if (multicast) {
+ /* mapping from encap types to default filter IDs (multicast) */
+ static enum efx_mcdi_filter_default_filters map[] = {
+ [EFX_ENCAP_TYPE_NONE] = EFX_EF10_MCDEF,
+ [EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_MCDEF,
+ [EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_MCDEF,
+ [EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_MCDEF,
+ [EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] =
+ EFX_EF10_VXLAN6_MCDEF,
+ [EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] =
+ EFX_EF10_NVGRE6_MCDEF,
+ [EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] =
+ EFX_EF10_GENEVE6_MCDEF,
+ };
+
+ /* quick bounds check (BCAST result impossible) */
+ BUILD_BUG_ON(EFX_EF10_BCAST != 0);
+ if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+ /* then follow map */
+ id = &vlan->default_filters[map[encap_type]];
+
+ EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
+ *id = efx_mcdi_filter_get_unsafe_id(rc);
+ if (!nic_data->workaround_26807 && !encap_type) {
+ /* Also need an Ethernet broadcast filter */
+ efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
+ filter_flags, 0);
+ eth_broadcast_addr(baddr);
+ efx_filter_set_eth_local(&spec, vlan->vid, baddr);
+ rc = efx_mcdi_filter_insert_locked(efx, &spec, true);
+ if (rc < 0) {
+ netif_warn(efx, drv, efx->net_dev,
+ "Broadcast filter insert failed rc=%d\n",
+ rc);
+ if (rollback) {
+ /* Roll back the mc_def filter */
+ efx_mcdi_filter_remove_unsafe(
+ efx, EFX_FILTER_PRI_AUTO,
+ *id);
+ *id = EFX_EF10_FILTER_ID_INVALID;
+ return rc;
+ }
+ } else {
+ EFX_WARN_ON_PARANOID(
+ vlan->default_filters[EFX_EF10_BCAST] !=
+ EFX_EF10_FILTER_ID_INVALID);
+ vlan->default_filters[EFX_EF10_BCAST] =
+ efx_mcdi_filter_get_unsafe_id(rc);
+ }
+ }
+ rc = 0;
+ } else {
+ /* mapping from encap types to default filter IDs (unicast) */
+ static enum efx_mcdi_filter_default_filters map[] = {
+ [EFX_ENCAP_TYPE_NONE] = EFX_EF10_UCDEF,
+ [EFX_ENCAP_TYPE_VXLAN] = EFX_EF10_VXLAN4_UCDEF,
+ [EFX_ENCAP_TYPE_NVGRE] = EFX_EF10_NVGRE4_UCDEF,
+ [EFX_ENCAP_TYPE_GENEVE] = EFX_EF10_GENEVE4_UCDEF,
+ [EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6] =
+ EFX_EF10_VXLAN6_UCDEF,
+ [EFX_ENCAP_TYPE_NVGRE | EFX_ENCAP_FLAG_IPV6] =
+ EFX_EF10_NVGRE6_UCDEF,
+ [EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6] =
+ EFX_EF10_GENEVE6_UCDEF,
+ };
+
+ /* quick bounds check (BCAST result impossible) */
+ BUILD_BUG_ON(EFX_EF10_BCAST != 0);
+ if (encap_type >= ARRAY_SIZE(map) || map[encap_type] == 0) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+ /* then follow map */
+ id = &vlan->default_filters[map[encap_type]];
+ EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
+ *id = rc;
+ rc = 0;
+ }
+ return rc;
+}
+
+/*
+ * Caller must hold efx->filter_sem for read if race against
+ * efx_mcdi_filter_table_remove() is possible
+ */
+static void efx_mcdi_filter_vlan_sync_rx_mode(struct efx_nic *efx,
+ struct efx_mcdi_filter_vlan *vlan)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+ /*
+ * Do not install unspecified VID if VLAN filtering is enabled.
+ * Do not install all specified VIDs if VLAN filtering is disabled.
+ */
+ if ((vlan->vid == EFX_FILTER_VID_UNSPEC) == table->vlan_filter)
+ return;
+
+ /* Insert/renew unicast filters */
+ if (table->uc_promisc) {
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NONE,
+ false, false);
+ efx_mcdi_filter_insert_addr_list(efx, vlan, false, false);
+ } else {
+ /*
+ * If any of the filters failed to insert, fall back to
+ * promiscuous mode - add in the uc_def filter. But keep
+ * our individual unicast filters.
+ */
+ if (efx_mcdi_filter_insert_addr_list(efx, vlan, false, false))
+ efx_mcdi_filter_insert_def(efx, vlan,
+ EFX_ENCAP_TYPE_NONE,
+ false, false);
+ }
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN,
+ false, false);
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN |
+ EFX_ENCAP_FLAG_IPV6,
+ false, false);
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE,
+ false, false);
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE |
+ EFX_ENCAP_FLAG_IPV6,
+ false, false);
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE,
+ false, false);
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE |
+ EFX_ENCAP_FLAG_IPV6,
+ false, false);
+
+ /*
+ * Insert/renew multicast filters
+ *
+ * If changing promiscuous state with cascaded multicast filters, remove
+ * old filters first, so that packets are dropped rather than duplicated
+ */
+ if (nic_data->workaround_26807 &&
+ table->mc_promisc_last != table->mc_promisc)
+ efx_mcdi_filter_remove_old(efx);
+ if (table->mc_promisc) {
+ if (nic_data->workaround_26807) {
+ /*
+ * If we failed to insert promiscuous filters, rollback
+ * and fall back to individual multicast filters
+ */
+ if (efx_mcdi_filter_insert_def(efx, vlan,
+ EFX_ENCAP_TYPE_NONE,
+ true, true)) {
+ /* Changing promisc state, so remove old filters */
+ efx_mcdi_filter_remove_old(efx);
+ efx_mcdi_filter_insert_addr_list(efx, vlan,
+ true, false);
+ }
+ } else {
+ /*
+ * If we failed to insert promiscuous filters, don't
+ * rollback. Regardless, also insert the mc_list,
+ * unless it's incomplete due to overflow
+ */
+ efx_mcdi_filter_insert_def(efx, vlan,
+ EFX_ENCAP_TYPE_NONE,
+ true, false);
+ if (!table->mc_overflow)
+ efx_mcdi_filter_insert_addr_list(efx, vlan,
+ true, false);
+ }
+ } else {
+ /*
+ * If any filters failed to insert, rollback and fall back to
+ * promiscuous mode - mc_def filter and maybe broadcast. If
+ * that fails, roll back again and insert as many of our
+ * individual multicast filters as we can.
+ */
+ if (efx_mcdi_filter_insert_addr_list(efx, vlan, true, true)) {
+ /* Changing promisc state, so remove old filters */
+ if (nic_data->workaround_26807)
+ efx_mcdi_filter_remove_old(efx);
+ if (efx_mcdi_filter_insert_def(efx, vlan,
+ EFX_ENCAP_TYPE_NONE,
+ true, true))
+ efx_mcdi_filter_insert_addr_list(efx, vlan,
+ true, false);
+ }
+ }
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN,
+ true, false);
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_VXLAN |
+ EFX_ENCAP_FLAG_IPV6,
+ true, false);
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE,
+ true, false);
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_NVGRE |
+ EFX_ENCAP_FLAG_IPV6,
+ true, false);
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE,
+ true, false);
+ efx_mcdi_filter_insert_def(efx, vlan, EFX_ENCAP_TYPE_GENEVE |
+ EFX_ENCAP_FLAG_IPV6,
+ true, false);
+}
+
+int efx_mcdi_filter_clear_rx(struct efx_nic *efx,
+ enum efx_filter_priority priority)
+{
+ struct efx_mcdi_filter_table *table;
+ unsigned int priority_mask;
+ unsigned int i;
+ int rc;
+
+ priority_mask = (((1U << (priority + 1)) - 1) &
+ ~(1U << EFX_FILTER_PRI_AUTO));
+
+ down_read(&efx->filter_sem);
+ table = efx->filter_state;
+ down_write(&table->lock);
+ for (i = 0; i < EFX_MCDI_FILTER_TBL_ROWS; i++) {
+ rc = efx_mcdi_filter_remove_internal(efx, priority_mask,
+ i, true);
+ if (rc && rc != -ENOENT)
+ break;
+ rc = 0;
+ }
+
+ up_write(&table->lock);
+ up_read(&efx->filter_sem);
+ return rc;
+}
+
+u32 efx_mcdi_filter_count_rx_used(struct efx_nic *efx,
+ enum efx_filter_priority priority)
+{
+ struct efx_mcdi_filter_table *table;
+ unsigned int filter_idx;
+ s32 count = 0;
+
+ down_read(&efx->filter_sem);
+ table = efx->filter_state;
+ down_read(&table->lock);
+ for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
+ if (table->entry[filter_idx].spec &&
+ efx_mcdi_filter_entry_spec(table, filter_idx)->priority ==
+ priority)
+ ++count;
+ }
+ up_read(&table->lock);
+ up_read(&efx->filter_sem);
+ return count;
+}
+
+u32 efx_mcdi_filter_get_rx_id_limit(struct efx_nic *efx)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+
+ return table->rx_match_count * EFX_MCDI_FILTER_TBL_ROWS * 2;
+}
+
+s32 efx_mcdi_filter_get_rx_ids(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 *buf, u32 size)
+{
+ struct efx_mcdi_filter_table *table;
+ struct efx_filter_spec *spec;
+ unsigned int filter_idx;
+ s32 count = 0;
+
+ down_read(&efx->filter_sem);
+ table = efx->filter_state;
+ down_read(&table->lock);
+
+ for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
+ spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+ if (spec && spec->priority == priority) {
+ if (count == size) {
+ count = -EMSGSIZE;
+ break;
+ }
+ buf[count++] =
+ efx_mcdi_filter_make_filter_id(
+ efx_mcdi_filter_pri(table, spec),
+ filter_idx);
+ }
+ }
+ up_read(&table->lock);
+ up_read(&efx->filter_sem);
+ return count;
+}
+
+static int efx_mcdi_filter_match_flags_from_mcdi(bool encap, u32 mcdi_flags)
+{
+ int match_flags = 0;
+
+#define MAP_FLAG(gen_flag, mcdi_field) do { \
+ u32 old_mcdi_flags = mcdi_flags; \
+ mcdi_flags &= ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ ## \
+ mcdi_field ## _LBN); \
+ if (mcdi_flags != old_mcdi_flags) \
+ match_flags |= EFX_FILTER_MATCH_ ## gen_flag; \
+ } while (0)
+
+ if (encap) {
+ /* encap filters must specify encap type */
+ match_flags |= EFX_FILTER_MATCH_ENCAP_TYPE;
+ /* and imply ethertype and ip proto */
+ mcdi_flags &=
+ ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO_LBN);
+ mcdi_flags &=
+ ~(1 << MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE_LBN);
+ /* VLAN tags refer to the outer packet */
+ MAP_FLAG(INNER_VID, INNER_VLAN);
+ MAP_FLAG(OUTER_VID, OUTER_VLAN);
+ /* everything else refers to the inner packet */
+ MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_UCAST_DST);
+ MAP_FLAG(LOC_MAC_IG, IFRM_UNKNOWN_MCAST_DST);
+ MAP_FLAG(REM_HOST, IFRM_SRC_IP);
+ MAP_FLAG(LOC_HOST, IFRM_DST_IP);
+ MAP_FLAG(REM_MAC, IFRM_SRC_MAC);
+ MAP_FLAG(REM_PORT, IFRM_SRC_PORT);
+ MAP_FLAG(LOC_MAC, IFRM_DST_MAC);
+ MAP_FLAG(LOC_PORT, IFRM_DST_PORT);
+ MAP_FLAG(ETHER_TYPE, IFRM_ETHER_TYPE);
+ MAP_FLAG(IP_PROTO, IFRM_IP_PROTO);
+ } else {
+ MAP_FLAG(LOC_MAC_IG, UNKNOWN_UCAST_DST);
+ MAP_FLAG(LOC_MAC_IG, UNKNOWN_MCAST_DST);
+ MAP_FLAG(REM_HOST, SRC_IP);
+ MAP_FLAG(LOC_HOST, DST_IP);
+ MAP_FLAG(REM_MAC, SRC_MAC);
+ MAP_FLAG(REM_PORT, SRC_PORT);
+ MAP_FLAG(LOC_MAC, DST_MAC);
+ MAP_FLAG(LOC_PORT, DST_PORT);
+ MAP_FLAG(ETHER_TYPE, ETHER_TYPE);
+ MAP_FLAG(INNER_VID, INNER_VLAN);
+ MAP_FLAG(OUTER_VID, OUTER_VLAN);
+ MAP_FLAG(IP_PROTO, IP_PROTO);
+ }
+#undef MAP_FLAG
+
+ /* Did we map them all? */
+ if (mcdi_flags)
+ return -EINVAL;
+
+ return match_flags;
+}
+
+bool efx_mcdi_filter_match_supported(struct efx_mcdi_filter_table *table,
+ bool encap,
+ enum efx_filter_match_flags match_flags)
+{
+ unsigned int match_pri;
+ int mf;
+
+ for (match_pri = 0;
+ match_pri < table->rx_match_count;
+ match_pri++) {
+ mf = efx_mcdi_filter_match_flags_from_mcdi(encap,
+ table->rx_match_mcdi_flags[match_pri]);
+ if (mf == match_flags)
+ return true;
+ }
+
+ return false;
+}
+
+static int
+efx_mcdi_filter_table_probe_matches(struct efx_nic *efx,
+ struct efx_mcdi_filter_table *table,
+ bool encap)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX);
+ unsigned int pd_match_pri, pd_match_count;
+ size_t outlen;
+ int rc;
+
+ /* Find out which RX filter types are supported, and their priorities */
+ MCDI_SET_DWORD(inbuf, GET_PARSER_DISP_INFO_IN_OP,
+ encap ?
+ MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_ENCAP_RX_MATCHES :
+ MC_CMD_GET_PARSER_DISP_INFO_IN_OP_GET_SUPPORTED_RX_MATCHES);
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_PARSER_DISP_INFO,
+ inbuf, sizeof(inbuf), outbuf, sizeof(outbuf),
+ &outlen);
+ if (rc)
+ return rc;
+
+ pd_match_count = MCDI_VAR_ARRAY_LEN(
+ outlen, GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES);
+
+ for (pd_match_pri = 0; pd_match_pri < pd_match_count; pd_match_pri++) {
+ u32 mcdi_flags =
+ MCDI_ARRAY_DWORD(
+ outbuf,
+ GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES,
+ pd_match_pri);
+ rc = efx_mcdi_filter_match_flags_from_mcdi(encap, mcdi_flags);
+ if (rc < 0) {
+ netif_dbg(efx, probe, efx->net_dev,
+ "%s: fw flags %#x pri %u not supported in driver\n",
+ __func__, mcdi_flags, pd_match_pri);
+ } else {
+ netif_dbg(efx, probe, efx->net_dev,
+ "%s: fw flags %#x pri %u supported as driver flags %#x pri %u\n",
+ __func__, mcdi_flags, pd_match_pri,
+ rc, table->rx_match_count);
+ table->rx_match_mcdi_flags[table->rx_match_count] = mcdi_flags;
+ table->rx_match_count++;
+ }
+ }
+
+ return 0;
+}
+
+int efx_mcdi_filter_table_probe(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ struct net_device *net_dev = efx->net_dev;
+ struct efx_mcdi_filter_table *table;
+ struct efx_mcdi_filter_vlan *vlan;
+ int rc;
+
+ if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+ return -EINVAL;
+
+ if (efx->filter_state) /* already probed */
+ return 0;
+
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ table->rx_match_count = 0;
+ rc = efx_mcdi_filter_table_probe_matches(efx, table, false);
+ if (rc)
+ goto fail;
+ if (nic_data->datapath_caps &
+ (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
+ rc = efx_mcdi_filter_table_probe_matches(efx, table, true);
+ if (rc)
+ goto fail;
+ if ((efx_supported_features(efx) & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+ !(efx_mcdi_filter_match_supported(table, false,
+ (EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC)) &&
+ efx_mcdi_filter_match_supported(table, false,
+ (EFX_FILTER_MATCH_OUTER_VID | EFX_FILTER_MATCH_LOC_MAC_IG)))) {
+ netif_info(efx, probe, net_dev,
+ "VLAN filters are not supported in this firmware variant\n");
+ net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ efx->fixed_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ net_dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ }
+
+ table->entry = vzalloc(array_size(EFX_MCDI_FILTER_TBL_ROWS,
+ sizeof(*table->entry)));
+ if (!table->entry) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ table->mc_promisc_last = false;
+ table->vlan_filter =
+ !!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
+ INIT_LIST_HEAD(&table->vlan_list);
+ init_rwsem(&table->lock);
+
+ efx->filter_state = table;
+
+ list_for_each_entry(vlan, &nic_data->vlan_list, list) {
+ rc = efx_mcdi_filter_add_vlan(efx, vlan->vid);
+ if (rc)
+ goto fail_add_vlan;
+ }
+
+ return 0;
+
+fail_add_vlan:
+ efx_mcdi_filter_cleanup_vlans(efx);
+ efx->filter_state = NULL;
+fail:
+ kfree(table);
+ return rc;
+}
+
+/*
+ * Caller must hold efx->filter_sem for read if race against
+ * efx_mcdi_filter_table_remove() is possible
+ */
+void efx_mcdi_filter_table_restore(struct efx_nic *efx)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ unsigned int invalid_filters = 0, failed = 0;
+ struct efx_mcdi_filter_vlan *vlan;
+ struct efx_filter_spec *spec;
+ struct efx_rss_context *ctx;
+ unsigned int filter_idx;
+ u32 mcdi_flags;
+ int match_pri;
+ int rc, i;
+
+ WARN_ON(!rwsem_is_locked(&efx->filter_sem));
+
+ if (!nic_data->must_restore_filters)
+ return;
+
+ if (!table)
+ return;
+
+ down_write(&table->lock);
+ mutex_lock(&efx->rss_lock);
+
+ for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
+ spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+ if (!spec)
+ continue;
+
+ mcdi_flags = efx_mcdi_filter_mcdi_flags_from_spec(spec);
+ match_pri = 0;
+ while (match_pri < table->rx_match_count &&
+ table->rx_match_mcdi_flags[match_pri] != mcdi_flags)
+ ++match_pri;
+ if (match_pri >= table->rx_match_count) {
+ invalid_filters++;
+ goto not_restored;
+ }
+ if (spec->rss_context)
+ ctx = efx_find_rss_context_entry(efx, spec->rss_context);
+ else
+ ctx = &efx->rss_context;
+ if (spec->flags & EFX_FILTER_FLAG_RX_RSS) {
+ if (!ctx) {
+ netif_warn(efx, drv, efx->net_dev,
+ "Warning: unable to restore a filter with nonexistent RSS context %u.\n",
+ spec->rss_context);
+ invalid_filters++;
+ goto not_restored;
+ }
+ if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
+ netif_warn(efx, drv, efx->net_dev,
+ "Warning: unable to restore a filter with RSS context %u as it was not created.\n",
+ spec->rss_context);
+ invalid_filters++;
+ goto not_restored;
+ }
+ }
+
+ rc = efx_mcdi_filter_push(efx, spec,
+ &table->entry[filter_idx].handle,
+ ctx, false);
+ if (rc)
+ failed++;
+
+ if (rc) {
+not_restored:
+ list_for_each_entry(vlan, &table->vlan_list, list)
+ for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; ++i)
+ if (vlan->default_filters[i] == filter_idx)
+ vlan->default_filters[i] =
+ EFX_EF10_FILTER_ID_INVALID;
+
+ kfree(spec);
+ efx_mcdi_filter_set_entry(table, filter_idx, NULL, 0);
+ }
+ }
+
+ mutex_unlock(&efx->rss_lock);
+ up_write(&table->lock);
+
+ /*
+ * This can happen validly if the MC's capabilities have changed, so
+ * is not an error.
+ */
+ if (invalid_filters)
+ netif_dbg(efx, drv, efx->net_dev,
+ "Did not restore %u filters that are now unsupported.\n",
+ invalid_filters);
+
+ if (failed)
+ netif_err(efx, hw, efx->net_dev,
+ "unable to restore %u filters\n", failed);
+ else
+ nic_data->must_restore_filters = false;
+}
+
+void efx_mcdi_filter_table_remove(struct efx_nic *efx)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN);
+ struct efx_filter_spec *spec;
+ unsigned int filter_idx;
+ int rc;
+
+ efx_mcdi_filter_cleanup_vlans(efx);
+ efx->filter_state = NULL;
+ /*
+ * If we were called without locking, then it's not safe to free
+ * the table as others might be using it. So we just WARN, leak
+ * the memory, and potentially get an inconsistent filter table
+ * state.
+ * This should never actually happen.
+ */
+ if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+ return;
+
+ if (!table)
+ return;
+
+ for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) {
+ spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+ if (!spec)
+ continue;
+
+ MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP,
+ efx_mcdi_filter_is_exclusive(spec) ?
+ MC_CMD_FILTER_OP_IN_OP_REMOVE :
+ MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE);
+ MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE,
+ table->entry[filter_idx].handle);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf,
+ sizeof(inbuf), NULL, 0, NULL);
+ if (rc)
+ netif_info(efx, drv, efx->net_dev,
+ "%s: filter %04x remove failed\n",
+ __func__, filter_idx);
+ kfree(spec);
+ }
+
+ vfree(table->entry);
+ kfree(table);
+}
+
+static void efx_mcdi_filter_mark_one_old(struct efx_nic *efx, uint16_t *id)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ unsigned int filter_idx;
+
+ efx_rwsem_assert_write_locked(&table->lock);
+
+ if (*id != EFX_EF10_FILTER_ID_INVALID) {
+ filter_idx = efx_mcdi_filter_get_unsafe_id(*id);
+ if (!table->entry[filter_idx].spec)
+ netif_dbg(efx, drv, efx->net_dev,
+ "marked null spec old %04x:%04x\n", *id,
+ filter_idx);
+ table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_AUTO_OLD;
+ *id = EFX_EF10_FILTER_ID_INVALID;
+ }
+}
+
+/* Mark old per-VLAN filters that may need to be removed */
+static void _efx_mcdi_filter_vlan_mark_old(struct efx_nic *efx,
+ struct efx_mcdi_filter_vlan *vlan)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ unsigned int i;
+
+ for (i = 0; i < table->dev_uc_count; i++)
+ efx_mcdi_filter_mark_one_old(efx, &vlan->uc[i]);
+ for (i = 0; i < table->dev_mc_count; i++)
+ efx_mcdi_filter_mark_one_old(efx, &vlan->mc[i]);
+ for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
+ efx_mcdi_filter_mark_one_old(efx, &vlan->default_filters[i]);
+}
+
+/*
+ * Mark old filters that may need to be removed.
+ * Caller must hold efx->filter_sem for read if race against
+ * efx_mcdi_filter_table_remove() is possible
+ */
+static void efx_mcdi_filter_mark_old(struct efx_nic *efx)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ struct efx_mcdi_filter_vlan *vlan;
+
+ down_write(&table->lock);
+ list_for_each_entry(vlan, &table->vlan_list, list)
+ _efx_mcdi_filter_vlan_mark_old(efx, vlan);
+ up_write(&table->lock);
+}
+
+int efx_mcdi_filter_add_vlan(struct efx_nic *efx, u16 vid)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ struct efx_mcdi_filter_vlan *vlan;
+ unsigned int i;
+
+ if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+ return -EINVAL;
+
+ vlan = efx_mcdi_filter_find_vlan(efx, vid);
+ if (WARN_ON(vlan)) {
+ netif_err(efx, drv, efx->net_dev,
+ "VLAN %u already added\n", vid);
+ return -EALREADY;
+ }
+
+ vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+ if (!vlan)
+ return -ENOMEM;
+
+ vlan->vid = vid;
+
+ for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
+ vlan->uc[i] = EFX_EF10_FILTER_ID_INVALID;
+ for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
+ vlan->mc[i] = EFX_EF10_FILTER_ID_INVALID;
+ for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
+ vlan->default_filters[i] = EFX_EF10_FILTER_ID_INVALID;
+
+ list_add_tail(&vlan->list, &table->vlan_list);
+
+ if (efx_dev_registered(efx))
+ efx_mcdi_filter_vlan_sync_rx_mode(efx, vlan);
+
+ return 0;
+}
+
+static void efx_mcdi_filter_del_vlan_internal(struct efx_nic *efx,
+ struct efx_mcdi_filter_vlan *vlan)
+{
+ unsigned int i;
+
+ /* See comment in efx_mcdi_filter_table_remove() */
+ if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+ return;
+
+ list_del(&vlan->list);
+
+ for (i = 0; i < ARRAY_SIZE(vlan->uc); i++)
+ efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
+ vlan->uc[i]);
+ for (i = 0; i < ARRAY_SIZE(vlan->mc); i++)
+ efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
+ vlan->mc[i]);
+ for (i = 0; i < EFX_EF10_NUM_DEFAULT_FILTERS; i++)
+ if (vlan->default_filters[i] != EFX_EF10_FILTER_ID_INVALID)
+ efx_mcdi_filter_remove_unsafe(efx, EFX_FILTER_PRI_AUTO,
+ vlan->default_filters[i]);
+
+ kfree(vlan);
+}
+
+void efx_mcdi_filter_del_vlan(struct efx_nic *efx, u16 vid)
+{
+ struct efx_mcdi_filter_vlan *vlan;
+
+ /* See comment in efx_mcdi_filter_table_remove() */
+ if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+ return;
+
+ vlan = efx_mcdi_filter_find_vlan(efx, vid);
+ if (!vlan) {
+ netif_err(efx, drv, efx->net_dev,
+ "VLAN %u not found in filter state\n", vid);
+ return;
+ }
+
+ efx_mcdi_filter_del_vlan_internal(efx, vlan);
+}
+
+struct efx_mcdi_filter_vlan *efx_mcdi_filter_find_vlan(struct efx_nic *efx,
+ u16 vid)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ struct efx_mcdi_filter_vlan *vlan;
+
+ WARN_ON(!rwsem_is_locked(&efx->filter_sem));
+
+ list_for_each_entry(vlan, &table->vlan_list, list) {
+ if (vlan->vid == vid)
+ return vlan;
+ }
+
+ return NULL;
+}
+
+void efx_mcdi_filter_cleanup_vlans(struct efx_nic *efx)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ struct efx_mcdi_filter_vlan *vlan, *next_vlan;
+
+ /* See comment in efx_mcdi_filter_table_remove() */
+ if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
+ return;
+
+ if (!table)
+ return;
+
+ list_for_each_entry_safe(vlan, next_vlan, &table->vlan_list, list)
+ efx_mcdi_filter_del_vlan_internal(efx, vlan);
+}
+
+static void efx_mcdi_filter_uc_addr_list(struct efx_nic *efx)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ struct net_device *net_dev = efx->net_dev;
+ struct netdev_hw_addr *uc;
+ unsigned int i;
+
+ table->uc_promisc = !!(net_dev->flags & IFF_PROMISC);
+ ether_addr_copy(table->dev_uc_list[0].addr, net_dev->dev_addr);
+ i = 1;
+ netdev_for_each_uc_addr(uc, net_dev) {
+ if (i >= EFX_EF10_FILTER_DEV_UC_MAX) {
+ table->uc_promisc = true;
+ break;
+ }
+ ether_addr_copy(table->dev_uc_list[i].addr, uc->addr);
+ i++;
+ }
+
+ table->dev_uc_count = i;
+}
+
+static void efx_mcdi_filter_mc_addr_list(struct efx_nic *efx)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ struct net_device *net_dev = efx->net_dev;
+ struct netdev_hw_addr *mc;
+ unsigned int i;
+
+ table->mc_overflow = false;
+ table->mc_promisc = !!(net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI));
+
+ i = 0;
+ netdev_for_each_mc_addr(mc, net_dev) {
+ if (i >= EFX_EF10_FILTER_DEV_MC_MAX) {
+ table->mc_promisc = true;
+ table->mc_overflow = true;
+ break;
+ }
+ ether_addr_copy(table->dev_mc_list[i].addr, mc->addr);
+ i++;
+ }
+
+ table->dev_mc_count = i;
+}
+
+/*
+ * Caller must hold efx->filter_sem for read if race against
+ * efx_mcdi_filter_table_remove() is possible
+ */
+void efx_mcdi_filter_sync_rx_mode(struct efx_nic *efx)
+{
+ struct efx_mcdi_filter_table *table = efx->filter_state;
+ struct net_device *net_dev = efx->net_dev;
+ struct efx_mcdi_filter_vlan *vlan;
+ bool vlan_filter;
+
+ if (!efx_dev_registered(efx))
+ return;
+
+ if (!table)
+ return;
+
+ efx_mcdi_filter_mark_old(efx);
+
+ /*
+ * Copy/convert the address lists; add the primary station
+ * address and broadcast address
+ */
+ netif_addr_lock_bh(net_dev);
+ efx_mcdi_filter_uc_addr_list(efx);
+ efx_mcdi_filter_mc_addr_list(efx);
+ netif_addr_unlock_bh(net_dev);
+
+ /*
+ * If VLAN filtering changes, all old filters are finally removed.
+ * Do it in advance to avoid conflicts for unicast untagged and
+ * VLAN 0 tagged filters.
+ */
+ vlan_filter = !!(net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER);
+ if (table->vlan_filter != vlan_filter) {
+ table->vlan_filter = vlan_filter;
+ efx_mcdi_filter_remove_old(efx);
+ }
+
+ list_for_each_entry(vlan, &table->vlan_list, list)
+ efx_mcdi_filter_vlan_sync_rx_mode(efx, vlan);
+
+ efx_mcdi_filter_remove_old(efx);
+ table->mc_promisc_last = table->mc_promisc;
+}
+
+#ifdef CONFIG_RFS_ACCEL
+
+bool efx_mcdi_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
+ unsigned int filter_idx)
+{
+ struct efx_filter_spec *spec, saved_spec;
+ struct efx_mcdi_filter_table *table;
+ struct efx_arfs_rule *rule = NULL;
+ bool ret = true, force = false;
+ u16 arfs_id;
+
+ down_read(&efx->filter_sem);
+ table = efx->filter_state;
+ down_write(&table->lock);
+ spec = efx_mcdi_filter_entry_spec(table, filter_idx);
+
+ if (!spec || spec->priority != EFX_FILTER_PRI_HINT)
+ goto out_unlock;
+
+ spin_lock_bh(&efx->rps_hash_lock);
+ if (!efx->rps_hash_table) {
+ /* In the absence of the table, we always return 0 to ARFS. */
+ arfs_id = 0;
+ } else {
+ rule = efx_rps_hash_find(efx, spec);
+ if (!rule)
+ /* ARFS table doesn't know of this filter, so remove it */
+ goto expire;
+ arfs_id = rule->arfs_id;
+ ret = efx_rps_check_rule(rule, filter_idx, &force);
+ if (force)
+ goto expire;
+ if (!ret) {
+ spin_unlock_bh(&efx->rps_hash_lock);
+ goto out_unlock;
+ }
+ }
+ if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, flow_id, arfs_id))
+ ret = false;
+ else if (rule)
+ rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
+expire:
+ saved_spec = *spec; /* remove operation will kfree spec */
+ spin_unlock_bh(&efx->rps_hash_lock);
+ /*
+ * At this point (since we dropped the lock), another thread might queue
+ * up a fresh insertion request (but the actual insertion will be held
+ * up by our possession of the filter table lock). In that case, it
+ * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that
+ * the rule is not removed by efx_rps_hash_del() below.
+ */
+ if (ret)
+ ret = efx_mcdi_filter_remove_internal(efx, 1U << spec->priority,
+ filter_idx, true) == 0;
+ /*
+ * While we can't safely dereference rule (we dropped the lock), we can
+ * still test it for NULL.
+ */
+ if (ret && rule) {
+ /* Expiring, so remove entry from ARFS table */
+ spin_lock_bh(&efx->rps_hash_lock);
+ efx_rps_hash_del(efx, &saved_spec);
+ spin_unlock_bh(&efx->rps_hash_lock);
+ }
+out_unlock:
+ up_write(&table->lock);
+ up_read(&efx->filter_sem);
+ return ret;
+}
+
+#endif /* CONFIG_RFS_ACCEL */
+
+#define RSS_MODE_HASH_ADDRS (1 << RSS_MODE_HASH_SRC_ADDR_LBN |\
+ 1 << RSS_MODE_HASH_DST_ADDR_LBN)
+#define RSS_MODE_HASH_PORTS (1 << RSS_MODE_HASH_SRC_PORT_LBN |\
+ 1 << RSS_MODE_HASH_DST_PORT_LBN)
+#define RSS_CONTEXT_FLAGS_DEFAULT (1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\
+ 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\
+ 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\
+ 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\
+ (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\
+ RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\
+ RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\
+ (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\
+ RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\
+ RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN)
+
+int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, u32 *flags)
+{
+ /*
+ * Firmware had a bug (sfc bug 61952) where it would not actually
+ * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS.
+ * This meant that it would always contain whatever was previously
+ * in the MCDI buffer. Fortunately, all firmware versions with
+ * this bug have the same default flags value for a newly-allocated
+ * RSS context, and the only time we want to get the flags is just
+ * after allocating. Moreover, the response has a 32-bit hole
+ * where the context ID would be in the request, so we can use an
+ * overlength buffer in the request and pre-fill the flags field
+ * with what we believe the default to be. Thus if the firmware
+ * has the bug, it will leave our pre-filled value in the flags
+ * field of the response, and we will get the right answer.
+ *
+ * However, this does mean that this function should NOT be used if
+ * the RSS context flags might not be their defaults - it is ONLY
+ * reliably correct for a newly-allocated RSS context.
+ */
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ /* Check we have a hole for the context ID */
+ BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST);
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context);
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS,
+ RSS_CONTEXT_FLAGS_DEFAULT);
+ rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf,
+ sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
+ if (rc == 0) {
+ if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN)
+ rc = -EIO;
+ else
+ *flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS);
+ }
+ return rc;
+}
+
+/*
+ * Attempt to enable 4-tuple UDP hashing on the specified RSS context.
+ * If we fail, we just leave the RSS context at its default hash settings,
+ * which is safe but may slightly reduce performance.
+ * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
+ * just need to set the UDP ports flags (for both IP versions).
+ */
+void efx_mcdi_set_rss_context_flags(struct efx_nic *efx,
+ struct efx_rss_context *ctx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
+ u32 flags;
+
+ BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
+
+ if (efx_mcdi_get_rss_context_flags(efx, ctx->context_id, &flags) != 0)
+ return;
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID,
+ ctx->context_id);
+ flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
+ flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
+ if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
+ NULL, 0, NULL))
+ /* Succeeded, so UDP 4-tuple is now enabled */
+ ctx->rx_hash_udp_4tuple = true;
+}
+
+static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive,
+ struct efx_rss_context *ctx,
+ unsigned *context_size)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ size_t outlen;
+ int rc;
+ u32 alloc_type = exclusive ?
+ MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE :
+ MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_SHARED;
+ unsigned rss_spread = exclusive ?
+ efx->rss_spread :
+ min(rounddown_pow_of_two(efx->rss_spread),
+ EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE);
+
+ if (!exclusive && rss_spread == 1) {
+ ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+ if (context_size)
+ *context_size = 1;
+ return 0;
+ }
+
+ if (nic_data->datapath_caps &
+ 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_RSS_LIMITED_LBN)
+ return -EOPNOTSUPP;
+
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID,
+ nic_data->vport_id);
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type);
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, rss_spread);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_ALLOC, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc != 0)
+ return rc;
+
+ if (outlen < MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)
+ return -EIO;
+
+ ctx->context_id = MCDI_DWORD(outbuf, RSS_CONTEXT_ALLOC_OUT_RSS_CONTEXT_ID);
+
+ if (context_size)
+ *context_size = rss_spread;
+
+ if (nic_data->datapath_caps &
+ 1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
+ efx_mcdi_set_rss_context_flags(efx, ctx);
+
+ return 0;
+}
+
+static int efx_mcdi_filter_free_rss_context(struct efx_nic *efx, u32 context)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_FREE_IN_LEN);
+
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_FREE_IN_RSS_CONTEXT_ID,
+ context);
+ return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_FREE, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+}
+
+static int efx_mcdi_filter_populate_rss_table(struct efx_nic *efx, u32 context,
+ const u32 *rx_indir_table, const u8 *key)
+{
+ MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN);
+ MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN);
+ int i, rc;
+
+ MCDI_SET_DWORD(tablebuf, RSS_CONTEXT_SET_TABLE_IN_RSS_CONTEXT_ID,
+ context);
+ BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_indir_table) !=
+ MC_CMD_RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE_LEN);
+
+ /* This iterates over the length of efx->rss_context.rx_indir_table, but
+ * copies bytes from rx_indir_table. That's because the latter is a
+ * pointer rather than an array, but should have the same length.
+ * The efx->rss_context.rx_hash_key loop below is similar.
+ */
+ for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); ++i)
+ MCDI_PTR(tablebuf,
+ RSS_CONTEXT_SET_TABLE_IN_INDIRECTION_TABLE)[i] =
+ (u8) rx_indir_table[i];
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_TABLE, tablebuf,
+ sizeof(tablebuf), NULL, 0, NULL);
+ if (rc != 0)
+ return rc;
+
+ MCDI_SET_DWORD(keybuf, RSS_CONTEXT_SET_KEY_IN_RSS_CONTEXT_ID,
+ context);
+ BUILD_BUG_ON(ARRAY_SIZE(efx->rss_context.rx_hash_key) !=
+ MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
+ for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_hash_key); ++i)
+ MCDI_PTR(keybuf, RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY)[i] = key[i];
+
+ return efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_KEY, keybuf,
+ sizeof(keybuf), NULL, 0, NULL);
+}
+
+void efx_mcdi_rx_free_indir_table(struct efx_nic *efx)
+{
+ int rc;
+
+ if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) {
+ rc = efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id);
+ WARN_ON(rc != 0);
+ }
+ efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+}
+
+static int efx_mcdi_filter_rx_push_shared_rss_config(struct efx_nic *efx,
+ unsigned *context_size)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ int rc = efx_mcdi_filter_alloc_rss_context(efx, false, &efx->rss_context,
+ context_size);
+
+ if (rc != 0)
+ return rc;
+
+ nic_data->rx_rss_context_exclusive = false;
+ efx_set_default_rx_indir_table(efx, &efx->rss_context);
+ return 0;
+}
+
+static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx,
+ const u32 *rx_indir_table,
+ const u8 *key)
+{
+ u32 old_rx_rss_context = efx->rss_context.context_id;
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ int rc;
+
+ if (efx->rss_context.context_id == EFX_MCDI_RSS_CONTEXT_INVALID ||
+ !nic_data->rx_rss_context_exclusive) {
+ rc = efx_mcdi_filter_alloc_rss_context(efx, true, &efx->rss_context,
+ NULL);
+ if (rc == -EOPNOTSUPP)
+ return rc;
+ else if (rc != 0)
+ goto fail1;
+ }
+
+ rc = efx_mcdi_filter_populate_rss_table(efx, efx->rss_context.context_id,
+ rx_indir_table, key);
+ if (rc != 0)
+ goto fail2;
+
+ if (efx->rss_context.context_id != old_rx_rss_context &&
+ old_rx_rss_context != EFX_MCDI_RSS_CONTEXT_INVALID)
+ WARN_ON(efx_mcdi_filter_free_rss_context(efx, old_rx_rss_context) != 0);
+ nic_data->rx_rss_context_exclusive = true;
+ if (rx_indir_table != efx->rss_context.rx_indir_table)
+ memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
+ sizeof(efx->rss_context.rx_indir_table));
+ if (key != efx->rss_context.rx_hash_key)
+ memcpy(efx->rss_context.rx_hash_key, key,
+ efx->type->rx_hash_key_size);
+
+ return 0;
+
+fail2:
+ if (old_rx_rss_context != efx->rss_context.context_id) {
+ WARN_ON(efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id) != 0);
+ efx->rss_context.context_id = old_rx_rss_context;
+ }
+fail1:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx,
+ struct efx_rss_context *ctx,
+ const u32 *rx_indir_table,
+ const u8 *key)
+{
+ int rc;
+
+ WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+ if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
+ rc = efx_mcdi_filter_alloc_rss_context(efx, true, ctx, NULL);
+ if (rc)
+ return rc;
+ }
+
+ if (!rx_indir_table) /* Delete this context */
+ return efx_mcdi_filter_free_rss_context(efx, ctx->context_id);
+
+ rc = efx_mcdi_filter_populate_rss_table(efx, ctx->context_id,
+ rx_indir_table, key);
+ if (rc)
+ return rc;
+
+ memcpy(ctx->rx_indir_table, rx_indir_table,
+ sizeof(efx->rss_context.rx_indir_table));
+ memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size);
+
+ return 0;
+}
+
+int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx,
+ struct efx_rss_context *ctx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN);
+ MCDI_DECLARE_BUF(tablebuf, MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN);
+ MCDI_DECLARE_BUF(keybuf, MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN);
+ size_t outlen;
+ int rc, i;
+
+ WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+ BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
+ MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
+
+ if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID)
+ return -ENOENT;
+
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
+ ctx->context_id);
+ BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) !=
+ MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN);
+ rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf),
+ tablebuf, sizeof(tablebuf), &outlen);
+ if (rc != 0)
+ return rc;
+
+ if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_LEN))
+ return -EIO;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+ ctx->rx_indir_table[i] = MCDI_PTR(tablebuf,
+ RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i];
+
+ MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID,
+ ctx->context_id);
+ BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) !=
+ MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN);
+ rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf),
+ keybuf, sizeof(keybuf), &outlen);
+ if (rc != 0)
+ return rc;
+
+ if (WARN_ON(outlen != MC_CMD_RSS_CONTEXT_GET_KEY_OUT_LEN))
+ return -EIO;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->rx_hash_key); ++i)
+ ctx->rx_hash_key[i] = MCDI_PTR(
+ keybuf, RSS_CONTEXT_GET_KEY_OUT_TOEPLITZ_KEY)[i];
+
+ return 0;
+}
+
+int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx)
+{
+ int rc;
+
+ mutex_lock(&efx->rss_lock);
+ rc = efx_mcdi_rx_pull_rss_context_config(efx, &efx->rss_context);
+ mutex_unlock(&efx->rss_lock);
+ return rc;
+}
+
+void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ struct efx_rss_context *ctx;
+ int rc;
+
+ WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+ if (!nic_data->must_restore_rss_contexts)
+ return;
+
+ list_for_each_entry(ctx, &efx->rss_context.list, list) {
+ /* previous NIC RSS context is gone */
+ ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+ /* so try to allocate a new one */
+ rc = efx_mcdi_rx_push_rss_context_config(efx, ctx,
+ ctx->rx_indir_table,
+ ctx->rx_hash_key);
+ if (rc)
+ netif_warn(efx, probe, efx->net_dev,
+ "failed to restore RSS context %u, rc=%d"
+ "; RSS filters may fail to be applied\n",
+ ctx->user_id, rc);
+ }
+ nic_data->must_restore_rss_contexts = false;
+}
+
+int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
+ const u32 *rx_indir_table,
+ const u8 *key)
+{
+ int rc;
+
+ if (efx->rss_spread == 1)
+ return 0;
+
+ if (!key)
+ key = efx->rss_context.rx_hash_key;
+
+ rc = efx_mcdi_filter_rx_push_exclusive_rss_config(efx, rx_indir_table, key);
+
+ if (rc == -ENOBUFS && !user) {
+ unsigned context_size;
+ bool mismatch = false;
+ size_t i;
+
+ for (i = 0;
+ i < ARRAY_SIZE(efx->rss_context.rx_indir_table) && !mismatch;
+ i++)
+ mismatch = rx_indir_table[i] !=
+ ethtool_rxfh_indir_default(i, efx->rss_spread);
+
+ rc = efx_mcdi_filter_rx_push_shared_rss_config(efx, &context_size);
+ if (rc == 0) {
+ if (context_size != efx->rss_spread)
+ netif_warn(efx, probe, efx->net_dev,
+ "Could not allocate an exclusive RSS"
+ " context; allocated a shared one of"
+ " different size."
+ " Wanted %u, got %u.\n",
+ efx->rss_spread, context_size);
+ else if (mismatch)
+ netif_warn(efx, probe, efx->net_dev,
+ "Could not allocate an exclusive RSS"
+ " context; allocated a shared one but"
+ " could not apply custom"
+ " indirection.\n");
+ else
+ netif_info(efx, probe, efx->net_dev,
+ "Could not allocate an exclusive RSS"
+ " context; allocated a shared one.\n");
+ }
+ }
+ return rc;
+}
+
+int efx_mcdi_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
+ const u32 *rx_indir_table
+ __attribute__ ((unused)),
+ const u8 *key
+ __attribute__ ((unused)))
+{
+ if (user)
+ return -EOPNOTSUPP;
+ if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID)
+ return 0;
+ return efx_mcdi_filter_rx_push_shared_rss_config(efx, NULL);
+}
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.h b/drivers/net/ethernet/sfc/mcdi_filters.h
new file mode 100644
index 000000000000..1837f4f5d661
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_filters.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#ifndef EFX_MCDI_FILTERS_H
+#define EFX_MCDI_FILTERS_H
+
+#include "net_driver.h"
+#include "filter.h"
+#include "mcdi_pcol.h"
+
+#define EFX_EF10_FILTER_DEV_UC_MAX 32
+#define EFX_EF10_FILTER_DEV_MC_MAX 256
+
+enum efx_mcdi_filter_default_filters {
+ EFX_EF10_BCAST,
+ EFX_EF10_UCDEF,
+ EFX_EF10_MCDEF,
+ EFX_EF10_VXLAN4_UCDEF,
+ EFX_EF10_VXLAN4_MCDEF,
+ EFX_EF10_VXLAN6_UCDEF,
+ EFX_EF10_VXLAN6_MCDEF,
+ EFX_EF10_NVGRE4_UCDEF,
+ EFX_EF10_NVGRE4_MCDEF,
+ EFX_EF10_NVGRE6_UCDEF,
+ EFX_EF10_NVGRE6_MCDEF,
+ EFX_EF10_GENEVE4_UCDEF,
+ EFX_EF10_GENEVE4_MCDEF,
+ EFX_EF10_GENEVE6_UCDEF,
+ EFX_EF10_GENEVE6_MCDEF,
+
+ EFX_EF10_NUM_DEFAULT_FILTERS
+};
+
+/* Per-VLAN filters information */
+struct efx_mcdi_filter_vlan {
+ struct list_head list;
+ u16 vid;
+ u16 uc[EFX_EF10_FILTER_DEV_UC_MAX];
+ u16 mc[EFX_EF10_FILTER_DEV_MC_MAX];
+ u16 default_filters[EFX_EF10_NUM_DEFAULT_FILTERS];
+};
+
+struct efx_mcdi_dev_addr {
+ u8 addr[ETH_ALEN];
+};
+
+struct efx_mcdi_filter_table {
+/* The MCDI match masks supported by this fw & hw, in order of priority */
+ u32 rx_match_mcdi_flags[
+ MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM * 2];
+ unsigned int rx_match_count;
+
+ struct rw_semaphore lock; /* Protects entries */
+ struct {
+ unsigned long spec; /* pointer to spec plus flag bits */
+/* AUTO_OLD is used to mark and sweep MAC filters for the device address lists. */
+/* unused flag 1UL */
+#define EFX_EF10_FILTER_FLAG_AUTO_OLD 2UL
+#define EFX_EF10_FILTER_FLAGS 3UL
+ u64 handle; /* firmware handle */
+ } *entry;
+/* Shadow of net_device address lists, guarded by mac_lock */
+ struct efx_mcdi_dev_addr dev_uc_list[EFX_EF10_FILTER_DEV_UC_MAX];
+ struct efx_mcdi_dev_addr dev_mc_list[EFX_EF10_FILTER_DEV_MC_MAX];
+ int dev_uc_count;
+ int dev_mc_count;
+ bool uc_promisc;
+ bool mc_promisc;
+/* Whether in multicast promiscuous mode when last changed */
+ bool mc_promisc_last;
+ bool mc_overflow; /* Too many MC addrs; should always imply mc_promisc */
+ bool vlan_filter;
+ struct list_head vlan_list;
+};
+
+int efx_mcdi_filter_table_probe(struct efx_nic *efx);
+void efx_mcdi_filter_table_remove(struct efx_nic *efx);
+void efx_mcdi_filter_table_restore(struct efx_nic *efx);
+
+/*
+ * The filter table(s) are managed by firmware and we have write-only
+ * access. When removing filters we must identify them to the
+ * firmware by a 64-bit handle, but this is too wide for Linux kernel
+ * interfaces (32-bit for RX NFC, 16-bit for RFS). Also, we need to
+ * be able to tell in advance whether a requested insertion will
+ * replace an existing filter. Therefore we maintain a software hash
+ * table, which should be at least as large as the hardware hash
+ * table.
+ *
+ * Huntington has a single 8K filter table shared between all filter
+ * types and both ports.
+ */
+#define EFX_MCDI_FILTER_TBL_ROWS 8192
+
+bool efx_mcdi_filter_match_supported(struct efx_mcdi_filter_table *table,
+ bool encap,
+ enum efx_filter_match_flags match_flags);
+
+void efx_mcdi_filter_sync_rx_mode(struct efx_nic *efx);
+s32 efx_mcdi_filter_insert(struct efx_nic *efx, struct efx_filter_spec *spec,
+ bool replace_equal);
+int efx_mcdi_filter_remove_safe(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 filter_id);
+int efx_mcdi_filter_get_safe(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 filter_id, struct efx_filter_spec *spec);
+
+u32 efx_mcdi_filter_count_rx_used(struct efx_nic *efx,
+ enum efx_filter_priority priority);
+int efx_mcdi_filter_clear_rx(struct efx_nic *efx,
+ enum efx_filter_priority priority);
+u32 efx_mcdi_filter_get_rx_id_limit(struct efx_nic *efx);
+s32 efx_mcdi_filter_get_rx_ids(struct efx_nic *efx,
+ enum efx_filter_priority priority,
+ u32 *buf, u32 size);
+
+void efx_mcdi_filter_cleanup_vlans(struct efx_nic *efx);
+int efx_mcdi_filter_add_vlan(struct efx_nic *efx, u16 vid);
+struct efx_mcdi_filter_vlan *efx_mcdi_filter_find_vlan(struct efx_nic *efx, u16 vid);
+void efx_mcdi_filter_del_vlan(struct efx_nic *efx, u16 vid);
+
+void efx_mcdi_rx_free_indir_table(struct efx_nic *efx);
+int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx,
+ struct efx_rss_context *ctx,
+ const u32 *rx_indir_table,
+ const u8 *key);
+int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
+ const u32 *rx_indir_table,
+ const u8 *key);
+int efx_mcdi_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
+ const u32 *rx_indir_table
+ __attribute__ ((unused)),
+ const u8 *key
+ __attribute__ ((unused)));
+int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx);
+int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx,
+ struct efx_rss_context *ctx);
+int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context,
+ u32 *flags);
+void efx_mcdi_set_rss_context_flags(struct efx_nic *efx,
+ struct efx_rss_context *ctx);
+void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx);
+
+static inline void efx_mcdi_update_rx_scatter(struct efx_nic *efx)
+{
+ /* no need to do anything here */
+}
+
+bool efx_mcdi_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id,
+ unsigned int filter_idx);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/mcdi_functions.c b/drivers/net/ethernet/sfc/mcdi_functions.c
new file mode 100644
index 000000000000..dcfe78b0fa5a
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_functions.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include "efx.h"
+#include "nic.h"
+#include "mcdi_functions.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+
+int efx_mcdi_free_vis(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF_ERR(outbuf);
+ size_t outlen;
+ int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+
+ /* -EALREADY means nothing to free, so ignore */
+ if (rc == -EALREADY)
+ rc = 0;
+ if (rc)
+ efx_mcdi_display_error(efx, MC_CMD_FREE_VIS, 0, outbuf, outlen,
+ rc);
+ return rc;
+}
+
+int efx_mcdi_alloc_vis(struct efx_nic *efx, unsigned int min_vis,
+ unsigned int max_vis, unsigned int *vi_base,
+ unsigned int *allocated_vis)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN);
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis);
+ MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis);
+ rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc != 0)
+ return rc;
+
+ if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN)
+ return -EIO;
+
+ netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n",
+ MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE));
+
+ if (vi_base)
+ *vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE);
+ if (allocated_vis)
+ *allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT);
+ return 0;
+}
+
+int efx_mcdi_ev_probe(struct efx_channel *channel)
+{
+ return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf,
+ (channel->eventq_mask + 1) *
+ sizeof(efx_qword_t),
+ GFP_KERNEL);
+}
+
+int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2)
+{
+ MCDI_DECLARE_BUF(inbuf,
+ MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
+ EFX_BUF_SIZE));
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN);
+ size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE;
+ struct efx_nic *efx = channel->efx;
+ size_t inlen, outlen;
+ dma_addr_t dma_addr;
+ int rc, i;
+
+ /* Fill event queue with all ones (i.e. empty events) */
+ memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
+
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel);
+ /* INIT_EVQ expects index in vector table, not absolute */
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE,
+ MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE,
+ MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0);
+
+ if (v2) {
+ /* Use the new generic approach to specifying event queue
+ * configuration, requesting lower latency or higher throughput.
+ * The options that actually get used appear in the output.
+ */
+ MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS,
+ INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1,
+ INIT_EVQ_V2_IN_FLAG_TYPE,
+ MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO);
+ } else {
+ MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
+ INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
+ INIT_EVQ_IN_FLAG_RX_MERGE, 1,
+ INIT_EVQ_IN_FLAG_TX_MERGE, 1,
+ INIT_EVQ_IN_FLAG_CUT_THRU, v1_cut_thru);
+ }
+
+ dma_addr = channel->eventq.buf.dma_addr;
+ for (i = 0; i < entries; ++i) {
+ MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr);
+ dma_addr += EFX_BUF_SIZE;
+ }
+
+ inlen = MC_CMD_INIT_EVQ_IN_LEN(entries);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen,
+ outbuf, sizeof(outbuf), &outlen);
+
+ if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN)
+ netif_dbg(efx, drv, efx->net_dev,
+ "Channel %d using event queue flags %08x\n",
+ channel->channel,
+ MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS));
+
+ return rc;
+}
+
+void efx_mcdi_ev_remove(struct efx_channel *channel)
+{
+ efx_nic_free_buffer(channel->efx, &channel->eventq.buf);
+}
+
+void efx_mcdi_ev_fini(struct efx_channel *channel)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN);
+ MCDI_DECLARE_BUF_ERR(outbuf);
+ struct efx_nic *efx = channel->efx;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+
+ if (rc && rc != -EALREADY)
+ goto fail;
+
+ return;
+
+fail:
+ efx_mcdi_display_error(efx, MC_CMD_FINI_EVQ, MC_CMD_FINI_EVQ_IN_LEN,
+ outbuf, outlen, rc);
+}
+
+int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
+ EFX_BUF_SIZE));
+ bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
+ size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
+ struct efx_channel *channel = tx_queue->channel;
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_ef10_nic_data *nic_data;
+ dma_addr_t dma_addr;
+ size_t inlen;
+ int rc, i;
+
+ BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
+
+ nic_data = efx->nic_data;
+
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);
+
+ dma_addr = tx_queue->txd.buf.dma_addr;
+
+ netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n",
+ tx_queue->queue, entries, (u64)dma_addr);
+
+ for (i = 0; i < entries; ++i) {
+ MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr);
+ dma_addr += EFX_BUF_SIZE;
+ }
+
+ inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
+
+ do {
+ MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS,
+ /* This flag was removed from mcdi_pcol.h for
+ * the non-_EXT version of INIT_TXQ. However,
+ * firmware still honours it.
+ */
+ INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
+ INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
+ INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload,
+ INIT_TXQ_EXT_IN_FLAG_TIMESTAMP,
+ tx_queue->timestamping);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
+ NULL, 0, NULL);
+ if (rc == -ENOSPC && tso_v2) {
+ /* Retry without TSOv2 if we're short on contexts. */
+ tso_v2 = false;
+ netif_warn(efx, probe, efx->net_dev,
+ "TSOv2 context not available to segment in "
+ "hardware. TCP performance may be reduced.\n"
+ );
+ } else if (rc) {
+ efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ,
+ MC_CMD_INIT_TXQ_EXT_IN_LEN,
+ NULL, 0, rc);
+ goto fail;
+ }
+ } while (rc);
+
+ return 0;
+
+fail:
+ return rc;
+}
+
+void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue)
+{
+ efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf);
+}
+
+void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN);
+ MCDI_DECLARE_BUF_ERR(outbuf);
+ struct efx_nic *efx = tx_queue->efx;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE,
+ tx_queue->queue);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+
+ if (rc && rc != -EALREADY)
+ goto fail;
+
+ return;
+
+fail:
+ efx_mcdi_display_error(efx, MC_CMD_FINI_TXQ, MC_CMD_FINI_TXQ_IN_LEN,
+ outbuf, outlen, rc);
+}
+
+int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue)
+{
+ return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf,
+ (rx_queue->ptr_mask + 1) *
+ sizeof(efx_qword_t),
+ GFP_KERNEL);
+}
+
+void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue)
+{
+ MCDI_DECLARE_BUF(inbuf,
+ MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
+ EFX_BUF_SIZE));
+ struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
+ size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE;
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ dma_addr_t dma_addr;
+ size_t inlen;
+ int rc;
+ int i;
+ BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0);
+
+ rx_queue->scatter_n = 0;
+ rx_queue->scatter_len = 0;
+
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1);
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel);
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue));
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE,
+ efx_rx_queue_index(rx_queue));
+ MCDI_POPULATE_DWORD_2(inbuf, INIT_RXQ_IN_FLAGS,
+ INIT_RXQ_IN_FLAG_PREFIX, 1,
+ INIT_RXQ_IN_FLAG_TIMESTAMP, 1);
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0);
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id);
+
+ dma_addr = rx_queue->rxd.buf.dma_addr;
+
+ netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n",
+ efx_rx_queue_index(rx_queue), entries, (u64)dma_addr);
+
+ for (i = 0; i < entries; ++i) {
+ MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr);
+ dma_addr += EFX_BUF_SIZE;
+ }
+
+ inlen = MC_CMD_INIT_RXQ_IN_LEN(entries);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen,
+ NULL, 0, NULL);
+ if (rc)
+ netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n",
+ efx_rx_queue_index(rx_queue));
+}
+
+void efx_mcdi_rx_remove(struct efx_rx_queue *rx_queue)
+{
+ efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf);
+}
+
+void efx_mcdi_rx_fini(struct efx_rx_queue *rx_queue)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN);
+ MCDI_DECLARE_BUF_ERR(outbuf);
+ struct efx_nic *efx = rx_queue->efx;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE,
+ efx_rx_queue_index(rx_queue));
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+
+ if (rc && rc != -EALREADY)
+ goto fail;
+
+ return;
+
+fail:
+ efx_mcdi_display_error(efx, MC_CMD_FINI_RXQ, MC_CMD_FINI_RXQ_IN_LEN,
+ outbuf, outlen, rc);
+}
+
+int efx_mcdi_window_mode_to_stride(struct efx_nic *efx, u8 vi_window_mode)
+{
+ switch (vi_window_mode) {
+ case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K:
+ efx->vi_stride = 8192;
+ break;
+ case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K:
+ efx->vi_stride = 16384;
+ break;
+ case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K:
+ efx->vi_stride = 65536;
+ break;
+ default:
+ netif_err(efx, probe, efx->net_dev,
+ "Unrecognised VI window mode %d\n",
+ vi_window_mode);
+ return -EIO;
+ }
+ netif_dbg(efx, probe, efx->net_dev, "vi_stride = %u\n",
+ efx->vi_stride);
+ return 0;
+}
+
+int efx_get_pf_index(struct efx_nic *efx, unsigned int *pf_index)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf,
+ sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+ if (outlen < sizeof(outbuf))
+ return -EIO;
+
+ *pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF);
+ return 0;
+}
diff --git a/drivers/net/ethernet/sfc/mcdi_functions.h b/drivers/net/ethernet/sfc/mcdi_functions.h
new file mode 100644
index 000000000000..ca4a5ac1a66b
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_functions.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#ifndef EFX_MCDI_FUNCTIONS_H
+#define EFX_MCDI_FUNCTIONS_H
+
+int efx_mcdi_alloc_vis(struct efx_nic *efx, unsigned int min_vis,
+ unsigned int max_vis, unsigned int *vi_base,
+ unsigned int *allocated_vis);
+int efx_mcdi_free_vis(struct efx_nic *efx);
+
+int efx_mcdi_ev_probe(struct efx_channel *channel);
+int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2);
+void efx_mcdi_ev_remove(struct efx_channel *channel);
+void efx_mcdi_ev_fini(struct efx_channel *channel);
+int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2);
+void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue);
+void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue);
+int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue);
+void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue);
+void efx_mcdi_rx_remove(struct efx_rx_queue *rx_queue);
+void efx_mcdi_rx_fini(struct efx_rx_queue *rx_queue);
+int efx_mcdi_window_mode_to_stride(struct efx_nic *efx, u8 vi_window_mode);
+int efx_get_pf_index(struct efx_nic *efx, unsigned int *pf_index);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index fb7cde4980ed..ab5227b13ae6 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -14,106 +14,7 @@
#include "mcdi_pcol.h"
#include "nic.h"
#include "selftest.h"
-
-struct efx_mcdi_phy_data {
- u32 flags;
- u32 type;
- u32 supported_cap;
- u32 channel;
- u32 port;
- u32 stats_mask;
- u8 name[20];
- u32 media;
- u32 mmd_mask;
- u8 revision[20];
- u32 forced_cap;
-};
-
-static int
-efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN);
- size_t outlen;
- int rc;
-
- BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0);
- BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name));
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0,
- outbuf, sizeof(outbuf), &outlen);
- if (rc)
- goto fail;
-
- if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) {
- rc = -EIO;
- goto fail;
- }
-
- cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS);
- cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE);
- cfg->supported_cap =
- MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP);
- cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL);
- cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT);
- cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK);
- memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME),
- sizeof(cfg->name));
- cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE);
- cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK);
- memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION),
- sizeof(cfg->revision));
-
- return 0;
-
-fail:
- netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
- return rc;
-}
-
-static int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
- u32 flags, u32 loopback_mode,
- u32 loopback_speed)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN);
- int rc;
-
- BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0);
-
- MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities);
- MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags);
- MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode);
- MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf),
- NULL, 0, NULL);
- return rc;
-}
-
-static int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN);
- size_t outlen;
- int rc;
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0,
- outbuf, sizeof(outbuf), &outlen);
- if (rc)
- goto fail;
-
- if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST +
- MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) {
- rc = -EIO;
- goto fail;
- }
-
- *loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED);
-
- return 0;
-
-fail:
- netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
- return rc;
-}
+#include "mcdi_port_common.h"
static int efx_mcdi_mdio_read(struct net_device *net_dev,
int prtad, int devad, u16 addr)
@@ -168,246 +69,6 @@ static int efx_mcdi_mdio_write(struct net_device *net_dev,
return 0;
}
-static void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset)
-{
- #define SET_BIT(name) __set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
- linkset)
-
- bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS);
- switch (media) {
- case MC_CMD_MEDIA_KX4:
- SET_BIT(Backplane);
- if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
- SET_BIT(1000baseKX_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
- SET_BIT(10000baseKX4_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
- SET_BIT(40000baseKR4_Full);
- break;
-
- case MC_CMD_MEDIA_XFP:
- case MC_CMD_MEDIA_SFP_PLUS:
- case MC_CMD_MEDIA_QSFP_PLUS:
- SET_BIT(FIBRE);
- if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
- SET_BIT(1000baseT_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
- SET_BIT(10000baseT_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
- SET_BIT(40000baseCR4_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN))
- SET_BIT(100000baseCR4_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN))
- SET_BIT(25000baseCR_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN))
- SET_BIT(50000baseCR2_Full);
- break;
-
- case MC_CMD_MEDIA_BASE_T:
- SET_BIT(TP);
- if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN))
- SET_BIT(10baseT_Half);
- if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN))
- SET_BIT(10baseT_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN))
- SET_BIT(100baseT_Half);
- if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN))
- SET_BIT(100baseT_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN))
- SET_BIT(1000baseT_Half);
- if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
- SET_BIT(1000baseT_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
- SET_BIT(10000baseT_Full);
- break;
- }
-
- if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
- SET_BIT(Pause);
- if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
- SET_BIT(Asym_Pause);
- if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
- SET_BIT(Autoneg);
-
- #undef SET_BIT
-}
-
-static u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset)
-{
- u32 result = 0;
-
- #define TEST_BIT(name) test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
- linkset)
-
- if (TEST_BIT(10baseT_Half))
- result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN);
- if (TEST_BIT(10baseT_Full))
- result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN);
- if (TEST_BIT(100baseT_Half))
- result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN);
- if (TEST_BIT(100baseT_Full))
- result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN);
- if (TEST_BIT(1000baseT_Half))
- result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN);
- if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full))
- result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN);
- if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full))
- result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN);
- if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full))
- result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN);
- if (TEST_BIT(100000baseCR4_Full))
- result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN);
- if (TEST_BIT(25000baseCR_Full))
- result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN);
- if (TEST_BIT(50000baseCR2_Full))
- result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN);
- if (TEST_BIT(Pause))
- result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN);
- if (TEST_BIT(Asym_Pause))
- result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN);
- if (TEST_BIT(Autoneg))
- result |= (1 << MC_CMD_PHY_CAP_AN_LBN);
-
- #undef TEST_BIT
-
- return result;
-}
-
-static u32 efx_get_mcdi_phy_flags(struct efx_nic *efx)
-{
- struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
- enum efx_phy_mode mode, supported;
- u32 flags;
-
- /* TODO: Advertise the capabilities supported by this PHY */
- supported = 0;
- if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN))
- supported |= PHY_MODE_TX_DISABLED;
- if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN))
- supported |= PHY_MODE_LOW_POWER;
- if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN))
- supported |= PHY_MODE_OFF;
-
- mode = efx->phy_mode & supported;
-
- flags = 0;
- if (mode & PHY_MODE_TX_DISABLED)
- flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN);
- if (mode & PHY_MODE_LOW_POWER)
- flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN);
- if (mode & PHY_MODE_OFF)
- flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN);
-
- return flags;
-}
-
-static u8 mcdi_to_ethtool_media(u32 media)
-{
- switch (media) {
- case MC_CMD_MEDIA_XAUI:
- case MC_CMD_MEDIA_CX4:
- case MC_CMD_MEDIA_KX4:
- return PORT_OTHER;
-
- case MC_CMD_MEDIA_XFP:
- case MC_CMD_MEDIA_SFP_PLUS:
- case MC_CMD_MEDIA_QSFP_PLUS:
- return PORT_FIBRE;
-
- case MC_CMD_MEDIA_BASE_T:
- return PORT_TP;
-
- default:
- return PORT_OTHER;
- }
-}
-
-static void efx_mcdi_phy_decode_link(struct efx_nic *efx,
- struct efx_link_state *link_state,
- u32 speed, u32 flags, u32 fcntl)
-{
- switch (fcntl) {
- case MC_CMD_FCNTL_AUTO:
- WARN_ON(1); /* This is not a link mode */
- link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX;
- break;
- case MC_CMD_FCNTL_BIDIR:
- link_state->fc = EFX_FC_TX | EFX_FC_RX;
- break;
- case MC_CMD_FCNTL_RESPOND:
- link_state->fc = EFX_FC_RX;
- break;
- default:
- WARN_ON(1);
- /* Fall through */
- case MC_CMD_FCNTL_OFF:
- link_state->fc = 0;
- break;
- }
-
- link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN));
- link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN));
- link_state->speed = speed;
-}
-
-/* The semantics of the ethtool FEC mode bitmask are not well defined,
- * particularly the meaning of combinations of bits. Which means we get to
- * define our own semantics, as follows:
- * OFF overrides any other bits, and means "disable all FEC" (with the
- * exception of 25G KR4/CR4, where it is not possible to reject it if AN
- * partner requests it).
- * AUTO on its own means use cable requirements and link partner autoneg with
- * fw-default preferences for the cable type.
- * AUTO and either RS or BASER means use the specified FEC type if cable and
- * link partner support it, otherwise autoneg/fw-default.
- * RS or BASER alone means use the specified FEC type if cable and link partner
- * support it and either requests it, otherwise no FEC.
- * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
- * partner support it, preferring RS to BASER.
- */
-static u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
-{
- u32 ret = 0;
-
- if (ethtool_cap & ETHTOOL_FEC_OFF)
- return 0;
-
- if (ethtool_cap & ETHTOOL_FEC_AUTO)
- ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
- (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
- (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
- if (ethtool_cap & ETHTOOL_FEC_RS)
- ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
- (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
- if (ethtool_cap & ETHTOOL_FEC_BASER)
- ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
- (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
- (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
- (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
- return ret;
-}
-
-/* Invert ethtool_fec_caps_to_mcdi. There are two combinations that function
- * can never produce, (baser xor rs) and neither req; the implementation below
- * maps both of those to AUTO. This should never matter, and it's not clear
- * what a better mapping would be anyway.
- */
-static u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
-{
- bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
- rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
- baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
- : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
- baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
- : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
-
- if (!baser && !rs)
- return ETHTOOL_FEC_OFF;
- return (rs_req ? ETHTOOL_FEC_RS : 0) |
- (baser_req ? ETHTOOL_FEC_BASER : 0) |
- (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
-}
-
static int efx_mcdi_phy_probe(struct efx_nic *efx)
{
struct efx_mcdi_phy_data *phy_data;
@@ -527,58 +188,6 @@ int efx_mcdi_port_reconfigure(struct efx_nic *efx)
efx->loopback_mode, 0);
}
-/* Verify that the forced flow control settings (!EFX_FC_AUTO) are
- * supported by the link partner. Warn the user if this isn't the case
- */
-static void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa)
-{
- struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
- u32 rmtadv;
-
- /* The link partner capabilities are only relevant if the
- * link supports flow control autonegotiation */
- if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
- return;
-
- /* If flow control autoneg is supported and enabled, then fine */
- if (efx->wanted_fc & EFX_FC_AUTO)
- return;
-
- rmtadv = 0;
- if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
- rmtadv |= ADVERTISED_Pause;
- if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
- rmtadv |= ADVERTISED_Asym_Pause;
-
- if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause)
- netif_err(efx, link, efx->net_dev,
- "warning: link partner doesn't support pause frames");
-}
-
-static bool efx_mcdi_phy_poll(struct efx_nic *efx)
-{
- struct efx_link_state old_state = efx->link_state;
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
- int rc;
-
- WARN_ON(!mutex_is_locked(&efx->mac_lock));
-
- BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
- outbuf, sizeof(outbuf), NULL);
- if (rc)
- efx->link_state.up = false;
- else
- efx_mcdi_phy_decode_link(
- efx, &efx->link_state,
- MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED),
- MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
- MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
-
- return !efx_link_state_equal(&efx->link_state, &old_state);
-}
-
static void efx_mcdi_phy_remove(struct efx_nic *efx)
{
struct efx_mcdi_phy_data *phy_data = efx->phy_data;
@@ -666,58 +275,6 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
return 0;
}
-static int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
- struct ethtool_fecparam *fec)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
- u32 caps, active, speed; /* MCDI format */
- bool is_25g = false;
- size_t outlen;
- int rc;
-
- BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
- outbuf, sizeof(outbuf), &outlen);
- if (rc)
- return rc;
- if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
- return -EOPNOTSUPP;
-
- /* behaviour for 25G/50G links depends on 25G BASER bit */
- speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
- is_25g = speed == 25000 || speed == 50000;
-
- caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
- fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
- /* BASER is never supported on 100G */
- if (speed == 100000)
- fec->fec &= ~ETHTOOL_FEC_BASER;
-
- active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
- switch (active) {
- case MC_CMD_FEC_NONE:
- fec->active_fec = ETHTOOL_FEC_OFF;
- break;
- case MC_CMD_FEC_BASER:
- fec->active_fec = ETHTOOL_FEC_BASER;
- break;
- case MC_CMD_FEC_RS:
- fec->active_fec = ETHTOOL_FEC_RS;
- break;
- default:
- netif_warn(efx, hw, efx->net_dev,
- "Firmware reports unrecognised FEC_TYPE %u\n",
- active);
- /* We don't know what firmware has picked. AUTO is as good a
- * "can't happen" value as any other.
- */
- fec->active_fec = ETHTOOL_FEC_AUTO;
- break;
- }
-
- return 0;
-}
-
static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
const struct ethtool_fecparam *fec)
{
@@ -745,27 +302,6 @@ static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
return 0;
}
-static int efx_mcdi_phy_test_alive(struct efx_nic *efx)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
- size_t outlen;
- int rc;
-
- BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0,
- outbuf, sizeof(outbuf), &outlen);
- if (rc)
- return rc;
-
- if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN)
- return -EIO;
- if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK)
- return -EINVAL;
-
- return 0;
-}
-
static const char *const mcdi_sft9001_cable_diag_names[] = {
"cable.pairA.length",
"cable.pairB.length",
@@ -1139,84 +675,6 @@ u32 efx_mcdi_phy_get_caps(struct efx_nic *efx)
return phy_data->supported_cap;
}
-static unsigned int efx_mcdi_event_link_speed[] = {
- [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100,
- [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000,
- [MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000,
- [MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000,
- [MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000,
- [MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000,
- [MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000,
-};
-
-void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev)
-{
- u32 flags, fcntl, speed, lpa;
-
- speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED);
- EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed));
- speed = efx_mcdi_event_link_speed[speed];
-
- flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS);
- fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL);
- lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP);
-
- /* efx->link_state is only modified by efx_mcdi_phy_get_link(),
- * which is only run after flushing the event queues. Therefore, it
- * is safe to modify the link state outside of the mac_lock here.
- */
- efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl);
-
- efx_mcdi_phy_check_fcntl(efx, lpa);
-
- efx_link_status_changed(efx);
-}
-
-int efx_mcdi_set_mac(struct efx_nic *efx)
-{
- u32 fcntl;
- MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN);
-
- BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0);
-
- /* This has no effect on EF10 */
- ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR),
- efx->net_dev->dev_addr);
-
- MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU,
- EFX_MAX_FRAME_LEN(efx->net_dev->mtu));
- MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0);
-
- /* Set simple MAC filter for Siena */
- MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT,
- SET_MAC_IN_REJECT_UNCST, efx->unicast_filter);
-
- MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS,
- SET_MAC_IN_FLAG_INCLUDE_FCS,
- !!(efx->net_dev->features & NETIF_F_RXFCS));
-
- switch (efx->wanted_fc) {
- case EFX_FC_RX | EFX_FC_TX:
- fcntl = MC_CMD_FCNTL_BIDIR;
- break;
- case EFX_FC_RX:
- fcntl = MC_CMD_FCNTL_RESPOND;
- break;
- default:
- fcntl = MC_CMD_FCNTL_OFF;
- break;
- }
- if (efx->wanted_fc & EFX_FC_AUTO)
- fcntl = MC_CMD_FCNTL_AUTO;
- if (efx->fc_disable)
- fcntl = MC_CMD_FCNTL_OFF;
-
- MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl);
-
- return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes),
- NULL, 0, NULL);
-}
-
bool efx_mcdi_mac_check_fault(struct efx_nic *efx)
{
MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
@@ -1348,17 +806,3 @@ void efx_mcdi_port_remove(struct efx_nic *efx)
efx->phy_op->remove(efx);
efx_nic_free_buffer(efx, &efx->stats_buffer);
}
-
-/* Get physical port number (EF10 only; on Siena it is same as PF number) */
-int efx_mcdi_port_get_number(struct efx_nic *efx)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN);
- int rc;
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0,
- outbuf, sizeof(outbuf), NULL);
- if (rc)
- return rc;
-
- return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT);
-}
diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c
new file mode 100644
index 000000000000..a6a072ba46d3
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_port_common.c
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "mcdi_port_common.h"
+#include "efx_common.h"
+
+int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0);
+ BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name));
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS);
+ cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE);
+ cfg->supported_cap =
+ MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP);
+ cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL);
+ cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT);
+ cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK);
+ memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME),
+ sizeof(cfg->name));
+ cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE);
+ cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK);
+ memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION),
+ sizeof(cfg->revision));
+
+ return 0;
+
+fail:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+void efx_link_set_advertising(struct efx_nic *efx,
+ const unsigned long *advertising)
+{
+ memcpy(efx->link_advertising, advertising,
+ sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
+
+ efx->link_advertising[0] |= ADVERTISED_Autoneg;
+ if (advertising[0] & ADVERTISED_Pause)
+ efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
+ else
+ efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
+ if (advertising[0] & ADVERTISED_Asym_Pause)
+ efx->wanted_fc ^= EFX_FC_TX;
+}
+
+int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
+ u32 flags, u32 loopback_mode, u32 loopback_speed)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN);
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0);
+
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities);
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags);
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode);
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+ return rc;
+}
+
+int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST +
+ MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ *loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED);
+
+ return 0;
+
+fail:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset)
+{
+ #define SET_BIT(name) __set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
+ linkset)
+
+ bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ switch (media) {
+ case MC_CMD_MEDIA_KX4:
+ SET_BIT(Backplane);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+ SET_BIT(1000baseKX_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+ SET_BIT(10000baseKX4_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
+ SET_BIT(40000baseKR4_Full);
+ break;
+
+ case MC_CMD_MEDIA_XFP:
+ case MC_CMD_MEDIA_SFP_PLUS:
+ case MC_CMD_MEDIA_QSFP_PLUS:
+ SET_BIT(FIBRE);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+ SET_BIT(1000baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+ SET_BIT(10000baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
+ SET_BIT(40000baseCR4_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN))
+ SET_BIT(100000baseCR4_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN))
+ SET_BIT(25000baseCR_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN))
+ SET_BIT(50000baseCR2_Full);
+ break;
+
+ case MC_CMD_MEDIA_BASE_T:
+ SET_BIT(TP);
+ if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN))
+ SET_BIT(10baseT_Half);
+ if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN))
+ SET_BIT(10baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN))
+ SET_BIT(100baseT_Half);
+ if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN))
+ SET_BIT(100baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN))
+ SET_BIT(1000baseT_Half);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+ SET_BIT(1000baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+ SET_BIT(10000baseT_Full);
+ break;
+ }
+
+ if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
+ SET_BIT(Pause);
+ if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
+ SET_BIT(Asym_Pause);
+ if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
+ SET_BIT(Autoneg);
+
+ #undef SET_BIT
+}
+
+u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset)
+{
+ u32 result = 0;
+
+ #define TEST_BIT(name) test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
+ linkset)
+
+ if (TEST_BIT(10baseT_Half))
+ result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN);
+ if (TEST_BIT(10baseT_Full))
+ result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN);
+ if (TEST_BIT(100baseT_Half))
+ result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN);
+ if (TEST_BIT(100baseT_Full))
+ result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN);
+ if (TEST_BIT(1000baseT_Half))
+ result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN);
+ if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full))
+ result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN);
+ if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full))
+ result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN);
+ if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full))
+ result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN);
+ if (TEST_BIT(100000baseCR4_Full))
+ result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN);
+ if (TEST_BIT(25000baseCR_Full))
+ result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN);
+ if (TEST_BIT(50000baseCR2_Full))
+ result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN);
+ if (TEST_BIT(Pause))
+ result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN);
+ if (TEST_BIT(Asym_Pause))
+ result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN);
+ if (TEST_BIT(Autoneg))
+ result |= (1 << MC_CMD_PHY_CAP_AN_LBN);
+
+ #undef TEST_BIT
+
+ return result;
+}
+
+u32 efx_get_mcdi_phy_flags(struct efx_nic *efx)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ enum efx_phy_mode mode, supported;
+ u32 flags;
+
+ /* TODO: Advertise the capabilities supported by this PHY */
+ supported = 0;
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN))
+ supported |= PHY_MODE_TX_DISABLED;
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN))
+ supported |= PHY_MODE_LOW_POWER;
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN))
+ supported |= PHY_MODE_OFF;
+
+ mode = efx->phy_mode & supported;
+
+ flags = 0;
+ if (mode & PHY_MODE_TX_DISABLED)
+ flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN);
+ if (mode & PHY_MODE_LOW_POWER)
+ flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN);
+ if (mode & PHY_MODE_OFF)
+ flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN);
+
+ return flags;
+}
+
+u8 mcdi_to_ethtool_media(u32 media)
+{
+ switch (media) {
+ case MC_CMD_MEDIA_XAUI:
+ case MC_CMD_MEDIA_CX4:
+ case MC_CMD_MEDIA_KX4:
+ return PORT_OTHER;
+
+ case MC_CMD_MEDIA_XFP:
+ case MC_CMD_MEDIA_SFP_PLUS:
+ case MC_CMD_MEDIA_QSFP_PLUS:
+ return PORT_FIBRE;
+
+ case MC_CMD_MEDIA_BASE_T:
+ return PORT_TP;
+
+ default:
+ return PORT_OTHER;
+ }
+}
+
+void efx_mcdi_phy_decode_link(struct efx_nic *efx,
+ struct efx_link_state *link_state,
+ u32 speed, u32 flags, u32 fcntl)
+{
+ switch (fcntl) {
+ case MC_CMD_FCNTL_AUTO:
+ WARN_ON(1); /* This is not a link mode */
+ link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX;
+ break;
+ case MC_CMD_FCNTL_BIDIR:
+ link_state->fc = EFX_FC_TX | EFX_FC_RX;
+ break;
+ case MC_CMD_FCNTL_RESPOND:
+ link_state->fc = EFX_FC_RX;
+ break;
+ default:
+ WARN_ON(1);
+ /* Fall through */
+ case MC_CMD_FCNTL_OFF:
+ link_state->fc = 0;
+ break;
+ }
+
+ link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN));
+ link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN));
+ link_state->speed = speed;
+}
+
+/* The semantics of the ethtool FEC mode bitmask are not well defined,
+ * particularly the meaning of combinations of bits. Which means we get to
+ * define our own semantics, as follows:
+ * OFF overrides any other bits, and means "disable all FEC" (with the
+ * exception of 25G KR4/CR4, where it is not possible to reject it if AN
+ * partner requests it).
+ * AUTO on its own means use cable requirements and link partner autoneg with
+ * fw-default preferences for the cable type.
+ * AUTO and either RS or BASER means use the specified FEC type if cable and
+ * link partner support it, otherwise autoneg/fw-default.
+ * RS or BASER alone means use the specified FEC type if cable and link partner
+ * support it and either requests it, otherwise no FEC.
+ * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
+ * partner support it, preferring RS to BASER.
+ */
+u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
+{
+ u32 ret = 0;
+
+ if (ethtool_cap & ETHTOOL_FEC_OFF)
+ return 0;
+
+ if (ethtool_cap & ETHTOOL_FEC_AUTO)
+ ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
+ if (ethtool_cap & ETHTOOL_FEC_RS)
+ ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
+ if (ethtool_cap & ETHTOOL_FEC_BASER)
+ ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
+ (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
+ return ret;
+}
+
+/* Invert ethtool_fec_caps_to_mcdi. There are two combinations that function
+ * can never produce, (baser xor rs) and neither req; the implementation below
+ * maps both of those to AUTO. This should never matter, and it's not clear
+ * what a better mapping would be anyway.
+ */
+u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
+{
+ bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
+ rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
+ baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
+ : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
+ baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
+ : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
+
+ if (!baser && !rs)
+ return ETHTOOL_FEC_OFF;
+ return (rs_req ? ETHTOOL_FEC_RS : 0) |
+ (baser_req ? ETHTOOL_FEC_BASER : 0) |
+ (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
+}
+
+/* Verify that the forced flow control settings (!EFX_FC_AUTO) are
+ * supported by the link partner. Warn the user if this isn't the case
+ */
+void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ u32 rmtadv;
+
+ /* The link partner capabilities are only relevant if the
+ * link supports flow control autonegotiation
+ */
+ if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
+ return;
+
+ /* If flow control autoneg is supported and enabled, then fine */
+ if (efx->wanted_fc & EFX_FC_AUTO)
+ return;
+
+ rmtadv = 0;
+ if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
+ rmtadv |= ADVERTISED_Pause;
+ if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
+ rmtadv |= ADVERTISED_Asym_Pause;
+
+ if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause)
+ netif_err(efx, link, efx->net_dev,
+ "warning: link partner doesn't support pause frames");
+}
+
+bool efx_mcdi_phy_poll(struct efx_nic *efx)
+{
+ struct efx_link_state old_state = efx->link_state;
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
+ int rc;
+
+ WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+ BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ efx->link_state.up = false;
+ else
+ efx_mcdi_phy_decode_link(
+ efx, &efx->link_state,
+ MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED),
+ MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
+ MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
+
+ return !efx_link_state_equal(&efx->link_state, &old_state);
+}
+
+int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
+ u32 caps, active, speed; /* MCDI format */
+ bool is_25g = false;
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+ if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
+ return -EOPNOTSUPP;
+
+ /* behaviour for 25G/50G links depends on 25G BASER bit */
+ speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
+ is_25g = speed == 25000 || speed == 50000;
+
+ caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
+ fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
+ /* BASER is never supported on 100G */
+ if (speed == 100000)
+ fec->fec &= ~ETHTOOL_FEC_BASER;
+
+ active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
+ switch (active) {
+ case MC_CMD_FEC_NONE:
+ fec->active_fec = ETHTOOL_FEC_OFF;
+ break;
+ case MC_CMD_FEC_BASER:
+ fec->active_fec = ETHTOOL_FEC_BASER;
+ break;
+ case MC_CMD_FEC_RS:
+ fec->active_fec = ETHTOOL_FEC_RS;
+ break;
+ default:
+ netif_warn(efx, hw, efx->net_dev,
+ "Firmware reports unrecognised FEC_TYPE %u\n",
+ active);
+ /* We don't know what firmware has picked. AUTO is as good a
+ * "can't happen" value as any other.
+ */
+ fec->active_fec = ETHTOOL_FEC_AUTO;
+ break;
+ }
+
+ return 0;
+}
+
+int efx_mcdi_phy_test_alive(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+
+ if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN)
+ return -EIO;
+ if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK)
+ return -EINVAL;
+
+ return 0;
+}
+
+int efx_mcdi_set_mac(struct efx_nic *efx)
+{
+ u32 fcntl;
+ MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN);
+
+ BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0);
+
+ /* This has no effect on EF10 */
+ ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR),
+ efx->net_dev->dev_addr);
+
+ MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU,
+ EFX_MAX_FRAME_LEN(efx->net_dev->mtu));
+ MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0);
+
+ /* Set simple MAC filter for Siena */
+ MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT,
+ SET_MAC_IN_REJECT_UNCST, efx->unicast_filter);
+
+ MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS,
+ SET_MAC_IN_FLAG_INCLUDE_FCS,
+ !!(efx->net_dev->features & NETIF_F_RXFCS));
+
+ switch (efx->wanted_fc) {
+ case EFX_FC_RX | EFX_FC_TX:
+ fcntl = MC_CMD_FCNTL_BIDIR;
+ break;
+ case EFX_FC_RX:
+ fcntl = MC_CMD_FCNTL_RESPOND;
+ break;
+ default:
+ fcntl = MC_CMD_FCNTL_OFF;
+ break;
+ }
+ if (efx->wanted_fc & EFX_FC_AUTO)
+ fcntl = MC_CMD_FCNTL_AUTO;
+ if (efx->fc_disable)
+ fcntl = MC_CMD_FCNTL_OFF;
+
+ MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl);
+
+ return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes),
+ NULL, 0, NULL);
+}
+
+/* Get physical port number (EF10 only; on Siena it is same as PF number) */
+int efx_mcdi_port_get_number(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN);
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0,
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ return rc;
+
+ return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT);
+}
+
+static unsigned int efx_mcdi_event_link_speed[] = {
+ [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100,
+ [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000,
+};
+
+void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev)
+{
+ u32 flags, fcntl, speed, lpa;
+
+ speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED);
+ EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed));
+ speed = efx_mcdi_event_link_speed[speed];
+
+ flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS);
+ fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL);
+ lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP);
+
+ /* efx->link_state is only modified by efx_mcdi_phy_get_link(),
+ * which is only run after flushing the event queues. Therefore, it
+ * is safe to modify the link state outside of the mac_lock here.
+ */
+ efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl);
+
+ efx_mcdi_phy_check_fcntl(efx, lpa);
+
+ efx_link_status_changed(efx);
+}
diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.h b/drivers/net/ethernet/sfc/mcdi_port_common.h
new file mode 100644
index 000000000000..b16f11265269
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_port_common.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#ifndef EFX_MCDI_PORT_COMMON_H
+#define EFX_MCDI_PORT_COMMON_H
+
+#include "net_driver.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+
+struct efx_mcdi_phy_data {
+ u32 flags;
+ u32 type;
+ u32 supported_cap;
+ u32 channel;
+ u32 port;
+ u32 stats_mask;
+ u8 name[20];
+ u32 media;
+ u32 mmd_mask;
+ u8 revision[20];
+ u32 forced_cap;
+};
+
+#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1))
+
+int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg);
+void efx_link_set_advertising(struct efx_nic *efx,
+ const unsigned long *advertising);
+int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
+ u32 flags, u32 loopback_mode, u32 loopback_speed);
+int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes);
+void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset);
+u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset);
+u32 efx_get_mcdi_phy_flags(struct efx_nic *efx);
+u8 mcdi_to_ethtool_media(u32 media);
+void efx_mcdi_phy_decode_link(struct efx_nic *efx,
+ struct efx_link_state *link_state,
+ u32 speed, u32 flags, u32 fcntl);
+u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap);
+u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g);
+void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa);
+bool efx_mcdi_phy_poll(struct efx_nic *efx);
+int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
+ struct ethtool_fecparam *fec);
+int efx_mcdi_phy_test_alive(struct efx_nic *efx);
+int efx_mcdi_set_mac(struct efx_nic *efx);
+int efx_mcdi_port_get_number(struct efx_nic *efx);
+void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index dfd5182d9e47..9f9886f222c8 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -24,7 +24,6 @@
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/vmalloc.h>
-#include <linux/i2c.h>
#include <linux/mtd/mtd.h>
#include <net/busy_poll.h>
#include <net/xdp.h>
@@ -139,6 +138,8 @@ struct efx_special_buffer {
* freed when descriptor completes
* @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data
* member is the associated buffer to drop a page reference on.
+ * @option: When @flags & %EFX_TX_BUF_OPTION, an EF10-specific option
+ * descriptor.
* @dma_addr: DMA address of the fragment.
* @flags: Flags for allocation and DMA mapping type
* @len: Length of this fragment.
@@ -153,7 +154,7 @@ struct efx_tx_buffer {
struct xdp_frame *xdpf;
};
union {
- efx_qword_t option;
+ efx_qword_t option; /* EF10 */
dma_addr_t dma_addr;
};
unsigned short flags;
@@ -743,13 +744,13 @@ union efx_multicast_hash {
struct vfdi_status;
/* The reserved RSS context value */
-#define EFX_EF10_RSS_CONTEXT_INVALID 0xffffffff
+#define EFX_MCDI_RSS_CONTEXT_INVALID 0xffffffff
/**
* struct efx_rss_context - A user-defined RSS context for filtering
* @list: node of linked list on which this struct is stored
* @context_id: the RSS_CONTEXT_ID returned by MC firmware, or
- * %EFX_EF10_RSS_CONTEXT_INVALID if this context is not present on the NIC.
- * For Siena, 0 if RSS is active, else %EFX_EF10_RSS_CONTEXT_INVALID.
+ * %EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC.
+ * For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID.
* @user_id: the rss_context ID exposed to userspace over ethtool.
* @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
* @rx_hash_key: Toeplitz hash key for this RSS context
@@ -1611,6 +1612,15 @@ static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue,
return &rx_queue->buffer[index];
}
+static inline struct efx_rx_buffer *
+efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
+{
+ if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
+ return efx_rx_buffer(rx_queue, 0);
+ else
+ return rx_buf + 1;
+}
+
/**
* EFX_MAX_FRAME_LEN - calculate maximum frame length
*
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 1f7c5717de75..6670fda8f35a 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -9,9 +9,9 @@
#define EFX_NIC_H
#include <linux/net_tstamp.h>
-#include <linux/i2c-algo-bit.h>
#include "net_driver.h"
#include "efx.h"
+#include "efx_common.h"
#include "mcdi.h"
enum {
@@ -506,6 +506,9 @@ static inline void efx_nic_push_buffers(struct efx_tx_queue *tx_queue)
tx_queue->efx->type->tx_write(tx_queue);
}
+int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ bool *data_mapped);
+
/* RX data path */
static inline int efx_nic_probe_rx(struct efx_rx_queue *rx_queue)
{
@@ -554,6 +557,7 @@ static inline void efx_nic_eventq_read_ack(struct efx_channel *channel)
{
channel->efx->type->ev_read_ack(channel);
}
+
void efx_nic_event_test_start(struct efx_channel *channel);
/* Falcon/Siena queue operations */
@@ -671,6 +675,7 @@ struct efx_farch_register_test {
unsigned address;
efx_oword_t mask;
};
+
int efx_farch_test_registers(struct efx_nic *efx,
const struct efx_farch_register_test *regs,
size_t n_regs);
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index c29bf862a94c..a2042f16babc 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -21,6 +21,7 @@
#include <linux/bpf_trace.h>
#include "net_driver.h"
#include "efx.h"
+#include "rx_common.h"
#include "filter.h"
#include "nic.h"
#include "selftest.h"
@@ -32,60 +33,13 @@
/* Maximum rx prefix used by any architecture. */
#define EFX_MAX_RX_PREFIX_SIZE 16
-/* Number of RX buffers to recycle pages for. When creating the RX page recycle
- * ring, this number is divided by the number of buffers per page to calculate
- * the number of pages to store in the RX page recycle ring.
- */
-#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
-#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
-
/* Size of buffer allocated for skb header area. */
#define EFX_SKB_HEADERS 128u
-/* This is the percentage fill level below which new RX descriptors
- * will be added to the RX descriptor ring.
- */
-static unsigned int rx_refill_threshold;
-
/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
EFX_RX_USR_BUF_SIZE)
-/*
- * RX maximum head room required.
- *
- * This must be at least 1 to prevent overflow, plus one packet-worth
- * to allow pipelined receives.
- */
-#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
-
-static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
-{
- return page_address(buf->page) + buf->page_offset;
-}
-
-static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh)
-{
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
- return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
-#else
- const u8 *data = eh + efx->rx_packet_hash_offset;
- return (u32)data[0] |
- (u32)data[1] << 8 |
- (u32)data[2] << 16 |
- (u32)data[3] << 24;
-#endif
-}
-
-static inline struct efx_rx_buffer *
-efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
-{
- if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
- return efx_rx_buffer(rx_queue, 0);
- else
- return rx_buf + 1;
-}
-
static inline void efx_sync_rx_buffer(struct efx_nic *efx,
struct efx_rx_buffer *rx_buf,
unsigned int len)
@@ -94,301 +48,6 @@ static inline void efx_sync_rx_buffer(struct efx_nic *efx,
DMA_FROM_DEVICE);
}
-void efx_rx_config_page_split(struct efx_nic *efx)
-{
- efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align +
- XDP_PACKET_HEADROOM,
- EFX_RX_BUF_ALIGNMENT);
- efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
- ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
- efx->rx_page_buf_step);
- efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
- efx->rx_bufs_per_page;
- efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
- efx->rx_bufs_per_page);
-}
-
-/* Check the RX page recycle ring for a page that can be reused. */
-static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
-{
- struct efx_nic *efx = rx_queue->efx;
- struct page *page;
- struct efx_rx_page_state *state;
- unsigned index;
-
- index = rx_queue->page_remove & rx_queue->page_ptr_mask;
- page = rx_queue->page_ring[index];
- if (page == NULL)
- return NULL;
-
- rx_queue->page_ring[index] = NULL;
- /* page_remove cannot exceed page_add. */
- if (rx_queue->page_remove != rx_queue->page_add)
- ++rx_queue->page_remove;
-
- /* If page_count is 1 then we hold the only reference to this page. */
- if (page_count(page) == 1) {
- ++rx_queue->page_recycle_count;
- return page;
- } else {
- state = page_address(page);
- dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
- PAGE_SIZE << efx->rx_buffer_order,
- DMA_FROM_DEVICE);
- put_page(page);
- ++rx_queue->page_recycle_failed;
- }
-
- return NULL;
-}
-
-/**
- * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
- *
- * @rx_queue: Efx RX queue
- *
- * This allocates a batch of pages, maps them for DMA, and populates
- * struct efx_rx_buffers for each one. Return a negative error code or
- * 0 on success. If a single page can be used for multiple buffers,
- * then the page will either be inserted fully, or not at all.
- */
-static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
-{
- struct efx_nic *efx = rx_queue->efx;
- struct efx_rx_buffer *rx_buf;
- struct page *page;
- unsigned int page_offset;
- struct efx_rx_page_state *state;
- dma_addr_t dma_addr;
- unsigned index, count;
-
- count = 0;
- do {
- page = efx_reuse_page(rx_queue);
- if (page == NULL) {
- page = alloc_pages(__GFP_COMP |
- (atomic ? GFP_ATOMIC : GFP_KERNEL),
- efx->rx_buffer_order);
- if (unlikely(page == NULL))
- return -ENOMEM;
- dma_addr =
- dma_map_page(&efx->pci_dev->dev, page, 0,
- PAGE_SIZE << efx->rx_buffer_order,
- DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
- dma_addr))) {
- __free_pages(page, efx->rx_buffer_order);
- return -EIO;
- }
- state = page_address(page);
- state->dma_addr = dma_addr;
- } else {
- state = page_address(page);
- dma_addr = state->dma_addr;
- }
-
- dma_addr += sizeof(struct efx_rx_page_state);
- page_offset = sizeof(struct efx_rx_page_state);
-
- do {
- index = rx_queue->added_count & rx_queue->ptr_mask;
- rx_buf = efx_rx_buffer(rx_queue, index);
- rx_buf->dma_addr = dma_addr + efx->rx_ip_align +
- XDP_PACKET_HEADROOM;
- rx_buf->page = page;
- rx_buf->page_offset = page_offset + efx->rx_ip_align +
- XDP_PACKET_HEADROOM;
- rx_buf->len = efx->rx_dma_len;
- rx_buf->flags = 0;
- ++rx_queue->added_count;
- get_page(page);
- dma_addr += efx->rx_page_buf_step;
- page_offset += efx->rx_page_buf_step;
- } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
-
- rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
- } while (++count < efx->rx_pages_per_batch);
-
- return 0;
-}
-
-/* Unmap a DMA-mapped page. This function is only called for the final RX
- * buffer in a page.
- */
-static void efx_unmap_rx_buffer(struct efx_nic *efx,
- struct efx_rx_buffer *rx_buf)
-{
- struct page *page = rx_buf->page;
-
- if (page) {
- struct efx_rx_page_state *state = page_address(page);
- dma_unmap_page(&efx->pci_dev->dev,
- state->dma_addr,
- PAGE_SIZE << efx->rx_buffer_order,
- DMA_FROM_DEVICE);
- }
-}
-
-static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
- struct efx_rx_buffer *rx_buf,
- unsigned int num_bufs)
-{
- do {
- if (rx_buf->page) {
- put_page(rx_buf->page);
- rx_buf->page = NULL;
- }
- rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
- } while (--num_bufs);
-}
-
-/* Attempt to recycle the page if there is an RX recycle ring; the page can
- * only be added if this is the final RX buffer, to prevent pages being used in
- * the descriptor ring and appearing in the recycle ring simultaneously.
- */
-static void efx_recycle_rx_page(struct efx_channel *channel,
- struct efx_rx_buffer *rx_buf)
-{
- struct page *page = rx_buf->page;
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
- struct efx_nic *efx = rx_queue->efx;
- unsigned index;
-
- /* Only recycle the page after processing the final buffer. */
- if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
- return;
-
- index = rx_queue->page_add & rx_queue->page_ptr_mask;
- if (rx_queue->page_ring[index] == NULL) {
- unsigned read_index = rx_queue->page_remove &
- rx_queue->page_ptr_mask;
-
- /* The next slot in the recycle ring is available, but
- * increment page_remove if the read pointer currently
- * points here.
- */
- if (read_index == index)
- ++rx_queue->page_remove;
- rx_queue->page_ring[index] = page;
- ++rx_queue->page_add;
- return;
- }
- ++rx_queue->page_recycle_full;
- efx_unmap_rx_buffer(efx, rx_buf);
- put_page(rx_buf->page);
-}
-
-static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
- struct efx_rx_buffer *rx_buf)
-{
- /* Release the page reference we hold for the buffer. */
- if (rx_buf->page)
- put_page(rx_buf->page);
-
- /* If this is the last buffer in a page, unmap and free it. */
- if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
- efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
- efx_free_rx_buffers(rx_queue, rx_buf, 1);
- }
- rx_buf->page = NULL;
-}
-
-/* Recycle the pages that are used by buffers that have just been received. */
-static void efx_recycle_rx_pages(struct efx_channel *channel,
- struct efx_rx_buffer *rx_buf,
- unsigned int n_frags)
-{
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
-
- do {
- efx_recycle_rx_page(channel, rx_buf);
- rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
- } while (--n_frags);
-}
-
-static void efx_discard_rx_packet(struct efx_channel *channel,
- struct efx_rx_buffer *rx_buf,
- unsigned int n_frags)
-{
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
-
- efx_recycle_rx_pages(channel, rx_buf, n_frags);
-
- efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
-}
-
-/**
- * efx_fast_push_rx_descriptors - push new RX descriptors quickly
- * @rx_queue: RX descriptor queue
- *
- * This will aim to fill the RX descriptor queue up to
- * @rx_queue->@max_fill. If there is insufficient atomic
- * memory to do so, a slow fill will be scheduled.
- *
- * The caller must provide serialisation (none is used here). In practise,
- * this means this function must run from the NAPI handler, or be called
- * when NAPI is disabled.
- */
-void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
-{
- struct efx_nic *efx = rx_queue->efx;
- unsigned int fill_level, batch_size;
- int space, rc = 0;
-
- if (!rx_queue->refill_enabled)
- return;
-
- /* Calculate current fill level, and exit if we don't need to fill */
- fill_level = (rx_queue->added_count - rx_queue->removed_count);
- EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
- if (fill_level >= rx_queue->fast_fill_trigger)
- goto out;
-
- /* Record minimum fill level */
- if (unlikely(fill_level < rx_queue->min_fill)) {
- if (fill_level)
- rx_queue->min_fill = fill_level;
- }
-
- batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
- space = rx_queue->max_fill - fill_level;
- EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
-
- netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
- "RX queue %d fast-filling descriptor ring from"
- " level %d to level %d\n",
- efx_rx_queue_index(rx_queue), fill_level,
- rx_queue->max_fill);
-
-
- do {
- rc = efx_init_rx_buffers(rx_queue, atomic);
- if (unlikely(rc)) {
- /* Ensure that we don't leave the rx queue empty */
- efx_schedule_slow_fill(rx_queue);
- goto out;
- }
- } while ((space -= batch_size) >= batch_size);
-
- netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
- "RX queue %d fast-filled descriptor ring "
- "to level %d\n", efx_rx_queue_index(rx_queue),
- rx_queue->added_count - rx_queue->removed_count);
-
- out:
- if (rx_queue->notified_count != rx_queue->added_count)
- efx_nic_notify_rx_desc(rx_queue);
-}
-
-void efx_rx_slow_fill(struct timer_list *t)
-{
- struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
-
- /* Post an event to cause NAPI to run and refill the queue */
- efx_nic_generate_fill_event(rx_queue);
- ++rx_queue->slow_fill_count;
-}
-
static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
struct efx_rx_buffer *rx_buf,
int len)
@@ -412,53 +71,6 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
}
-/* Pass a received packet up through GRO. GRO can handle pages
- * regardless of checksum state and skbs with a good checksum.
- */
-static void
-efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
- unsigned int n_frags, u8 *eh)
-{
- struct napi_struct *napi = &channel->napi_str;
- struct efx_nic *efx = channel->efx;
- struct sk_buff *skb;
-
- skb = napi_get_frags(napi);
- if (unlikely(!skb)) {
- struct efx_rx_queue *rx_queue;
-
- rx_queue = efx_channel_get_rx_queue(channel);
- efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
- return;
- }
-
- if (efx->net_dev->features & NETIF_F_RXHASH)
- skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
- PKT_HASH_TYPE_L3);
- skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
- CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
- skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
-
- for (;;) {
- skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
- rx_buf->page, rx_buf->page_offset,
- rx_buf->len);
- rx_buf->page = NULL;
- skb->len += rx_buf->len;
- if (skb_shinfo(skb)->nr_frags == n_frags)
- break;
-
- rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
- }
-
- skb->data_len = skb->len;
- skb->truesize += n_frags * efx->rx_buffer_truesize;
-
- skb_record_rx_queue(skb, channel->rx_queue.core_index);
-
- napi_gro_frags(napi);
-}
-
/* Allocate and construct an SKB around page fragments */
static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
struct efx_rx_buffer *rx_buf,
@@ -805,174 +417,6 @@ out:
channel->rx_pkt_n_frags = 0;
}
-int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
-{
- struct efx_nic *efx = rx_queue->efx;
- unsigned int entries;
- int rc;
-
- /* Create the smallest power-of-two aligned ring */
- entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
- EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
- rx_queue->ptr_mask = entries - 1;
-
- netif_dbg(efx, probe, efx->net_dev,
- "creating RX queue %d size %#x mask %#x\n",
- efx_rx_queue_index(rx_queue), efx->rxq_entries,
- rx_queue->ptr_mask);
-
- /* Allocate RX buffers */
- rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
- GFP_KERNEL);
- if (!rx_queue->buffer)
- return -ENOMEM;
-
- rc = efx_nic_probe_rx(rx_queue);
- if (rc) {
- kfree(rx_queue->buffer);
- rx_queue->buffer = NULL;
- }
-
- return rc;
-}
-
-static void efx_init_rx_recycle_ring(struct efx_nic *efx,
- struct efx_rx_queue *rx_queue)
-{
- unsigned int bufs_in_recycle_ring, page_ring_size;
-
- /* Set the RX recycle ring size */
-#ifdef CONFIG_PPC64
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
-#else
- if (iommu_present(&pci_bus_type))
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
- else
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
-#endif /* CONFIG_PPC64 */
-
- page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
- efx->rx_bufs_per_page);
- rx_queue->page_ring = kcalloc(page_ring_size,
- sizeof(*rx_queue->page_ring), GFP_KERNEL);
- rx_queue->page_ptr_mask = page_ring_size - 1;
-}
-
-void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
-{
- struct efx_nic *efx = rx_queue->efx;
- unsigned int max_fill, trigger, max_trigger;
- int rc = 0;
-
- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
- "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
-
- /* Initialise ptr fields */
- rx_queue->added_count = 0;
- rx_queue->notified_count = 0;
- rx_queue->removed_count = 0;
- rx_queue->min_fill = -1U;
- efx_init_rx_recycle_ring(efx, rx_queue);
-
- rx_queue->page_remove = 0;
- rx_queue->page_add = rx_queue->page_ptr_mask + 1;
- rx_queue->page_recycle_count = 0;
- rx_queue->page_recycle_failed = 0;
- rx_queue->page_recycle_full = 0;
-
- /* Initialise limit fields */
- max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
- max_trigger =
- max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
- if (rx_refill_threshold != 0) {
- trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
- if (trigger > max_trigger)
- trigger = max_trigger;
- } else {
- trigger = max_trigger;
- }
-
- rx_queue->max_fill = max_fill;
- rx_queue->fast_fill_trigger = trigger;
- rx_queue->refill_enabled = true;
-
- /* Initialise XDP queue information */
- rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
- rx_queue->core_index);
-
- if (rc) {
- netif_err(efx, rx_err, efx->net_dev,
- "Failure to initialise XDP queue information rc=%d\n",
- rc);
- efx->xdp_rxq_info_failed = true;
- } else {
- rx_queue->xdp_rxq_info_valid = true;
- }
-
- /* Set up RX descriptor ring */
- efx_nic_init_rx(rx_queue);
-}
-
-void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
-{
- int i;
- struct efx_nic *efx = rx_queue->efx;
- struct efx_rx_buffer *rx_buf;
-
- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
- "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
-
- del_timer_sync(&rx_queue->slow_fill);
-
- /* Release RX buffers from the current read ptr to the write ptr */
- if (rx_queue->buffer) {
- for (i = rx_queue->removed_count; i < rx_queue->added_count;
- i++) {
- unsigned index = i & rx_queue->ptr_mask;
- rx_buf = efx_rx_buffer(rx_queue, index);
- efx_fini_rx_buffer(rx_queue, rx_buf);
- }
- }
-
- /* Unmap and release the pages in the recycle ring. Remove the ring. */
- for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
- struct page *page = rx_queue->page_ring[i];
- struct efx_rx_page_state *state;
-
- if (page == NULL)
- continue;
-
- state = page_address(page);
- dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
- PAGE_SIZE << efx->rx_buffer_order,
- DMA_FROM_DEVICE);
- put_page(page);
- }
- kfree(rx_queue->page_ring);
- rx_queue->page_ring = NULL;
-
- if (rx_queue->xdp_rxq_info_valid)
- xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
-
- rx_queue->xdp_rxq_info_valid = false;
-}
-
-void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
-{
- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
- "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
-
- efx_nic_remove_rx(rx_queue);
-
- kfree(rx_queue->buffer);
- rx_queue->buffer = NULL;
-}
-
-
-module_param(rx_refill_threshold, uint, 0444);
-MODULE_PARM_DESC(rx_refill_threshold,
- "RX descriptor ring refill threshold (%)");
-
#ifdef CONFIG_RFS_ACCEL
static void efx_filter_rfs_work(struct work_struct *data)
@@ -1206,37 +650,3 @@ bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
}
#endif /* CONFIG_RFS_ACCEL */
-
-/**
- * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
- * @spec: Specification to test
- *
- * Return: %true if the specification is a non-drop RX filter that
- * matches a local MAC address I/G bit value of 1 or matches a local
- * IPv4 or IPv6 address value in the respective multicast address
- * range. Otherwise %false.
- */
-bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
-{
- if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
- spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
- return false;
-
- if (spec->match_flags &
- (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
- is_multicast_ether_addr(spec->loc_mac))
- return true;
-
- if ((spec->match_flags &
- (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
- (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
- if (spec->ether_type == htons(ETH_P_IP) &&
- ipv4_is_multicast(spec->loc_host[0]))
- return true;
- if (spec->ether_type == htons(ETH_P_IPV6) &&
- ((const u8 *)spec->loc_host)[0] == 0xff)
- return true;
- }
-
- return false;
-}
diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
new file mode 100644
index 000000000000..ee8beb87bdc1
--- /dev/null
+++ b/drivers/net/ethernet/sfc/rx_common.c
@@ -0,0 +1,851 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include <linux/module.h>
+#include <linux/iommu.h>
+#include "efx.h"
+#include "nic.h"
+#include "rx_common.h"
+
+/* This is the percentage fill level below which new RX descriptors
+ * will be added to the RX descriptor ring.
+ */
+static unsigned int rx_refill_threshold;
+module_param(rx_refill_threshold, uint, 0444);
+MODULE_PARM_DESC(rx_refill_threshold,
+ "RX descriptor ring refill threshold (%)");
+
+/* Number of RX buffers to recycle pages for. When creating the RX page recycle
+ * ring, this number is divided by the number of buffers per page to calculate
+ * the number of pages to store in the RX page recycle ring.
+ */
+#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
+#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
+
+/* RX maximum head room required.
+ *
+ * This must be at least 1 to prevent overflow, plus one packet-worth
+ * to allow pipelined receives.
+ */
+#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
+
+/* Check the RX page recycle ring for a page that can be reused. */
+static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_rx_page_state *state;
+ unsigned int index;
+ struct page *page;
+
+ index = rx_queue->page_remove & rx_queue->page_ptr_mask;
+ page = rx_queue->page_ring[index];
+ if (page == NULL)
+ return NULL;
+
+ rx_queue->page_ring[index] = NULL;
+ /* page_remove cannot exceed page_add. */
+ if (rx_queue->page_remove != rx_queue->page_add)
+ ++rx_queue->page_remove;
+
+ /* If page_count is 1 then we hold the only reference to this page. */
+ if (page_count(page) == 1) {
+ ++rx_queue->page_recycle_count;
+ return page;
+ } else {
+ state = page_address(page);
+ dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ put_page(page);
+ ++rx_queue->page_recycle_failed;
+ }
+
+ return NULL;
+}
+
+/* Attempt to recycle the page if there is an RX recycle ring; the page can
+ * only be added if this is the final RX buffer, to prevent pages being used in
+ * the descriptor ring and appearing in the recycle ring simultaneously.
+ */
+static void efx_recycle_rx_page(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+ struct efx_nic *efx = rx_queue->efx;
+ struct page *page = rx_buf->page;
+ unsigned int index;
+
+ /* Only recycle the page after processing the final buffer. */
+ if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
+ return;
+
+ index = rx_queue->page_add & rx_queue->page_ptr_mask;
+ if (rx_queue->page_ring[index] == NULL) {
+ unsigned int read_index = rx_queue->page_remove &
+ rx_queue->page_ptr_mask;
+
+ /* The next slot in the recycle ring is available, but
+ * increment page_remove if the read pointer currently
+ * points here.
+ */
+ if (read_index == index)
+ ++rx_queue->page_remove;
+ rx_queue->page_ring[index] = page;
+ ++rx_queue->page_add;
+ return;
+ }
+ ++rx_queue->page_recycle_full;
+ efx_unmap_rx_buffer(efx, rx_buf);
+ put_page(rx_buf->page);
+}
+
+/* Recycle the pages that are used by buffers that have just been received. */
+void efx_recycle_rx_pages(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+
+ do {
+ efx_recycle_rx_page(channel, rx_buf);
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+ } while (--n_frags);
+}
+
+void efx_discard_rx_packet(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+
+ efx_recycle_rx_pages(channel, rx_buf, n_frags);
+
+ efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
+}
+
+static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+{
+ unsigned int bufs_in_recycle_ring, page_ring_size;
+ struct efx_nic *efx = rx_queue->efx;
+
+ /* Set the RX recycle ring size */
+#ifdef CONFIG_PPC64
+ bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
+#else
+ if (iommu_present(&pci_bus_type))
+ bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
+ else
+ bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
+#endif /* CONFIG_PPC64 */
+
+ page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
+ efx->rx_bufs_per_page);
+ rx_queue->page_ring = kcalloc(page_ring_size,
+ sizeof(*rx_queue->page_ring), GFP_KERNEL);
+ rx_queue->page_ptr_mask = page_ring_size - 1;
+}
+
+static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ int i;
+
+ /* Unmap and release the pages in the recycle ring. Remove the ring. */
+ for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
+ struct page *page = rx_queue->page_ring[i];
+ struct efx_rx_page_state *state;
+
+ if (page == NULL)
+ continue;
+
+ state = page_address(page);
+ dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ put_page(page);
+ }
+ kfree(rx_queue->page_ring);
+ rx_queue->page_ring = NULL;
+}
+
+static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf)
+{
+ /* Release the page reference we hold for the buffer. */
+ if (rx_buf->page)
+ put_page(rx_buf->page);
+
+ /* If this is the last buffer in a page, unmap and free it. */
+ if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
+ efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
+ }
+ rx_buf->page = NULL;
+}
+
+int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned int entries;
+ int rc;
+
+ /* Create the smallest power-of-two aligned ring */
+ entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
+ EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+ rx_queue->ptr_mask = entries - 1;
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "creating RX queue %d size %#x mask %#x\n",
+ efx_rx_queue_index(rx_queue), efx->rxq_entries,
+ rx_queue->ptr_mask);
+
+ /* Allocate RX buffers */
+ rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
+ GFP_KERNEL);
+ if (!rx_queue->buffer)
+ return -ENOMEM;
+
+ rc = efx_nic_probe_rx(rx_queue);
+ if (rc) {
+ kfree(rx_queue->buffer);
+ rx_queue->buffer = NULL;
+ }
+
+ return rc;
+}
+
+void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ unsigned int max_fill, trigger, max_trigger;
+ struct efx_nic *efx = rx_queue->efx;
+ int rc = 0;
+
+ netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+ "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+ /* Initialise ptr fields */
+ rx_queue->added_count = 0;
+ rx_queue->notified_count = 0;
+ rx_queue->removed_count = 0;
+ rx_queue->min_fill = -1U;
+ efx_init_rx_recycle_ring(rx_queue);
+
+ rx_queue->page_remove = 0;
+ rx_queue->page_add = rx_queue->page_ptr_mask + 1;
+ rx_queue->page_recycle_count = 0;
+ rx_queue->page_recycle_failed = 0;
+ rx_queue->page_recycle_full = 0;
+
+ /* Initialise limit fields */
+ max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
+ max_trigger =
+ max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+ if (rx_refill_threshold != 0) {
+ trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
+ if (trigger > max_trigger)
+ trigger = max_trigger;
+ } else {
+ trigger = max_trigger;
+ }
+
+ rx_queue->max_fill = max_fill;
+ rx_queue->fast_fill_trigger = trigger;
+ rx_queue->refill_enabled = true;
+
+ /* Initialise XDP queue information */
+ rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
+ rx_queue->core_index);
+
+ if (rc) {
+ netif_err(efx, rx_err, efx->net_dev,
+ "Failure to initialise XDP queue information rc=%d\n",
+ rc);
+ efx->xdp_rxq_info_failed = true;
+ } else {
+ rx_queue->xdp_rxq_info_valid = true;
+ }
+
+ /* Set up RX descriptor ring */
+ efx_nic_init_rx(rx_queue);
+}
+
+void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ struct efx_rx_buffer *rx_buf;
+ int i;
+
+ netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+ "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+ del_timer_sync(&rx_queue->slow_fill);
+
+ /* Release RX buffers from the current read ptr to the write ptr */
+ if (rx_queue->buffer) {
+ for (i = rx_queue->removed_count; i < rx_queue->added_count;
+ i++) {
+ unsigned int index = i & rx_queue->ptr_mask;
+
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ efx_fini_rx_buffer(rx_queue, rx_buf);
+ }
+ }
+
+ efx_fini_rx_recycle_ring(rx_queue);
+
+ if (rx_queue->xdp_rxq_info_valid)
+ xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
+
+ rx_queue->xdp_rxq_info_valid = false;
+}
+
+void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+ "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+ efx_nic_remove_rx(rx_queue);
+
+ kfree(rx_queue->buffer);
+ rx_queue->buffer = NULL;
+}
+
+/* Unmap a DMA-mapped page. This function is only called for the final RX
+ * buffer in a page.
+ */
+void efx_unmap_rx_buffer(struct efx_nic *efx,
+ struct efx_rx_buffer *rx_buf)
+{
+ struct page *page = rx_buf->page;
+
+ if (page) {
+ struct efx_rx_page_state *state = page_address(page);
+
+ dma_unmap_page(&efx->pci_dev->dev,
+ state->dma_addr,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ }
+}
+
+void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int num_bufs)
+{
+ do {
+ if (rx_buf->page) {
+ put_page(rx_buf->page);
+ rx_buf->page = NULL;
+ }
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+ } while (--num_bufs);
+}
+
+void efx_rx_slow_fill(struct timer_list *t)
+{
+ struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
+
+ /* Post an event to cause NAPI to run and refill the queue */
+ efx_nic_generate_fill_event(rx_queue);
+ ++rx_queue->slow_fill_count;
+}
+
+void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
+{
+ mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
+}
+
+/* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
+ *
+ * @rx_queue: Efx RX queue
+ *
+ * This allocates a batch of pages, maps them for DMA, and populates
+ * struct efx_rx_buffers for each one. Return a negative error code or
+ * 0 on success. If a single page can be used for multiple buffers,
+ * then the page will either be inserted fully, or not at all.
+ */
+static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
+{
+ unsigned int page_offset, index, count;
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_rx_page_state *state;
+ struct efx_rx_buffer *rx_buf;
+ dma_addr_t dma_addr;
+ struct page *page;
+
+ count = 0;
+ do {
+ page = efx_reuse_page(rx_queue);
+ if (page == NULL) {
+ page = alloc_pages(__GFP_COMP |
+ (atomic ? GFP_ATOMIC : GFP_KERNEL),
+ efx->rx_buffer_order);
+ if (unlikely(page == NULL))
+ return -ENOMEM;
+ dma_addr =
+ dma_map_page(&efx->pci_dev->dev, page, 0,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
+ dma_addr))) {
+ __free_pages(page, efx->rx_buffer_order);
+ return -EIO;
+ }
+ state = page_address(page);
+ state->dma_addr = dma_addr;
+ } else {
+ state = page_address(page);
+ dma_addr = state->dma_addr;
+ }
+
+ dma_addr += sizeof(struct efx_rx_page_state);
+ page_offset = sizeof(struct efx_rx_page_state);
+
+ do {
+ index = rx_queue->added_count & rx_queue->ptr_mask;
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ rx_buf->dma_addr = dma_addr + efx->rx_ip_align +
+ XDP_PACKET_HEADROOM;
+ rx_buf->page = page;
+ rx_buf->page_offset = page_offset + efx->rx_ip_align +
+ XDP_PACKET_HEADROOM;
+ rx_buf->len = efx->rx_dma_len;
+ rx_buf->flags = 0;
+ ++rx_queue->added_count;
+ get_page(page);
+ dma_addr += efx->rx_page_buf_step;
+ page_offset += efx->rx_page_buf_step;
+ } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
+
+ rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
+ } while (++count < efx->rx_pages_per_batch);
+
+ return 0;
+}
+
+void efx_rx_config_page_split(struct efx_nic *efx)
+{
+ efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align +
+ XDP_PACKET_HEADROOM,
+ EFX_RX_BUF_ALIGNMENT);
+ efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
+ ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
+ efx->rx_page_buf_step);
+ efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
+ efx->rx_bufs_per_page;
+ efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
+ efx->rx_bufs_per_page);
+}
+
+/* efx_fast_push_rx_descriptors - push new RX descriptors quickly
+ * @rx_queue: RX descriptor queue
+ *
+ * This will aim to fill the RX descriptor queue up to
+ * @rx_queue->@max_fill. If there is insufficient atomic
+ * memory to do so, a slow fill will be scheduled.
+ *
+ * The caller must provide serialisation (none is used here). In practise,
+ * this means this function must run from the NAPI handler, or be called
+ * when NAPI is disabled.
+ */
+void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned int fill_level, batch_size;
+ int space, rc = 0;
+
+ if (!rx_queue->refill_enabled)
+ return;
+
+ /* Calculate current fill level, and exit if we don't need to fill */
+ fill_level = (rx_queue->added_count - rx_queue->removed_count);
+ EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
+ if (fill_level >= rx_queue->fast_fill_trigger)
+ goto out;
+
+ /* Record minimum fill level */
+ if (unlikely(fill_level < rx_queue->min_fill)) {
+ if (fill_level)
+ rx_queue->min_fill = fill_level;
+ }
+
+ batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+ space = rx_queue->max_fill - fill_level;
+ EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
+
+ netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+ "RX queue %d fast-filling descriptor ring from"
+ " level %d to level %d\n",
+ efx_rx_queue_index(rx_queue), fill_level,
+ rx_queue->max_fill);
+
+ do {
+ rc = efx_init_rx_buffers(rx_queue, atomic);
+ if (unlikely(rc)) {
+ /* Ensure that we don't leave the rx queue empty */
+ efx_schedule_slow_fill(rx_queue);
+ goto out;
+ }
+ } while ((space -= batch_size) >= batch_size);
+
+ netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+ "RX queue %d fast-filled descriptor ring "
+ "to level %d\n", efx_rx_queue_index(rx_queue),
+ rx_queue->added_count - rx_queue->removed_count);
+
+ out:
+ if (rx_queue->notified_count != rx_queue->added_count)
+ efx_nic_notify_rx_desc(rx_queue);
+}
+
+/* Pass a received packet up through GRO. GRO can handle pages
+ * regardless of checksum state and skbs with a good checksum.
+ */
+void
+efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags, u8 *eh)
+{
+ struct napi_struct *napi = &channel->napi_str;
+ struct efx_nic *efx = channel->efx;
+ struct sk_buff *skb;
+
+ skb = napi_get_frags(napi);
+ if (unlikely(!skb)) {
+ struct efx_rx_queue *rx_queue;
+
+ rx_queue = efx_channel_get_rx_queue(channel);
+ efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
+ return;
+ }
+
+ if (efx->net_dev->features & NETIF_F_RXHASH)
+ skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
+ PKT_HASH_TYPE_L3);
+ skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
+ CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
+ skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
+
+ for (;;) {
+ skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+ rx_buf->page, rx_buf->page_offset,
+ rx_buf->len);
+ rx_buf->page = NULL;
+ skb->len += rx_buf->len;
+ if (skb_shinfo(skb)->nr_frags == n_frags)
+ break;
+
+ rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+ }
+
+ skb->data_len = skb->len;
+ skb->truesize += n_frags * efx->rx_buffer_truesize;
+
+ skb_record_rx_queue(skb, channel->rx_queue.core_index);
+
+ napi_gro_frags(napi);
+}
+
+/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because
+ * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
+ */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
+{
+ struct list_head *head = &efx->rss_context.list;
+ struct efx_rss_context *ctx, *new;
+ u32 id = 1; /* Don't use zero, that refers to the master RSS context */
+
+ WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+ /* Search for first gap in the numbering */
+ list_for_each_entry(ctx, head, list) {
+ if (ctx->user_id != id)
+ break;
+ id++;
+ /* Check for wrap. If this happens, we have nearly 2^32
+ * allocated RSS contexts, which seems unlikely.
+ */
+ if (WARN_ON_ONCE(!id))
+ return NULL;
+ }
+
+ /* Create the new entry */
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+ new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+ new->rx_hash_udp_4tuple = false;
+
+ /* Insert the new entry into the gap */
+ new->user_id = id;
+ list_add_tail(&new->list, &ctx->list);
+ return new;
+}
+
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
+{
+ struct list_head *head = &efx->rss_context.list;
+ struct efx_rss_context *ctx;
+
+ WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+ list_for_each_entry(ctx, head, list)
+ if (ctx->user_id == id)
+ return ctx;
+ return NULL;
+}
+
+void efx_free_rss_context_entry(struct efx_rss_context *ctx)
+{
+ list_del(&ctx->list);
+ kfree(ctx);
+}
+
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+ struct efx_rss_context *ctx)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+ ctx->rx_indir_table[i] =
+ ethtool_rxfh_indir_default(i, efx->rss_spread);
+}
+
+/**
+ * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
+ * @spec: Specification to test
+ *
+ * Return: %true if the specification is a non-drop RX filter that
+ * matches a local MAC address I/G bit value of 1 or matches a local
+ * IPv4 or IPv6 address value in the respective multicast address
+ * range. Otherwise %false.
+ */
+bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
+{
+ if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
+ spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
+ return false;
+
+ if (spec->match_flags &
+ (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
+ is_multicast_ether_addr(spec->loc_mac))
+ return true;
+
+ if ((spec->match_flags &
+ (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
+ (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
+ if (spec->ether_type == htons(ETH_P_IP) &&
+ ipv4_is_multicast(spec->loc_host[0]))
+ return true;
+ if (spec->ether_type == htons(ETH_P_IPV6) &&
+ ((const u8 *)spec->loc_host)[0] == 0xff)
+ return true;
+ }
+
+ return false;
+}
+
+bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+ const struct efx_filter_spec *right)
+{
+ if ((left->match_flags ^ right->match_flags) |
+ ((left->flags ^ right->flags) &
+ (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
+ return false;
+
+ return memcmp(&left->outer_vid, &right->outer_vid,
+ sizeof(struct efx_filter_spec) -
+ offsetof(struct efx_filter_spec, outer_vid)) == 0;
+}
+
+u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
+{
+ BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
+ return jhash2((const u32 *)&spec->outer_vid,
+ (sizeof(struct efx_filter_spec) -
+ offsetof(struct efx_filter_spec, outer_vid)) / 4,
+ 0);
+}
+
+#ifdef CONFIG_RFS_ACCEL
+bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
+ bool *force)
+{
+ if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
+ /* ARFS is currently updating this entry, leave it */
+ return false;
+ }
+ if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
+ /* ARFS tried and failed to update this, so it's probably out
+ * of date. Remove the filter and the ARFS rule entry.
+ */
+ rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
+ *force = true;
+ return true;
+ } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
+ /* ARFS has moved on, so old filter is not needed. Since we did
+ * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
+ * not be removed by efx_rps_hash_del() subsequently.
+ */
+ *force = true;
+ return true;
+ }
+ /* Remove it iff ARFS wants to. */
+ return true;
+}
+
+static
+struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
+ const struct efx_filter_spec *spec)
+{
+ u32 hash = efx_filter_spec_hash(spec);
+
+ lockdep_assert_held(&efx->rps_hash_lock);
+ if (!efx->rps_hash_table)
+ return NULL;
+ return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
+}
+
+struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
+ const struct efx_filter_spec *spec)
+{
+ struct efx_arfs_rule *rule;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ head = efx_rps_hash_bucket(efx, spec);
+ if (!head)
+ return NULL;
+ hlist_for_each(node, head) {
+ rule = container_of(node, struct efx_arfs_rule, node);
+ if (efx_filter_spec_equal(spec, &rule->spec))
+ return rule;
+ }
+ return NULL;
+}
+
+struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
+ const struct efx_filter_spec *spec,
+ bool *new)
+{
+ struct efx_arfs_rule *rule;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ head = efx_rps_hash_bucket(efx, spec);
+ if (!head)
+ return NULL;
+ hlist_for_each(node, head) {
+ rule = container_of(node, struct efx_arfs_rule, node);
+ if (efx_filter_spec_equal(spec, &rule->spec)) {
+ *new = false;
+ return rule;
+ }
+ }
+ rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
+ *new = true;
+ if (rule) {
+ memcpy(&rule->spec, spec, sizeof(rule->spec));
+ hlist_add_head(&rule->node, head);
+ }
+ return rule;
+}
+
+void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec)
+{
+ struct efx_arfs_rule *rule;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ head = efx_rps_hash_bucket(efx, spec);
+ if (WARN_ON(!head))
+ return;
+ hlist_for_each(node, head) {
+ rule = container_of(node, struct efx_arfs_rule, node);
+ if (efx_filter_spec_equal(spec, &rule->spec)) {
+ /* Someone already reused the entry. We know that if
+ * this check doesn't fire (i.e. filter_id == REMOVING)
+ * then the REMOVING mark was put there by our caller,
+ * because caller is holding a lock on filter table and
+ * only holders of that lock set REMOVING.
+ */
+ if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
+ return;
+ hlist_del(node);
+ kfree(rule);
+ return;
+ }
+ }
+ /* We didn't find it. */
+ WARN_ON(1);
+}
+#endif
+
+int efx_probe_filters(struct efx_nic *efx)
+{
+ int rc;
+
+ init_rwsem(&efx->filter_sem);
+ mutex_lock(&efx->mac_lock);
+ down_write(&efx->filter_sem);
+ rc = efx->type->filter_table_probe(efx);
+ if (rc)
+ goto out_unlock;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (efx->type->offload_features & NETIF_F_NTUPLE) {
+ struct efx_channel *channel;
+ int i, success = 1;
+
+ efx_for_each_channel(channel, efx) {
+ channel->rps_flow_id =
+ kcalloc(efx->type->max_rx_ip_filters,
+ sizeof(*channel->rps_flow_id),
+ GFP_KERNEL);
+ if (!channel->rps_flow_id)
+ success = 0;
+ else
+ for (i = 0;
+ i < efx->type->max_rx_ip_filters;
+ ++i)
+ channel->rps_flow_id[i] =
+ RPS_FLOW_ID_INVALID;
+ channel->rfs_expire_index = 0;
+ channel->rfs_filter_count = 0;
+ }
+
+ if (!success) {
+ efx_for_each_channel(channel, efx)
+ kfree(channel->rps_flow_id);
+ efx->type->filter_table_remove(efx);
+ rc = -ENOMEM;
+ goto out_unlock;
+ }
+ }
+#endif
+out_unlock:
+ up_write(&efx->filter_sem);
+ mutex_unlock(&efx->mac_lock);
+ return rc;
+}
+
+void efx_remove_filters(struct efx_nic *efx)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx) {
+ cancel_delayed_work_sync(&channel->filter_work);
+ kfree(channel->rps_flow_id);
+ }
+#endif
+ down_write(&efx->filter_sem);
+ efx->type->filter_table_remove(efx);
+ up_write(&efx->filter_sem);
+}
diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h
new file mode 100644
index 000000000000..c41f12a89477
--- /dev/null
+++ b/drivers/net/ethernet/sfc/rx_common.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_RX_COMMON_H
+#define EFX_RX_COMMON_H
+
+/* Preferred number of descriptors to fill at once */
+#define EFX_RX_PREFERRED_BATCH 8U
+
+/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
+#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
+ EFX_RX_USR_BUF_SIZE)
+
+static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
+{
+ return page_address(buf->page) + buf->page_offset;
+}
+
+static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
+#else
+ const u8 *data = eh + efx->rx_packet_hash_offset;
+
+ return (u32)data[0] |
+ (u32)data[1] << 8 |
+ (u32)data[2] << 16 |
+ (u32)data[3] << 24;
+#endif
+}
+
+void efx_rx_slow_fill(struct timer_list *t);
+
+void efx_recycle_rx_pages(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags);
+void efx_discard_rx_packet(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags);
+
+int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_destroy_rx_queue(struct efx_rx_queue *rx_queue);
+
+void efx_init_rx_buffer(struct efx_rx_queue *rx_queue,
+ struct page *page,
+ unsigned int page_offset,
+ u16 flags);
+void efx_unmap_rx_buffer(struct efx_nic *efx, struct efx_rx_buffer *rx_buf);
+void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int num_bufs);
+
+void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
+void efx_rx_config_page_split(struct efx_nic *efx);
+void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic);
+
+void
+efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags, u8 *eh);
+
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
+void efx_free_rss_context_entry(struct efx_rss_context *ctx);
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+ struct efx_rss_context *ctx);
+
+bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
+bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+ const struct efx_filter_spec *right);
+u32 efx_filter_spec_hash(const struct efx_filter_spec *spec);
+
+#ifdef CONFIG_RFS_ACCEL
+bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
+ bool *force);
+struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
+ const struct efx_filter_spec *spec);
+struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
+ const struct efx_filter_spec *spec,
+ bool *new);
+void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec);
+#endif
+
+int efx_probe_filters(struct efx_nic *efx);
+void efx_remove_filters(struct efx_nic *efx);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c
index 8474cf8ea7d3..1ae369022d7d 100644
--- a/drivers/net/ethernet/sfc/selftest.c
+++ b/drivers/net/ethernet/sfc/selftest.c
@@ -18,6 +18,8 @@
#include <linux/slab.h>
#include "net_driver.h"
#include "efx.h"
+#include "efx_common.h"
+#include "efx_channels.h"
#include "nic.h"
#include "selftest.h"
#include "workarounds.h"
@@ -783,7 +785,7 @@ void efx_selftest_async_cancel(struct efx_nic *efx)
cancel_delayed_work_sync(&efx->selftest_work);
}
-void efx_selftest_async_work(struct work_struct *data)
+static void efx_selftest_async_work(struct work_struct *data)
{
struct efx_nic *efx = container_of(data, struct efx_nic,
selftest_work.work);
@@ -802,3 +804,8 @@ void efx_selftest_async_work(struct work_struct *data)
channel->channel, cpu);
}
}
+
+void efx_selftest_async_init(struct efx_nic *efx)
+{
+ INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
+}
diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h
index a3553816d92c..ca88ebb4f6b1 100644
--- a/drivers/net/ethernet/sfc/selftest.h
+++ b/drivers/net/ethernet/sfc/selftest.h
@@ -45,8 +45,8 @@ void efx_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr,
int pkt_len);
int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests,
unsigned flags);
+void efx_selftest_async_init(struct efx_nic *efx);
void efx_selftest_async_start(struct efx_nic *efx);
void efx_selftest_async_cancel(struct efx_nic *efx);
-void efx_selftest_async_work(struct work_struct *data);
#endif /* EFX_SELFTEST_H */
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 81499244a4b4..baa464161626 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -14,12 +14,14 @@
#include "net_driver.h"
#include "bitfield.h"
#include "efx.h"
+#include "efx_common.h"
#include "nic.h"
#include "farch_regs.h"
#include "io.h"
#include "workarounds.h"
#include "mcdi.h"
#include "mcdi_pcol.h"
+#include "mcdi_port_common.h"
#include "selftest.h"
#include "siena_sriov.h"
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
index dfbdf05dcf79..83dcfcae3d4b 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.c
+++ b/drivers/net/ethernet/sfc/siena_sriov.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include "net_driver.h"
#include "efx.h"
+#include "efx_channels.h"
#include "nic.h"
#include "io.h"
#include "mcdi.h"
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 00c1c4402451..04d7f41d7ed9 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -20,6 +20,7 @@
#include "io.h"
#include "nic.h"
#include "tx.h"
+#include "tx_common.h"
#include "workarounds.h"
#include "ef10_regs.h"
@@ -56,72 +57,6 @@ u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
return efx_tx_get_copy_buffer(tx_queue, buffer);
}
-static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
- struct efx_tx_buffer *buffer,
- unsigned int *pkts_compl,
- unsigned int *bytes_compl)
-{
- if (buffer->unmap_len) {
- struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
- dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
- if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
- dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
- DMA_TO_DEVICE);
- else
- dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
- DMA_TO_DEVICE);
- buffer->unmap_len = 0;
- }
-
- if (buffer->flags & EFX_TX_BUF_SKB) {
- struct sk_buff *skb = (struct sk_buff *)buffer->skb;
-
- EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
- (*pkts_compl)++;
- (*bytes_compl) += skb->len;
- if (tx_queue->timestamping &&
- (tx_queue->completed_timestamp_major ||
- tx_queue->completed_timestamp_minor)) {
- struct skb_shared_hwtstamps hwtstamp;
-
- hwtstamp.hwtstamp =
- efx_ptp_nic_to_kernel_time(tx_queue);
- skb_tstamp_tx(skb, &hwtstamp);
-
- tx_queue->completed_timestamp_major = 0;
- tx_queue->completed_timestamp_minor = 0;
- }
- dev_consume_skb_any((struct sk_buff *)buffer->skb);
- netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
- "TX queue %d transmission id %x complete\n",
- tx_queue->queue, tx_queue->read_count);
- } else if (buffer->flags & EFX_TX_BUF_XDP) {
- xdp_return_frame_rx_napi(buffer->xdpf);
- }
-
- buffer->len = 0;
- buffer->flags = 0;
-}
-
-unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
-{
- /* Header and payload descriptor for each output segment, plus
- * one for every input fragment boundary within a segment
- */
- unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
-
- /* Possibly one more per segment for option descriptors */
- if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
- max_descs += EFX_TSO_MAX_SEGS;
-
- /* Possibly more for PCIe page boundaries within input fragments */
- if (PAGE_SIZE > EFX_PAGE_SIZE)
- max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
- DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));
-
- return max_descs;
-}
-
static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
{
/* We need to consider both queues that the net core sees as one */
@@ -333,125 +268,6 @@ static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue,
}
#endif /* EFX_USE_PIO */
-static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
- dma_addr_t dma_addr,
- size_t len)
-{
- const struct efx_nic_type *nic_type = tx_queue->efx->type;
- struct efx_tx_buffer *buffer;
- unsigned int dma_len;
-
- /* Map the fragment taking account of NIC-dependent DMA limits. */
- do {
- buffer = efx_tx_queue_get_insert_buffer(tx_queue);
- dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
-
- buffer->len = dma_len;
- buffer->dma_addr = dma_addr;
- buffer->flags = EFX_TX_BUF_CONT;
- len -= dma_len;
- dma_addr += dma_len;
- ++tx_queue->insert_count;
- } while (len);
-
- return buffer;
-}
-
-/* Map all data from an SKB for DMA and create descriptors on the queue.
- */
-static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
- unsigned int segment_count)
-{
- struct efx_nic *efx = tx_queue->efx;
- struct device *dma_dev = &efx->pci_dev->dev;
- unsigned int frag_index, nr_frags;
- dma_addr_t dma_addr, unmap_addr;
- unsigned short dma_flags;
- size_t len, unmap_len;
-
- nr_frags = skb_shinfo(skb)->nr_frags;
- frag_index = 0;
-
- /* Map header data. */
- len = skb_headlen(skb);
- dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
- dma_flags = EFX_TX_BUF_MAP_SINGLE;
- unmap_len = len;
- unmap_addr = dma_addr;
-
- if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
- return -EIO;
-
- if (segment_count) {
- /* For TSO we need to put the header in to a separate
- * descriptor. Map this separately if necessary.
- */
- size_t header_len = skb_transport_header(skb) - skb->data +
- (tcp_hdr(skb)->doff << 2u);
-
- if (header_len != len) {
- tx_queue->tso_long_headers++;
- efx_tx_map_chunk(tx_queue, dma_addr, header_len);
- len -= header_len;
- dma_addr += header_len;
- }
- }
-
- /* Add descriptors for each fragment. */
- do {
- struct efx_tx_buffer *buffer;
- skb_frag_t *fragment;
-
- buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
-
- /* The final descriptor for a fragment is responsible for
- * unmapping the whole fragment.
- */
- buffer->flags = EFX_TX_BUF_CONT | dma_flags;
- buffer->unmap_len = unmap_len;
- buffer->dma_offset = buffer->dma_addr - unmap_addr;
-
- if (frag_index >= nr_frags) {
- /* Store SKB details with the final buffer for
- * the completion.
- */
- buffer->skb = skb;
- buffer->flags = EFX_TX_BUF_SKB | dma_flags;
- return 0;
- }
-
- /* Move on to the next fragment. */
- fragment = &skb_shinfo(skb)->frags[frag_index++];
- len = skb_frag_size(fragment);
- dma_addr = skb_frag_dma_map(dma_dev, fragment,
- 0, len, DMA_TO_DEVICE);
- dma_flags = 0;
- unmap_len = len;
- unmap_addr = dma_addr;
-
- if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
- return -EIO;
- } while (1);
-}
-
-/* Remove buffers put into a tx_queue for the current packet.
- * None of the buffers must have an skb attached.
- */
-static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
- unsigned int insert_count)
-{
- struct efx_tx_buffer *buffer;
- unsigned int bytes_compl = 0;
- unsigned int pkts_compl = 0;
-
- /* Work backwards until we hit the original insert pointer value */
- while (tx_queue->insert_count != insert_count) {
- --tx_queue->insert_count;
- buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
- efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
- }
-}
-
/*
* Fallback to software TSO.
*
@@ -473,12 +289,9 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue,
dev_consume_skb_any(skb);
skb = segments;
- while (skb) {
- next = skb->next;
- skb->next = NULL;
-
+ skb_list_walk_safe(skb, skb, next) {
+ skb_mark_not_on_list(skb);
efx_enqueue_skb(tx_queue, skb);
- skb = next;
}
return 0;
@@ -687,41 +500,6 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
return i;
}
-/* Remove packets from the TX queue
- *
- * This removes packets from the TX queue, up to and including the
- * specified index.
- */
-static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
- unsigned int index,
- unsigned int *pkts_compl,
- unsigned int *bytes_compl)
-{
- struct efx_nic *efx = tx_queue->efx;
- unsigned int stop_index, read_ptr;
-
- stop_index = (index + 1) & tx_queue->ptr_mask;
- read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
-
- while (read_ptr != stop_index) {
- struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
-
- if (!(buffer->flags & EFX_TX_BUF_OPTION) &&
- unlikely(buffer->len == 0)) {
- netif_err(efx, tx_err, efx->net_dev,
- "TX queue %d spurious TX completion id %x\n",
- tx_queue->queue, read_ptr);
- efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
- return;
- }
-
- efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
-
- ++tx_queue->read_count;
- read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
- }
-}
-
/* Initiate a packet transmission. We use one channel per CPU
* (sharing when we have more CPUs than channels). On Falcon, the TX
* completion events will be directed back to the CPU that transmitted
@@ -834,173 +612,3 @@ int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
net_dev->num_tc = num_tc;
return 0;
}
-
-void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
-{
- unsigned fill_level;
- struct efx_nic *efx = tx_queue->efx;
- struct efx_tx_queue *txq2;
- unsigned int pkts_compl = 0, bytes_compl = 0;
-
- EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
-
- efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
- tx_queue->pkts_compl += pkts_compl;
- tx_queue->bytes_compl += bytes_compl;
-
- if (pkts_compl > 1)
- ++tx_queue->merge_events;
-
- /* See if we need to restart the netif queue. This memory
- * barrier ensures that we write read_count (inside
- * efx_dequeue_buffers()) before reading the queue status.
- */
- smp_mb();
- if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
- likely(efx->port_enabled) &&
- likely(netif_device_present(efx->net_dev))) {
- txq2 = efx_tx_queue_partner(tx_queue);
- fill_level = max(tx_queue->insert_count - tx_queue->read_count,
- txq2->insert_count - txq2->read_count);
- if (fill_level <= efx->txq_wake_thresh)
- netif_tx_wake_queue(tx_queue->core_txq);
- }
-
- /* Check whether the hardware queue is now empty */
- if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
- tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
- if (tx_queue->read_count == tx_queue->old_write_count) {
- smp_mb();
- tx_queue->empty_read_count =
- tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
- }
- }
-}
-
-static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
-{
- return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER);
-}
-
-int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
-{
- struct efx_nic *efx = tx_queue->efx;
- unsigned int entries;
- int rc;
-
- /* Create the smallest power-of-two aligned ring */
- entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
- EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
- tx_queue->ptr_mask = entries - 1;
-
- netif_dbg(efx, probe, efx->net_dev,
- "creating TX queue %d size %#x mask %#x\n",
- tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
-
- /* Allocate software ring */
- tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
- GFP_KERNEL);
- if (!tx_queue->buffer)
- return -ENOMEM;
-
- tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue),
- sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
- if (!tx_queue->cb_page) {
- rc = -ENOMEM;
- goto fail1;
- }
-
- /* Allocate hardware ring */
- rc = efx_nic_probe_tx(tx_queue);
- if (rc)
- goto fail2;
-
- return 0;
-
-fail2:
- kfree(tx_queue->cb_page);
- tx_queue->cb_page = NULL;
-fail1:
- kfree(tx_queue->buffer);
- tx_queue->buffer = NULL;
- return rc;
-}
-
-void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
-{
- struct efx_nic *efx = tx_queue->efx;
-
- netif_dbg(efx, drv, efx->net_dev,
- "initialising TX queue %d\n", tx_queue->queue);
-
- tx_queue->insert_count = 0;
- tx_queue->write_count = 0;
- tx_queue->packet_write_count = 0;
- tx_queue->old_write_count = 0;
- tx_queue->read_count = 0;
- tx_queue->old_read_count = 0;
- tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
- tx_queue->xmit_more_available = false;
- tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
- tx_queue->channel == efx_ptp_channel(efx));
- tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
- tx_queue->completed_timestamp_major = 0;
- tx_queue->completed_timestamp_minor = 0;
-
- tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
-
- /* Set up default function pointers. These may get replaced by
- * efx_nic_init_tx() based off NIC/queue capabilities.
- */
- tx_queue->handle_tso = efx_enqueue_skb_tso;
-
- /* Set up TX descriptor ring */
- efx_nic_init_tx(tx_queue);
-
- tx_queue->initialised = true;
-}
-
-void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
-{
- struct efx_tx_buffer *buffer;
-
- netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
- "shutting down TX queue %d\n", tx_queue->queue);
-
- if (!tx_queue->buffer)
- return;
-
- /* Free any buffers left in the ring */
- while (tx_queue->read_count != tx_queue->write_count) {
- unsigned int pkts_compl = 0, bytes_compl = 0;
- buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
- efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
-
- ++tx_queue->read_count;
- }
- tx_queue->xmit_more_available = false;
- netdev_tx_reset_queue(tx_queue->core_txq);
-}
-
-void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
-{
- int i;
-
- if (!tx_queue->buffer)
- return;
-
- netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
- "destroying TX queue %d\n", tx_queue->queue);
- efx_nic_remove_tx(tx_queue);
-
- if (tx_queue->cb_page) {
- for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
- efx_nic_free_buffer(tx_queue->efx,
- &tx_queue->cb_page[i]);
- kfree(tx_queue->cb_page);
- tx_queue->cb_page = NULL;
- }
-
- kfree(tx_queue->buffer);
- tx_queue->buffer = NULL;
-}
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
new file mode 100644
index 000000000000..b1571e9789d0
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include "efx.h"
+#include "nic.h"
+#include "tx_common.h"
+
+static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
+{
+ return DIV_ROUND_UP(tx_queue->ptr_mask + 1,
+ PAGE_SIZE >> EFX_TX_CB_ORDER);
+}
+
+int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ unsigned int entries;
+ int rc;
+
+ /* Create the smallest power-of-two aligned ring */
+ entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
+ EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+ tx_queue->ptr_mask = entries - 1;
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "creating TX queue %d size %#x mask %#x\n",
+ tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
+
+ /* Allocate software ring */
+ tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
+ GFP_KERNEL);
+ if (!tx_queue->buffer)
+ return -ENOMEM;
+
+ tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue),
+ sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
+ if (!tx_queue->cb_page) {
+ rc = -ENOMEM;
+ goto fail1;
+ }
+
+ /* Allocate hardware ring */
+ rc = efx_nic_probe_tx(tx_queue);
+ if (rc)
+ goto fail2;
+
+ return 0;
+
+fail2:
+ kfree(tx_queue->cb_page);
+ tx_queue->cb_page = NULL;
+fail1:
+ kfree(tx_queue->buffer);
+ tx_queue->buffer = NULL;
+ return rc;
+}
+
+void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+
+ netif_dbg(efx, drv, efx->net_dev,
+ "initialising TX queue %d\n", tx_queue->queue);
+
+ tx_queue->insert_count = 0;
+ tx_queue->write_count = 0;
+ tx_queue->packet_write_count = 0;
+ tx_queue->old_write_count = 0;
+ tx_queue->read_count = 0;
+ tx_queue->old_read_count = 0;
+ tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
+ tx_queue->xmit_more_available = false;
+ tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
+ tx_queue->channel == efx_ptp_channel(efx));
+ tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
+ tx_queue->completed_timestamp_major = 0;
+ tx_queue->completed_timestamp_minor = 0;
+
+ tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
+
+ /* Set up default function pointers. These may get replaced by
+ * efx_nic_init_tx() based off NIC/queue capabilities.
+ */
+ tx_queue->handle_tso = efx_enqueue_skb_tso;
+
+ /* Set up TX descriptor ring */
+ efx_nic_init_tx(tx_queue);
+
+ tx_queue->initialised = true;
+}
+
+void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_tx_buffer *buffer;
+
+ netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+ "shutting down TX queue %d\n", tx_queue->queue);
+
+ if (!tx_queue->buffer)
+ return;
+
+ /* Free any buffers left in the ring */
+ while (tx_queue->read_count != tx_queue->write_count) {
+ unsigned int pkts_compl = 0, bytes_compl = 0;
+
+ buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+
+ ++tx_queue->read_count;
+ }
+ tx_queue->xmit_more_available = false;
+ netdev_tx_reset_queue(tx_queue->core_txq);
+}
+
+void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ int i;
+
+ if (!tx_queue->buffer)
+ return;
+
+ netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+ "destroying TX queue %d\n", tx_queue->queue);
+ efx_nic_remove_tx(tx_queue);
+
+ if (tx_queue->cb_page) {
+ for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
+ efx_nic_free_buffer(tx_queue->efx,
+ &tx_queue->cb_page[i]);
+ kfree(tx_queue->cb_page);
+ tx_queue->cb_page = NULL;
+ }
+
+ kfree(tx_queue->buffer);
+ tx_queue->buffer = NULL;
+}
+
+void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl)
+{
+ if (buffer->unmap_len) {
+ struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
+ dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
+
+ if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
+ dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
+ DMA_TO_DEVICE);
+ else
+ dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
+ DMA_TO_DEVICE);
+ buffer->unmap_len = 0;
+ }
+
+ if (buffer->flags & EFX_TX_BUF_SKB) {
+ struct sk_buff *skb = (struct sk_buff *)buffer->skb;
+
+ EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
+ (*pkts_compl)++;
+ (*bytes_compl) += skb->len;
+ if (tx_queue->timestamping &&
+ (tx_queue->completed_timestamp_major ||
+ tx_queue->completed_timestamp_minor)) {
+ struct skb_shared_hwtstamps hwtstamp;
+
+ hwtstamp.hwtstamp =
+ efx_ptp_nic_to_kernel_time(tx_queue);
+ skb_tstamp_tx(skb, &hwtstamp);
+
+ tx_queue->completed_timestamp_major = 0;
+ tx_queue->completed_timestamp_minor = 0;
+ }
+ dev_consume_skb_any((struct sk_buff *)buffer->skb);
+ netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
+ "TX queue %d transmission id %x complete\n",
+ tx_queue->queue, tx_queue->read_count);
+ } else if (buffer->flags & EFX_TX_BUF_XDP) {
+ xdp_return_frame_rx_napi(buffer->xdpf);
+ }
+
+ buffer->len = 0;
+ buffer->flags = 0;
+}
+
+/* Remove packets from the TX queue
+ *
+ * This removes packets from the TX queue, up to and including the
+ * specified index.
+ */
+static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
+ unsigned int index,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ unsigned int stop_index, read_ptr;
+
+ stop_index = (index + 1) & tx_queue->ptr_mask;
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+
+ while (read_ptr != stop_index) {
+ struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
+
+ if (!(buffer->flags & EFX_TX_BUF_OPTION) &&
+ unlikely(buffer->len == 0)) {
+ netif_err(efx, tx_err, efx->net_dev,
+ "TX queue %d spurious TX completion id %x\n",
+ tx_queue->queue, read_ptr);
+ efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
+ return;
+ }
+
+ efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
+
+ ++tx_queue->read_count;
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+ }
+}
+
+void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
+{
+ unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_tx_queue *txq2;
+
+ EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
+
+ efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
+ tx_queue->pkts_compl += pkts_compl;
+ tx_queue->bytes_compl += bytes_compl;
+
+ if (pkts_compl > 1)
+ ++tx_queue->merge_events;
+
+ /* See if we need to restart the netif queue. This memory
+ * barrier ensures that we write read_count (inside
+ * efx_dequeue_buffers()) before reading the queue status.
+ */
+ smp_mb();
+ if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
+ likely(efx->port_enabled) &&
+ likely(netif_device_present(efx->net_dev))) {
+ txq2 = efx_tx_queue_partner(tx_queue);
+ fill_level = max(tx_queue->insert_count - tx_queue->read_count,
+ txq2->insert_count - txq2->read_count);
+ if (fill_level <= efx->txq_wake_thresh)
+ netif_tx_wake_queue(tx_queue->core_txq);
+ }
+
+ /* Check whether the hardware queue is now empty */
+ if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
+ tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
+ if (tx_queue->read_count == tx_queue->old_write_count) {
+ smp_mb();
+ tx_queue->empty_read_count =
+ tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
+ }
+ }
+}
+
+/* Remove buffers put into a tx_queue for the current packet.
+ * None of the buffers must have an skb attached.
+ */
+void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
+ unsigned int insert_count)
+{
+ struct efx_tx_buffer *buffer;
+ unsigned int bytes_compl = 0;
+ unsigned int pkts_compl = 0;
+
+ /* Work backwards until we hit the original insert pointer value */
+ while (tx_queue->insert_count != insert_count) {
+ --tx_queue->insert_count;
+ buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+ }
+}
+
+struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, size_t len)
+{
+ const struct efx_nic_type *nic_type = tx_queue->efx->type;
+ struct efx_tx_buffer *buffer;
+ unsigned int dma_len;
+
+ /* Map the fragment taking account of NIC-dependent DMA limits. */
+ do {
+ buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+ dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
+
+ buffer->len = dma_len;
+ buffer->dma_addr = dma_addr;
+ buffer->flags = EFX_TX_BUF_CONT;
+ len -= dma_len;
+ dma_addr += dma_len;
+ ++tx_queue->insert_count;
+ } while (len);
+
+ return buffer;
+}
+
+/* Map all data from an SKB for DMA and create descriptors on the queue. */
+int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ unsigned int segment_count)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct device *dma_dev = &efx->pci_dev->dev;
+ unsigned int frag_index, nr_frags;
+ dma_addr_t dma_addr, unmap_addr;
+ unsigned short dma_flags;
+ size_t len, unmap_len;
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ frag_index = 0;
+
+ /* Map header data. */
+ len = skb_headlen(skb);
+ dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
+ dma_flags = EFX_TX_BUF_MAP_SINGLE;
+ unmap_len = len;
+ unmap_addr = dma_addr;
+
+ if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+ return -EIO;
+
+ if (segment_count) {
+ /* For TSO we need to put the header in to a separate
+ * descriptor. Map this separately if necessary.
+ */
+ size_t header_len = skb_transport_header(skb) - skb->data +
+ (tcp_hdr(skb)->doff << 2u);
+
+ if (header_len != len) {
+ tx_queue->tso_long_headers++;
+ efx_tx_map_chunk(tx_queue, dma_addr, header_len);
+ len -= header_len;
+ dma_addr += header_len;
+ }
+ }
+
+ /* Add descriptors for each fragment. */
+ do {
+ struct efx_tx_buffer *buffer;
+ skb_frag_t *fragment;
+
+ buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
+
+ /* The final descriptor for a fragment is responsible for
+ * unmapping the whole fragment.
+ */
+ buffer->flags = EFX_TX_BUF_CONT | dma_flags;
+ buffer->unmap_len = unmap_len;
+ buffer->dma_offset = buffer->dma_addr - unmap_addr;
+
+ if (frag_index >= nr_frags) {
+ /* Store SKB details with the final buffer for
+ * the completion.
+ */
+ buffer->skb = skb;
+ buffer->flags = EFX_TX_BUF_SKB | dma_flags;
+ return 0;
+ }
+
+ /* Move on to the next fragment. */
+ fragment = &skb_shinfo(skb)->frags[frag_index++];
+ len = skb_frag_size(fragment);
+ dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
+ DMA_TO_DEVICE);
+ dma_flags = 0;
+ unmap_len = len;
+ unmap_addr = dma_addr;
+
+ if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+ return -EIO;
+ } while (1);
+}
+
+unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
+{
+ /* Header and payload descriptor for each output segment, plus
+ * one for every input fragment boundary within a segment
+ */
+ unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
+
+ /* Possibly one more per segment for option descriptors */
+ if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
+ max_descs += EFX_TSO_MAX_SEGS;
+
+ /* Possibly more for PCIe page boundaries within input fragments */
+ if (PAGE_SIZE > EFX_PAGE_SIZE)
+ max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
+ DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));
+
+ return max_descs;
+}
diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h
new file mode 100644
index 000000000000..f92f1fe3a87f
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tx_common.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_TX_COMMON_H
+#define EFX_TX_COMMON_H
+
+int efx_probe_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_init_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_fini_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
+
+void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl);
+
+void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
+
+void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
+ unsigned int insert_count);
+
+struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, size_t len);
+int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ unsigned int segment_count);
+
+unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
+
+#endif