From 8c42350116fe4ad23a5a813eba367355cfe7cff5 Mon Sep 17 00:00:00 2001 From: Charles McLachlan Date: Thu, 31 Oct 2019 10:23:10 +0000 Subject: sfc: support encapsulation of xdp_frames in efx_tx_buffer Add a field to efx_tx_buffer so that we can track xdp_frames. Add a flag so that buffers that contain xdp_frames can be identified and passed to xdp_return_frame. Signed-off-by: Charles McLachlan Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/net_driver.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/sfc/net_driver.h') diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 284a1b047ac2..7394d901e021 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -27,6 +27,7 @@ #include #include #include +#include #include "enum.h" #include "bitfield.h" @@ -136,7 +137,8 @@ struct efx_special_buffer { * struct efx_tx_buffer - buffer state for a TX descriptor * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be * freed when descriptor completes - * @option: When @flags & %EFX_TX_BUF_OPTION, a NIC-specific option descriptor. + * @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data + * member is the associated buffer to drop a page reference on. * @dma_addr: DMA address of the fragment. * @flags: Flags for allocation and DMA mapping type * @len: Length of this fragment. @@ -146,7 +148,10 @@ struct efx_special_buffer { * Only valid if @unmap_len != 0. */ struct efx_tx_buffer { - const struct sk_buff *skb; + union { + const struct sk_buff *skb; + struct xdp_frame *xdpf; + }; union { efx_qword_t option; dma_addr_t dma_addr; @@ -160,6 +165,7 @@ struct efx_tx_buffer { #define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ #define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */ #define EFX_TX_BUF_OPTION 0x10 /* empty buffer for option descriptor */ +#define EFX_TX_BUF_XDP 0x20 /* buffer was sent with XDP */ /** * struct efx_tx_queue - An Efx TX queue -- cgit v1.2.3-59-g8ed1b From eb9a36be7f3ec414700af9a616f035eda1f1e63e Mon Sep 17 00:00:00 2001 From: Charles McLachlan Date: Thu, 31 Oct 2019 10:23:23 +0000 Subject: sfc: perform XDP processing on received packets Adds a field to hold an attached xdp_prog, but never populates it (see following patch). Also, XDP_TX support is deferred to a later patch in the series. Track failures of xdp_rxq_info_reg() via per-queue xdp_rxq_info_valid flags and a per-nic xdp_rxq_info_failed flag. The per-queue flags are needed to prevent attempts to xdp_rxq_info_unreg() structs that failed to register. Possibly the API could be changed in the future to avoid the need for these flags. Signed-off-by: Charles McLachlan Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 5 +- drivers/net/ethernet/sfc/net_driver.h | 12 ++++ drivers/net/ethernet/sfc/rx.c | 130 +++++++++++++++++++++++++++++++++- 3 files changed, 145 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/sfc/net_driver.h') diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 2fef7402233e..bd0424414781 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -340,6 +340,8 @@ static int efx_poll(struct napi_struct *napi, int budget) spent = efx_process_channel(channel, budget); + xdp_do_flush_map(); + if (spent < budget) { if (efx_channel_has_rx_queue(channel) && efx->irq_rx_adaptive && @@ -651,7 +653,7 @@ static void efx_start_datapath(struct efx_nic *efx) efx->rx_dma_len = (efx->rx_prefix_size + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + efx->type->rx_buffer_padding); - rx_buf_len = (sizeof(struct efx_rx_page_state) + + rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM + efx->rx_ip_align + efx->rx_dma_len); if (rx_buf_len <= PAGE_SIZE) { efx->rx_scatter = efx->type->always_rx_scatter; @@ -774,6 +776,7 @@ static void efx_stop_datapath(struct efx_nic *efx) efx_for_each_possible_channel_tx_queue(tx_queue, channel) efx_fini_tx_queue(tx_queue); } + efx->xdp_rxq_info_failed = false; } static void efx_remove_channel(struct efx_channel *channel) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 7394d901e021..a5a5055969fb 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -369,6 +369,8 @@ struct efx_rx_page_state { * refill was triggered. * @recycle_count: RX buffer recycle counter. * @slow_fill: Timer used to defer efx_nic_generate_fill_event(). + * @xdp_rxq_info: XDP specific RX queue information. + * @xdp_rxq_info_valid: Is xdp_rxq_info valid data?. */ struct efx_rx_queue { struct efx_nic *efx; @@ -400,6 +402,8 @@ struct efx_rx_queue { unsigned int slow_fill_count; /* Statistics to supplement MAC stats */ unsigned long rx_packets; + struct xdp_rxq_info xdp_rxq_info; + bool xdp_rxq_info_valid; }; enum efx_sync_events_state { @@ -900,6 +904,7 @@ struct efx_async_filter_insertion { * @loopback_mode: Loopback status * @loopback_modes: Supported loopback mode bitmask * @loopback_selftest: Offline self-test private state + * @xdp_prog: Current XDP programme for this interface * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state * @filter_state: Architecture-dependent filter table state * @rps_mutex: Protects RPS state of all channels @@ -925,6 +930,8 @@ struct efx_async_filter_insertion { * @ptp_data: PTP state data * @ptp_warned: has this NIC seen and warned about unexpected PTP events? * @vpd_sn: Serial number read from VPD + * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their + * xdp_rxq_info structures? * @monitor_work: Hardware monitor workitem * @biu_lock: BIU (bus interface unit) lock * @last_irq_cpu: Last CPU to handle a possible test interrupt. This @@ -1059,6 +1066,10 @@ struct efx_nic { u64 loopback_modes; void *loopback_selftest; + /* We access loopback_selftest immediately before running XDP, + * so we want them next to each other. + */ + struct bpf_prog __rcu *xdp_prog; struct rw_semaphore filter_sem; void *filter_state; @@ -1088,6 +1099,7 @@ struct efx_nic { bool ptp_warned; char *vpd_sn; + bool xdp_rxq_info_failed; /* The following fields may be written more often */ diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 85ec07f5a674..644d157843c6 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include "net_driver.h" #include "efx.h" #include "filter.h" @@ -27,6 +29,9 @@ /* Preferred number of descriptors to fill at once */ #define EFX_RX_PREFERRED_BATCH 8U +/* Maximum rx prefix used by any architecture. */ +#define EFX_MAX_RX_PREFIX_SIZE 16 + /* Number of RX buffers to recycle pages for. When creating the RX page recycle * ring, this number is divided by the number of buffers per page to calculate * the number of pages to store in the RX page recycle ring. @@ -95,7 +100,7 @@ void efx_rx_config_page_split(struct efx_nic *efx) EFX_RX_BUF_ALIGNMENT); efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / - efx->rx_page_buf_step); + (efx->rx_page_buf_step + XDP_PACKET_HEADROOM)); efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / efx->rx_bufs_per_page; efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, @@ -185,6 +190,9 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) page_offset = sizeof(struct efx_rx_page_state); do { + page_offset += XDP_PACKET_HEADROOM; + dma_addr += XDP_PACKET_HEADROOM; + index = rx_queue->added_count & rx_queue->ptr_mask; rx_buf = efx_rx_buffer(rx_queue, index); rx_buf->dma_addr = dma_addr + efx->rx_ip_align; @@ -635,6 +643,104 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, netif_receive_skb(skb); } +/** efx_do_xdp: perform XDP processing on a received packet + * + * Returns true if packet should still be delivered. + */ +static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, u8 **ehp) +{ + u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE]; + struct efx_rx_queue *rx_queue; + struct bpf_prog *xdp_prog; + struct xdp_buff xdp; + u32 xdp_act; + s16 offset; + int err; + + rcu_read_lock(); + xdp_prog = rcu_dereference(efx->xdp_prog); + if (!xdp_prog) { + rcu_read_unlock(); + return true; + } + + rx_queue = efx_channel_get_rx_queue(channel); + + if (unlikely(channel->rx_pkt_n_frags > 1)) { + /* We can't do XDP on fragmented packets - drop. */ + rcu_read_unlock(); + efx_free_rx_buffers(rx_queue, rx_buf, + channel->rx_pkt_n_frags); + if (net_ratelimit()) + netif_err(efx, rx_err, efx->net_dev, + "XDP is not possible with multiple receive fragments (%d)\n", + channel->rx_pkt_n_frags); + return false; + } + + dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, + rx_buf->len, DMA_FROM_DEVICE); + + /* Save the rx prefix. */ + EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE); + memcpy(rx_prefix, *ehp - efx->rx_prefix_size, + efx->rx_prefix_size); + + xdp.data = *ehp; + xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; + + /* No support yet for XDP metadata */ + xdp_set_data_meta_invalid(&xdp); + xdp.data_end = xdp.data + rx_buf->len; + xdp.rxq = &rx_queue->xdp_rxq_info; + + xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp); + rcu_read_unlock(); + + offset = (u8 *)xdp.data - *ehp; + + switch (xdp_act) { + case XDP_PASS: + /* Fix up rx prefix. */ + if (offset) { + *ehp += offset; + rx_buf->page_offset += offset; + rx_buf->len -= offset; + memcpy(*ehp - efx->rx_prefix_size, rx_prefix, + efx->rx_prefix_size); + } + break; + + case XDP_TX: + return -EOPNOTSUPP; + + case XDP_REDIRECT: + err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog); + if (unlikely(err)) { + efx_free_rx_buffers(rx_queue, rx_buf, 1); + if (net_ratelimit()) + netif_err(efx, rx_err, efx->net_dev, + "XDP redirect failed (%d)\n", err); + } + break; + + default: + bpf_warn_invalid_xdp_action(xdp_act); + efx_free_rx_buffers(rx_queue, rx_buf, 1); + break; + + case XDP_ABORTED: + trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); + /* Fall through */ + case XDP_DROP: + efx_free_rx_buffers(rx_queue, rx_buf, 1); + break; + } + + return xdp_act == XDP_PASS; +} + /* Handle a received packet. Second half: Touches packet payload. */ void __efx_rx_packet(struct efx_channel *channel) { @@ -663,6 +769,9 @@ void __efx_rx_packet(struct efx_channel *channel) goto out; } + if (!efx_do_xdp(efx, channel, rx_buf, &eh)) + goto out; + if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; @@ -731,6 +840,7 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue) { struct efx_nic *efx = rx_queue->efx; unsigned int max_fill, trigger, max_trigger; + int rc = 0; netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, "initialising RX queue %d\n", efx_rx_queue_index(rx_queue)); @@ -764,6 +874,19 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue) rx_queue->fast_fill_trigger = trigger; rx_queue->refill_enabled = true; + /* Initialise XDP queue information */ + rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev, + rx_queue->core_index); + + if (rc) { + netif_err(efx, rx_err, efx->net_dev, + "Failure to initialise XDP queue information rc=%d\n", + rc); + efx->xdp_rxq_info_failed = true; + } else { + rx_queue->xdp_rxq_info_valid = true; + } + /* Set up RX descriptor ring */ efx_nic_init_rx(rx_queue); } @@ -805,6 +928,11 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) } kfree(rx_queue->page_ring); rx_queue->page_ring = NULL; + + if (rx_queue->xdp_rxq_info_valid) + xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info); + + rx_queue->xdp_rxq_info_valid = false; } void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) -- cgit v1.2.3-59-g8ed1b From 3990a8fffbdad5765f47ea593f9de66c91762059 Mon Sep 17 00:00:00 2001 From: Charles McLachlan Date: Thu, 31 Oct 2019 10:23:49 +0000 Subject: sfc: allocate channels for XDP tx queues Each CPU needs access to its own queue to allow uncontested transmission of XDP_TX packets. This means we need to allocate (up front) enough channels ("xdp transmit channels") to provide at least one extra tx queue per CPU. These tx queues should not do TSO. Signed-off-by: Charles McLachlan Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 14 ++- drivers/net/ethernet/sfc/efx.c | 180 ++++++++++++++++++++++++++++------ drivers/net/ethernet/sfc/net_driver.h | 34 ++++++- drivers/net/ethernet/sfc/tx.c | 2 + 4 files changed, 190 insertions(+), 40 deletions(-) (limited to 'drivers/net/ethernet/sfc/net_driver.h') diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 0ec13f520e90..ad68eb0cb8fd 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -946,8 +946,10 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx) /* Extra channels, even those with TXQs (PTP), do not require * PIO resources. */ - if (!channel->type->want_pio) + if (!channel->type->want_pio || + channel->channel >= efx->xdp_channel_offset) continue; + efx_for_each_channel_tx_queue(tx_queue, channel) { /* We assign the PIO buffers to queues in * reverse order to allow for the following @@ -1296,8 +1298,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) int rc; channel_vis = max(efx->n_channels, - (efx->n_tx_channels + efx->n_extra_tx_channels) * - EFX_TXQ_TYPES); + ((efx->n_tx_channels + efx->n_extra_tx_channels) * + EFX_TXQ_TYPES) + + efx->n_xdp_channels * efx->xdp_tx_per_channel); #ifdef EFX_USE_PIO /* Try to allocate PIO buffers if wanted and if the full @@ -2434,11 +2437,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) /* TSOv2 is a limited resource that can only be configured on a limited * number of queues. TSO without checksum offload is not really a thing, * so we only enable it for those queues. - * TSOv2 cannot be used with Hardware timestamping. + * TSOv2 cannot be used with Hardware timestamping, and is never needed + * for XDP tx. */ if (csum_offload && (nic_data->datapath_caps2 & (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) && - !tx_queue->timestamping) { + !tx_queue->timestamping && !tx_queue->xdp_tx) { tso_v2 = true; netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n", channel->channel); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 429fb268b0da..10c9b1ede799 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -583,9 +583,14 @@ efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) int number; number = channel->channel; - if (efx->tx_channel_offset == 0) { + + if (number >= efx->xdp_channel_offset && + !WARN_ON_ONCE(!efx->n_xdp_channels)) { + type = "-xdp"; + number -= efx->xdp_channel_offset; + } else if (efx->tx_channel_offset == 0) { type = ""; - } else if (channel->channel < efx->tx_channel_offset) { + } else if (number < efx->tx_channel_offset) { type = "-rx"; } else { type = "-tx"; @@ -803,6 +808,8 @@ static void efx_remove_channels(struct efx_nic *efx) efx_for_each_channel(channel, efx) efx_remove_channel(channel); + + kfree(efx->xdp_tx_queues); } int @@ -1440,6 +1447,101 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx) return count; } +static int efx_allocate_msix_channels(struct efx_nic *efx, + unsigned int max_channels, + unsigned int extra_channels, + unsigned int parallelism) +{ + unsigned int n_channels = parallelism; + int vec_count; + int n_xdp_tx; + int n_xdp_ev; + + if (efx_separate_tx_channels) + n_channels *= 2; + n_channels += extra_channels; + + /* To allow XDP transmit to happen from arbitrary NAPI contexts + * we allocate a TX queue per CPU. We share event queues across + * multiple tx queues, assuming tx and ev queues are both + * maximum size. + */ + + n_xdp_tx = num_possible_cpus(); + n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES); + + /* Check resources. + * We need a channel per event queue, plus a VI per tx queue. + * This may be more pessimistic than it needs to be. + */ + if (n_channels + n_xdp_ev > max_channels) { + netif_err(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", + n_xdp_ev, n_channels, max_channels); + efx->n_xdp_channels = 0; + efx->xdp_tx_per_channel = 0; + efx->xdp_tx_queue_count = 0; + } else { + efx->n_xdp_channels = n_xdp_ev; + efx->xdp_tx_per_channel = EFX_TXQ_TYPES; + efx->xdp_tx_queue_count = n_xdp_tx; + n_channels += n_xdp_ev; + netif_dbg(efx, drv, efx->net_dev, + "Allocating %d TX and %d event queues for XDP\n", + n_xdp_tx, n_xdp_ev); + } + + n_channels = min(n_channels, max_channels); + + vec_count = pci_msix_vec_count(efx->pci_dev); + if (vec_count < 0) + return vec_count; + if (vec_count < n_channels) { + netif_err(efx, drv, efx->net_dev, + "WARNING: Insufficient MSI-X vectors available (%d < %u).\n", + vec_count, n_channels); + netif_err(efx, drv, efx->net_dev, + "WARNING: Performance may be reduced.\n"); + n_channels = vec_count; + } + + efx->n_channels = n_channels; + + /* Do not create the PTP TX queue(s) if PTP uses the MC directly. */ + if (extra_channels && !efx_ptp_use_mac_tx_timestamps(efx)) + n_channels--; + + /* Ignore XDP tx channels when creating rx channels. */ + n_channels -= efx->n_xdp_channels; + + if (efx_separate_tx_channels) { + efx->n_tx_channels = + min(max(n_channels / 2, 1U), + efx->max_tx_channels); + efx->tx_channel_offset = + n_channels - efx->n_tx_channels; + efx->n_rx_channels = + max(n_channels - + efx->n_tx_channels, 1U); + } else { + efx->n_tx_channels = min(n_channels, efx->max_tx_channels); + efx->tx_channel_offset = 0; + efx->n_rx_channels = n_channels; + } + + if (efx->n_xdp_channels) + efx->xdp_channel_offset = efx->tx_channel_offset + + efx->n_tx_channels; + else + efx->xdp_channel_offset = efx->n_channels; + + netif_dbg(efx, drv, efx->net_dev, + "Allocating %u RX channels\n", + efx->n_rx_channels); + + return efx->n_channels; +} + /* Probe the number and type of interrupts we are able to obtain, and * the resulting numbers of channels and RX queues. */ @@ -1454,19 +1556,19 @@ static int efx_probe_interrupts(struct efx_nic *efx) ++extra_channels; if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { + unsigned int parallelism = efx_wanted_parallelism(efx); struct msix_entry xentries[EFX_MAX_CHANNELS]; unsigned int n_channels; - n_channels = efx_wanted_parallelism(efx); - if (efx_separate_tx_channels) - n_channels *= 2; - n_channels += extra_channels; - n_channels = min(n_channels, efx->max_channels); - - for (i = 0; i < n_channels; i++) - xentries[i].entry = i; - rc = pci_enable_msix_range(efx->pci_dev, - xentries, 1, n_channels); + rc = efx_allocate_msix_channels(efx, efx->max_channels, + extra_channels, parallelism); + if (rc >= 0) { + n_channels = rc; + for (i = 0; i < n_channels; i++) + xentries[i].entry = i; + rc = pci_enable_msix_range(efx->pci_dev, xentries, 1, + n_channels); + } if (rc < 0) { /* Fall back to single channel MSI */ netif_err(efx, drv, efx->net_dev, @@ -1485,21 +1587,6 @@ static int efx_probe_interrupts(struct efx_nic *efx) } if (rc > 0) { - efx->n_channels = n_channels; - if (n_channels > extra_channels) - n_channels -= extra_channels; - if (efx_separate_tx_channels) { - efx->n_tx_channels = min(max(n_channels / 2, - 1U), - efx->max_tx_channels); - efx->n_rx_channels = max(n_channels - - efx->n_tx_channels, - 1U); - } else { - efx->n_tx_channels = min(n_channels, - efx->max_tx_channels); - efx->n_rx_channels = n_channels; - } for (i = 0; i < efx->n_channels; i++) efx_get_channel(efx, i)->irq = xentries[i].vector; @@ -1511,6 +1598,8 @@ static int efx_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1; efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->n_xdp_channels = 0; + efx->xdp_channel_offset = efx->n_channels; rc = pci_enable_msi(efx->pci_dev); if (rc == 0) { efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; @@ -1529,12 +1618,14 @@ static int efx_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->n_xdp_channels = 0; + efx->xdp_channel_offset = efx->n_channels; efx->legacy_irq = efx->pci_dev->irq; } - /* Assign extra channels if possible */ + /* Assign extra channels if possible, before XDP channels */ efx->n_extra_tx_channels = 0; - j = efx->n_channels; + j = efx->xdp_channel_offset; for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { if (!efx->extra_channel_type[i]) continue; @@ -1729,29 +1820,50 @@ static void efx_remove_interrupts(struct efx_nic *efx) efx->legacy_irq = 0; } -static void efx_set_channels(struct efx_nic *efx) +static int efx_set_channels(struct efx_nic *efx) { struct efx_channel *channel; struct efx_tx_queue *tx_queue; + int xdp_queue_number; efx->tx_channel_offset = efx_separate_tx_channels ? efx->n_channels - efx->n_tx_channels : 0; + if (efx->xdp_tx_queue_count) { + EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); + + /* Allocate array for XDP TX queue lookup. */ + efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count, + sizeof(*efx->xdp_tx_queues), + GFP_KERNEL); + if (!efx->xdp_tx_queues) + return -ENOMEM; + } + /* We need to mark which channels really have RX and TX * queues, and adjust the TX queue numbers if we have separate * RX-only and TX-only channels. */ + xdp_queue_number = 0; efx_for_each_channel(channel, efx) { if (channel->channel < efx->n_rx_channels) channel->rx_queue.core_index = channel->channel; else channel->rx_queue.core_index = -1; - efx_for_each_channel_tx_queue(tx_queue, channel) + efx_for_each_channel_tx_queue(tx_queue, channel) { tx_queue->queue -= (efx->tx_channel_offset * EFX_TXQ_TYPES); + + if (efx_channel_is_xdp_tx(channel) && + xdp_queue_number < efx->xdp_tx_queue_count) { + efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + xdp_queue_number++; + } + } } + return 0; } static int efx_probe_nic(struct efx_nic *efx) @@ -1781,7 +1893,9 @@ static int efx_probe_nic(struct efx_nic *efx) if (rc) goto fail1; - efx_set_channels(efx); + rc = efx_set_channels(efx); + if (rc) + goto fail1; /* dimension_resources can fail with EAGAIN */ rc = efx->type->dimension_resources(efx); @@ -2091,6 +2205,8 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, channel->irq_moderation_us = rx_usecs; else if (efx_channel_has_tx_queues(channel)) channel->irq_moderation_us = tx_usecs; + else if (efx_channel_is_xdp_tx(channel)) + channel->irq_moderation_us = tx_usecs; } return 0; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index a5a5055969fb..505ddc060e64 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -195,6 +195,7 @@ struct efx_tx_buffer { * @piobuf_offset: Buffer offset to be specified in PIO descriptors * @initialised: Has hardware queue been initialised? * @timestamping: Is timestamping enabled for this channel? + * @xdp_tx: Is this an XDP tx queue? * @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and * may also map tx data, depending on the nature of the TSO implementation. * @read_count: Current read pointer. @@ -256,6 +257,7 @@ struct efx_tx_queue { unsigned int piobuf_offset; bool initialised; bool timestamping; + bool xdp_tx; /* Function pointers used in the fast path. */ int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *); @@ -828,6 +830,8 @@ struct efx_async_filter_insertion { * @msi_context: Context for each MSI * @extra_channel_types: Types of extra (non-traffic) channels that * should be allocated for this NIC + * @xdp_tx_queue_count: Number of entries in %xdp_tx_queues. + * @xdp_tx_queues: Array of pointers to tx queues used for XDP transmit. * @rxq_entries: Size of receive queues requested by user. * @txq_entries: Size of transmit queues requested by user. * @txq_stop_thresh: TX queue fill level at or above which we stop it. @@ -840,6 +844,9 @@ struct efx_async_filter_insertion { * @n_rx_channels: Number of channels used for RX (= number of RX queues) * @n_tx_channels: Number of channels used for TX * @n_extra_tx_channels: Number of extra channels with TX queues + * @n_xdp_channels: Number of channels used for XDP TX + * @xdp_channel_offset: Offset of zeroth channel used for XPD TX. + * @xdp_tx_per_channel: Max number of TX queues on an XDP TX channel. * @rx_ip_align: RX DMA address offset to have IP header aligned in * in accordance with NET_IP_ALIGN * @rx_dma_len: Current maximum RX DMA length @@ -979,6 +986,9 @@ struct efx_nic { const struct efx_channel_type * extra_channel_type[EFX_MAX_EXTRA_CHANNELS]; + unsigned int xdp_tx_queue_count; + struct efx_tx_queue **xdp_tx_queues; + unsigned rxq_entries; unsigned txq_entries; unsigned int txq_stop_thresh; @@ -997,6 +1007,9 @@ struct efx_nic { unsigned tx_channel_offset; unsigned n_tx_channels; unsigned n_extra_tx_channels; + unsigned int n_xdp_channels; + unsigned int xdp_channel_offset; + unsigned int xdp_tx_per_channel; unsigned int rx_ip_align; unsigned int rx_dma_len; unsigned int rx_buffer_order; @@ -1491,10 +1504,24 @@ efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type) return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type]; } +static inline struct efx_channel * +efx_get_xdp_channel(struct efx_nic *efx, unsigned int index) +{ + EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_xdp_channels); + return efx->channel[efx->xdp_channel_offset + index]; +} + +static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel) +{ + return channel->channel - channel->efx->xdp_channel_offset < + channel->efx->n_xdp_channels; +} + static inline bool efx_channel_has_tx_queues(struct efx_channel *channel) { - return channel->type && channel->type->want_txqs && - channel->type->want_txqs(channel); + return efx_channel_is_xdp_tx(channel) || + (channel->type && channel->type->want_txqs && + channel->type->want_txqs(channel)); } static inline struct efx_tx_queue * @@ -1518,7 +1545,8 @@ static inline bool efx_tx_queue_used(struct efx_tx_queue *tx_queue) else \ for (_tx_queue = (_channel)->tx_queue; \ _tx_queue < (_channel)->tx_queue + EFX_TXQ_TYPES && \ - efx_tx_queue_used(_tx_queue); \ + (efx_tx_queue_used(_tx_queue) || \ + efx_channel_is_xdp_tx(_channel)); \ _tx_queue++) /* Iterate over all possible TX queues belonging to a channel */ diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 204aafb3d96a..8a5bc500d2a1 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -859,6 +859,8 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->completed_timestamp_major = 0; tx_queue->completed_timestamp_minor = 0; + tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel); + /* Set up default function pointers. These may get replaced by * efx_nic_init_tx() based off NIC/queue capabilities. */ -- cgit v1.2.3-59-g8ed1b From cd846bef2d7464b58db01ecd2c6cb20652c5a7a8 Mon Sep 17 00:00:00 2001 From: Charles McLachlan Date: Thu, 31 Oct 2019 10:24:23 +0000 Subject: sfc: add XDP counters to ethtool stats Count XDP packet drops, error drops, transmissions and redirects and expose these counters via the ethtool stats command. Signed-off-by: Charles McLachlan Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ethtool.c | 25 +++++++++++++++++++++++++ drivers/net/ethernet/sfc/net_driver.h | 8 ++++++++ drivers/net/ethernet/sfc/rx.c | 9 +++++++++ 3 files changed, 42 insertions(+) (limited to 'drivers/net/ethernet/sfc/net_driver.h') diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 86b965875540..8db593fb9699 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -83,6 +83,10 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect), }; #define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc) @@ -399,6 +403,19 @@ static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings) } } } + if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { + unsigned short xdp; + + for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { + n_stats++; + if (strings) { + snprintf(strings, ETH_GSTRING_LEN, + "tx-xdp-cpu-%hu.tx_packets", xdp); + strings += ETH_GSTRING_LEN; + } + } + } + return n_stats; } @@ -509,6 +526,14 @@ static void efx_ethtool_get_stats(struct net_device *net_dev, data++; } } + if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { + int xdp; + + for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { + data[0] = efx->xdp_tx_queues[xdp]->tx_packets; + data++; + } + } efx_ptp_update_stats(efx, data); } diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 505ddc060e64..04e49eac7327 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -453,6 +453,10 @@ enum efx_sync_events_state { * lack of descriptors * @n_rx_merge_events: Number of RX merged completion events * @n_rx_merge_packets: Number of RX packets completed by merged events + * @n_rx_xdp_drops: Count of RX packets intentionally dropped due to XDP + * @n_rx_xdp_bad_drops: Count of RX packets dropped due to XDP errors + * @n_rx_xdp_tx: Count of RX packets retransmitted due to XDP + * @n_rx_xdp_redirect: Count of RX packets redirected to a different NIC by XDP * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by * __efx_rx_packet(), or zero if there is none * @rx_pkt_index: Ring index of first buffer for next packet to be delivered @@ -506,6 +510,10 @@ struct efx_channel { unsigned int n_rx_nodesc_trunc; unsigned int n_rx_merge_events; unsigned int n_rx_merge_packets; + unsigned int n_rx_xdp_drops; + unsigned int n_rx_xdp_bad_drops; + unsigned int n_rx_xdp_tx; + unsigned int n_rx_xdp_redirect; unsigned int rx_pkt_n_frags; unsigned int rx_pkt_index; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 91f6d5b9ceac..a7d9841105d8 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -677,6 +677,7 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, netif_err(efx, rx_err, efx->net_dev, "XDP is not possible with multiple receive fragments (%d)\n", channel->rx_pkt_n_frags); + channel->n_rx_xdp_bad_drops++; return false; } @@ -722,6 +723,9 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, if (net_ratelimit()) netif_err(efx, rx_err, efx->net_dev, "XDP TX failed (%d)\n", err); + channel->n_rx_xdp_bad_drops++; + } else { + channel->n_rx_xdp_tx++; } break; @@ -732,12 +736,16 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, if (net_ratelimit()) netif_err(efx, rx_err, efx->net_dev, "XDP redirect failed (%d)\n", err); + channel->n_rx_xdp_bad_drops++; + } else { + channel->n_rx_xdp_redirect++; } break; default: bpf_warn_invalid_xdp_action(xdp_act); efx_free_rx_buffers(rx_queue, rx_buf, 1); + channel->n_rx_xdp_bad_drops++; break; case XDP_ABORTED: @@ -745,6 +753,7 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, /* Fall through */ case XDP_DROP: efx_free_rx_buffers(rx_queue, rx_buf, 1); + channel->n_rx_xdp_drops++; break; } -- cgit v1.2.3-59-g8ed1b From 8490e75cdbb734829d3b324c3a52492c2edbfbd6 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 22 Nov 2019 17:57:03 +0000 Subject: sfc: change ARFS expiry mechanism The old rfs_filters_added method for determining the quota could potentially allow the NIC to become filled with old filters, which never get tested for expiry. Instead, explicitly make expiry check work depend on the number of filters installed, and don't count checking slots without filters in as doing work. This guarantees that each filter will be checked for expiry at least once every thirty seconds (assuming the channel to which it belongs is NAPI polling actively) regardless of fill level. Signed-off-by: Edward Cree Tested-by: David Ahern Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx.c | 8 +++--- drivers/net/ethernet/sfc/efx.h | 9 ++++--- drivers/net/ethernet/sfc/net_driver.h | 14 ++++++----- drivers/net/ethernet/sfc/rx.c | 46 ++++++++++++++++++++--------------- 4 files changed, 45 insertions(+), 32 deletions(-) (limited to 'drivers/net/ethernet/sfc/net_driver.h') diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 0fa9972027db..38d186b949be 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1969,6 +1969,8 @@ static int efx_probe_filters(struct efx_nic *efx) ++i) channel->rps_flow_id[i] = RPS_FLOW_ID_INVALID; + channel->rfs_expire_index = 0; + channel->rfs_filter_count = 0; } if (!success) { @@ -1978,8 +1980,6 @@ static int efx_probe_filters(struct efx_nic *efx) rc = -ENOMEM; goto out_unlock; } - - efx->rps_expire_index = efx->rps_expire_channel = 0; } #endif out_unlock: @@ -1993,8 +1993,10 @@ static void efx_remove_filters(struct efx_nic *efx) #ifdef CONFIG_RFS_ACCEL struct efx_channel *channel; - efx_for_each_channel(channel, efx) + efx_for_each_channel(channel, efx) { + flush_work(&channel->filter_work); kfree(channel->rps_flow_id); + } #endif down_write(&efx->filter_sem); efx->type->filter_table_remove(efx); diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 45c7ae4114ec..e58c2b6d64d9 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -166,15 +166,16 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx, #ifdef CONFIG_RFS_ACCEL int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); -bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned quota); +bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota); static inline void efx_filter_rfs_expire(struct work_struct *data) { struct efx_channel *channel = container_of(data, struct efx_channel, filter_work); + unsigned int time = jiffies - channel->rfs_last_expiry, quota; - if (channel->rfs_filters_added >= 60 && - __efx_filter_rfs_expire(channel->efx, 100)) - channel->rfs_filters_added -= 60; + quota = channel->rfs_filter_count * time / (30 * HZ); + if (quota > 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota))) + channel->rfs_last_expiry += time; } #define efx_filter_rfs_enabled() 1 #else diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 04e49eac7327..5b1b882f6c67 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -439,6 +439,11 @@ enum efx_sync_events_state { * @event_test_cpu: Last CPU to handle interrupt or test event for this channel * @irq_count: Number of IRQs since last adaptive moderation decision * @irq_mod_score: IRQ moderation score + * @rfs_filter_count: number of accelerated RFS filters currently in place; + * equals the count of @rps_flow_id slots filled + * @rfs_last_expiry: value of jiffies last time some accelerated RFS filters + * were checked for expiry + * @rfs_expire_index: next accelerated RFS filter ID to check for expiry * @filter_work: Work item for efx_filter_rfs_expire() * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, * indexed by filter ID @@ -489,7 +494,9 @@ struct efx_channel { unsigned int irq_count; unsigned int irq_mod_score; #ifdef CONFIG_RFS_ACCEL - unsigned int rfs_filters_added; + unsigned int rfs_filter_count; + unsigned int rfs_last_expiry; + unsigned int rfs_expire_index; struct work_struct filter_work; #define RPS_FLOW_ID_INVALID 0xFFFFFFFF u32 *rps_flow_id; @@ -923,9 +930,6 @@ struct efx_async_filter_insertion { * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state * @filter_state: Architecture-dependent filter table state * @rps_mutex: Protects RPS state of all channels - * @rps_expire_channel: Next channel to check for expiry - * @rps_expire_index: Next index to check for expiry in - * @rps_expire_channel's @rps_flow_id * @rps_slot_map: bitmap of in-flight entries in @rps_slot * @rps_slot: array of ARFS insertion requests for efx_filter_rfs_work() * @rps_hash_lock: Protects ARFS filter mapping state (@rps_hash_table and @@ -1096,8 +1100,6 @@ struct efx_nic { void *filter_state; #ifdef CONFIG_RFS_ACCEL struct mutex rps_mutex; - unsigned int rps_expire_channel; - unsigned int rps_expire_index; unsigned long rps_slot_map; struct efx_async_filter_insertion rps_slot[EFX_RPS_MAX_IN_FLIGHT]; spinlock_t rps_hash_lock; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index bec261905530..bbf2393f7599 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -988,6 +988,7 @@ static void efx_filter_rfs_work(struct work_struct *data) rc = efx->type->filter_insert(efx, &req->spec, true); if (rc >= 0) + /* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */ rc %= efx->type->max_rx_ip_filters; if (efx->rps_hash_table) { spin_lock_bh(&efx->rps_hash_lock); @@ -1012,8 +1013,9 @@ static void efx_filter_rfs_work(struct work_struct *data) * later. */ mutex_lock(&efx->rps_mutex); + if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID) + channel->rfs_filter_count++; channel->rps_flow_id[rc] = req->flow_id; - ++channel->rfs_filters_added; mutex_unlock(&efx->rps_mutex); if (req->spec.ether_type == htons(ETH_P_IP)) @@ -1139,38 +1141,44 @@ out_clear: return rc; } -bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota) +bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota) { bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index); - unsigned int channel_idx, index, size; + struct efx_nic *efx = channel->efx; + unsigned int index, size, start; u32 flow_id; if (!mutex_trylock(&efx->rps_mutex)) return false; expire_one = efx->type->filter_rfs_expire_one; - channel_idx = efx->rps_expire_channel; - index = efx->rps_expire_index; + index = channel->rfs_expire_index; + start = index; size = efx->type->max_rx_ip_filters; - while (quota--) { - struct efx_channel *channel = efx_get_channel(efx, channel_idx); + while (quota) { flow_id = channel->rps_flow_id[index]; - if (flow_id != RPS_FLOW_ID_INVALID && - expire_one(efx, flow_id, index)) { - netif_info(efx, rx_status, efx->net_dev, - "expired filter %d [queue %u flow %u]\n", - index, channel_idx, flow_id); - channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; + if (flow_id != RPS_FLOW_ID_INVALID) { + quota--; + if (expire_one(efx, flow_id, index)) { + netif_info(efx, rx_status, efx->net_dev, + "expired filter %d [channel %u flow %u]\n", + index, channel->channel, flow_id); + channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; + channel->rfs_filter_count--; + } } - if (++index == size) { - if (++channel_idx == efx->n_channels) - channel_idx = 0; + if (++index == size) index = 0; - } + /* If we were called with a quota that exceeds the total number + * of filters in the table (which should never happen), ensure + * that we don't loop forever - stop when we've examined every + * row of the table. + */ + if (WARN_ON(index == start && quota)) + break; } - efx->rps_expire_channel = channel_idx; - efx->rps_expire_index = index; + channel->rfs_expire_index = index; mutex_unlock(&efx->rps_mutex); return true; } -- cgit v1.2.3-59-g8ed1b From ca70bd423f10b004f1bf7b2424d34025a9408e54 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 22 Nov 2019 17:57:27 +0000 Subject: sfc: add statistics for ARFS Report the number of successful and failed insertions, and also the current count of filters, to aid in tuning e.g. rps_flow_cnt. Signed-off-by: Edward Cree Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ethtool.c | 6 ++++++ drivers/net/ethernet/sfc/net_driver.h | 4 ++++ drivers/net/ethernet/sfc/rx.c | 2 ++ 3 files changed, 12 insertions(+) (limited to 'drivers/net/ethernet/sfc/net_driver.h') diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 8db593fb9699..6a9347cd67f3 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -56,6 +56,9 @@ static u64 efx_get_atomic_stat(void *field) #define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \ EFX_ETHTOOL_STAT(field, channel, n_##field, \ unsigned int, efx_get_uint_stat) +#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \ + EFX_ETHTOOL_STAT(field, channel, field, \ + unsigned int, efx_get_uint_stat) #define EFX_ETHTOOL_UINT_TXQ_STAT(field) \ EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \ @@ -87,6 +90,9 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect), + EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed), }; #define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 5b1b882f6c67..ccd480e699d3 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -444,6 +444,8 @@ enum efx_sync_events_state { * @rfs_last_expiry: value of jiffies last time some accelerated RFS filters * were checked for expiry * @rfs_expire_index: next accelerated RFS filter ID to check for expiry + * @n_rfs_succeeded: number of successful accelerated RFS filter insertions + * @n_rfs_failed; number of failed accelerated RFS filter insertions * @filter_work: Work item for efx_filter_rfs_expire() * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, * indexed by filter ID @@ -497,6 +499,8 @@ struct efx_channel { unsigned int rfs_filter_count; unsigned int rfs_last_expiry; unsigned int rfs_expire_index; + unsigned int n_rfs_succeeded; + unsigned int n_rfs_failed; struct work_struct filter_work; #define RPS_FLOW_ID_INVALID 0xFFFFFFFF u32 *rps_flow_id; diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 252a5f10596d..ef52b24ad9e7 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -1032,6 +1032,7 @@ static void efx_filter_rfs_work(struct work_struct *data) req->spec.rem_host, ntohs(req->spec.rem_port), req->spec.loc_host, ntohs(req->spec.loc_port), req->rxq_index, req->flow_id, rc, arfs_id); + channel->n_rfs_succeeded++; } else { if (req->spec.ether_type == htons(ETH_P_IP)) netif_dbg(efx, rx_status, efx->net_dev, @@ -1047,6 +1048,7 @@ static void efx_filter_rfs_work(struct work_struct *data) req->spec.rem_host, ntohs(req->spec.rem_port), req->spec.loc_host, ntohs(req->spec.loc_port), req->rxq_index, req->flow_id, rc, arfs_id); + channel->n_rfs_failed++; /* We're overloading the NIC's filter tables, so let's do a * chunk of extra expiry work. */ -- cgit v1.2.3-59-g8ed1b From 6fbc05e59163e66795a2bbdb4068abd7f7ae3510 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 22 Nov 2019 17:57:40 +0000 Subject: sfc: do ARFS expiry work occasionally even without NAPI poll If there's no traffic on a channel, its ARFS expiry work will never get scheduled by efx_poll() as that isn't being run. So make efx_filter_rfs_expire() reschedule itself to run after 30 seconds. Signed-off-by: Edward Cree Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx.c | 8 ++++---- drivers/net/ethernet/sfc/efx.h | 10 +++++++--- drivers/net/ethernet/sfc/net_driver.h | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet/sfc/net_driver.h') diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 38d186b949be..992c773620ec 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -355,7 +355,7 @@ static int efx_poll(struct napi_struct *napi, int budget) #ifdef CONFIG_RFS_ACCEL /* Perhaps expire some ARFS filters */ - schedule_work(&channel->filter_work); + mod_delayed_work(system_wq, &channel->filter_work, 0); #endif /* There is no race here; although napi_disable() will @@ -487,7 +487,7 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) } #ifdef CONFIG_RFS_ACCEL - INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); #endif rx_queue = &channel->rx_queue; @@ -533,7 +533,7 @@ efx_copy_channel(const struct efx_channel *old_channel) memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); #ifdef CONFIG_RFS_ACCEL - INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); #endif return channel; @@ -1994,7 +1994,7 @@ static void efx_remove_filters(struct efx_nic *efx) struct efx_channel *channel; efx_for_each_channel(channel, efx) { - flush_work(&channel->filter_work); + cancel_delayed_work_sync(&channel->filter_work); kfree(channel->rps_flow_id); } #endif diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index e58c2b6d64d9..2dd8d5002315 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -169,13 +169,17 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota); static inline void efx_filter_rfs_expire(struct work_struct *data) { - struct efx_channel *channel = container_of(data, struct efx_channel, - filter_work); - unsigned int time = jiffies - channel->rfs_last_expiry, quota; + struct delayed_work *dwork = to_delayed_work(data); + struct efx_channel *channel; + unsigned int time, quota; + channel = container_of(dwork, struct efx_channel, filter_work); + time = jiffies - channel->rfs_last_expiry; quota = channel->rfs_filter_count * time / (30 * HZ); if (quota > 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota))) channel->rfs_last_expiry += time; + /* Ensure we do more work eventually even if NAPI poll is not happening */ + schedule_delayed_work(dwork, 30 * HZ); } #define efx_filter_rfs_enabled() 1 #else diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index ccd480e699d3..1f88212be085 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -501,7 +501,7 @@ struct efx_channel { unsigned int rfs_expire_index; unsigned int n_rfs_succeeded; unsigned int n_rfs_failed; - struct work_struct filter_work; + struct delayed_work filter_work; #define RPS_FLOW_ID_INVALID 0xFFFFFFFF u32 *rps_flow_id; #endif -- cgit v1.2.3-59-g8ed1b