diff options
Diffstat (limited to 'drivers/net/ethernet/sfc')
-rw-r--r-- | drivers/net/ethernet/sfc/ef10.c | 22 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 283 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.h | 22 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ethtool.c | 33 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/net_driver.h | 84 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/rx.c | 220 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/tx.c | 92 |
7 files changed, 671 insertions, 85 deletions
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 0ec13f520e90..4d9bbccc6f89 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -946,8 +946,10 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx) /* Extra channels, even those with TXQs (PTP), do not require * PIO resources. */ - if (!channel->type->want_pio) + if (!channel->type->want_pio || + channel->channel >= efx->xdp_channel_offset) continue; + efx_for_each_channel_tx_queue(tx_queue, channel) { /* We assign the PIO buffers to queues in * reverse order to allow for the following @@ -1296,8 +1298,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) int rc; channel_vis = max(efx->n_channels, - (efx->n_tx_channels + efx->n_extra_tx_channels) * - EFX_TXQ_TYPES); + ((efx->n_tx_channels + efx->n_extra_tx_channels) * + EFX_TXQ_TYPES) + + efx->n_xdp_channels * efx->xdp_tx_per_channel); #ifdef EFX_USE_PIO /* Try to allocate PIO buffers if wanted and if the full @@ -2434,11 +2437,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) /* TSOv2 is a limited resource that can only be configured on a limited * number of queues. TSO without checksum offload is not really a thing, * so we only enable it for those queues. - * TSOv2 cannot be used with Hardware timestamping. + * TSOv2 cannot be used with Hardware timestamping, and is never needed + * for XDP tx. */ if (csum_offload && (nic_data->datapath_caps2 & (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) && - !tx_queue->timestamping) { + !tx_queue->timestamping && !tx_queue->xdp_tx) { tso_v2 = true; netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n", channel->channel); @@ -4198,11 +4202,15 @@ static int efx_ef10_filter_push(struct efx_nic *efx, { MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN); + size_t outlen; int rc; efx_ef10_filter_push_prep(efx, spec, inbuf, *handle, ctx, replacing); - rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), - outbuf, sizeof(outbuf), NULL); + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc && spec->priority != EFX_FILTER_PRI_HINT) + efx_mcdi_display_error(efx, MC_CMD_FILTER_OP, sizeof(inbuf), + outbuf, outlen, rc); if (rc == 0) *handle = MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE); if (rc == -ENOSPC) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 2fef7402233e..992c773620ec 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -226,6 +226,10 @@ static void efx_fini_napi_channel(struct efx_channel *channel); static void efx_fini_struct(struct efx_nic *efx); static void efx_start_all(struct efx_nic *efx); static void efx_stop_all(struct efx_nic *efx); +static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog); +static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp); +static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, + u32 flags); #define EFX_ASSERT_RESET_SERIALISED(efx) \ do { \ @@ -340,6 +344,8 @@ static int efx_poll(struct napi_struct *napi, int budget) spent = efx_process_channel(channel, budget); + xdp_do_flush_map(); + if (spent < budget) { if (efx_channel_has_rx_queue(channel) && efx->irq_rx_adaptive && @@ -349,7 +355,7 @@ static int efx_poll(struct napi_struct *napi, int budget) #ifdef CONFIG_RFS_ACCEL /* Perhaps expire some ARFS filters */ - schedule_work(&channel->filter_work); + mod_delayed_work(system_wq, &channel->filter_work, 0); #endif /* There is no race here; although napi_disable() will @@ -481,7 +487,7 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) } #ifdef CONFIG_RFS_ACCEL - INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); #endif rx_queue = &channel->rx_queue; @@ -527,7 +533,7 @@ efx_copy_channel(const struct efx_channel *old_channel) memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); #ifdef CONFIG_RFS_ACCEL - INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); + INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); #endif return channel; @@ -579,9 +585,14 @@ efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) int number; number = channel->channel; - if (efx->tx_channel_offset == 0) { + + if (number >= efx->xdp_channel_offset && + !WARN_ON_ONCE(!efx->n_xdp_channels)) { + type = "-xdp"; + number -= efx->xdp_channel_offset; + } else if (efx->tx_channel_offset == 0) { type = ""; - } else if (channel->channel < efx->tx_channel_offset) { + } else if (number < efx->tx_channel_offset) { type = "-rx"; } else { type = "-tx"; @@ -651,7 +662,7 @@ static void efx_start_datapath(struct efx_nic *efx) efx->rx_dma_len = (efx->rx_prefix_size + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + efx->type->rx_buffer_padding); - rx_buf_len = (sizeof(struct efx_rx_page_state) + + rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM + efx->rx_ip_align + efx->rx_dma_len); if (rx_buf_len <= PAGE_SIZE) { efx->rx_scatter = efx->type->always_rx_scatter; @@ -774,6 +785,7 @@ static void efx_stop_datapath(struct efx_nic *efx) efx_for_each_possible_channel_tx_queue(tx_queue, channel) efx_fini_tx_queue(tx_queue); } + efx->xdp_rxq_info_failed = false; } static void efx_remove_channel(struct efx_channel *channel) @@ -798,6 +810,8 @@ static void efx_remove_channels(struct efx_nic *efx) efx_for_each_channel(channel, efx) efx_remove_channel(channel); + + kfree(efx->xdp_tx_queues); } int @@ -1435,6 +1449,101 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx) return count; } +static int efx_allocate_msix_channels(struct efx_nic *efx, + unsigned int max_channels, + unsigned int extra_channels, + unsigned int parallelism) +{ + unsigned int n_channels = parallelism; + int vec_count; + int n_xdp_tx; + int n_xdp_ev; + + if (efx_separate_tx_channels) + n_channels *= 2; + n_channels += extra_channels; + + /* To allow XDP transmit to happen from arbitrary NAPI contexts + * we allocate a TX queue per CPU. We share event queues across + * multiple tx queues, assuming tx and ev queues are both + * maximum size. + */ + + n_xdp_tx = num_possible_cpus(); + n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES); + + /* Check resources. + * We need a channel per event queue, plus a VI per tx queue. + * This may be more pessimistic than it needs to be. + */ + if (n_channels + n_xdp_ev > max_channels) { + netif_err(efx, drv, efx->net_dev, + "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", + n_xdp_ev, n_channels, max_channels); + efx->n_xdp_channels = 0; + efx->xdp_tx_per_channel = 0; + efx->xdp_tx_queue_count = 0; + } else { + efx->n_xdp_channels = n_xdp_ev; + efx->xdp_tx_per_channel = EFX_TXQ_TYPES; + efx->xdp_tx_queue_count = n_xdp_tx; + n_channels += n_xdp_ev; + netif_dbg(efx, drv, efx->net_dev, + "Allocating %d TX and %d event queues for XDP\n", + n_xdp_tx, n_xdp_ev); + } + + n_channels = min(n_channels, max_channels); + + vec_count = pci_msix_vec_count(efx->pci_dev); + if (vec_count < 0) + return vec_count; + if (vec_count < n_channels) { + netif_err(efx, drv, efx->net_dev, + "WARNING: Insufficient MSI-X vectors available (%d < %u).\n", + vec_count, n_channels); + netif_err(efx, drv, efx->net_dev, + "WARNING: Performance may be reduced.\n"); + n_channels = vec_count; + } + + efx->n_channels = n_channels; + + /* Do not create the PTP TX queue(s) if PTP uses the MC directly. */ + if (extra_channels && !efx_ptp_use_mac_tx_timestamps(efx)) + n_channels--; + + /* Ignore XDP tx channels when creating rx channels. */ + n_channels -= efx->n_xdp_channels; + + if (efx_separate_tx_channels) { + efx->n_tx_channels = + min(max(n_channels / 2, 1U), + efx->max_tx_channels); + efx->tx_channel_offset = + n_channels - efx->n_tx_channels; + efx->n_rx_channels = + max(n_channels - + efx->n_tx_channels, 1U); + } else { + efx->n_tx_channels = min(n_channels, efx->max_tx_channels); + efx->tx_channel_offset = 0; + efx->n_rx_channels = n_channels; + } + + if (efx->n_xdp_channels) + efx->xdp_channel_offset = efx->tx_channel_offset + + efx->n_tx_channels; + else + efx->xdp_channel_offset = efx->n_channels; + + netif_dbg(efx, drv, efx->net_dev, + "Allocating %u RX channels\n", + efx->n_rx_channels); + + return efx->n_channels; +} + /* Probe the number and type of interrupts we are able to obtain, and * the resulting numbers of channels and RX queues. */ @@ -1449,19 +1558,19 @@ static int efx_probe_interrupts(struct efx_nic *efx) ++extra_channels; if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { + unsigned int parallelism = efx_wanted_parallelism(efx); struct msix_entry xentries[EFX_MAX_CHANNELS]; unsigned int n_channels; - n_channels = efx_wanted_parallelism(efx); - if (efx_separate_tx_channels) - n_channels *= 2; - n_channels += extra_channels; - n_channels = min(n_channels, efx->max_channels); - - for (i = 0; i < n_channels; i++) - xentries[i].entry = i; - rc = pci_enable_msix_range(efx->pci_dev, - xentries, 1, n_channels); + rc = efx_allocate_msix_channels(efx, efx->max_channels, + extra_channels, parallelism); + if (rc >= 0) { + n_channels = rc; + for (i = 0; i < n_channels; i++) + xentries[i].entry = i; + rc = pci_enable_msix_range(efx->pci_dev, xentries, 1, + n_channels); + } if (rc < 0) { /* Fall back to single channel MSI */ netif_err(efx, drv, efx->net_dev, @@ -1480,21 +1589,6 @@ static int efx_probe_interrupts(struct efx_nic *efx) } if (rc > 0) { - efx->n_channels = n_channels; - if (n_channels > extra_channels) - n_channels -= extra_channels; - if (efx_separate_tx_channels) { - efx->n_tx_channels = min(max(n_channels / 2, - 1U), - efx->max_tx_channels); - efx->n_rx_channels = max(n_channels - - efx->n_tx_channels, - 1U); - } else { - efx->n_tx_channels = min(n_channels, - efx->max_tx_channels); - efx->n_rx_channels = n_channels; - } for (i = 0; i < efx->n_channels; i++) efx_get_channel(efx, i)->irq = xentries[i].vector; @@ -1506,6 +1600,8 @@ static int efx_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1; efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->n_xdp_channels = 0; + efx->xdp_channel_offset = efx->n_channels; rc = pci_enable_msi(efx->pci_dev); if (rc == 0) { efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; @@ -1524,12 +1620,14 @@ static int efx_probe_interrupts(struct efx_nic *efx) efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); efx->n_rx_channels = 1; efx->n_tx_channels = 1; + efx->n_xdp_channels = 0; + efx->xdp_channel_offset = efx->n_channels; efx->legacy_irq = efx->pci_dev->irq; } - /* Assign extra channels if possible */ + /* Assign extra channels if possible, before XDP channels */ efx->n_extra_tx_channels = 0; - j = efx->n_channels; + j = efx->xdp_channel_offset; for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { if (!efx->extra_channel_type[i]) continue; @@ -1724,29 +1822,50 @@ static void efx_remove_interrupts(struct efx_nic *efx) efx->legacy_irq = 0; } -static void efx_set_channels(struct efx_nic *efx) +static int efx_set_channels(struct efx_nic *efx) { struct efx_channel *channel; struct efx_tx_queue *tx_queue; + int xdp_queue_number; efx->tx_channel_offset = efx_separate_tx_channels ? efx->n_channels - efx->n_tx_channels : 0; + if (efx->xdp_tx_queue_count) { + EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); + + /* Allocate array for XDP TX queue lookup. */ + efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count, + sizeof(*efx->xdp_tx_queues), + GFP_KERNEL); + if (!efx->xdp_tx_queues) + return -ENOMEM; + } + /* We need to mark which channels really have RX and TX * queues, and adjust the TX queue numbers if we have separate * RX-only and TX-only channels. */ + xdp_queue_number = 0; efx_for_each_channel(channel, efx) { if (channel->channel < efx->n_rx_channels) channel->rx_queue.core_index = channel->channel; else channel->rx_queue.core_index = -1; - efx_for_each_channel_tx_queue(tx_queue, channel) + efx_for_each_channel_tx_queue(tx_queue, channel) { tx_queue->queue -= (efx->tx_channel_offset * EFX_TXQ_TYPES); + + if (efx_channel_is_xdp_tx(channel) && + xdp_queue_number < efx->xdp_tx_queue_count) { + efx->xdp_tx_queues[xdp_queue_number] = tx_queue; + xdp_queue_number++; + } + } } + return 0; } static int efx_probe_nic(struct efx_nic *efx) @@ -1776,7 +1895,9 @@ static int efx_probe_nic(struct efx_nic *efx) if (rc) goto fail1; - efx_set_channels(efx); + rc = efx_set_channels(efx); + if (rc) + goto fail1; /* dimension_resources can fail with EAGAIN */ rc = efx->type->dimension_resources(efx); @@ -1848,6 +1969,8 @@ static int efx_probe_filters(struct efx_nic *efx) ++i) channel->rps_flow_id[i] = RPS_FLOW_ID_INVALID; + channel->rfs_expire_index = 0; + channel->rfs_filter_count = 0; } if (!success) { @@ -1857,8 +1980,6 @@ static int efx_probe_filters(struct efx_nic *efx) rc = -ENOMEM; goto out_unlock; } - - efx->rps_expire_index = efx->rps_expire_channel = 0; } #endif out_unlock: @@ -1872,8 +1993,10 @@ static void efx_remove_filters(struct efx_nic *efx) #ifdef CONFIG_RFS_ACCEL struct efx_channel *channel; - efx_for_each_channel(channel, efx) + efx_for_each_channel(channel, efx) { + cancel_delayed_work_sync(&channel->filter_work); kfree(channel->rps_flow_id); + } #endif down_write(&efx->filter_sem); efx->type->filter_table_remove(efx); @@ -2022,6 +2145,10 @@ static void efx_stop_all(struct efx_nic *efx) static void efx_remove_all(struct efx_nic *efx) { + rtnl_lock(); + efx_xdp_setup_prog(efx, NULL); + rtnl_unlock(); + efx_remove_channels(efx); efx_remove_filters(efx); #ifdef CONFIG_SFC_SRIOV @@ -2082,6 +2209,8 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, channel->irq_moderation_us = rx_usecs; else if (efx_channel_has_tx_queues(channel)) channel->irq_moderation_us = tx_usecs; + else if (efx_channel_is_xdp_tx(channel)) + channel->irq_moderation_us = tx_usecs; } return 0; @@ -2277,6 +2406,17 @@ static void efx_watchdog(struct net_device *net_dev) efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); } +static unsigned int efx_xdp_max_mtu(struct efx_nic *efx) +{ + /* The maximum MTU that we can fit in a single page, allowing for + * framing, overhead and XDP headroom. + */ + int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + + efx->rx_prefix_size + efx->type->rx_buffer_padding + + efx->rx_ip_align + XDP_PACKET_HEADROOM; + + return PAGE_SIZE - overhead; +} /* Context: process, rtnl_lock() held. */ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) @@ -2288,6 +2428,14 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) if (rc) return rc; + if (rtnl_dereference(efx->xdp_prog) && + new_mtu > efx_xdp_max_mtu(efx)) { + netif_err(efx, drv, efx->net_dev, + "Requested MTU of %d too big for XDP (max: %d)\n", + new_mtu, efx_xdp_max_mtu(efx)); + return -EINVAL; + } + netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); efx_device_detach_sync(efx); @@ -2489,8 +2637,65 @@ static const struct net_device_ops efx_netdev_ops = { #endif .ndo_udp_tunnel_add = efx_udp_tunnel_add, .ndo_udp_tunnel_del = efx_udp_tunnel_del, + .ndo_xdp_xmit = efx_xdp_xmit, + .ndo_bpf = efx_xdp }; +static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog; + + if (efx->xdp_rxq_info_failed) { + netif_err(efx, drv, efx->net_dev, + "Unable to bind XDP program due to previous failure of rxq_info\n"); + return -EINVAL; + } + + if (prog && efx->net_dev->mtu > efx_xdp_max_mtu(efx)) { + netif_err(efx, drv, efx->net_dev, + "Unable to configure XDP with MTU of %d (max: %d)\n", + efx->net_dev->mtu, efx_xdp_max_mtu(efx)); + return -EINVAL; + } + + old_prog = rtnl_dereference(efx->xdp_prog); + rcu_assign_pointer(efx->xdp_prog, prog); + /* Release the reference that was originally passed by the caller. */ + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +/* Context: process, rtnl_lock() held. */ +static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct efx_nic *efx = netdev_priv(dev); + struct bpf_prog *xdp_prog; + + switch (xdp->command) { + case XDP_SETUP_PROG: + return efx_xdp_setup_prog(efx, xdp->prog); + case XDP_QUERY_PROG: + xdp_prog = rtnl_dereference(efx->xdp_prog); + xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0; + return 0; + default: + return -EINVAL; + } +} + +static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, + u32 flags) +{ + struct efx_nic *efx = netdev_priv(dev); + + if (!netif_running(dev)) + return -EINVAL; + + return efx_xdp_tx_buffers(efx, n, xdpfs, flags & XDP_XMIT_FLUSH); +} + static void efx_update_name(struct efx_nic *efx) { strcpy(efx->name, efx->net_dev->name); diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 04fed7c06618..2dd8d5002315 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -166,15 +166,20 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx, #ifdef CONFIG_RFS_ACCEL int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); -bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned quota); +bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota); static inline void efx_filter_rfs_expire(struct work_struct *data) { - struct efx_channel *channel = container_of(data, struct efx_channel, - filter_work); - - if (channel->rfs_filters_added >= 60 && - __efx_filter_rfs_expire(channel->efx, 100)) - channel->rfs_filters_added -= 60; + struct delayed_work *dwork = to_delayed_work(data); + struct efx_channel *channel; + unsigned int time, quota; + + channel = container_of(dwork, struct efx_channel, filter_work); + time = jiffies - channel->rfs_last_expiry; + quota = channel->rfs_filter_count * time / (30 * HZ); + if (quota > 20 && __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, quota))) + channel->rfs_last_expiry += time; + /* Ensure we do more work eventually even if NAPI poll is not happening */ + schedule_delayed_work(dwork, 30 * HZ); } #define efx_filter_rfs_enabled() 1 #else @@ -322,4 +327,7 @@ static inline bool efx_rwsem_assert_write_locked(struct rw_semaphore *sem) return true; } +int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, + bool flush); + #endif /* EFX_EFX_H */ diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 86b965875540..b31032da4bcb 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -56,6 +56,9 @@ static u64 efx_get_atomic_stat(void *field) #define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \ EFX_ETHTOOL_STAT(field, channel, n_##field, \ unsigned int, efx_get_uint_stat) +#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \ + EFX_ETHTOOL_STAT(field, channel, field, \ + unsigned int, efx_get_uint_stat) #define EFX_ETHTOOL_UINT_TXQ_STAT(field) \ EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \ @@ -83,6 +86,15 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect), +#ifdef CONFIG_RFS_ACCEL + EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded), + EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed), +#endif }; #define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc) @@ -399,6 +411,19 @@ static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings) } } } + if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { + unsigned short xdp; + + for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { + n_stats++; + if (strings) { + snprintf(strings, ETH_GSTRING_LEN, + "tx-xdp-cpu-%hu.tx_packets", xdp); + strings += ETH_GSTRING_LEN; + } + } + } + return n_stats; } @@ -509,6 +534,14 @@ static void efx_ethtool_get_stats(struct net_device *net_dev, data++; } } + if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) { + int xdp; + + for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) { + data[0] = efx->xdp_tx_queues[xdp]->tx_packets; + data++; + } + } efx_ptp_update_stats(efx, data); } diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 284a1b047ac2..1f88212be085 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -27,6 +27,7 @@ #include <linux/i2c.h> #include <linux/mtd/mtd.h> #include <net/busy_poll.h> +#include <net/xdp.h> #include "enum.h" #include "bitfield.h" @@ -136,7 +137,8 @@ struct efx_special_buffer { * struct efx_tx_buffer - buffer state for a TX descriptor * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be * freed when descriptor completes - * @option: When @flags & %EFX_TX_BUF_OPTION, a NIC-specific option descriptor. + * @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data + * member is the associated buffer to drop a page reference on. * @dma_addr: DMA address of the fragment. * @flags: Flags for allocation and DMA mapping type * @len: Length of this fragment. @@ -146,7 +148,10 @@ struct efx_special_buffer { * Only valid if @unmap_len != 0. */ struct efx_tx_buffer { - const struct sk_buff *skb; + union { + const struct sk_buff *skb; + struct xdp_frame *xdpf; + }; union { efx_qword_t option; dma_addr_t dma_addr; @@ -160,6 +165,7 @@ struct efx_tx_buffer { #define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ #define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */ #define EFX_TX_BUF_OPTION 0x10 /* empty buffer for option descriptor */ +#define EFX_TX_BUF_XDP 0x20 /* buffer was sent with XDP */ /** * struct efx_tx_queue - An Efx TX queue @@ -189,6 +195,7 @@ struct efx_tx_buffer { * @piobuf_offset: Buffer offset to be specified in PIO descriptors * @initialised: Has hardware queue been initialised? * @timestamping: Is timestamping enabled for this channel? + * @xdp_tx: Is this an XDP tx queue? * @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and * may also map tx data, depending on the nature of the TSO implementation. * @read_count: Current read pointer. @@ -250,6 +257,7 @@ struct efx_tx_queue { unsigned int piobuf_offset; bool initialised; bool timestamping; + bool xdp_tx; /* Function pointers used in the fast path. */ int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *); @@ -363,6 +371,8 @@ struct efx_rx_page_state { * refill was triggered. * @recycle_count: RX buffer recycle counter. * @slow_fill: Timer used to defer efx_nic_generate_fill_event(). + * @xdp_rxq_info: XDP specific RX queue information. + * @xdp_rxq_info_valid: Is xdp_rxq_info valid data?. */ struct efx_rx_queue { struct efx_nic *efx; @@ -394,6 +404,8 @@ struct efx_rx_queue { unsigned int slow_fill_count; /* Statistics to supplement MAC stats */ unsigned long rx_packets; + struct xdp_rxq_info xdp_rxq_info; + bool xdp_rxq_info_valid; }; enum efx_sync_events_state { @@ -427,6 +439,13 @@ enum efx_sync_events_state { * @event_test_cpu: Last CPU to handle interrupt or test event for this channel * @irq_count: Number of IRQs since last adaptive moderation decision * @irq_mod_score: IRQ moderation score + * @rfs_filter_count: number of accelerated RFS filters currently in place; + * equals the count of @rps_flow_id slots filled + * @rfs_last_expiry: value of jiffies last time some accelerated RFS filters + * were checked for expiry + * @rfs_expire_index: next accelerated RFS filter ID to check for expiry + * @n_rfs_succeeded: number of successful accelerated RFS filter insertions + * @n_rfs_failed; number of failed accelerated RFS filter insertions * @filter_work: Work item for efx_filter_rfs_expire() * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, * indexed by filter ID @@ -441,6 +460,10 @@ enum efx_sync_events_state { * lack of descriptors * @n_rx_merge_events: Number of RX merged completion events * @n_rx_merge_packets: Number of RX packets completed by merged events + * @n_rx_xdp_drops: Count of RX packets intentionally dropped due to XDP + * @n_rx_xdp_bad_drops: Count of RX packets dropped due to XDP errors + * @n_rx_xdp_tx: Count of RX packets retransmitted due to XDP + * @n_rx_xdp_redirect: Count of RX packets redirected to a different NIC by XDP * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by * __efx_rx_packet(), or zero if there is none * @rx_pkt_index: Ring index of first buffer for next packet to be delivered @@ -473,8 +496,12 @@ struct efx_channel { unsigned int irq_count; unsigned int irq_mod_score; #ifdef CONFIG_RFS_ACCEL - unsigned int rfs_filters_added; - struct work_struct filter_work; + unsigned int rfs_filter_count; + unsigned int rfs_last_expiry; + unsigned int rfs_expire_index; + unsigned int n_rfs_succeeded; + unsigned int n_rfs_failed; + struct delayed_work filter_work; #define RPS_FLOW_ID_INVALID 0xFFFFFFFF u32 *rps_flow_id; #endif @@ -494,6 +521,10 @@ struct efx_channel { unsigned int n_rx_nodesc_trunc; unsigned int n_rx_merge_events; unsigned int n_rx_merge_packets; + unsigned int n_rx_xdp_drops; + unsigned int n_rx_xdp_bad_drops; + unsigned int n_rx_xdp_tx; + unsigned int n_rx_xdp_redirect; unsigned int rx_pkt_n_frags; unsigned int rx_pkt_index; @@ -818,6 +849,8 @@ struct efx_async_filter_insertion { * @msi_context: Context for each MSI * @extra_channel_types: Types of extra (non-traffic) channels that * should be allocated for this NIC + * @xdp_tx_queue_count: Number of entries in %xdp_tx_queues. + * @xdp_tx_queues: Array of pointers to tx queues used for XDP transmit. * @rxq_entries: Size of receive queues requested by user. * @txq_entries: Size of transmit queues requested by user. * @txq_stop_thresh: TX queue fill level at or above which we stop it. @@ -830,6 +863,9 @@ struct efx_async_filter_insertion { * @n_rx_channels: Number of channels used for RX (= number of RX queues) * @n_tx_channels: Number of channels used for TX * @n_extra_tx_channels: Number of extra channels with TX queues + * @n_xdp_channels: Number of channels used for XDP TX + * @xdp_channel_offset: Offset of zeroth channel used for XPD TX. + * @xdp_tx_per_channel: Max number of TX queues on an XDP TX channel. * @rx_ip_align: RX DMA address offset to have IP header aligned in * in accordance with NET_IP_ALIGN * @rx_dma_len: Current maximum RX DMA length @@ -894,12 +930,10 @@ struct efx_async_filter_insertion { * @loopback_mode: Loopback status * @loopback_modes: Supported loopback mode bitmask * @loopback_selftest: Offline self-test private state + * @xdp_prog: Current XDP programme for this interface * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state * @filter_state: Architecture-dependent filter table state * @rps_mutex: Protects RPS state of all channels - * @rps_expire_channel: Next channel to check for expiry - * @rps_expire_index: Next index to check for expiry in - * @rps_expire_channel's @rps_flow_id * @rps_slot_map: bitmap of in-flight entries in @rps_slot * @rps_slot: array of ARFS insertion requests for efx_filter_rfs_work() * @rps_hash_lock: Protects ARFS filter mapping state (@rps_hash_table and @@ -919,6 +953,8 @@ struct efx_async_filter_insertion { * @ptp_data: PTP state data * @ptp_warned: has this NIC seen and warned about unexpected PTP events? * @vpd_sn: Serial number read from VPD + * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their + * xdp_rxq_info structures? * @monitor_work: Hardware monitor workitem * @biu_lock: BIU (bus interface unit) lock * @last_irq_cpu: Last CPU to handle a possible test interrupt. This @@ -966,6 +1002,9 @@ struct efx_nic { const struct efx_channel_type * extra_channel_type[EFX_MAX_EXTRA_CHANNELS]; + unsigned int xdp_tx_queue_count; + struct efx_tx_queue **xdp_tx_queues; + unsigned rxq_entries; unsigned txq_entries; unsigned int txq_stop_thresh; @@ -984,6 +1023,9 @@ struct efx_nic { unsigned tx_channel_offset; unsigned n_tx_channels; unsigned n_extra_tx_channels; + unsigned int n_xdp_channels; + unsigned int xdp_channel_offset; + unsigned int xdp_tx_per_channel; unsigned int rx_ip_align; unsigned int rx_dma_len; unsigned int rx_buffer_order; @@ -1053,13 +1095,15 @@ struct efx_nic { u64 loopback_modes; void *loopback_selftest; + /* We access loopback_selftest immediately before running XDP, + * so we want them next to each other. + */ + struct bpf_prog __rcu *xdp_prog; struct rw_semaphore filter_sem; void *filter_state; #ifdef CONFIG_RFS_ACCEL struct mutex rps_mutex; - unsigned int rps_expire_channel; - unsigned int rps_expire_index; unsigned long rps_slot_map; struct efx_async_filter_insertion rps_slot[EFX_RPS_MAX_IN_FLIGHT]; spinlock_t rps_hash_lock; @@ -1082,6 +1126,7 @@ struct efx_nic { bool ptp_warned; char *vpd_sn; + bool xdp_rxq_info_failed; /* The following fields may be written more often */ @@ -1473,10 +1518,24 @@ efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type) return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type]; } +static inline struct efx_channel * +efx_get_xdp_channel(struct efx_nic *efx, unsigned int index) +{ + EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_xdp_channels); + return efx->channel[efx->xdp_channel_offset + index]; +} + +static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel) +{ + return channel->channel - channel->efx->xdp_channel_offset < + channel->efx->n_xdp_channels; +} + static inline bool efx_channel_has_tx_queues(struct efx_channel *channel) { - return channel->type && channel->type->want_txqs && - channel->type->want_txqs(channel); + return efx_channel_is_xdp_tx(channel) || + (channel->type && channel->type->want_txqs && + channel->type->want_txqs(channel)); } static inline struct efx_tx_queue * @@ -1500,7 +1559,8 @@ static inline bool efx_tx_queue_used(struct efx_tx_queue *tx_queue) else \ for (_tx_queue = (_channel)->tx_queue; \ _tx_queue < (_channel)->tx_queue + EFX_TXQ_TYPES && \ - efx_tx_queue_used(_tx_queue); \ + (efx_tx_queue_used(_tx_queue) || \ + efx_channel_is_xdp_tx(_channel)); \ _tx_queue++) /* Iterate over all possible TX queues belonging to a channel */ diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 85ec07f5a674..ef52b24ad9e7 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -17,6 +17,8 @@ #include <linux/iommu.h> #include <net/ip.h> #include <net/checksum.h> +#include <net/xdp.h> +#include <linux/bpf_trace.h> #include "net_driver.h" #include "efx.h" #include "filter.h" @@ -27,6 +29,9 @@ /* Preferred number of descriptors to fill at once */ #define EFX_RX_PREFERRED_BATCH 8U +/* Maximum rx prefix used by any architecture. */ +#define EFX_MAX_RX_PREFIX_SIZE 16 + /* Number of RX buffers to recycle pages for. When creating the RX page recycle * ring, this number is divided by the number of buffers per page to calculate * the number of pages to store in the RX page recycle ring. @@ -95,7 +100,7 @@ void efx_rx_config_page_split(struct efx_nic *efx) EFX_RX_BUF_ALIGNMENT); efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 : ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) / - efx->rx_page_buf_step); + (efx->rx_page_buf_step + XDP_PACKET_HEADROOM)); efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) / efx->rx_bufs_per_page; efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH, @@ -185,6 +190,9 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) page_offset = sizeof(struct efx_rx_page_state); do { + page_offset += XDP_PACKET_HEADROOM; + dma_addr += XDP_PACKET_HEADROOM; + index = rx_queue->added_count & rx_queue->ptr_mask; rx_buf = efx_rx_buffer(rx_queue, index); rx_buf->dma_addr = dma_addr + efx->rx_ip_align; @@ -635,6 +643,126 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, netif_receive_skb(skb); } +/** efx_do_xdp: perform XDP processing on a received packet + * + * Returns true if packet should still be delivered. + */ +static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, u8 **ehp) +{ + u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE]; + struct efx_rx_queue *rx_queue; + struct bpf_prog *xdp_prog; + struct xdp_frame *xdpf; + struct xdp_buff xdp; + u32 xdp_act; + s16 offset; + int err; + + rcu_read_lock(); + xdp_prog = rcu_dereference(efx->xdp_prog); + if (!xdp_prog) { + rcu_read_unlock(); + return true; + } + + rx_queue = efx_channel_get_rx_queue(channel); + + if (unlikely(channel->rx_pkt_n_frags > 1)) { + /* We can't do XDP on fragmented packets - drop. */ + rcu_read_unlock(); + efx_free_rx_buffers(rx_queue, rx_buf, + channel->rx_pkt_n_frags); + if (net_ratelimit()) + netif_err(efx, rx_err, efx->net_dev, + "XDP is not possible with multiple receive fragments (%d)\n", + channel->rx_pkt_n_frags); + channel->n_rx_xdp_bad_drops++; + return false; + } + + dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, + rx_buf->len, DMA_FROM_DEVICE); + + /* Save the rx prefix. */ + EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE); + memcpy(rx_prefix, *ehp - efx->rx_prefix_size, + efx->rx_prefix_size); + + xdp.data = *ehp; + xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; + + /* No support yet for XDP metadata */ + xdp_set_data_meta_invalid(&xdp); + xdp.data_end = xdp.data + rx_buf->len; + xdp.rxq = &rx_queue->xdp_rxq_info; + + xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp); + rcu_read_unlock(); + + offset = (u8 *)xdp.data - *ehp; + + switch (xdp_act) { + case XDP_PASS: + /* Fix up rx prefix. */ + if (offset) { + *ehp += offset; + rx_buf->page_offset += offset; + rx_buf->len -= offset; + memcpy(*ehp - efx->rx_prefix_size, rx_prefix, + efx->rx_prefix_size); + } + break; + + case XDP_TX: + /* Buffer ownership passes to tx on success. */ + xdpf = convert_to_xdp_frame(&xdp); + err = efx_xdp_tx_buffers(efx, 1, &xdpf, true); + if (unlikely(err != 1)) { + efx_free_rx_buffers(rx_queue, rx_buf, 1); + if (net_ratelimit()) + netif_err(efx, rx_err, efx->net_dev, + "XDP TX failed (%d)\n", err); + channel->n_rx_xdp_bad_drops++; + trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); + } else { + channel->n_rx_xdp_tx++; + } + break; + + case XDP_REDIRECT: + err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog); + if (unlikely(err)) { + efx_free_rx_buffers(rx_queue, rx_buf, 1); + if (net_ratelimit()) + netif_err(efx, rx_err, efx->net_dev, + "XDP redirect failed (%d)\n", err); + channel->n_rx_xdp_bad_drops++; + trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); + } else { + channel->n_rx_xdp_redirect++; + } + break; + + default: + bpf_warn_invalid_xdp_action(xdp_act); + efx_free_rx_buffers(rx_queue, rx_buf, 1); + channel->n_rx_xdp_bad_drops++; + trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); + break; + + case XDP_ABORTED: + trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); + /* Fall through */ + case XDP_DROP: + efx_free_rx_buffers(rx_queue, rx_buf, 1); + channel->n_rx_xdp_drops++; + break; + } + + return xdp_act == XDP_PASS; +} + /* Handle a received packet. Second half: Touches packet payload. */ void __efx_rx_packet(struct efx_channel *channel) { @@ -663,6 +791,9 @@ void __efx_rx_packet(struct efx_channel *channel) goto out; } + if (!efx_do_xdp(efx, channel, rx_buf, &eh)) + goto out; + if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; @@ -731,6 +862,7 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue) { struct efx_nic *efx = rx_queue->efx; unsigned int max_fill, trigger, max_trigger; + int rc = 0; netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev, "initialising RX queue %d\n", efx_rx_queue_index(rx_queue)); @@ -764,6 +896,19 @@ void efx_init_rx_queue(struct efx_rx_queue *rx_queue) rx_queue->fast_fill_trigger = trigger; rx_queue->refill_enabled = true; + /* Initialise XDP queue information */ + rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev, + rx_queue->core_index); + + if (rc) { + netif_err(efx, rx_err, efx->net_dev, + "Failure to initialise XDP queue information rc=%d\n", + rc); + efx->xdp_rxq_info_failed = true; + } else { + rx_queue->xdp_rxq_info_valid = true; + } + /* Set up RX descriptor ring */ efx_nic_init_rx(rx_queue); } @@ -805,6 +950,11 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) } kfree(rx_queue->page_ring); rx_queue->page_ring = NULL; + + if (rx_queue->xdp_rxq_info_valid) + xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info); + + rx_queue->xdp_rxq_info_valid = false; } void efx_remove_rx_queue(struct efx_rx_queue *rx_queue) @@ -838,6 +988,7 @@ static void efx_filter_rfs_work(struct work_struct *data) rc = efx->type->filter_insert(efx, &req->spec, true); if (rc >= 0) + /* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */ rc %= efx->type->max_rx_ip_filters; if (efx->rps_hash_table) { spin_lock_bh(&efx->rps_hash_lock); @@ -862,8 +1013,9 @@ static void efx_filter_rfs_work(struct work_struct *data) * later. */ mutex_lock(&efx->rps_mutex); + if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID) + channel->rfs_filter_count++; channel->rps_flow_id[rc] = req->flow_id; - ++channel->rfs_filters_added; mutex_unlock(&efx->rps_mutex); if (req->spec.ether_type == htons(ETH_P_IP)) @@ -880,6 +1032,28 @@ static void efx_filter_rfs_work(struct work_struct *data) req->spec.rem_host, ntohs(req->spec.rem_port), req->spec.loc_host, ntohs(req->spec.loc_port), req->rxq_index, req->flow_id, rc, arfs_id); + channel->n_rfs_succeeded++; + } else { + if (req->spec.ether_type == htons(ETH_P_IP)) + netif_dbg(efx, rx_status, efx->net_dev, + "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n", + (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + req->spec.rem_host, ntohs(req->spec.rem_port), + req->spec.loc_host, ntohs(req->spec.loc_port), + req->rxq_index, req->flow_id, rc, arfs_id); + else + netif_dbg(efx, rx_status, efx->net_dev, + "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n", + (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + req->spec.rem_host, ntohs(req->spec.rem_port), + req->spec.loc_host, ntohs(req->spec.loc_port), + req->rxq_index, req->flow_id, rc, arfs_id); + channel->n_rfs_failed++; + /* We're overloading the NIC's filter tables, so let's do a + * chunk of extra expiry work. + */ + __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count, + 100u)); } /* Release references */ @@ -989,38 +1163,44 @@ out_clear: return rc; } -bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota) +bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota) { bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index); - unsigned int channel_idx, index, size; + struct efx_nic *efx = channel->efx; + unsigned int index, size, start; u32 flow_id; if (!mutex_trylock(&efx->rps_mutex)) return false; expire_one = efx->type->filter_rfs_expire_one; - channel_idx = efx->rps_expire_channel; - index = efx->rps_expire_index; + index = channel->rfs_expire_index; + start = index; size = efx->type->max_rx_ip_filters; - while (quota--) { - struct efx_channel *channel = efx_get_channel(efx, channel_idx); + while (quota) { flow_id = channel->rps_flow_id[index]; - if (flow_id != RPS_FLOW_ID_INVALID && - expire_one(efx, flow_id, index)) { - netif_info(efx, rx_status, efx->net_dev, - "expired filter %d [queue %u flow %u]\n", - index, channel_idx, flow_id); - channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; + if (flow_id != RPS_FLOW_ID_INVALID) { + quota--; + if (expire_one(efx, flow_id, index)) { + netif_info(efx, rx_status, efx->net_dev, + "expired filter %d [channel %u flow %u]\n", + index, channel->channel, flow_id); + channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID; + channel->rfs_filter_count--; + } } - if (++index == size) { - if (++channel_idx == efx->n_channels) - channel_idx = 0; + if (++index == size) index = 0; - } + /* If we were called with a quota that exceeds the total number + * of filters in the table (which shouldn't happen, but could + * if two callers race), ensure that we don't loop forever - + * stop when we've examined every row of the table. + */ + if (index == start) + break; } - efx->rps_expire_channel = channel_idx; - efx->rps_expire_index = index; + channel->rfs_expire_index = index; mutex_unlock(&efx->rps_mutex); return true; } diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 65e81ec1b314..00c1c4402451 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -95,6 +95,8 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, "TX queue %d transmission id %x complete\n", tx_queue->queue, tx_queue->read_count); + } else if (buffer->flags & EFX_TX_BUF_XDP) { + xdp_return_frame_rx_napi(buffer->xdpf); } buffer->len = 0; @@ -597,6 +599,94 @@ err: return NETDEV_TX_OK; } +static void efx_xdp_return_frames(int n, struct xdp_frame **xdpfs) +{ + int i; + + for (i = 0; i < n; i++) + xdp_return_frame_rx_napi(xdpfs[i]); +} + +/* Transmit a packet from an XDP buffer + * + * Returns number of packets sent on success, error code otherwise. + * Runs in NAPI context, either in our poll (for XDP TX) or a different NIC + * (for XDP redirect). + */ +int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, + bool flush) +{ + struct efx_tx_buffer *tx_buffer; + struct efx_tx_queue *tx_queue; + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + unsigned int len; + int space; + int cpu; + int i; + + cpu = raw_smp_processor_id(); + + if (!efx->xdp_tx_queue_count || + unlikely(cpu >= efx->xdp_tx_queue_count)) + return -EINVAL; + + tx_queue = efx->xdp_tx_queues[cpu]; + if (unlikely(!tx_queue)) + return -EINVAL; + + if (unlikely(n && !xdpfs)) + return -EINVAL; + + if (!n) + return 0; + + /* Check for available space. We should never need multiple + * descriptors per frame. + */ + space = efx->txq_entries + + tx_queue->read_count - tx_queue->insert_count; + + for (i = 0; i < n; i++) { + xdpf = xdpfs[i]; + + if (i >= space) + break; + + /* We'll want a descriptor for this tx. */ + prefetchw(__efx_tx_queue_get_insert_buffer(tx_queue)); + + len = xdpf->len; + + /* Map for DMA. */ + dma_addr = dma_map_single(&efx->pci_dev->dev, + xdpf->data, len, + DMA_TO_DEVICE); + if (dma_mapping_error(&efx->pci_dev->dev, dma_addr)) + break; + + /* Create descriptor and set up for unmapping DMA. */ + tx_buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); + tx_buffer->xdpf = xdpf; + tx_buffer->flags = EFX_TX_BUF_XDP | + EFX_TX_BUF_MAP_SINGLE; + tx_buffer->dma_offset = 0; + tx_buffer->unmap_len = len; + tx_queue->tx_packets++; + } + + /* Pass mapped frames to hardware. */ + if (flush && i > 0) + efx_nic_push_buffers(tx_queue); + + if (i == 0) + return -EIO; + + efx_xdp_return_frames(n - i, xdpfs + i); + + return i; +} + /* Remove packets from the TX queue * * This removes packets from the TX queue, up to and including the @@ -857,6 +947,8 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->completed_timestamp_major = 0; tx_queue->completed_timestamp_minor = 0; + tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel); + /* Set up default function pointers. These may get replaced by * efx_nic_init_tx() based off NIC/queue capabilities. */ |