diff options
Diffstat (limited to 'drivers/net/ethernet/ibm')
-rw-r--r-- | drivers/net/ethernet/ibm/ehea/ehea.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ehea/ehea_ethtool.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ehea/ehea_main.c | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/emac/core.c | 19 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/emac/mal.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ibmveth.c | 309 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ibmveth.h | 24 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ibmvnic.c | 1002 | ||||
-rw-r--r-- | drivers/net/ethernet/ibm/ibmvnic.h | 71 |
9 files changed, 961 insertions, 480 deletions
diff --git a/drivers/net/ethernet/ibm/ehea/ehea.h b/drivers/net/ethernet/ibm/ehea/ehea.h index b140835d4c23..208c440a602b 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea.h +++ b/drivers/net/ethernet/ibm/ehea/ehea.h @@ -19,6 +19,7 @@ #include <linux/ethtool.h> #include <linux/vmalloc.h> #include <linux/if_vlan.h> +#include <linux/platform_device.h> #include <asm/ibmebus.h> #include <asm/io.h> diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c index 6cb86032ce46..1db5b6790a41 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c @@ -159,8 +159,8 @@ static int ehea_nway_reset(struct net_device *dev) static void ehea_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->version, DRV_VERSION, sizeof(info->version)); } static u32 ehea_get_msglevel(struct net_device *dev) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index bad94e4d50f4..b4aff59b3eb4 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -29,6 +29,8 @@ #include <asm/kexec.h> #include <linux/mutex.h> #include <linux/prefetch.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <net/ip.h> @@ -1544,7 +1546,7 @@ static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr, kfree(init_attr); - netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll, 64); + netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll); ret = 0; goto out; @@ -1615,7 +1617,7 @@ static void write_swqe2_immediate(struct sk_buff *skb, struct ehea_swqe *swqe, * For TSO packets we only copy the headers into the * immediate area. */ - immediate_len = ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb); + immediate_len = skb_tcp_all_headers(skb); } if (skb_is_gso(skb) || skb_data_size >= SWQE2_MAX_IMM) { @@ -2898,6 +2900,7 @@ static struct device *ehea_register_port(struct ehea_port *port, ret = of_device_register(&port->ofdev); if (ret) { pr_err("failed to register device. ret=%d\n", ret); + put_device(&port->ofdev.dev); goto out; } diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 6b3fc8823c54..9b08e41ccc29 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2137,8 +2137,11 @@ emac_ethtool_set_link_ksettings(struct net_device *ndev, return 0; } -static void emac_ethtool_get_ringparam(struct net_device *ndev, - struct ethtool_ringparam *rp) +static void +emac_ethtool_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *rp, + struct kernel_ethtool_ringparam *kernel_rp, + struct netlink_ext_ack *extack) { rp->rx_max_pending = rp->rx_pending = NUM_RX_BUFF; rp->tx_max_pending = rp->tx_pending = NUM_TX_BUFF; @@ -2281,8 +2284,8 @@ static void emac_ethtool_get_drvinfo(struct net_device *ndev, { struct emac_instance *dev = netdev_priv(ndev); - strlcpy(info->driver, "ibm_emac", sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); + strscpy(info->driver, "ibm_emac", sizeof(info->driver)); + strscpy(info->version, DRV_VERSION, sizeof(info->version)); snprintf(info->bus_info, sizeof(info->bus_info), "PPC 4xx EMAC-%d %pOF", dev->cell_index, dev->ofdev->dev.of_node); } @@ -2976,11 +2979,9 @@ static int emac_init_config(struct emac_instance *dev) /* Read MAC-address */ err = of_get_ethdev_address(np, dev->ndev); - if (err) { - if (err != -EPROBE_DEFER) - dev_err(&dev->ofdev->dev, "Can't get valid [local-]mac-address from OF !\n"); - return err; - } + if (err) + return dev_err_probe(&dev->ofdev->dev, err, + "Can't get valid [local-]mac-address from OF !\n"); /* IAHT and GAHT filter parameterization */ if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) { diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 075c07303f16..ff5487bbebe3 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -605,8 +605,8 @@ static int mal_probe(struct platform_device *ofdev) init_dummy_netdev(&mal->dummy_dev); - netif_napi_add(&mal->dummy_dev, &mal->napi, mal_poll, - CONFIG_IBM_EMAC_POLL_WEIGHT); + netif_napi_add_weight(&mal->dummy_dev, &mal->napi, mal_poll, + CONFIG_IBM_EMAC_POLL_WEIGHT); /* Load power-on reset defaults */ mal_reset(mal); diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 45ba40cf4d07..5b96cd94dcd2 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -141,6 +141,13 @@ static inline int ibmveth_rxq_csum_good(struct ibmveth_adapter *adapter) return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_CSUM_GOOD; } +static unsigned int ibmveth_real_max_tx_queues(void) +{ + unsigned int n_cpu = num_online_cpus(); + + return min(n_cpu, IBMVETH_MAX_QUEUES); +} + /* setup the initial settings for a buffer pool */ static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool, u32 pool_index, u32 pool_size, @@ -456,6 +463,38 @@ static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) } } +static void ibmveth_free_tx_ltb(struct ibmveth_adapter *adapter, int idx) +{ + dma_unmap_single(&adapter->vdev->dev, adapter->tx_ltb_dma[idx], + adapter->tx_ltb_size, DMA_TO_DEVICE); + kfree(adapter->tx_ltb_ptr[idx]); + adapter->tx_ltb_ptr[idx] = NULL; +} + +static int ibmveth_allocate_tx_ltb(struct ibmveth_adapter *adapter, int idx) +{ + adapter->tx_ltb_ptr[idx] = kzalloc(adapter->tx_ltb_size, + GFP_KERNEL); + if (!adapter->tx_ltb_ptr[idx]) { + netdev_err(adapter->netdev, + "unable to allocate tx long term buffer\n"); + return -ENOMEM; + } + adapter->tx_ltb_dma[idx] = dma_map_single(&adapter->vdev->dev, + adapter->tx_ltb_ptr[idx], + adapter->tx_ltb_size, + DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->vdev->dev, adapter->tx_ltb_dma[idx])) { + netdev_err(adapter->netdev, + "unable to DMA map tx long term buffer\n"); + kfree(adapter->tx_ltb_ptr[idx]); + adapter->tx_ltb_ptr[idx] = NULL; + return -ENOMEM; + } + + return 0; +} + static int ibmveth_register_logical_lan(struct ibmveth_adapter *adapter, union ibmveth_buf_desc rxq_desc, u64 mac_address) { @@ -538,6 +577,11 @@ static int ibmveth_open(struct net_device *netdev) goto out_unmap_buffer_list; } + for (i = 0; i < netdev->real_num_tx_queues; i++) { + if (ibmveth_allocate_tx_ltb(adapter, i)) + goto out_free_tx_ltb; + } + adapter->rx_queue.index = 0; adapter->rx_queue.num_slots = rxq_entries; adapter->rx_queue.toggle = 1; @@ -595,25 +639,15 @@ static int ibmveth_open(struct net_device *netdev) rc = -ENOMEM; - adapter->bounce_buffer = dma_alloc_coherent(&adapter->vdev->dev, - netdev->mtu + IBMVETH_BUFF_OH, - &adapter->bounce_buffer_dma, GFP_KERNEL); - if (!adapter->bounce_buffer) { - netdev_err(netdev, "unable to alloc bounce buffer\n"); - goto out_free_irq; - } - netdev_dbg(netdev, "initial replenish cycle\n"); ibmveth_interrupt(netdev->irq, netdev); - netif_start_queue(netdev); + netif_tx_start_all_queues(netdev); netdev_dbg(netdev, "open complete\n"); return 0; -out_free_irq: - free_irq(netdev->irq, netdev); out_free_buffer_pools: while (--i >= 0) { if (adapter->rx_buff_pool[i].active) @@ -623,6 +657,12 @@ out_free_buffer_pools: out_unmap_filter_list: dma_unmap_single(dev, adapter->filter_list_dma, 4096, DMA_BIDIRECTIONAL); + +out_free_tx_ltb: + while (--i >= 0) { + ibmveth_free_tx_ltb(adapter, i); + } + out_unmap_buffer_list: dma_unmap_single(dev, adapter->buffer_list_dma, 4096, DMA_BIDIRECTIONAL); @@ -651,7 +691,7 @@ static int ibmveth_close(struct net_device *netdev) napi_disable(&adapter->napi); if (!adapter->pool_config) - netif_stop_queue(netdev); + netif_tx_stop_all_queues(netdev); h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); @@ -685,9 +725,8 @@ static int ibmveth_close(struct net_device *netdev) ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[i]); - dma_free_coherent(&adapter->vdev->dev, - adapter->netdev->mtu + IBMVETH_BUFF_OH, - adapter->bounce_buffer, adapter->bounce_buffer_dma); + for (i = 0; i < netdev->real_num_tx_queues; i++) + ibmveth_free_tx_ltb(adapter, i); netdev_dbg(netdev, "close complete\n"); @@ -727,8 +766,8 @@ static void ibmveth_init_link_settings(struct net_device *dev) static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, ibmveth_driver_name, sizeof(info->driver)); - strlcpy(info->version, ibmveth_driver_version, sizeof(info->version)); + strscpy(info->driver, ibmveth_driver_name, sizeof(info->driver)); + strscpy(info->version, ibmveth_driver_version, sizeof(info->version)); } static netdev_features_t ibmveth_fix_features(struct net_device *dev, @@ -953,6 +992,69 @@ static void ibmveth_get_ethtool_stats(struct net_device *dev, data[i] = IBMVETH_GET_STAT(adapter, ibmveth_stats[i].offset); } +static void ibmveth_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + channels->max_tx = ibmveth_real_max_tx_queues(); + channels->tx_count = netdev->real_num_tx_queues; + + channels->max_rx = netdev->real_num_rx_queues; + channels->rx_count = netdev->real_num_rx_queues; +} + +static int ibmveth_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ibmveth_adapter *adapter = netdev_priv(netdev); + unsigned int old = netdev->real_num_tx_queues, + goal = channels->tx_count; + int rc, i; + + /* If ndo_open has not been called yet then don't allocate, just set + * desired netdev_queue's and return + */ + if (!(netdev->flags & IFF_UP)) + return netif_set_real_num_tx_queues(netdev, goal); + + /* We have IBMVETH_MAX_QUEUES netdev_queue's allocated + * but we may need to alloc/free the ltb's. + */ + netif_tx_stop_all_queues(netdev); + + /* Allocate any queue that we need */ + for (i = old; i < goal; i++) { + if (adapter->tx_ltb_ptr[i]) + continue; + + rc = ibmveth_allocate_tx_ltb(adapter, i); + if (!rc) + continue; + + /* if something goes wrong, free everything we just allocated */ + netdev_err(netdev, "Failed to allocate more tx queues, returning to %d queues\n", + old); + goal = old; + old = i; + break; + } + rc = netif_set_real_num_tx_queues(netdev, goal); + if (rc) { + netdev_err(netdev, "Failed to set real tx queues, returning to %d queues\n", + old); + goal = old; + old = i; + } + /* Free any that are no longer needed */ + for (i = old; i > goal; i--) { + if (adapter->tx_ltb_ptr[i - 1]) + ibmveth_free_tx_ltb(adapter, i - 1); + } + + netif_tx_wake_all_queues(netdev); + + return rc; +} + static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, .get_link = ethtool_op_get_link, @@ -961,6 +1063,8 @@ static const struct ethtool_ops netdev_ethtool_ops = { .get_ethtool_stats = ibmveth_get_ethtool_stats, .get_link_ksettings = ibmveth_get_link_ksettings, .set_link_ksettings = ibmveth_set_link_ksettings, + .get_channels = ibmveth_get_channels, + .set_channels = ibmveth_set_channels }; static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -969,7 +1073,7 @@ static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } static int ibmveth_send(struct ibmveth_adapter *adapter, - union ibmveth_buf_desc *descs, unsigned long mss) + unsigned long desc, unsigned long mss) { unsigned long correlator; unsigned int retry_count; @@ -982,12 +1086,9 @@ static int ibmveth_send(struct ibmveth_adapter *adapter, retry_count = 1024; correlator = 0; do { - ret = h_send_logical_lan(adapter->vdev->unit_address, - descs[0].desc, descs[1].desc, - descs[2].desc, descs[3].desc, - descs[4].desc, descs[5].desc, - correlator, &correlator, mss, - adapter->fw_large_send_support); + ret = h_send_logical_lan(adapter->vdev->unit_address, desc, + correlator, &correlator, mss, + adapter->fw_large_send_support); } while ((ret == H_BUSY) && (retry_count--)); if (ret != H_SUCCESS && ret != H_DROPPED) { @@ -1020,34 +1121,13 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); - unsigned int desc_flags; - union ibmveth_buf_desc descs[6]; - int last, i; - int force_bounce = 0; - dma_addr_t dma_addr; + unsigned int desc_flags, total_bytes; + union ibmveth_buf_desc desc; + int i, queue_num = skb_get_queue_mapping(skb); unsigned long mss = 0; if (ibmveth_is_packet_unsupported(skb, netdev)) goto out; - - /* veth doesn't handle frag_list, so linearize the skb. - * When GRO is enabled SKB's can have frag_list. - */ - if (adapter->is_active_trunk && - skb_has_frag_list(skb) && __skb_linearize(skb)) { - netdev->stats.tx_dropped++; - goto out; - } - - /* - * veth handles a maximum of 6 segments including the header, so - * we have to linearize the skb if there are more than this. - */ - if (skb_shinfo(skb)->nr_frags > 5 && __skb_linearize(skb)) { - netdev->stats.tx_dropped++; - goto out; - } - /* veth can't checksum offload UDP */ if (skb->ip_summed == CHECKSUM_PARTIAL && ((skb->protocol == htons(ETH_P_IP) && @@ -1077,56 +1157,6 @@ static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb, desc_flags |= IBMVETH_BUF_LRG_SND; } -retry_bounce: - memset(descs, 0, sizeof(descs)); - - /* - * If a linear packet is below the rx threshold then - * copy it into the static bounce buffer. This avoids the - * cost of a TCE insert and remove. - */ - if (force_bounce || (!skb_is_nonlinear(skb) && - (skb->len < tx_copybreak))) { - skb_copy_from_linear_data(skb, adapter->bounce_buffer, - skb->len); - - descs[0].fields.flags_len = desc_flags | skb->len; - descs[0].fields.address = adapter->bounce_buffer_dma; - - if (ibmveth_send(adapter, descs, 0)) { - adapter->tx_send_failed++; - netdev->stats.tx_dropped++; - } else { - netdev->stats.tx_packets++; - netdev->stats.tx_bytes += skb->len; - } - - goto out; - } - - /* Map the header */ - dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, - skb_headlen(skb), DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) - goto map_failed; - - descs[0].fields.flags_len = desc_flags | skb_headlen(skb); - descs[0].fields.address = dma_addr; - - /* Map the frags */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - dma_addr = skb_frag_dma_map(&adapter->vdev->dev, frag, 0, - skb_frag_size(frag), DMA_TO_DEVICE); - - if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) - goto map_failed_frags; - - descs[i+1].fields.flags_len = desc_flags | skb_frag_size(frag); - descs[i+1].fields.address = dma_addr; - } - if (skb->ip_summed == CHECKSUM_PARTIAL && skb_is_gso(skb)) { if (adapter->fw_large_send_support) { mss = (unsigned long)skb_shinfo(skb)->gso_size; @@ -1143,7 +1173,36 @@ retry_bounce: } } - if (ibmveth_send(adapter, descs, mss)) { + /* Copy header into mapped buffer */ + if (unlikely(skb->len > adapter->tx_ltb_size)) { + netdev_err(adapter->netdev, "tx: packet size (%u) exceeds ltb (%u)\n", + skb->len, adapter->tx_ltb_size); + netdev->stats.tx_dropped++; + goto out; + } + memcpy(adapter->tx_ltb_ptr[queue_num], skb->data, skb_headlen(skb)); + total_bytes = skb_headlen(skb); + /* Copy frags into mapped buffers */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + memcpy(adapter->tx_ltb_ptr[queue_num] + total_bytes, + skb_frag_address_safe(frag), skb_frag_size(frag)); + total_bytes += skb_frag_size(frag); + } + + if (unlikely(total_bytes != skb->len)) { + netdev_err(adapter->netdev, "tx: incorrect packet len copied into ltb (%u != %u)\n", + skb->len, total_bytes); + netdev->stats.tx_dropped++; + goto out; + } + desc.fields.flags_len = desc_flags | skb->len; + desc.fields.address = adapter->tx_ltb_dma[queue_num]; + /* finish writing to long_term_buff before VIOS accessing it */ + dma_wmb(); + + if (ibmveth_send(adapter, desc.desc, mss)) { adapter->tx_send_failed++; netdev->stats.tx_dropped++; } else { @@ -1151,41 +1210,11 @@ retry_bounce: netdev->stats.tx_bytes += skb->len; } - dma_unmap_single(&adapter->vdev->dev, - descs[0].fields.address, - descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); - - for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) - dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, - descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); - out: dev_consume_skb_any(skb); return NETDEV_TX_OK; -map_failed_frags: - last = i+1; - for (i = 1; i < last; i++) - dma_unmap_page(&adapter->vdev->dev, descs[i].fields.address, - descs[i].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); - dma_unmap_single(&adapter->vdev->dev, - descs[0].fields.address, - descs[0].fields.flags_len & IBMVETH_BUF_LEN_MASK, - DMA_TO_DEVICE); -map_failed: - if (!firmware_has_feature(FW_FEATURE_CMO)) - netdev_err(netdev, "tx: unable to map xmit buffer\n"); - adapter->tx_map_failed++; - if (skb_linearize(skb)) { - netdev->stats.tx_dropped++; - goto out; - } - force_bounce = 1; - goto retry_bounce; } static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt) @@ -1568,6 +1597,8 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) ret = IBMVETH_BUFF_LIST_SIZE + IBMVETH_FILT_LIST_SIZE; ret += IOMMU_PAGE_ALIGN(netdev->mtu, tbl); + /* add size of mapped tx buffers */ + ret += IOMMU_PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE, tbl); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { /* add the size of the active receive buffers */ @@ -1660,8 +1691,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) return -EINVAL; } - netdev = alloc_etherdev(sizeof(struct ibmveth_adapter)); - + netdev = alloc_etherdev_mqs(sizeof(struct ibmveth_adapter), IBMVETH_MAX_QUEUES, 1); if (!netdev) return -ENOMEM; @@ -1674,7 +1704,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->pool_config = 0; ibmveth_init_link_settings(netdev); - netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16); + netif_napi_add_weight(netdev, &adapter->napi, ibmveth_poll, 16); netdev->irq = dev->irq; netdev->netdev_ops = &ibmveth_netdev_ops; @@ -1727,6 +1757,18 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) kobject_uevent(kobj, KOBJ_ADD); } + rc = netif_set_real_num_tx_queues(netdev, min(num_online_cpus(), + IBMVETH_DEFAULT_QUEUES)); + if (rc) { + netdev_dbg(netdev, "failed to set number of tx queues rc=%d\n", + rc); + free_netdev(netdev); + return rc; + } + adapter->tx_ltb_size = PAGE_ALIGN(IBMVETH_MAX_TX_BUF_SIZE); + for (i = 0; i < IBMVETH_MAX_QUEUES; i++) + adapter->tx_ltb_ptr[i] = NULL; + netdev_dbg(netdev, "adapter @ 0x%p\n", adapter); netdev_dbg(netdev, "registering netdev...\n"); @@ -1890,6 +1932,7 @@ static struct attribute *veth_pool_attrs[] = { &veth_size_attr, NULL, }; +ATTRIBUTE_GROUPS(veth_pool); static const struct sysfs_ops veth_pool_ops = { .show = veth_pool_show, @@ -1899,7 +1942,7 @@ static const struct sysfs_ops veth_pool_ops = { static struct kobj_type ktype_veth_pool = { .release = NULL, .sysfs_ops = &veth_pool_ops, - .default_attrs = veth_pool_attrs, + .default_groups = veth_pool_groups, }; static int ibmveth_resume(struct device *dev) diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 27dfff200166..115d4c45aa77 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -46,23 +46,23 @@ #define h_add_logical_lan_buffer(ua, buf) \ plpar_hcall_norets(H_ADD_LOGICAL_LAN_BUFFER, ua, buf) +/* FW allows us to send 6 descriptors but we only use one so mark + * the other 5 as unused (0) + */ static inline long h_send_logical_lan(unsigned long unit_address, - unsigned long desc1, unsigned long desc2, unsigned long desc3, - unsigned long desc4, unsigned long desc5, unsigned long desc6, - unsigned long corellator_in, unsigned long *corellator_out, - unsigned long mss, unsigned long large_send_support) + unsigned long desc, unsigned long corellator_in, + unsigned long *corellator_out, unsigned long mss, + unsigned long large_send_support) { long rc; unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; if (large_send_support) rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, - desc1, desc2, desc3, desc4, desc5, desc6, - corellator_in, mss); + desc, 0, 0, 0, 0, 0, corellator_in, mss); else rc = plpar_hcall9(H_SEND_LOGICAL_LAN, retbuf, unit_address, - desc1, desc2, desc3, desc4, desc5, desc6, - corellator_in); + desc, 0, 0, 0, 0, 0, corellator_in); *corellator_out = retbuf[0]; @@ -98,6 +98,9 @@ static inline long h_illan_attributes(unsigned long unit_address, #define IBMVETH_BUFF_LIST_SIZE 4096 #define IBMVETH_FILT_LIST_SIZE 4096 #define IBMVETH_MAX_BUF_SIZE (1024 * 128) +#define IBMVETH_MAX_TX_BUF_SIZE (1024 * 64) +#define IBMVETH_MAX_QUEUES 16U +#define IBMVETH_DEFAULT_QUEUES 8U static int pool_size[] = { 512, 1024 * 2, 1024 * 16, 1024 * 32, 1024 * 64 }; static int pool_count[] = { 256, 512, 256, 256, 256 }; @@ -137,6 +140,9 @@ struct ibmveth_adapter { unsigned int mcastFilterSize; void * buffer_list_addr; void * filter_list_addr; + void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; + unsigned int tx_ltb_size; + dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; dma_addr_t buffer_list_dma; dma_addr_t filter_list_dma; struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; @@ -145,8 +151,6 @@ struct ibmveth_adapter { int rx_csum; int large_send; bool is_active_trunk; - void *bounce_buffer; - dma_addr_t bounce_buffer_dma; u64 fw_ipv6_csum_support; u64 fw_ipv4_csum_support; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 0bb3911dd014..9282381a438f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -53,6 +53,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/irq.h> +#include <linux/irqdomain.h> #include <linux/kthread.h> #include <linux/seq_file.h> #include <linux/interrupt.h> @@ -60,6 +61,7 @@ #include <asm/hvcall.h> #include <linux/atomic.h> #include <asm/vio.h> +#include <asm/xive.h> #include <asm/iommu.h> #include <linux/uaccess.h> #include <asm/firmware.h> @@ -110,6 +112,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, struct ibmvnic_sub_crq_queue *tx_scrq); static void free_long_term_buff(struct ibmvnic_adapter *adapter, struct ibmvnic_long_term_buff *ltb); +static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -255,12 +258,14 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, struct ibmvnic_long_term_buff *ltb, int size) { struct device *dev = &adapter->vdev->dev; + u64 prev = 0; int rc; if (!reuse_ltb(ltb, size)) { dev_dbg(dev, "LTB size changed from 0x%llx to 0x%x, reallocating\n", ltb->size, size); + prev = ltb->size; free_long_term_buff(adapter, ltb); } @@ -281,8 +286,8 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, bitmap_set(adapter->map_ids, ltb->map_id, 1); dev_dbg(dev, - "Allocated new LTB [map %d, size 0x%llx]\n", - ltb->map_id, ltb->size); + "Allocated new LTB [map %d, size 0x%llx was 0x%llx]\n", + ltb->map_id, ltb->size, prev); } /* Ensure ltb is zeroed - specially when reusing it. */ @@ -308,7 +313,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, if (adapter->fw_done_rc) { dev_err(dev, "Couldn't map LTB, rc = %d\n", adapter->fw_done_rc); - rc = -1; + rc = -EIO; goto out; } rc = 0; @@ -343,6 +348,208 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, ltb->map_id = 0; } +/** + * free_ltb_set - free the given set of long term buffers (LTBS) + * @adapter: The ibmvnic adapter containing this ltb set + * @ltb_set: The ltb_set to be freed + * + * Free the set of LTBs in the given set. + */ + +static void free_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set) +{ + int i; + + for (i = 0; i < ltb_set->num_ltbs; i++) + free_long_term_buff(adapter, <b_set->ltbs[i]); + + kfree(ltb_set->ltbs); + ltb_set->ltbs = NULL; + ltb_set->num_ltbs = 0; +} + +/** + * alloc_ltb_set() - Allocate a set of long term buffers (LTBs) + * + * @adapter: ibmvnic adapter associated to the LTB + * @ltb_set: container object for the set of LTBs + * @num_buffs: Number of buffers in the LTB + * @buff_size: Size of each buffer in the LTB + * + * Allocate a set of LTBs to accommodate @num_buffs buffers of @buff_size + * each. We currently cap size each LTB to IBMVNIC_ONE_LTB_SIZE. If the + * new set of LTBs have fewer LTBs than the old set, free the excess LTBs. + * If new set needs more than in old set, allocate the remaining ones. + * Try and reuse as many LTBs as possible and avoid reallocation. + * + * Any changes to this allocation strategy must be reflected in + * map_rxpool_buff_to_ltb() and map_txpool_buff_to_ltb(). + */ +static int alloc_ltb_set(struct ibmvnic_adapter *adapter, + struct ibmvnic_ltb_set *ltb_set, int num_buffs, + int buff_size) +{ + struct device *dev = &adapter->vdev->dev; + struct ibmvnic_ltb_set old_set; + struct ibmvnic_ltb_set new_set; + int rem_size; + int tot_size; /* size of all ltbs */ + int ltb_size; /* size of one ltb */ + int nltbs; + int rc; + int n; + int i; + + dev_dbg(dev, "%s() num_buffs %d, buff_size %d\n", __func__, num_buffs, + buff_size); + + ltb_size = rounddown(IBMVNIC_ONE_LTB_SIZE, buff_size); + tot_size = num_buffs * buff_size; + + if (ltb_size > tot_size) + ltb_size = tot_size; + + nltbs = tot_size / ltb_size; + if (tot_size % ltb_size) + nltbs++; + + old_set = *ltb_set; + + if (old_set.num_ltbs == nltbs) { + new_set = old_set; + } else { + int tmp = nltbs * sizeof(struct ibmvnic_long_term_buff); + + new_set.ltbs = kzalloc(tmp, GFP_KERNEL); + if (!new_set.ltbs) + return -ENOMEM; + + new_set.num_ltbs = nltbs; + + /* Free any excess ltbs in old set */ + for (i = new_set.num_ltbs; i < old_set.num_ltbs; i++) + free_long_term_buff(adapter, &old_set.ltbs[i]); + + /* Copy remaining ltbs to new set. All LTBs except the + * last one are of the same size. alloc_long_term_buff() + * will realloc if the size changes. + */ + n = min(old_set.num_ltbs, new_set.num_ltbs); + for (i = 0; i < n; i++) + new_set.ltbs[i] = old_set.ltbs[i]; + + /* Any additional ltbs in new set will have NULL ltbs for + * now and will be allocated in alloc_long_term_buff(). + */ + + /* We no longer need the old_set so free it. Note that we + * may have reused some ltbs from old set and freed excess + * ltbs above. So we only need to free the container now + * not the LTBs themselves. (i.e. dont free_ltb_set()!) + */ + kfree(old_set.ltbs); + old_set.ltbs = NULL; + old_set.num_ltbs = 0; + + /* Install the new set. If allocations fail below, we will + * retry later and know what size LTBs we need. + */ + *ltb_set = new_set; + } + + i = 0; + rem_size = tot_size; + while (rem_size) { + if (ltb_size > rem_size) + ltb_size = rem_size; + + rem_size -= ltb_size; + + rc = alloc_long_term_buff(adapter, &new_set.ltbs[i], ltb_size); + if (rc) + goto out; + i++; + } + + WARN_ON(i != new_set.num_ltbs); + + return 0; +out: + /* We may have allocated one/more LTBs before failing and we + * want to try and reuse on next reset. So don't free ltb set. + */ + return rc; +} + +/** + * map_rxpool_buf_to_ltb - Map given rxpool buffer to offset in an LTB. + * @rxpool: The receive buffer pool containing buffer + * @bufidx: Index of buffer in rxpool + * @ltbp: (Output) pointer to the long term buffer containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [rxpool, bufidx] to an LTB in the + * pool and its corresponding offset. Assume for now that each LTB is of + * different size but could possibly be optimized based on the allocation + * strategy in alloc_ltb_set(). + */ +static void map_rxpool_buf_to_ltb(struct ibmvnic_rx_pool *rxpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON(bufidx >= rxpool->size); + + for (i = 0; i < rxpool->ltb_set.num_ltbs; i++) { + ltb = &rxpool->ltb_set.ltbs[i]; + nbufs = ltb->size / rxpool->buff_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; + *offset = bufidx * rxpool->buff_size; +} + +/** + * map_txpool_buf_to_ltb - Map given txpool buffer to offset in an LTB. + * @txpool: The transmit buffer pool containing buffer + * @bufidx: Index of buffer in txpool + * @ltbp: (Output) pointer to the long term buffer (LTB) containing the buffer + * @offset: (Output) offset of buffer in the LTB from @ltbp + * + * Map the given buffer identified by [txpool, bufidx] to an LTB in the + * pool and its corresponding offset. + */ +static void map_txpool_buf_to_ltb(struct ibmvnic_tx_pool *txpool, + unsigned int bufidx, + struct ibmvnic_long_term_buff **ltbp, + unsigned int *offset) +{ + struct ibmvnic_long_term_buff *ltb; + int nbufs; /* # of buffers in one ltb */ + int i; + + WARN_ON_ONCE(bufidx >= txpool->num_buffers); + + for (i = 0; i < txpool->ltb_set.num_ltbs; i++) { + ltb = &txpool->ltb_set.ltbs[i]; + nbufs = ltb->size / txpool->buf_size; + if (bufidx < nbufs) + break; + bufidx -= nbufs; + } + + *ltbp = ltb; + *offset = bufidx * txpool->buf_size; +} + static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) { int i; @@ -359,6 +566,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, struct device *dev = &adapter->vdev->dev; struct ibmvnic_ind_xmit_queue *ind_bufp; struct ibmvnic_sub_crq_queue *rx_scrq; + struct ibmvnic_long_term_buff *ltb; union sub_crq *sub_crq; int buffers_added = 0; unsigned long lpar_rc; @@ -367,7 +575,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, dma_addr_t dma_addr; unsigned char *dst; int shift = 0; - int index; + int bufidx; int i; if (!pool->active) @@ -383,14 +591,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, * be 0. */ for (i = ind_bufp->index; i < count; ++i) { - index = pool->free_map[pool->next_free]; + bufidx = pool->free_map[pool->next_free]; /* We maybe reusing the skb from earlier resets. Allocate * only if necessary. But since the LTB may have changed * during reset (see init_rx_pools()), update LTB below * even if reusing skb. */ - skb = pool->rx_buff[index].skb; + skb = pool->rx_buff[bufidx].skb; if (!skb) { skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); @@ -405,26 +613,26 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, pool->next_free = (pool->next_free + 1) % pool->size; /* Copy the skb to the long term mapped DMA buffer */ - offset = index * pool->buff_size; - dst = pool->long_term_buff.buff + offset; + map_rxpool_buf_to_ltb(pool, bufidx, <b, &offset); + dst = ltb->buff + offset; memset(dst, 0, pool->buff_size); - dma_addr = pool->long_term_buff.addr + offset; + dma_addr = ltb->addr + offset; /* add the skb to an rx_buff in the pool */ - pool->rx_buff[index].data = dst; - pool->rx_buff[index].dma = dma_addr; - pool->rx_buff[index].skb = skb; - pool->rx_buff[index].pool_index = pool->index; - pool->rx_buff[index].size = pool->buff_size; + pool->rx_buff[bufidx].data = dst; + pool->rx_buff[bufidx].dma = dma_addr; + pool->rx_buff[bufidx].skb = skb; + pool->rx_buff[bufidx].pool_index = pool->index; + pool->rx_buff[bufidx].size = pool->buff_size; /* queue the rx_buff for the next send_subcrq_indirect */ sub_crq = &ind_bufp->indir_arr[ind_bufp->index++]; memset(sub_crq, 0, sizeof(*sub_crq)); sub_crq->rx_add.first = IBMVNIC_CRQ_CMD; sub_crq->rx_add.correlator = - cpu_to_be64((u64)&pool->rx_buff[index]); + cpu_to_be64((u64)&pool->rx_buff[bufidx]); sub_crq->rx_add.ioba = cpu_to_be32(dma_addr); - sub_crq->rx_add.map_id = pool->long_term_buff.map_id; + sub_crq->rx_add.map_id = ltb->map_id; /* The length field of the sCRQ is defined to be 24 bits so the * buffer size needs to be left shifted by a byte before it is @@ -464,10 +672,10 @@ failure: sub_crq = &ind_bufp->indir_arr[i]; rx_buff = (struct ibmvnic_rx_buff *) be64_to_cpu(sub_crq->rx_add.correlator); - index = (int)(rx_buff - pool->rx_buff); - pool->free_map[pool->next_free] = index; - dev_kfree_skb_any(pool->rx_buff[index].skb); - pool->rx_buff[index].skb = NULL; + bufidx = (int)(rx_buff - pool->rx_buff); + pool->free_map[pool->next_free] = bufidx; + dev_kfree_skb_any(pool->rx_buff[bufidx].skb); + pool->rx_buff[bufidx].skb = NULL; } adapter->replenish_add_buff_failure += ind_bufp->index; atomic_add(buffers_added, &pool->available); @@ -540,13 +748,15 @@ static int init_stats_token(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; dma_addr_t stok; + int rc; stok = dma_map_single(dev, &adapter->stats, sizeof(struct ibmvnic_statistics), DMA_FROM_DEVICE); - if (dma_mapping_error(dev, stok)) { - dev_err(dev, "Couldn't map stats buffer\n"); - return -1; + rc = dma_mapping_error(dev, stok); + if (rc) { + dev_err(dev, "Couldn't map stats buffer, rc = %d\n", rc); + return rc; } adapter->stats_token = stok; @@ -575,7 +785,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter) kfree(rx_pool->free_map); - free_long_term_buff(adapter, &rx_pool->long_term_buff); + free_ltb_set(adapter, &rx_pool->ltb_set); if (!rx_pool->rx_buff) continue; @@ -655,7 +865,7 @@ static int init_rx_pools(struct net_device *netdev) u64 num_pools; u64 pool_size; /* # of buffers in one pool */ u64 buff_size; - int i, j; + int i, j, rc; pool_size = adapter->req_rx_add_entries_per_subcrq; num_pools = adapter->req_rx_queues; @@ -674,7 +884,7 @@ static int init_rx_pools(struct net_device *netdev) GFP_KERNEL); if (!adapter->rx_pool) { dev_err(dev, "Failed to allocate rx pools\n"); - return -1; + return -ENOMEM; } /* Set num_active_rx_pools early. If we fail below after partial @@ -697,6 +907,7 @@ static int init_rx_pools(struct net_device *netdev) GFP_KERNEL); if (!rx_pool->free_map) { dev_err(dev, "Couldn't alloc free_map %d\n", i); + rc = -ENOMEM; goto out_release; } @@ -705,6 +916,7 @@ static int init_rx_pools(struct net_device *netdev) GFP_KERNEL); if (!rx_pool->rx_buff) { dev_err(dev, "Couldn't alloc rx buffers\n"); + rc = -ENOMEM; goto out_release; } } @@ -718,8 +930,9 @@ update_ltb: dev_dbg(dev, "Updating LTB for rx pool %d [%d, %d]\n", i, rx_pool->size, rx_pool->buff_size); - if (alloc_long_term_buff(adapter, &rx_pool->long_term_buff, - rx_pool->size * rx_pool->buff_size)) + rc = alloc_ltb_set(adapter, &rx_pool->ltb_set, + rx_pool->size, rx_pool->buff_size); + if (rc) goto out; for (j = 0; j < rx_pool->size; ++j) { @@ -756,7 +969,7 @@ out: /* We failed to allocate one or more LTBs or map them on the VIOS. * Hold onto the pools and any LTBs that we did allocate/map. */ - return -1; + return rc; } static void release_vpd_data(struct ibmvnic_adapter *adapter) @@ -775,7 +988,7 @@ static void release_one_tx_pool(struct ibmvnic_adapter *adapter, { kfree(tx_pool->tx_buff); kfree(tx_pool->free_map); - free_long_term_buff(adapter, &tx_pool->long_term_buff); + free_ltb_set(adapter, &tx_pool->ltb_set); } /** @@ -817,13 +1030,13 @@ static int init_one_tx_pool(struct net_device *netdev, sizeof(struct ibmvnic_tx_buff), GFP_KERNEL); if (!tx_pool->tx_buff) - return -1; + return -ENOMEM; tx_pool->free_map = kcalloc(pool_size, sizeof(int), GFP_KERNEL); if (!tx_pool->free_map) { kfree(tx_pool->tx_buff); tx_pool->tx_buff = NULL; - return -1; + return -ENOMEM; } for (i = 0; i < pool_size; i++) @@ -914,7 +1127,7 @@ static int init_tx_pools(struct net_device *netdev) adapter->tx_pool = kcalloc(num_pools, sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); if (!adapter->tx_pool) - return -1; + return -ENOMEM; adapter->tso_pool = kcalloc(num_pools, sizeof(struct ibmvnic_tx_pool), GFP_KERNEL); @@ -924,7 +1137,7 @@ static int init_tx_pools(struct net_device *netdev) if (!adapter->tso_pool) { kfree(adapter->tx_pool); adapter->tx_pool = NULL; - return -1; + return -ENOMEM; } /* Set num_active_tx_pools early. If we fail below after partial @@ -965,17 +1178,16 @@ update_ltb: for (i = 0; i < num_pools; i++) { struct ibmvnic_tx_pool *tso_pool; struct ibmvnic_tx_pool *tx_pool; - u32 ltb_size; tx_pool = &adapter->tx_pool[i]; - ltb_size = tx_pool->num_buffers * tx_pool->buf_size; - if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff, - ltb_size)) - goto out; - dev_dbg(dev, "Updated LTB for tx pool %d [%p, %d, %d]\n", - i, tx_pool->long_term_buff.buff, - tx_pool->num_buffers, tx_pool->buf_size); + dev_dbg(dev, "Updating LTB for tx pool %d [%d, %d]\n", + i, tx_pool->num_buffers, tx_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tx_pool->ltb_set, + tx_pool->num_buffers, tx_pool->buf_size); + if (rc) + goto out; tx_pool->consumer_index = 0; tx_pool->producer_index = 0; @@ -984,14 +1196,14 @@ update_ltb: tx_pool->free_map[j] = j; tso_pool = &adapter->tso_pool[i]; - ltb_size = tso_pool->num_buffers * tso_pool->buf_size; - if (alloc_long_term_buff(adapter, &tso_pool->long_term_buff, - ltb_size)) - goto out; - dev_dbg(dev, "Updated LTB for tso pool %d [%p, %d, %d]\n", - i, tso_pool->long_term_buff.buff, - tso_pool->num_buffers, tso_pool->buf_size); + dev_dbg(dev, "Updating LTB for tso pool %d [%d, %d]\n", + i, tso_pool->num_buffers, tso_pool->buf_size); + + rc = alloc_ltb_set(adapter, &tso_pool->ltb_set, + tso_pool->num_buffers, tso_pool->buf_size); + if (rc) + goto out; tso_pool->consumer_index = 0; tso_pool->producer_index = 0; @@ -1050,7 +1262,7 @@ static int init_napi(struct ibmvnic_adapter *adapter) for (i = 0; i < adapter->req_rx_queues; i++) { netdev_dbg(adapter->netdev, "Adding napi[%d]\n", i); netif_napi_add(adapter->netdev, &adapter->napi[i], - ibmvnic_poll, NAPI_POLL_WEIGHT); + ibmvnic_poll); } adapter->num_active_rx_napi = adapter->req_rx_queues; @@ -1113,7 +1325,7 @@ static int ibmvnic_login(struct net_device *netdev) retry = false; if (retry_count > retries) { netdev_warn(netdev, "Login attempts exceeded\n"); - return -1; + return -EACCES; } adapter->init_done_rc = 0; @@ -1154,25 +1366,26 @@ static int ibmvnic_login(struct net_device *netdev) timeout)) { netdev_warn(netdev, "Capabilities query timed out\n"); - return -1; + return -ETIMEDOUT; } rc = init_sub_crqs(adapter); if (rc) { netdev_warn(netdev, "SCRQ initialization failed\n"); - return -1; + return rc; } rc = init_sub_crq_irqs(adapter); if (rc) { netdev_warn(netdev, "SCRQ irq initialization failed\n"); - return -1; + return rc; } } else if (adapter->init_done_rc) { - netdev_warn(netdev, "Adapter login failed\n"); - return -1; + netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n", + adapter->init_done_rc); + return -EIO; } } while (retry); @@ -1231,7 +1444,7 @@ static int set_link_state(struct ibmvnic_adapter *adapter, u8 link_state) if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { netdev_err(netdev, "timeout setting link state\n"); - return -1; + return -ETIMEDOUT; } if (adapter->init_done_rc == PARTIALSUCCESS) { @@ -1418,10 +1631,19 @@ static int __ibmvnic_open(struct net_device *netdev) rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); if (rc) { ibmvnic_napi_disable(adapter); - release_resources(adapter); + ibmvnic_disable_irqs(adapter); return rc; } + adapter->tx_queues_active = true; + + /* Since queues were stopped until now, there shouldn't be any + * one in ibmvnic_complete_tx() or ibmvnic_xmit() so maybe we + * don't need the synchronize_rcu()? Leaving it for consistency + * with setting ->tx_queues_active = false. + */ + synchronize_rcu(); + netif_tx_start_all_queues(netdev); if (prev_state == VNIC_CLOSED) { @@ -1468,9 +1690,6 @@ static int ibmvnic_open(struct net_device *netdev) rc = init_resources(adapter); if (rc) { netdev_err(netdev, "failed to initialize resources\n"); - release_resources(adapter); - release_rx_pools(adapter); - release_tx_pools(adapter); goto out; } } @@ -1487,6 +1706,13 @@ out: adapter->state = VNIC_OPEN; rc = 0; } + + if (rc) { + release_resources(adapter); + release_rx_pools(adapter); + release_tx_pools(adapter); + } + return rc; } @@ -1592,6 +1818,14 @@ static void ibmvnic_cleanup(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); /* ensure that transmissions are stopped if called by do_reset */ + + adapter->tx_queues_active = false; + + /* Ensure complete_tx() and ibmvnic_xmit() see ->tx_queues_active + * update so they don't restart a queue after we stop it below. + */ + synchronize_rcu(); + if (test_bit(0, &adapter->resetting)) netif_tx_disable(netdev); else @@ -1831,14 +2065,21 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, tx_buff->skb = NULL; adapter->netdev->stats.tx_dropped++; } + ind_bufp->index = 0; + if (atomic_sub_return(entries, &tx_scrq->used) <= (adapter->req_tx_entries_per_subcrq / 2) && - __netif_subqueue_stopped(adapter->netdev, queue_num) && - !test_bit(0, &adapter->resetting)) { - netif_wake_subqueue(adapter->netdev, queue_num); - netdev_dbg(adapter->netdev, "Started queue %d\n", - queue_num); + __netif_subqueue_stopped(adapter->netdev, queue_num)) { + rcu_read_lock(); + + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, queue_num); + netdev_dbg(adapter->netdev, "Started queue %d\n", + queue_num); + } + + rcu_read_unlock(); } } @@ -1875,6 +2116,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) struct ibmvnic_ind_xmit_queue *ind_bufp; struct ibmvnic_tx_buff *tx_buff = NULL; struct ibmvnic_sub_crq_queue *tx_scrq; + struct ibmvnic_long_term_buff *ltb; struct ibmvnic_tx_pool *tx_pool; unsigned int tx_send_failed = 0; netdev_tx_t ret = NETDEV_TX_OK; @@ -1890,14 +2132,15 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned int offset; int num_entries = 1; unsigned char *dst; - int index = 0; + int bufidx = 0; u8 proto = 0; - tx_scrq = adapter->tx_scrq[queue_num]; - txq = netdev_get_tx_queue(netdev, queue_num); - ind_bufp = &tx_scrq->ind_buf; - - if (test_bit(0, &adapter->resetting)) { + /* If a reset is in progress, drop the packet since + * the scrqs may get torn down. Otherwise use the + * rcu to ensure reset waits for us to complete. + */ + rcu_read_lock(); + if (!adapter->tx_queues_active) { dev_kfree_skb_any(skb); tx_send_failed++; @@ -1906,6 +2149,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) goto out; } + tx_scrq = adapter->tx_scrq[queue_num]; + txq = netdev_get_tx_queue(netdev, queue_num); + ind_bufp = &tx_scrq->ind_buf; + if (ibmvnic_xmit_workarounds(skb, netdev)) { tx_dropped++; tx_send_failed++; @@ -1913,14 +2160,15 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ibmvnic_tx_scrq_flush(adapter, tx_scrq); goto out; } + if (skb_is_gso(skb)) tx_pool = &adapter->tso_pool[queue_num]; else tx_pool = &adapter->tx_pool[queue_num]; - index = tx_pool->free_map[tx_pool->consumer_index]; + bufidx = tx_pool->free_map[tx_pool->consumer_index]; - if (index == IBMVNIC_INVALID_MAP) { + if (bufidx == IBMVNIC_INVALID_MAP) { dev_kfree_skb_any(skb); tx_send_failed++; tx_dropped++; @@ -1931,10 +2179,11 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_pool->free_map[tx_pool->consumer_index] = IBMVNIC_INVALID_MAP; - offset = index * tx_pool->buf_size; - dst = tx_pool->long_term_buff.buff + offset; + map_txpool_buf_to_ltb(tx_pool, bufidx, <b, &offset); + + dst = ltb->buff + offset; memset(dst, 0, tx_pool->buf_size); - data_dma_addr = tx_pool->long_term_buff.addr + offset; + data_dma_addr = ltb->addr + offset; if (skb_shinfo(skb)->nr_frags) { int cur, i; @@ -1961,9 +2210,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_pool->consumer_index = (tx_pool->consumer_index + 1) % tx_pool->num_buffers; - tx_buff = &tx_pool->tx_buff[index]; + tx_buff = &tx_pool->tx_buff[bufidx]; tx_buff->skb = skb; - tx_buff->index = index; + tx_buff->index = bufidx; tx_buff->pool_index = queue_num; memset(&tx_crq, 0, sizeof(tx_crq)); @@ -1975,10 +2224,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) if (skb_is_gso(skb)) tx_crq.v1.correlator = - cpu_to_be32(index | IBMVNIC_TSO_POOL_MASK); + cpu_to_be32(bufidx | IBMVNIC_TSO_POOL_MASK); else - tx_crq.v1.correlator = cpu_to_be32(index); - tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id); + tx_crq.v1.correlator = cpu_to_be32(bufidx); + tx_crq.v1.dma_reg = cpu_to_be16(ltb->map_id); tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); @@ -2042,7 +2291,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_packets++; tx_bytes += skb->len; - txq->trans_start = jiffies; + txq_trans_cond_update(txq); ret = NETDEV_TX_OK; goto out; @@ -2067,6 +2316,7 @@ tx_err: netif_carrier_off(netdev); } out: + rcu_read_unlock(); netdev->stats.tx_dropped += tx_dropped; netdev->stats.tx_bytes += tx_bytes; netdev->stats.tx_packets += tx_packets; @@ -2202,6 +2452,19 @@ static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason) } /* + * Initialize the init_done completion and return code values. We + * can get a transport event just after registering the CRQ and the + * tasklet will use this to communicate the transport event. To ensure + * we don't miss the notification/error, initialize these _before_ + * regisering the CRQ. + */ +static inline void reinit_init_done(struct ibmvnic_adapter *adapter) +{ + reinit_completion(&adapter->init_done); + adapter->init_done_rc = 0; +} + +/* * do_reset returns zero if we are able to keep processing reset events, or * non-zero if we hit a fatal error and must halt. */ @@ -2288,7 +2551,7 @@ static int do_reset(struct ibmvnic_adapter *adapter, /* If someone else changed the adapter state * when we dropped the rtnl, fail the reset */ - rc = -1; + rc = -EAGAIN; goto out; } adapter->state = VNIC_CLOSED; @@ -2307,6 +2570,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; + reinit_init_done(adapter); + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM) { rc = init_crq_queue(adapter); } else if (adapter->reset_reason == VNIC_RESET_MOBILITY) { @@ -2330,10 +2595,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, } rc = ibmvnic_reset_init(adapter, true); - if (rc) { - rc = IBMVNIC_INIT_FAILED; + if (rc) goto out; - } /* If the adapter was in PROBE or DOWN state prior to the reset, * exit here. @@ -2452,7 +2715,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, */ adapter->state = VNIC_PROBED; - reinit_completion(&adapter->init_done); + reinit_init_done(adapter); + rc = init_crq_queue(adapter); if (rc) { netdev_err(adapter->netdev, @@ -2593,22 +2857,82 @@ out: static void __ibmvnic_reset(struct work_struct *work) { struct ibmvnic_adapter *adapter; - bool saved_state = false; + unsigned int timeout = 5000; struct ibmvnic_rwi *tmprwi; + bool saved_state = false; struct ibmvnic_rwi *rwi; unsigned long flags; + struct device *dev; + bool need_reset; + int num_fails = 0; u32 reset_state; int rc = 0; adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); + dev = &adapter->vdev->dev; - if (test_and_set_bit_lock(0, &adapter->resetting)) { + /* Wait for ibmvnic_probe() to complete. If probe is taking too long + * or if another reset is in progress, defer work for now. If probe + * eventually fails it will flush and terminate our work. + * + * Three possibilities here: + * 1. Adpater being removed - just return + * 2. Timed out on probe or another reset in progress - delay the work + * 3. Completed probe - perform any resets in queue + */ + if (adapter->state == VNIC_PROBING && + !wait_for_completion_timeout(&adapter->probe_done, timeout)) { + dev_err(dev, "Reset thread timed out on probe"); queue_delayed_work(system_long_wq, &adapter->ibmvnic_delayed_reset, IBMVNIC_RESET_DELAY); return; } + /* adapter is done with probe (i.e state is never VNIC_PROBING now) */ + if (adapter->state == VNIC_REMOVING) + return; + + /* ->rwi_list is stable now (no one else is removing entries) */ + + /* ibmvnic_probe() may have purged the reset queue after we were + * scheduled to process a reset so there maybe no resets to process. + * Before setting the ->resetting bit though, we have to make sure + * that there is infact a reset to process. Otherwise we may race + * with ibmvnic_open() and end up leaving the vnic down: + * + * __ibmvnic_reset() ibmvnic_open() + * ----------------- -------------- + * + * set ->resetting bit + * find ->resetting bit is set + * set ->state to IBMVNIC_OPEN (i.e + * assume reset will open device) + * return + * find reset queue empty + * return + * + * Neither performed vnic login/open and vnic stays down + * + * If we hold the lock and conditionally set the bit, either we + * or ibmvnic_open() will complete the open. + */ + need_reset = false; + spin_lock(&adapter->rwi_lock); + if (!list_empty(&adapter->rwi_list)) { + if (test_and_set_bit_lock(0, &adapter->resetting)) { + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); + } else { + need_reset = true; + } + } + spin_unlock(&adapter->rwi_lock); + + if (!need_reset) + return; + rwi = get_next_rwi(adapter); while (rwi) { spin_lock_irqsave(&adapter->state_lock, flags); @@ -2651,11 +2975,23 @@ static void __ibmvnic_reset(struct work_struct *work) rc = do_hard_reset(adapter, rwi, reset_state); rtnl_unlock(); } - if (rc) { - /* give backing device time to settle down */ + if (rc) + num_fails++; + else + num_fails = 0; + + /* If auto-priority-failover is enabled we can get + * back to back failovers during resets, resulting + * in at least two failed resets (from high-priority + * backing device to low-priority one and then back) + * If resets continue to fail beyond that, give the + * adapter some time to settle down before retrying. + */ + if (num_fails >= 3) { netdev_dbg(adapter->netdev, - "[S:%s] Hard reset failed, waiting 60 secs\n", - adapter_state_to_string(adapter->state)); + "[S:%s] Hard reset failed %d times, waiting 60 secs\n", + adapter_state_to_string(adapter->state), + num_fails); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(60 * HZ); } @@ -2671,19 +3007,19 @@ static void __ibmvnic_reset(struct work_struct *work) rwi = get_next_rwi(adapter); /* - * If there is another reset queued, free the previous rwi - * and process the new reset even if previous reset failed - * (the previous reset could have failed because of a fail - * over for instance, so process the fail over). - * * If there are no resets queued and the previous reset failed, * the adapter would be in an undefined state. So retry the * previous reset as a hard reset. + * + * Else, free the previous rwi and, if there is another reset + * queued, process the new reset even if previous reset failed + * (the previous reset could have failed because of a fail + * over for instance, so process the fail over). */ - if (rwi) - kfree(tmprwi); - else if (rc) + if (!rwi && rc) rwi = tmprwi; + else + kfree(tmprwi); if (rwi && (rwi->reset_reason == VNIC_RESET_FAILOVER || rwi->reset_reason == VNIC_RESET_MOBILITY || rc)) @@ -2713,12 +3049,23 @@ static void __ibmvnic_delayed_reset(struct work_struct *work) __ibmvnic_reset(&adapter->ibmvnic_reset); } +static void flush_reset_queue(struct ibmvnic_adapter *adapter) +{ + struct list_head *entry, *tmp_entry; + + if (!list_empty(&adapter->rwi_list)) { + list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) { + list_del(entry); + kfree(list_entry(entry, struct ibmvnic_rwi, list)); + } + } +} + static int ibmvnic_reset(struct ibmvnic_adapter *adapter, enum ibmvnic_reset_reason reason) { - struct list_head *entry, *tmp_entry; - struct ibmvnic_rwi *rwi, *tmp; struct net_device *netdev = adapter->netdev; + struct ibmvnic_rwi *rwi, *tmp; unsigned long flags; int ret; @@ -2737,13 +3084,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, goto err; } - if (adapter->state == VNIC_PROBING) { - netdev_warn(netdev, "Adapter reset during probe\n"); - adapter->init_done_rc = -EAGAIN; - ret = EAGAIN; - goto err; - } - list_for_each_entry(tmp, &adapter->rwi_list, list) { if (tmp->reset_reason == reason) { netdev_dbg(netdev, "Skipping matching reset, reason=%s\n", @@ -2761,10 +3101,9 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, /* if we just received a transport event, * flush reset queue and process this reset */ - if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) { - list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) - list_del(entry); - } + if (adapter->force_reset_recovery) + flush_reset_queue(adapter); + rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n", @@ -3072,17 +3411,14 @@ static u32 ibmvnic_get_link(struct net_device *netdev) } static void ibmvnic_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) { - ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; - ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; - } else { - ring->rx_max_pending = IBMVNIC_MAX_QUEUE_SZ; - ring->tx_max_pending = IBMVNIC_MAX_QUEUE_SZ; - } + ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq; + ring->tx_max_pending = adapter->max_tx_entries_per_subcrq; ring->rx_mini_max_pending = 0; ring->rx_jumbo_max_pending = 0; ring->rx_pending = adapter->req_rx_add_entries_per_subcrq; @@ -3092,26 +3428,26 @@ static void ibmvnic_get_ringparam(struct net_device *netdev, } static int ibmvnic_set_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int ret; - ret = 0; + if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq || + ring->tx_pending > adapter->max_tx_entries_per_subcrq) { + netdev_err(netdev, "Invalid request.\n"); + netdev_err(netdev, "Max tx buffers = %llu\n", + adapter->max_rx_add_entries_per_subcrq); + netdev_err(netdev, "Max rx buffers = %llu\n", + adapter->max_tx_entries_per_subcrq); + return -EINVAL; + } + adapter->desired.rx_entries = ring->rx_pending; adapter->desired.tx_entries = ring->tx_pending; - ret = wait_for_reset(adapter); - - if (!ret && - (adapter->req_rx_add_entries_per_subcrq != ring->rx_pending || - adapter->req_tx_entries_per_subcrq != ring->tx_pending)) - netdev_info(netdev, - "Could not match full ringsize request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n", - ring->rx_pending, ring->tx_pending, - adapter->req_rx_add_entries_per_subcrq, - adapter->req_tx_entries_per_subcrq); - return ret; + return wait_for_reset(adapter); } static void ibmvnic_get_channels(struct net_device *netdev, @@ -3119,14 +3455,8 @@ static void ibmvnic_get_channels(struct net_device *netdev, { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - if (adapter->priv_flags & IBMVNIC_USE_SERVER_MAXES) { - channels->max_rx = adapter->max_rx_queues; - channels->max_tx = adapter->max_tx_queues; - } else { - channels->max_rx = IBMVNIC_MAX_QUEUES; - channels->max_tx = IBMVNIC_MAX_QUEUES; - } - + channels->max_rx = adapter->max_rx_queues; + channels->max_tx = adapter->max_tx_queues; channels->max_other = 0; channels->max_combined = 0; channels->rx_count = adapter->req_rx_queues; @@ -3139,22 +3469,11 @@ static int ibmvnic_set_channels(struct net_device *netdev, struct ethtool_channels *channels) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int ret; - ret = 0; adapter->desired.rx_queues = channels->rx_count; adapter->desired.tx_queues = channels->tx_count; - ret = wait_for_reset(adapter); - - if (!ret && - (adapter->req_rx_queues != channels->rx_count || - adapter->req_tx_queues != channels->tx_count)) - netdev_info(netdev, - "Could not match full channels request. Requested: RX %d, TX %d; Allowed: RX %llu, TX %llu\n", - channels->rx_count, channels->tx_count, - adapter->req_rx_queues, adapter->req_tx_queues); - return ret; + return wait_for_reset(adapter); } static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) @@ -3162,43 +3481,32 @@ static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) struct ibmvnic_adapter *adapter = netdev_priv(dev); int i; - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); - i++, data += ETH_GSTRING_LEN) - memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); + if (stringset != ETH_SS_STATS) + return; - for (i = 0; i < adapter->req_tx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN) + memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN); - snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < adapter->req_tx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i); + data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, - "tx%d_dropped_packets", i); - data += ETH_GSTRING_LEN; - } + snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i); + data += ETH_GSTRING_LEN; - for (i = 0; i < adapter->req_rx_queues; i++) { - snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); - data += ETH_GSTRING_LEN; + snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i); + data += ETH_GSTRING_LEN; + } - snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); - data += ETH_GSTRING_LEN; + for (i = 0; i < adapter->req_rx_queues; i++) { + snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i); + data += ETH_GSTRING_LEN; - snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); - data += ETH_GSTRING_LEN; - } - break; + snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i); + data += ETH_GSTRING_LEN; - case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ARRAY_SIZE(ibmvnic_priv_flags); i++) - strcpy(data + i * ETH_GSTRING_LEN, - ibmvnic_priv_flags[i]); - break; - default: - return; + snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i); + data += ETH_GSTRING_LEN; } } @@ -3211,8 +3519,6 @@ static int ibmvnic_get_sset_count(struct net_device *dev, int sset) return ARRAY_SIZE(ibmvnic_stats) + adapter->req_tx_queues * NUM_TX_STATS + adapter->req_rx_queues * NUM_RX_STATS; - case ETH_SS_PRIV_FLAGS: - return ARRAY_SIZE(ibmvnic_priv_flags); default: return -EOPNOTSUPP; } @@ -3265,26 +3571,6 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, } } -static u32 ibmvnic_get_priv_flags(struct net_device *netdev) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - - return adapter->priv_flags; -} - -static int ibmvnic_set_priv_flags(struct net_device *netdev, u32 flags) -{ - struct ibmvnic_adapter *adapter = netdev_priv(netdev); - bool which_maxes = !!(flags & IBMVNIC_USE_SERVER_MAXES); - - if (which_maxes) - adapter->priv_flags |= IBMVNIC_USE_SERVER_MAXES; - else - adapter->priv_flags &= ~IBMVNIC_USE_SERVER_MAXES; - - return 0; -} - static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_drvinfo = ibmvnic_get_drvinfo, .get_msglevel = ibmvnic_get_msglevel, @@ -3298,8 +3584,6 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = { .get_sset_count = ibmvnic_get_sset_count, .get_ethtool_stats = ibmvnic_get_ethtool_stats, .get_link_ksettings = ibmvnic_get_link_ksettings, - .get_priv_flags = ibmvnic_get_priv_flags, - .set_priv_flags = ibmvnic_set_priv_flags, }; /* Routines for managing CRQs/sCRQs */ @@ -3536,6 +3820,30 @@ static int disable_scrq_irq(struct ibmvnic_adapter *adapter, return rc; } +/* We can not use the IRQ chip EOI handler because that has the + * unintended effect of changing the interrupt priority. + */ +static void ibmvnic_xics_eoi(struct device *dev, struct ibmvnic_sub_crq_queue *scrq) +{ + u64 val = 0xff000000 | scrq->hw_irq; + unsigned long rc; + + rc = plpar_hcall_norets(H_EOI, val); + if (rc) + dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", val, rc); +} + +/* Due to a firmware bug, the hypervisor can send an interrupt to a + * transmit or receive queue just prior to a partition migration. + * Force an EOI after migration. + */ +static void ibmvnic_clear_pending_interrupt(struct device *dev, + struct ibmvnic_sub_crq_queue *scrq) +{ + if (!xive_enabled()) + ibmvnic_xics_eoi(dev, scrq); +} + static int enable_scrq_irq(struct ibmvnic_adapter *adapter, struct ibmvnic_sub_crq_queue *scrq) { @@ -3549,15 +3857,7 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter, if (test_bit(0, &adapter->resetting) && adapter->reset_reason == VNIC_RESET_MOBILITY) { - u64 val = (0xff000000) | scrq->hw_irq; - - rc = plpar_hcall_norets(H_EOI, val); - /* H_EOI would fail with rc = H_FUNCTION when running - * in XIVE mode which is expected, but not an error. - */ - if (rc && (rc != H_FUNCTION)) - dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n", - val, rc); + ibmvnic_clear_pending_interrupt(dev, scrq); } rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, @@ -3628,9 +3928,15 @@ restart_loop: (adapter->req_tx_entries_per_subcrq / 2) && __netif_subqueue_stopped(adapter->netdev, scrq->pool_index)) { - netif_wake_subqueue(adapter->netdev, scrq->pool_index); - netdev_dbg(adapter->netdev, "Started queue %d\n", - scrq->pool_index); + rcu_read_lock(); + if (adapter->tx_queues_active) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + rcu_read_unlock(); } } @@ -3759,7 +4065,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *adapter) allqueues = kcalloc(total_queues, sizeof(*allqueues), GFP_KERNEL); if (!allqueues) - return -1; + return -ENOMEM; for (i = 0; i < total_queues; i++) { allqueues[i] = init_sub_crq_queue(adapter); @@ -3828,7 +4134,7 @@ tx_failed: for (i = 0; i < registered_queues; i++) release_sub_crq_queue(adapter, allqueues[i], 1); kfree(allqueues); - return -1; + return -ENOMEM; } static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) @@ -3836,11 +4142,25 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; int max_entries; + int cap_reqs; + + /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on + * the PROMISC flag). Initialize this count upfront. When the tasklet + * receives a response to all of these, it will send the next protocol + * message (QUERY_IP_OFFLOAD). + */ + if (!(adapter->netdev->flags & IFF_PROMISC) || + adapter->promisc_supported) + cap_reqs = 7; + else + cap_reqs = 6; if (!retry) { /* Sub-CRQ entries are 32 byte long */ int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4); + atomic_set(&adapter->running_cap_crqs, cap_reqs); + if (adapter->min_tx_entries_per_subcrq > entries_page || adapter->min_rx_add_entries_per_subcrq > entries_page) { dev_err(dev, "Fatal, invalid entries per sub-crq\n"); @@ -3859,16 +4179,16 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) adapter->desired.rx_entries = adapter->max_rx_add_entries_per_subcrq; - max_entries = IBMVNIC_MAX_LTB_SIZE / + max_entries = IBMVNIC_LTB_SET_SIZE / (adapter->req_mtu + IBMVNIC_BUFFER_HLEN); if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * - adapter->desired.tx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.tx_entries > IBMVNIC_LTB_SET_SIZE) { adapter->desired.tx_entries = max_entries; } if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * - adapter->desired.rx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.rx_entries > IBMVNIC_LTB_SET_SIZE) { adapter->desired.rx_entries = max_entries; } @@ -3901,44 +4221,45 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; + } else { + atomic_add(cap_reqs, &adapter->running_cap_crqs); } - memset(&crq, 0, sizeof(crq)); crq.request_capability.first = IBMVNIC_CRQ_CMD; crq.request_capability.cmd = REQUEST_CAPABILITY; crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_tx_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_MTU); crq.request_capability.number = cpu_to_be64(adapter->req_mtu); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); if (adapter->netdev->flags & IFF_PROMISC) { @@ -3946,16 +4267,21 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(1); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } } else { crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(0); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static int pending_scrq(struct ibmvnic_adapter *adapter, @@ -4187,7 +4513,7 @@ static int send_login(struct ibmvnic_adapter *adapter) if (!adapter->tx_scrq || !adapter->rx_scrq) { netdev_err(adapter->netdev, "RX or TX queues are not allocated, device login failed\n"); - return -1; + return -ENOMEM; } release_login_buffer(adapter); @@ -4307,7 +4633,7 @@ buf_map_failed: kfree(login_buffer); adapter->login_buf = NULL; buf_alloc_failed: - return -1; + return -ENOMEM; } static int send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, @@ -4349,118 +4675,132 @@ static void send_query_map(struct ibmvnic_adapter *adapter) static void send_query_cap(struct ibmvnic_adapter *adapter) { union ibmvnic_crq crq; + int cap_reqs; + + /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count + * upfront. When the tasklet receives a response to all of these, it + * can send out the next protocol messaage (REQUEST_CAPABILITY). + */ + cap_reqs = 25; + + atomic_set(&adapter->running_cap_crqs, cap_reqs); - atomic_set(&adapter->running_cap_crqs, 0); memset(&crq, 0, sizeof(crq)); crq.query_capability.first = IBMVNIC_CRQ_CMD; crq.query_capability.cmd = QUERY_CAPABILITY; crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ); - atomic_inc(&adapter->running_cap_crqs); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static void send_query_ip_offload(struct ibmvnic_adapter *adapter) @@ -4764,6 +5104,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, char *name; atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n", + atomic_read(&adapter->running_cap_crqs)); switch (be16_to_cpu(crq->request_capability_rsp.capability)) { case REQ_TX_QUEUES: req_value = &adapter->req_tx_queues; @@ -4827,10 +5169,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, } /* Done receiving requested capabilities, query IP offload support */ - if (atomic_read(&adapter->running_cap_crqs) == 0) { - adapter->wait_capability = false; + if (atomic_read(&adapter->running_cap_crqs) == 0) send_query_ip_offload(adapter); - } } static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, @@ -5128,10 +5468,8 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq, } out: - if (atomic_read(&adapter->running_cap_crqs) == 0) { - adapter->wait_capability = false; + if (atomic_read(&adapter->running_cap_crqs) == 0) send_request_cap(adapter, 0); - } } static int send_query_phys_parms(struct ibmvnic_adapter *adapter) @@ -5263,9 +5601,9 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, } if (!completion_done(&adapter->init_done)) { - complete(&adapter->init_done); if (!adapter->init_done_rc) adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); } break; @@ -5288,6 +5626,13 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, adapter->fw_done_rc = -EIO; complete(&adapter->fw_done); } + + /* if we got here during crq-init, retry crq-init */ + if (!completion_done(&adapter->init_done)) { + adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); + } + if (!completion_done(&adapter->stats_done)) complete(&adapter->stats_done); if (test_bit(0, &adapter->resetting)) @@ -5427,33 +5772,21 @@ static void ibmvnic_tasklet(struct tasklet_struct *t) struct ibmvnic_crq_queue *queue = &adapter->crq; union ibmvnic_crq *crq; unsigned long flags; - bool done = false; spin_lock_irqsave(&queue->lock, flags); - while (!done) { - /* Pull all the valid messages off the CRQ */ - while ((crq = ibmvnic_next_crq(adapter)) != NULL) { - /* This barrier makes sure ibmvnic_next_crq()'s - * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded - * before ibmvnic_handle_crq()'s - * switch(gen_crq->first) and switch(gen_crq->cmd). - */ - dma_rmb(); - ibmvnic_handle_crq(crq, adapter); - crq->generic.first = 0; - } - /* remain in tasklet until all - * capabilities responses are received + /* Pull all the valid messages off the CRQ */ + while ((crq = ibmvnic_next_crq(adapter)) != NULL) { + /* This barrier makes sure ibmvnic_next_crq()'s + * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded + * before ibmvnic_handle_crq()'s + * switch(gen_crq->first) and switch(gen_crq->cmd). */ - if (!adapter->wait_capability) - done = true; + dma_rmb(); + ibmvnic_handle_crq(crq, adapter); + crq->generic.first = 0; } - /* if capabilities CRQ's were sent in this tasklet, the following - * tasklet must wait until all responses are received - */ - if (atomic_read(&adapter->running_cap_crqs) != 0) - adapter->wait_capability = true; + spin_unlock_irqrestore(&queue->lock, flags); } @@ -5616,10 +5949,6 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) adapter->from_passive_init = false; - if (reset) - reinit_completion(&adapter->init_done); - - adapter->init_done_rc = 0; rc = ibmvnic_send_crq_init(adapter); if (rc) { dev_err(dev, "Send crq init failed with error %d\n", rc); @@ -5628,18 +5957,20 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { dev_err(dev, "Initialization sequence timed out\n"); - return -1; + return -ETIMEDOUT; } if (adapter->init_done_rc) { release_crq_queue(adapter); + dev_err(dev, "CRQ-init failed, %d\n", adapter->init_done_rc); return adapter->init_done_rc; } if (adapter->from_passive_init) { adapter->state = VNIC_OPEN; adapter->from_passive_init = false; - return -1; + dev_err(dev, "CRQ-init failed, passive-init\n"); + return -EINVAL; } if (reset && @@ -5650,6 +5981,15 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) release_sub_crqs(adapter, 0); rc = init_sub_crqs(adapter); } else { + /* no need to reinitialize completely, but we do + * need to clean up transmits that were in flight + * when we processed the reset. Failure to do so + * will confound the upper layer, usually TCP, by + * creating the illusion of transmits that are + * awaiting completion. + */ + clean_tx_pools(adapter); + rc = reset_sub_crq_queues(adapter); } } else { @@ -5678,6 +6018,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) struct ibmvnic_adapter *adapter; struct net_device *netdev; unsigned char *mac_addr_p; + unsigned long flags; bool init_success; int rc; @@ -5722,6 +6063,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) spin_lock_init(&adapter->rwi_lock); spin_lock_init(&adapter->state_lock); mutex_init(&adapter->fw_lock); + init_completion(&adapter->probe_done); init_completion(&adapter->init_done); init_completion(&adapter->fw_done); init_completion(&adapter->reset_done); @@ -5732,6 +6074,33 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) init_success = false; do { + reinit_init_done(adapter); + + /* clear any failovers we got in the previous pass + * since we are reinitializing the CRQ + */ + adapter->failover_pending = false; + + /* If we had already initialized CRQ, we may have one or + * more resets queued already. Discard those and release + * the CRQ before initializing the CRQ again. + */ + release_crq_queue(adapter); + + /* Since we are still in PROBING state, __ibmvnic_reset() + * will not access the ->rwi_list and since we released CRQ, + * we won't get _new_ transport events. But there maybe an + * ongoing ibmvnic_reset() call. So serialize access to + * rwi_list. If we win the race, ibvmnic_reset() could add + * a reset after we purged but thats ok - we just may end + * up with an extra reset (i.e similar to having two or more + * resets in the queue at once). + * CHECK. + */ + spin_lock_irqsave(&adapter->rwi_lock, flags); + flush_reset_queue(adapter); + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + rc = init_crq_queue(adapter); if (rc) { dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n", @@ -5763,12 +6132,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) goto ibmvnic_dev_file_err; netif_carrier_off(netdev); - rc = register_netdev(netdev); - if (rc) { - dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); - goto ibmvnic_register_fail; - } - dev_info(&dev->dev, "ibmvnic registered\n"); if (init_success) { adapter->state = VNIC_PROBED; @@ -5781,6 +6144,16 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->wait_for_reset = false; adapter->last_reset_time = jiffies; + + rc = register_netdev(netdev); + if (rc) { + dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); + goto ibmvnic_register_fail; + } + dev_info(&dev->dev, "ibmvnic registered\n"); + + complete(&adapter->probe_done); + return 0; ibmvnic_register_fail: @@ -5795,6 +6168,17 @@ ibmvnic_stats_fail: ibmvnic_init_fail: release_sub_crqs(adapter, 1); release_crq_queue(adapter); + + /* cleanup worker thread after releasing CRQ so we don't get + * transport events (i.e new work items for the worker thread). + */ + adapter->state = VNIC_REMOVING; + complete(&adapter->probe_done); + flush_work(&adapter->ibmvnic_reset); + flush_delayed_work(&adapter->ibmvnic_delayed_reset); + + flush_reset_queue(adapter); + mutex_destroy(&adapter->fw_lock); free_netdev(netdev); @@ -5871,10 +6255,14 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, be64_to_cpu(session_token)); rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, H_SESSION_ERR_DETECTED, session_token, 0, 0); - if (rc) + if (rc) { netdev_err(netdev, "H_VIOCTL initiated failover failed, rc %ld\n", rc); + goto last_resort; + } + + return count; last_resort: netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n"); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index b8e42f67d897..e5c6ff3d0c47 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -18,8 +18,6 @@ #define IBMVNIC_NAME "ibmvnic" #define IBMVNIC_DRIVER_VERSION "1.0.1" #define IBMVNIC_INVALID_MAP -1 -#define IBMVNIC_STATS_TIMEOUT 1 -#define IBMVNIC_INIT_FAILED 2 #define IBMVNIC_OPEN_FAILED 3 /* basic structures plus 100 2k buffers */ @@ -38,16 +36,52 @@ #define IBMVNIC_TSO_BUFS 64 #define IBMVNIC_TSO_POOL_MASK 0x80000000 -#define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE) -#define IBMVNIC_BUFFER_HLEN 500 +/* A VNIC adapter has set of Rx and Tx pools (aka queues). Each Rx/Tx pool + * has a set of buffers. The size of each buffer is determined by the MTU. + * + * Each Rx/Tx pool is also associated with a DMA region that is shared + * with the "hardware" (VIOS) and used to send/receive packets. The DMA + * region is also referred to as a Long Term Buffer or LTB. + * + * The size of the DMA region required for an Rx/Tx pool depends on the + * number and size (MTU) of the buffers in the pool. At the max levels + * of 4096 jumbo frames (MTU=9000) we will need about 9K*4K = 36MB plus + * some padding. + * + * But the size of a single DMA region is limited by MAX_ORDER in the + * kernel (about 16MB currently). To support say 4K Jumbo frames, we + * use a set of LTBs (struct ltb_set) per pool. + * + * IBMVNIC_ONE_LTB_MAX - max size of each LTB supported by kernel + * IBMVNIC_ONE_LTB_SIZE - current max size of each LTB in an ltb_set + * (must be <= IBMVNIC_ONE_LTB_MAX) + * IBMVNIC_LTB_SET_SIZE - current size of all LTBs in an ltb_set + * + * Each VNIC can have upto 16 Rx, 16 Tx and 16 TSO pools. The TSO pools + * are of fixed length (IBMVNIC_TSO_BUF_SZ * IBMVNIC_TSO_BUFS) of 4MB. + * + * The Rx and Tx pools can have upto 4096 buffers. The max size of these + * buffers is about 9588 (for jumbo frames, including IBMVNIC_BUFFER_HLEN). + * So, setting the IBMVNIC_LTB_SET_SIZE for a pool to 4096 * 9588 ~= 38MB. + * + * There is a trade-off in setting IBMVNIC_ONE_LTB_SIZE. If it is large, + * the allocation of the LTB can fail when system is low in memory. If + * its too small, we would need several mappings for each of the Rx/ + * Tx/TSO pools but there is a limit of 255 mappings per vnic in the + * VNIC protocol. + * + * So setting IBMVNIC_ONE_LTB_SIZE to 8MB. With IBMVNIC_LTB_SET_SIZE set + * to 38MB, we will need 5 LTBs per Rx and Tx pool and 1 LTB per TSO + * pool for the 4MB. Thus the 16 Rx and Tx queues require 32 * 5 = 160 + * plus 16 for the TSO pools for a total of 176 LTB mappings per VNIC. + */ +#define IBMVNIC_ONE_LTB_MAX ((u32)((1 << (MAX_ORDER - 1)) * PAGE_SIZE)) +#define IBMVNIC_ONE_LTB_SIZE min((u32)(8 << 20), IBMVNIC_ONE_LTB_MAX) +#define IBMVNIC_LTB_SET_SIZE (38 << 20) +#define IBMVNIC_BUFFER_HLEN 500 #define IBMVNIC_RESET_DELAY 100 -static const char ibmvnic_priv_flags[][ETH_GSTRING_LEN] = { -#define IBMVNIC_USE_SERVER_MAXES 0x1 - "use-server-maxes" -}; - struct ibmvnic_login_buffer { __be32 len; __be32 version; @@ -800,6 +834,11 @@ struct ibmvnic_long_term_buff { u8 map_id; }; +struct ibmvnic_ltb_set { + int num_ltbs; + struct ibmvnic_long_term_buff *ltbs; +}; + struct ibmvnic_tx_buff { struct sk_buff *skb; int index; @@ -812,7 +851,7 @@ struct ibmvnic_tx_pool { int *free_map; int consumer_index; int producer_index; - struct ibmvnic_long_term_buff long_term_buff; + struct ibmvnic_ltb_set ltb_set; int num_buffers; int buf_size; } ____cacheline_aligned; @@ -835,7 +874,7 @@ struct ibmvnic_rx_pool { int next_free; int next_alloc; int active; - struct ibmvnic_long_term_buff long_term_buff; + struct ibmvnic_ltb_set ltb_set; } ____cacheline_aligned; struct ibmvnic_vpd { @@ -885,7 +924,6 @@ struct ibmvnic_adapter { struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; - u32 priv_flags; /* Vital Product Data (VPD) */ struct ibmvnic_vpd *vpd; @@ -921,7 +959,6 @@ struct ibmvnic_adapter { int login_rsp_buf_sz; atomic_t running_cap_crqs; - bool wait_capability; struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned; struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned; @@ -933,6 +970,7 @@ struct ibmvnic_adapter { struct ibmvnic_tx_pool *tx_pool; struct ibmvnic_tx_pool *tso_pool; + struct completion probe_done; struct completion init_done; int init_done_rc; @@ -1008,11 +1046,14 @@ struct ibmvnic_adapter { struct work_struct ibmvnic_reset; struct delayed_work ibmvnic_delayed_reset; unsigned long resetting; - bool napi_enabled, from_passive_init; - bool login_pending; /* last device reset time */ unsigned long last_reset_time; + bool napi_enabled; + bool from_passive_init; + bool login_pending; + /* protected by rcu */ + bool tx_queues_active; bool failover_pending; bool force_reset_recovery; |